]>
Commit | Line | Data |
---|---|---|
5526e819 | 1 | ///////////////////////////////////////////////////////////////////////////// |
69941f05 | 2 | // Name: htmlpars.h |
5526e819 VS |
3 | // Purpose: wxHtmlParser class (generic parser) |
4 | // Author: Vaclav Slavik | |
69941f05 | 5 | // RCS-ID: $Id$ |
5526e819 VS |
6 | // Copyright: (c) 1999 Vaclav Slavik |
7 | // Licence: wxWindows Licence | |
8 | ///////////////////////////////////////////////////////////////////////////// | |
9 | ||
10 | ||
69941f05 VS |
11 | #ifndef _WX_HTMLPARS_H_ |
12 | #define _WX_HTMLPARS_H_ | |
5526e819 | 13 | |
af49c4b8 | 14 | #if defined(__GNUG__) && !defined(__APPLE__) |
97494971 | 15 | #pragma interface "htmlpars.h" |
5526e819 VS |
16 | #endif |
17 | ||
18 | #include "wx/defs.h" | |
19 | #if wxUSE_HTML | |
20 | ||
69941f05 VS |
21 | #include "wx/html/htmltag.h" |
22 | #include "wx/filesys.h" | |
fc1f2125 VS |
23 | #include "wx/hash.h" |
24 | #include "wx/fontenc.h" | |
5526e819 | 25 | |
daa616fc VS |
26 | class WXDLLEXPORT wxMBConv; |
27 | class WXDLLEXPORT wxHtmlParser; | |
28 | class WXDLLEXPORT wxHtmlTagHandler; | |
29 | class WXDLLEXPORT wxHtmlEntitiesParser; | |
30 | ||
6c62a62b VS |
31 | class wxHtmlTextPieces; |
32 | class wxHtmlParserState; | |
33 | ||
6cc4e6b8 VS |
34 | |
35 | enum wxHtmlURLType | |
36 | { | |
37 | wxHTML_URL_PAGE, | |
38 | wxHTML_URL_IMAGE, | |
39 | wxHTML_URL_OTHER | |
40 | }; | |
41 | ||
daa616fc VS |
42 | // This class handles generic parsing of HTML document : it scans |
43 | // the document and divide it into blocks of tags (where one block | |
44 | // consists of starting and ending tag and of text between these | |
45 | // 2 tags. | |
5526e819 VS |
46 | class WXDLLEXPORT wxHtmlParser : public wxObject |
47 | { | |
48 | DECLARE_ABSTRACT_CLASS(wxHtmlParser) | |
49 | ||
1309ba6c | 50 | public: |
daa616fc | 51 | wxHtmlParser(); |
1309ba6c VS |
52 | virtual ~wxHtmlParser(); |
53 | ||
54 | // Sets the class which will be used for opening files | |
55 | void SetFS(wxFileSystem *fs) { m_FS = fs; } | |
56 | ||
57 | wxFileSystem* GetFS() const { return m_FS; } | |
58 | ||
6cc4e6b8 | 59 | // Opens file if the parser is allowed to open given URL (may be forbidden |
04db5c3f | 60 | // for security reasons) |
6cc4e6b8 | 61 | virtual wxFSFile *OpenURL(wxHtmlURLType type, const wxString& url) const; |
04db5c3f | 62 | |
1309ba6c VS |
63 | // You can simply call this method when you need parsed output. |
64 | // This method does these things: | |
65 | // 1. call InitParser(source); | |
66 | // 2. call DoParsing(); | |
67 | // 3. call GetProduct(); (it's return value is then returned) | |
68 | // 4. call DoneParser(); | |
69 | wxObject* Parse(const wxString& source); | |
70 | ||
71 | // Sets the source. This must be called before running Parse() method. | |
72 | virtual void InitParser(const wxString& source); | |
73 | // This must be called after Parse(). | |
74 | virtual void DoneParser(); | |
75 | ||
76 | // Parses the m_Source from begin_pos to end_pos-1. | |
77 | // (in noparams version it parses whole m_Source) | |
78 | void DoParsing(int begin_pos, int end_pos); | |
6c62a62b VS |
79 | void DoParsing(); |
80 | ||
81 | // Returns pointer to the tag at parser's current position | |
82 | wxHtmlTag *GetCurrentTag() const { return m_CurTag; } | |
1309ba6c VS |
83 | |
84 | // Returns product of parsing | |
85 | // Returned value is result of parsing of the part. The type of this result | |
86 | // depends on internal representation in derived parser | |
87 | // (see wxHtmlWinParser for details). | |
88 | virtual wxObject* GetProduct() = 0; | |
89 | ||
90 | // adds handler to the list & hash table of handlers. | |
91 | virtual void AddTagHandler(wxHtmlTagHandler *handler); | |
92 | ||
93 | // Forces the handler to handle additional tags (not returned by GetSupportedTags). | |
94 | // The handler should already be in use by this parser. | |
95 | // Example: you want to parse following pseudo-html structure: | |
96 | // <myitems> | |
97 | // <it name="one" value="1"> | |
98 | // <it name="two" value="2"> | |
99 | // </myitems> | |
100 | // <it> This last it has different meaning, we don't want it to be parsed by myitems handler! | |
101 | // handler can handle only 'myitems' (e.g. it's GetSupportedTags returns "MYITEMS") | |
102 | // you can call PushTagHandler(handler, "IT") when you find <myitems> | |
103 | // and call PopTagHandler() when you find </myitems> | |
104 | void PushTagHandler(wxHtmlTagHandler *handler, wxString tags); | |
105 | ||
106 | // Restores state before last call to PushTagHandler | |
107 | void PopTagHandler(); | |
108 | ||
109 | wxString* GetSource() {return &m_Source;} | |
110 | void SetSource(const wxString& src); | |
6c62a62b VS |
111 | |
112 | // Sets HTML source and remebers current parser's state so that it can | |
113 | // later be restored. This is useful for on-line modifications of | |
114 | // HTML source (for example, <pre> handler replaces spaces with | |
115 | // and newlines with <br>) | |
116 | virtual void SetSourceAndSaveState(const wxString& src); | |
117 | // Restores parser's state from stack or returns FALSE if the stack is | |
118 | // empty | |
119 | virtual bool RestoreState(); | |
1309ba6c VS |
120 | |
121 | protected: | |
6c62a62b VS |
122 | // DOM structure |
123 | void CreateDOMTree(); | |
124 | void DestroyDOMTree(); | |
125 | void CreateDOMSubTree(wxHtmlTag *cur, | |
126 | int begin_pos, int end_pos, | |
127 | wxHtmlTagsCache *cache); | |
128 | ||
1309ba6c VS |
129 | // Adds text to the output. |
130 | // This is called from Parse() and must be overriden in derived classes. | |
131 | // txt is not guaranteed to be only one word. It is largest continuous part of text | |
132 | // (= not broken by tags) | |
133 | // NOTE : using char* because of speed improvements | |
6c62a62b | 134 | virtual void AddText(const wxChar* txt) = 0; |
1309ba6c VS |
135 | |
136 | // Adds tag and proceeds it. Parse() may (and usually is) called from this method. | |
137 | // This is called from Parse() and may be overriden. | |
138 | // Default behavior is that it looks for proper handler in m_Handlers. The tag is | |
139 | // ignored if no hander is found. | |
140 | // Derived class is *responsible* for filling in m_Handlers table. | |
141 | virtual void AddTag(const wxHtmlTag& tag); | |
daa616fc VS |
142 | |
143 | // Returns entity parser object, used to substitute HTML &entities; | |
144 | wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; } | |
1309ba6c VS |
145 | |
146 | protected: | |
6c62a62b VS |
147 | // DOM tree: |
148 | wxHtmlTag *m_CurTag; | |
149 | wxHtmlTag *m_Tags; | |
150 | wxHtmlTextPieces *m_TextPieces; | |
151 | size_t m_CurTextPiece; | |
1309ba6c | 152 | |
6c62a62b VS |
153 | wxString m_Source; |
154 | ||
155 | wxHtmlParserState *m_SavedStates; | |
156 | ||
1309ba6c VS |
157 | // handlers that handle particular tags. The table is accessed by |
158 | // key = tag's name. | |
159 | // This attribute MUST be filled by derived class otherwise it would | |
160 | // be empty and no tags would be recognized | |
161 | // (see wxHtmlWinParser for details about filling it) | |
162 | // m_HandlersHash is for random access based on knowledge of tag name (BR, P, etc.) | |
163 | // it may (and often does) contain more references to one object | |
164 | // m_HandlersList is list of all handlers and it is guaranteed to contain | |
165 | // only one reference to each handler instance. | |
166 | wxList m_HandlersList; | |
6c62a62b | 167 | wxHashTable m_HandlersHash; |
1309ba6c VS |
168 | |
169 | // class for opening files (file system) | |
170 | wxFileSystem *m_FS; | |
171 | // handlers stack used by PushTagHandler and PopTagHandler | |
172 | wxList *m_HandlersStack; | |
daa616fc VS |
173 | |
174 | // entity parse | |
175 | wxHtmlEntitiesParser *m_entitiesParser; | |
5526e819 VS |
176 | }; |
177 | ||
178 | ||
179 | ||
daa616fc VS |
180 | // This class (and derived classes) cooperates with wxHtmlParser. |
181 | // Each recognized tag is passed to handler which is capable | |
182 | // of handling it. Each tag is handled in 3 steps: | |
183 | // 1. Handler will modifies state of parser | |
184 | // (using it's public methods) | |
185 | // 2. Parser parses source between starting and ending tag | |
186 | // 3. Handler restores original state of the parser | |
5526e819 VS |
187 | class WXDLLEXPORT wxHtmlTagHandler : public wxObject |
188 | { | |
189 | DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler) | |
190 | ||
1309ba6c VS |
191 | public: |
192 | wxHtmlTagHandler() : wxObject () { m_Parser = NULL; } | |
193 | ||
194 | // Sets the parser. | |
195 | // NOTE : each _instance_ of handler is guaranteed to be called | |
196 | // only by one parser. This means you don't have to care about | |
197 | // reentrancy. | |
198 | virtual void SetParser(wxHtmlParser *parser) | |
199 | { m_Parser = parser; } | |
200 | ||
201 | // Returns list of supported tags. The list is in uppercase and | |
202 | // tags are delimited by ','. | |
203 | // Example : "I,B,FONT,P" | |
204 | // is capable of handling italic, bold, font and paragraph tags | |
205 | virtual wxString GetSupportedTags() = 0; | |
206 | ||
207 | // This is hadling core method. It does all the Steps 1-3. | |
208 | // To process step 2, you can call ParseInner() | |
209 | // returned value : TRUE if it called ParseInner(), | |
210 | // FALSE etherwise | |
211 | virtual bool HandleTag(const wxHtmlTag& tag) = 0; | |
212 | ||
213 | protected: | |
214 | // parses input between beginning and ending tag. | |
215 | // m_Parser must be set. | |
216 | void ParseInner(const wxHtmlTag& tag) | |
217 | { m_Parser->DoParsing(tag.GetBeginPos(), tag.GetEndPos1()); } | |
218 | ||
219 | wxHtmlParser *m_Parser; | |
5526e819 VS |
220 | }; |
221 | ||
222 | ||
daa616fc VS |
223 | // This class is used to parse HTML entities in strings. It can handle |
224 | // both named entities and &#xxxx entries where xxxx is Unicode code. | |
225 | class WXDLLEXPORT wxHtmlEntitiesParser : public wxObject | |
226 | { | |
227 | DECLARE_DYNAMIC_CLASS(wxHtmlEntitiesParser) | |
228 | ||
229 | public: | |
230 | wxHtmlEntitiesParser(); | |
231 | virtual ~wxHtmlEntitiesParser(); | |
232 | ||
233 | // Sets encoding of output string. | |
234 | // Has no effect if wxUSE_WCHAR_T==0 or wxUSE_UNICODE==1 | |
235 | void SetEncoding(wxFontEncoding encoding); | |
236 | ||
237 | // Parses entities in input and replaces them with respective characters | |
238 | // (with respect to output encoding) | |
239 | wxString Parse(const wxString& input); | |
61b50a43 | 240 | |
f5e6ed7c | 241 | // Returns character for given entity or 0 if the enity is unknown |
daa616fc | 242 | wxChar GetEntityChar(const wxString& entity); |
61b50a43 VS |
243 | |
244 | // Returns character that represents given Unicode code | |
daa616fc VS |
245 | wxChar GetCharForCode(unsigned code); |
246 | ||
61b50a43 | 247 | protected: |
daa616fc VS |
248 | #if wxUSE_WCHAR_T && !wxUSE_UNICODE |
249 | wxMBConv *m_conv; | |
250 | wxFontEncoding m_encoding; | |
251 | #endif | |
252 | }; | |
5526e819 VS |
253 | |
254 | ||
5526e819 | 255 | #endif |
69941f05 VS |
256 | |
257 | #endif // _WX_HTMLPARS_H_ |