]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: html/htmlpars.h | |
e54c96f1 | 3 | // Purpose: interface of wxHtmlTagHandler |
23324ae1 FM |
4 | // Author: wxWidgets team |
5 | // RCS-ID: $Id$ | |
526954c5 | 6 | // Licence: wxWindows licence |
23324ae1 FM |
7 | ///////////////////////////////////////////////////////////////////////////// |
8 | ||
90f011dc RD |
9 | enum wxHtmlURLType |
10 | { | |
11 | wxHTML_URL_PAGE, | |
12 | wxHTML_URL_IMAGE, | |
13 | wxHTML_URL_OTHER | |
14 | }; | |
15 | ||
16 | ||
17 | ||
23324ae1 FM |
18 | /** |
19 | @class wxHtmlTagHandler | |
7c913512 | 20 | |
5bddd46d | 21 | @todo describe me |
7c913512 | 22 | |
23324ae1 FM |
23 | @library{wxhtml} |
24 | @category{html} | |
7c913512 | 25 | |
5bddd46d | 26 | @see @ref overview_html_handlers, wxHtmlTag |
23324ae1 FM |
27 | */ |
28 | class wxHtmlTagHandler : public wxObject | |
29 | { | |
30 | public: | |
31 | /** | |
32 | Constructor. | |
33 | */ | |
34 | wxHtmlTagHandler(); | |
35 | ||
36 | /** | |
5bddd46d FM |
37 | Returns list of supported tags. |
38 | The list is in uppercase and tags are delimited by ','. | |
39 | Example: @c "I,B,FONT,P" | |
23324ae1 | 40 | */ |
da1ed74c | 41 | virtual wxString GetSupportedTags() = 0; |
23324ae1 FM |
42 | |
43 | /** | |
44 | This is the core method of each handler. It is called each time | |
4cc4bfaf | 45 | one of supported tags is detected. @a tag contains all necessary |
23324ae1 | 46 | info (see wxHtmlTag for details). |
5bddd46d FM |
47 | |
48 | Example: | |
49 | ||
50 | @code | |
51 | bool MyHandler::HandleTag(const wxHtmlTag& tag) | |
52 | { | |
53 | ... | |
54 | // change state of parser (e.g. set bold face) | |
55 | ParseInner(tag); | |
56 | ... | |
57 | // restore original state of parser | |
58 | } | |
59 | @endcode | |
60 | ||
61 | You shouldn't call ParseInner() if the tag is not paired with an ending one. | |
62 | ||
63 | @return @true if ParseInner() was called, @false otherwise. | |
23324ae1 | 64 | */ |
da1ed74c | 65 | virtual bool HandleTag(const wxHtmlTag& tag) = 0; |
23324ae1 | 66 | |
5e6e278d FM |
67 | /** |
68 | Assigns @a parser to this handler. Each @b instance of handler | |
047ea2e2 | 69 | is guaranteed to be called only from the one parser. |
5e6e278d FM |
70 | */ |
71 | virtual void SetParser(wxHtmlParser* parser); | |
72 | ||
047ea2e2 RD |
73 | /** |
74 | Returns the parser associated with this tag handler. | |
3f7564f2 VZ |
75 | |
76 | @since 2.9.5 | |
047ea2e2 | 77 | */ |
3f7564f2 | 78 | wxHtmlParser* GetParser() const; |
047ea2e2 RD |
79 | |
80 | ||
5e6e278d FM |
81 | protected: |
82 | ||
23324ae1 FM |
83 | /** |
84 | This method calls parser's wxHtmlParser::DoParsing method | |
85 | for the string between this tag and the paired ending tag: | |
5bddd46d FM |
86 | @code |
87 | ...<A HREF="x.htm">Hello, world!</A>... | |
88 | @endcode | |
89 | ||
90 | In this example, a call to ParseInner() (with @a tag pointing to A tag) | |
23324ae1 FM |
91 | will parse 'Hello, world!'. |
92 | */ | |
93 | void ParseInner(const wxHtmlTag& tag); | |
94 | ||
90f011dc RD |
95 | /** |
96 | Parses given source as if it was tag's inner code (see | |
97 | wxHtmlParser::GetInnerSource). Unlike ParseInner(), this method lets | |
98 | you specify the source code to parse. This is useful when you need to | |
99 | modify the inner text before parsing. | |
100 | */ | |
101 | void ParseInnerSource(const wxString& source); | |
102 | ||
23324ae1 | 103 | /** |
23324ae1 FM |
104 | This attribute is used to access parent parser. It is protected so that |
105 | it can't be accessed by user but can be accessed from derived classes. | |
106 | */ | |
5bddd46d | 107 | wxHtmlParser* m_Parser; |
23324ae1 FM |
108 | }; |
109 | ||
110 | ||
e54c96f1 | 111 | |
23324ae1 FM |
112 | /** |
113 | @class wxHtmlParser | |
7c913512 | 114 | |
23324ae1 | 115 | Classes derived from this handle the @b generic parsing of HTML documents: it |
5bddd46d FM |
116 | scans the document and divide it into blocks of tags (where one block consists |
117 | of beginning and ending tag and of text between these two tags). | |
7c913512 | 118 | |
5bddd46d | 119 | It is independent from wxHtmlWindow and can be used as stand-alone parser. |
7c913512 | 120 | |
23324ae1 FM |
121 | It uses system of tag handlers to parse the HTML document. Tag handlers |
122 | are not statically shared by all instances but are created for each | |
123 | wxHtmlParser instance. The reason is that the handler may contain | |
124 | document-specific temporary data used during parsing (e.g. complicated | |
125 | structures like tables). | |
7c913512 | 126 | |
23324ae1 | 127 | Typically the user calls only the wxHtmlParser::Parse method. |
7c913512 | 128 | |
23324ae1 FM |
129 | @library{wxhtml} |
130 | @category{html} | |
7c913512 | 131 | |
5bddd46d | 132 | @see @ref overview_html_cells, @ref overview_html_handlers, wxHtmlTag |
23324ae1 | 133 | */ |
7c913512 | 134 | class wxHtmlParser |
23324ae1 FM |
135 | { |
136 | public: | |
137 | /** | |
138 | Constructor. | |
139 | */ | |
140 | wxHtmlParser(); | |
141 | ||
23324ae1 | 142 | /** |
5bddd46d FM |
143 | Adds handler to the internal list ( hash table) of handlers. |
144 | This method should not be called directly by user but rather by derived class' | |
23324ae1 | 145 | constructor. |
5bddd46d | 146 | |
23324ae1 | 147 | This adds the handler to this @b instance of wxHtmlParser, not to |
5bddd46d FM |
148 | all objects of this class! |
149 | (Static front-end to AddTagHandler is provided by wxHtmlWinParser). | |
150 | ||
23324ae1 FM |
151 | All handlers are deleted on object deletion. |
152 | */ | |
5267aefd | 153 | virtual void AddTagHandler(wxHtmlTagHandler* handler); |
23324ae1 FM |
154 | |
155 | /** | |
156 | Must be overwritten in derived class. | |
5bddd46d FM |
157 | |
158 | This method is called by DoParsing() each time a part of text is parsed. | |
159 | @a txt is NOT only one word, it is substring of input. | |
160 | It is not formatted or preprocessed (so white spaces are unmodified). | |
23324ae1 FM |
161 | */ |
162 | virtual void AddWord(const wxString& txt); | |
163 | ||
23324ae1 | 164 | /** |
5bddd46d | 165 | Parses the m_Source from @a begin_pos to @a end_pos - 1. |
23324ae1 | 166 | */ |
a44f3b5a | 167 | void DoParsing(const const_iterator& begin_pos, const const_iterator& end_pos); |
5bddd46d FM |
168 | |
169 | /** | |
170 | Parses the whole m_Source. | |
171 | */ | |
7c913512 | 172 | void DoParsing(); |
23324ae1 FM |
173 | |
174 | /** | |
175 | This must be called after DoParsing(). | |
176 | */ | |
177 | virtual void DoneParser(); | |
178 | ||
179 | /** | |
180 | Returns pointer to the file system. Because each tag handler has | |
181 | reference to it is parent parser it can easily request the file by | |
5bddd46d FM |
182 | calling: |
183 | @code | |
184 | wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg"); | |
185 | @endcode | |
23324ae1 | 186 | */ |
328f5751 | 187 | wxFileSystem* GetFS() const; |
23324ae1 FM |
188 | |
189 | /** | |
5bddd46d FM |
190 | Returns product of parsing. |
191 | Returned value is result of parsing of the document. | |
192 | ||
193 | The type of this result depends on internal representation in derived | |
194 | parser (but it must be derived from wxObject!). | |
23324ae1 FM |
195 | See wxHtmlWinParser for details. |
196 | */ | |
da1ed74c | 197 | virtual wxObject* GetProduct() = 0; |
23324ae1 FM |
198 | |
199 | /** | |
200 | Returns pointer to the source being parsed. | |
201 | */ | |
5267aefd | 202 | const wxString* GetSource(); |
23324ae1 FM |
203 | |
204 | /** | |
5bddd46d FM |
205 | Setups the parser for parsing the @a source string. |
206 | (Should be overridden in derived class) | |
23324ae1 FM |
207 | */ |
208 | virtual void InitParser(const wxString& source); | |
209 | ||
210 | /** | |
211 | Opens given URL and returns @c wxFSFile object that can be used to read data | |
212 | from it. This method may return @NULL in one of two cases: either the URL doesn't | |
213 | point to any valid resource or the URL is blocked by overridden implementation | |
214 | of @e OpenURL in derived class. | |
5bddd46d | 215 | |
7c913512 | 216 | @param type |
4cc4bfaf | 217 | Indicates type of the resource. Is one of: |
5bddd46d FM |
218 | - wxHTML_URL_PAGE: Opening a HTML page. |
219 | - wxHTML_URL_IMAGE: Opening an image. | |
220 | - wxHTML_URL_OTHER: Opening a resource that doesn't fall into | |
221 | any other category. | |
7c913512 | 222 | @param url |
4cc4bfaf | 223 | URL being opened. |
5bddd46d FM |
224 | |
225 | @note | |
226 | Always use this method in tag handlers instead of GetFS()->OpenFile() | |
227 | because it can block the URL and is thus more secure. | |
228 | Default behaviour is to call wxHtmlWindow::OnOpeningURL of the associated | |
229 | wxHtmlWindow object (which may decide to block the URL or redirect it to | |
230 | another one),if there's any, and always open the URL if the parser is not | |
231 | used with wxHtmlWindow. | |
232 | Returned wxFSFile object is not guaranteed to point to url, it might have | |
233 | been redirected! | |
23324ae1 | 234 | */ |
fadc2df6 | 235 | virtual wxFSFile* OpenURL(wxHtmlURLType type, const wxString& url) const; |
23324ae1 FM |
236 | |
237 | /** | |
238 | Proceeds parsing of the document. This is end-user method. You can simply | |
5bddd46d FM |
239 | call it when you need to obtain parsed output (which is parser-specific). |
240 | ||
23324ae1 | 241 | The method does these things: |
5bddd46d FM |
242 | -# calls InitParser(source) |
243 | -# calls DoParsing() | |
244 | -# calls GetProduct() | |
245 | -# calls DoneParser() | |
246 | -# returns value returned by GetProduct() | |
247 | ||
248 | You shouldn't use InitParser(), DoParsing(), GetProduct() or DoneParser() directly. | |
23324ae1 FM |
249 | */ |
250 | wxObject* Parse(const wxString& source); | |
251 | ||
252 | /** | |
5bddd46d | 253 | Restores parser's state before last call to PushTagHandler(). |
23324ae1 FM |
254 | */ |
255 | void PopTagHandler(); | |
256 | ||
257 | /** | |
7c913512 FM |
258 | Forces the handler to handle additional tags |
259 | (not returned by wxHtmlTagHandler::GetSupportedTags). | |
23324ae1 | 260 | The handler should already be added to this parser. |
5bddd46d | 261 | |
7c913512 | 262 | @param handler |
4cc4bfaf | 263 | the handler |
7c913512 | 264 | @param tags |
5bddd46d FM |
265 | List of tags (in same format as GetSupportedTags()'s return value). |
266 | The parser will redirect these tags to handler (until call to PopTagHandler()). | |
267 | ||
268 | Example: | |
269 | ||
270 | Imagine you want to parse following pseudo-html structure: | |
271 | @code | |
272 | <myitems> | |
273 | <param name="one" value="1"> | |
274 | <param name="two" value="2"> | |
275 | </myitems> | |
276 | ||
277 | <execute> | |
278 | <param program="text.exe"> | |
279 | </execute> | |
280 | @endcode | |
281 | ||
282 | It is obvious that you cannot use only one tag handler for \<param\> tag. | |
283 | Instead you must use context-sensitive handlers for \<param\> inside \<myitems\> | |
284 | and \<param\> inside \<execute\>. | |
285 | This is the preferred solution: | |
286 | ||
287 | @code | |
288 | TAG_HANDLER_BEGIN(MYITEM, "MYITEMS") | |
289 | TAG_HANDLER_PROC(tag) | |
290 | { | |
291 | // ...something... | |
292 | ||
293 | m_Parser -> PushTagHandler(this, "PARAM"); | |
294 | ParseInner(tag); | |
295 | m_Parser -> PopTagHandler(); | |
296 | ||
297 | // ...something... | |
298 | } | |
299 | TAG_HANDLER_END(MYITEM) | |
300 | @endcode | |
23324ae1 FM |
301 | */ |
302 | void PushTagHandler(wxHtmlTagHandler* handler, | |
303 | const wxString& tags); | |
304 | ||
305 | /** | |
5bddd46d FM |
306 | Sets the virtual file system that will be used to request additional files. |
307 | (For example @c IMG tag handler requests wxFSFile with the image data.) | |
23324ae1 | 308 | */ |
5267aefd | 309 | void SetFS(wxFileSystem* fs); |
23324ae1 FM |
310 | |
311 | /** | |
5bddd46d FM |
312 | Call this function to interrupt parsing from a tag handler. |
313 | No more tags will be parsed afterward. This function may only be called | |
314 | from Parse() or any function called by it (i.e. from tag handlers). | |
23324ae1 | 315 | */ |
adaaa686 | 316 | virtual void StopParsing(); |
5e6e278d FM |
317 | |
318 | protected: | |
319 | ||
320 | /** | |
321 | This may (and may not) be overwritten in derived class. | |
322 | ||
323 | This method is called each time new tag is about to be added. | |
324 | @a tag contains information about the tag. (See wxHtmlTag for details.) | |
325 | ||
326 | Default (wxHtmlParser) behaviour is this: first it finds a handler capable | |
327 | of handling this tag and then it calls handler's HandleTag() method. | |
328 | */ | |
329 | virtual void AddTag(const wxHtmlTag& tag); | |
23324ae1 | 330 | }; |
e54c96f1 | 331 |