]> git.saurik.com Git - wxWidgets.git/blob - interface/wx/html/htmlpars.h
w*h interface revisions
[wxWidgets.git] / interface / wx / html / htmlpars.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: html/htmlpars.h
3 // Purpose: interface of wxHtmlTagHandler
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 @class wxHtmlTagHandler
11
12
13 @library{wxhtml}
14 @category{html}
15
16 @see Overview(), wxHtmlTag
17 */
18 class wxHtmlTagHandler : public wxObject
19 {
20 public:
21 /**
22 Constructor.
23 */
24 wxHtmlTagHandler();
25
26 /**
27 Returns list of supported tags. The list is in uppercase and tags
28 are delimited by ','. Example : @c "I,B,FONT,P"
29 */
30 virtual wxString GetSupportedTags();
31
32 /**
33 This is the core method of each handler. It is called each time
34 one of supported tags is detected. @a tag contains all necessary
35 info (see wxHtmlTag for details).
36
37 @return @true if ParseInner was called, @false otherwise.
38 */
39 virtual bool HandleTag(const wxHtmlTag& tag);
40
41 /**
42 This method calls parser's wxHtmlParser::DoParsing method
43 for the string between this tag and the paired ending tag:
44
45 In this example, a call to ParseInner (with @a tag pointing to A tag)
46 will parse 'Hello, world!'.
47 */
48 void ParseInner(const wxHtmlTag& tag);
49
50 /**
51 Assigns @a parser to this handler. Each @b instance of handler
52 is guaranteed to be called only from the parser.
53 */
54 virtual void SetParser(wxHtmlParser parser);
55
56 /**
57 @b wxHtmlParser* m_Parser
58 This attribute is used to access parent parser. It is protected so that
59 it can't be accessed by user but can be accessed from derived classes.
60 */
61 };
62
63
64
65 /**
66 @class wxHtmlParser
67
68 Classes derived from this handle the @b generic parsing of HTML documents: it
69 scans
70 the document and divide it into blocks of tags (where one block
71 consists of beginning and ending tag and of text between these
72 two tags).
73
74 It is independent from wxHtmlWindow and can be used as stand-alone parser
75 (Julian Smart's idea of speech-only HTML viewer or wget-like utility -
76 see InetGet sample for example).
77
78 It uses system of tag handlers to parse the HTML document. Tag handlers
79 are not statically shared by all instances but are created for each
80 wxHtmlParser instance. The reason is that the handler may contain
81 document-specific temporary data used during parsing (e.g. complicated
82 structures like tables).
83
84 Typically the user calls only the wxHtmlParser::Parse method.
85
86 @library{wxhtml}
87 @category{html}
88
89 @see @ref overview_cells "Cells Overview", @ref overview_handlers "Tag Handlers
90 Overview", wxHtmlTag
91 */
92 class wxHtmlParser
93 {
94 public:
95 /**
96 Constructor.
97 */
98 wxHtmlParser();
99
100 /**
101 This may (and may not) be overwritten in derived class.
102 This method is called each time new tag is about to be added.
103 @a tag contains information about the tag. (See wxHtmlTag
104 for details.)
105 Default (wxHtmlParser) behaviour is this:
106 First it finds a handler capable of handling this tag and then it calls
107 handler's HandleTag method.
108 */
109 void AddTag(const wxHtmlTag& tag);
110
111 /**
112 Adds handler to the internal list ( hash table) of handlers. This
113 method should not be called directly by user but rather by derived class'
114 constructor.
115 This adds the handler to this @b instance of wxHtmlParser, not to
116 all objects of this class! (Static front-end to AddTagHandler is provided
117 by wxHtmlWinParser).
118 All handlers are deleted on object deletion.
119 */
120 virtual void AddTagHandler(wxHtmlTagHandler handler);
121
122 /**
123 Must be overwritten in derived class.
124 This method is called by DoParsing()
125 each time a part of text is parsed. @a txt is NOT only one word, it is
126 substring of input. It is not formatted or preprocessed (so white spaces are
127 unmodified).
128 */
129 virtual void AddWord(const wxString& txt);
130
131 //@{
132 /**
133 Parses the m_Source from begin_pos to end_pos-1.
134 (in noparams version it parses whole m_Source)
135 */
136 void DoParsing(int begin_pos, int end_pos);
137 void DoParsing();
138 //@}
139
140 /**
141 This must be called after DoParsing().
142 */
143 virtual void DoneParser();
144
145 /**
146 Returns pointer to the file system. Because each tag handler has
147 reference to it is parent parser it can easily request the file by
148 calling
149 */
150 wxFileSystem* GetFS() const;
151
152 /**
153 Returns product of parsing. Returned value is result of parsing
154 of the document. The type of this result depends on internal
155 representation in derived parser (but it must be derived from wxObject!).
156 See wxHtmlWinParser for details.
157 */
158 virtual wxObject* GetProduct();
159
160 /**
161 Returns pointer to the source being parsed.
162 */
163 wxString* GetSource();
164
165 /**
166 Setups the parser for parsing the @a source string. (Should be overridden
167 in derived class)
168 */
169 virtual void InitParser(const wxString& source);
170
171 /**
172 Opens given URL and returns @c wxFSFile object that can be used to read data
173 from it. This method may return @NULL in one of two cases: either the URL doesn't
174 point to any valid resource or the URL is blocked by overridden implementation
175 of @e OpenURL in derived class.
176
177 @param type
178 Indicates type of the resource. Is one of:
179
180
181
182
183
184
185 wxHTML_URL_PAGE
186
187
188
189
190 Opening a HTML page.
191
192
193
194
195
196 wxHTML_URL_IMAGE
197
198
199
200
201 Opening an image.
202
203
204
205
206
207 wxHTML_URL_OTHER
208
209
210
211
212 Opening a resource that doesn't fall into
213 any other category.
214 @param url
215 URL being opened.
216 */
217 virtual wxFSFile* OpenURL(wxHtmlURLType type,
218 const wxString& url);
219
220 /**
221 Proceeds parsing of the document. This is end-user method. You can simply
222 call it when you need to obtain parsed output (which is parser-specific)
223 The method does these things:
224 calls @ref initparser() InitParser(source)
225 calls DoParsing()
226 calls GetProduct()
227 calls DoneParser()
228 returns value returned by GetProduct
229 You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly.
230 */
231 wxObject* Parse(const wxString& source);
232
233 /**
234 Restores parser's state before last call to
235 PushTagHandler().
236 */
237 void PopTagHandler();
238
239 /**
240 Forces the handler to handle additional tags
241 (not returned by wxHtmlTagHandler::GetSupportedTags).
242 The handler should already be added to this parser.
243
244 @param handler
245 the handler
246 @param tags
247 List of tags (in same format as GetSupportedTags's return value). The parser
248 will redirect these tags to handler (until call to PopTagHandler).
249 */
250 void PushTagHandler(wxHtmlTagHandler* handler,
251 const wxString& tags);
252
253 /**
254 Sets the virtual file system that will be used to request additional
255 files. (For example @c IMG tag handler requests wxFSFile with the
256 image data.)
257 */
258 void SetFS(wxFileSystem fs);
259
260 /**
261 Call this function to interrupt parsing from a tag handler. No more tags
262 will be parsed afterward. This function may only be called from
263 Parse() or any function called
264 by it (i.e. from tag handlers).
265 */
266 void StopParsing();
267 };
268