]>
Commit | Line | Data |
---|---|---|
704a4b75 VS |
1 | % |
2 | % automatically generated by HelpGen from | |
3 | % htmlparser.tex at 14/Mar/99 20:13:37 | |
4 | % | |
5 | ||
704a4b75 VS |
6 | \section{\class{wxHtmlParser}}\label{wxhtmlparser} |
7 | ||
f6e9a818 | 8 | Classes derived from this handle the {\bf generic} parsing of HTML documents: it scans |
704a4b75 | 9 | the document and divide it into blocks of tags (where one block |
2edb0bde | 10 | consists of beginning and ending tag and of text between these |
22d6efa8 | 11 | two tags). |
704a4b75 VS |
12 | |
13 | It is independent from wxHtmlWindow and can be used as stand-alone parser | |
14 | (Julian Smart's idea of speech-only HTML viewer or wget-like utility - | |
3660fc40 | 15 | see InetGet sample for example). |
704a4b75 VS |
16 | |
17 | It uses system of tag handlers to parse the HTML document. Tag handlers | |
2edb0bde | 18 | are not statically shared by all instances but are created for each |
704a4b75 VS |
19 | wxHtmlParser instance. The reason is that the handler may contain |
20 | document-specific temporary data used during parsing (e.g. complicated | |
22d6efa8 | 21 | structures like tables). |
704a4b75 | 22 | |
22d6efa8 | 23 | Typically the user calls only the \helpref{Parse}{wxhtmlparserparse} method. |
704a4b75 VS |
24 | |
25 | \wxheading{Derived from} | |
26 | ||
27 | wxObject | |
28 | ||
9704b250 VS |
29 | \wxheading{Include files} |
30 | ||
31 | <wx/html/htmlpars.h> | |
32 | ||
a7af285d VZ |
33 | \wxheading{Library} |
34 | ||
35 | \helpref{wxHtml}{librarieslist} | |
36 | ||
704a4b75 VS |
37 | \wxheading{See also} |
38 | ||
39 | \helpref{Cells Overview}{cells}, | |
40 | \helpref{Tag Handlers Overview}{handlers}, | |
41 | \helpref{wxHtmlTag}{wxhtmltag} | |
42 | ||
43 | \latexignore{\rtfignore{\wxheading{Members}}} | |
44 | ||
704a4b75 VS |
45 | \membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser} |
46 | ||
47 | \func{}{wxHtmlParser}{\void} | |
48 | ||
3660fc40 | 49 | Constructor. |
704a4b75 | 50 | |
559fe022 | 51 | \membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag} |
704a4b75 | 52 | |
559fe022 | 53 | \func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}} |
704a4b75 | 54 | |
2edb0bde | 55 | This may (and may not) be overwritten in derived class. |
704a4b75 | 56 | |
559fe022 VS |
57 | This method is called each time new tag is about to be added. |
58 | {\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag} | |
59 | for details.) | |
704a4b75 | 60 | |
559fe022 VS |
61 | Default (wxHtmlParser) behaviour is this: |
62 | First it finds a handler capable of handling this tag and then it calls | |
63 | handler's HandleTag method. | |
704a4b75 | 64 | |
559fe022 | 65 | \membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler} |
704a4b75 | 66 | |
559fe022 | 67 | \func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}} |
704a4b75 | 68 | |
559fe022 VS |
69 | Adds handler to the internal list (\& hash table) of handlers. This |
70 | method should not be called directly by user but rather by derived class' | |
71 | constructor. | |
704a4b75 | 72 | |
559fe022 VS |
73 | This adds the handler to this {\bf instance} of wxHtmlParser, not to |
74 | all objects of this class! (Static front-end to AddTagHandler is provided | |
75 | by wxHtmlWinParser). | |
704a4b75 | 76 | |
559fe022 | 77 | All handlers are deleted on object deletion. |
704a4b75 | 78 | |
559fe022 | 79 | \membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword} |
704a4b75 | 80 | |
5bce3e6f | 81 | \func{virtual void}{AddWord}{\param{const wxString\& }{txt}} |
704a4b75 | 82 | |
2edb0bde | 83 | Must be overwritten in derived class. |
704a4b75 | 84 | |
559fe022 VS |
85 | This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing} |
86 | each time a part of text is parsed. {\it txt} is NOT only one word, it is | |
87 | substring of input. It is not formatted or preprocessed (so white spaces are | |
88 | unmodified). | |
704a4b75 | 89 | |
559fe022 | 90 | \membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing} |
704a4b75 | 91 | |
559fe022 VS |
92 | \func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}} |
93 | ||
94 | \func{void}{DoParsing}{\void} | |
95 | ||
96 | Parses the m\_Source from begin\_pos to end\_pos-1. | |
97 | (in noparams version it parses whole m\_Source) | |
704a4b75 | 98 | |
704a4b75 VS |
99 | \membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser} |
100 | ||
101 | \func{virtual void}{DoneParser}{\void} | |
102 | ||
103 | This must be called after DoParsing(). | |
104 | ||
559fe022 | 105 | \membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs} |
704a4b75 | 106 | |
559fe022 | 107 | \constfunc{wxFileSystem*}{GetFS}{\void} |
704a4b75 | 108 | |
559fe022 | 109 | Returns pointer to the file system. Because each tag handler has |
f6bcfd97 | 110 | reference to it is parent parser it can easily request the file by |
559fe022 | 111 | calling |
704a4b75 | 112 | |
559fe022 VS |
113 | \begin{verbatim} |
114 | wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg"); | |
115 | \end{verbatim} | |
704a4b75 VS |
116 | |
117 | \membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct} | |
118 | ||
119 | \func{virtual wxObject*}{GetProduct}{\void} | |
120 | ||
3660fc40 RD |
121 | Returns product of parsing. Returned value is result of parsing |
122 | of the document. The type of this result depends on internal | |
704a4b75 VS |
123 | representation in derived parser (but it must be derived from wxObject!). |
124 | ||
125 | See wxHtmlWinParser for details. | |
126 | ||
704a4b75 VS |
127 | \membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource} |
128 | ||
129 | \func{wxString*}{GetSource}{\void} | |
130 | ||
131 | Returns pointer to the source being parsed. | |
132 | ||
704a4b75 | 133 | |
559fe022 | 134 | \membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser} |
704a4b75 | 135 | |
559fe022 | 136 | \func{virtual void}{InitParser}{\param{const wxString\& }{source}} |
704a4b75 | 137 | |
f6bcfd97 | 138 | Setups the parser for parsing the {\it source} string. (Should be overridden |
559fe022 | 139 | in derived class) |
704a4b75 | 140 | |
6cc4e6b8 VS |
141 | \membersection{wxHtmlParser::OpenURL}\label{wxhtmlparseropenurl} |
142 | ||
143 | \func{virtual wxFSFile*}{OpenURL}{\param{wxHtmlURLType }{type}, \param{const wxString\& }{url}} | |
144 | ||
145 | Opens given URL and returns {\tt wxFSFile} object that can be used to read data | |
146 | from it. This method may return NULL in one of two cases: either the URL doesn't | |
2edb0bde | 147 | point to any valid resource or the URL is blocked by overridden implementation |
6cc4e6b8 VS |
148 | of {\it OpenURL} in derived class. |
149 | ||
150 | \wxheading{Parameters} | |
151 | ||
d2c2afc9 JS |
152 | \docparam{type}{Indicates type of the resource. Is one of: |
153 | ||
6cc4e6b8 VS |
154 | \begin{twocollist}\itemsep=0pt |
155 | \twocolitem{{\bf wxHTML\_URL\_PAGE}}{Opening a HTML page.} | |
156 | \twocolitem{{\bf wxHTML\_URL\_IMAGE}}{Opening an image.} | |
157 | \twocolitem{{\bf wxHTML\_URL\_OTHER}}{Opening a resource that doesn't fall into | |
158 | any other category.} | |
159 | \end{twocollist}} | |
160 | ||
161 | \docparam{url}{URL being opened.} | |
162 | ||
163 | \wxheading{Notes} | |
164 | ||
165 | Always use this method in tag handlers instead of {\tt GetFS()->OpenFile()} | |
166 | because it can block the URL and is thus more secure. | |
167 | ||
168 | Default behaviour is to call \helpref{wxHtmlWindow::OnOpeningURL}{wxhtmlwindowonopeningurl} | |
169 | of the associated wxHtmlWindow object (which may decide to block the URL or | |
170 | redirect it to another one),if there's any, and always open the URL if the | |
171 | parser is not used with wxHtmlWindow. | |
172 | ||
173 | Returned {\tt wxFSFile} object is not guaranteed to point to {\it url}, it might | |
174 | have been redirected! | |
175 | ||
559fe022 | 176 | \membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse} |
704a4b75 | 177 | |
559fe022 | 178 | \func{wxObject*}{Parse}{\param{const wxString\& }{source}} |
704a4b75 | 179 | |
559fe022 VS |
180 | Proceeds parsing of the document. This is end-user method. You can simply |
181 | call it when you need to obtain parsed output (which is parser-specific) | |
704a4b75 | 182 | |
559fe022 | 183 | The method does these things: |
704a4b75 | 184 | |
448af9a4 | 185 | \begin{enumerate}\itemsep=0pt |
559fe022 VS |
186 | \item calls \helpref{InitParser(source)}{wxhtmlparserinitparser} |
187 | \item calls \helpref{DoParsing}{wxhtmlparserdoparsing} | |
188 | \item calls \helpref{GetProduct}{wxhtmlparsergetproduct} | |
189 | \item calls \helpref{DoneParser}{wxhtmlparserdoneparser} | |
190 | \item returns value returned by GetProduct | |
191 | \end{enumerate} | |
704a4b75 | 192 | |
559fe022 VS |
193 | You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly. |
194 | ||
0eb8c938 VS |
195 | \membersection{wxHtmlParser::PushTagHandler}\label{wxhtmlparserpushtaghandler} |
196 | ||
fbfb8bcc | 197 | \func{void}{PushTagHandler}{\param{wxHtmlTagHandler* }{handler}, \param{const wxString\& }{tags}} |
0eb8c938 VS |
198 | |
199 | Forces the handler to handle additional tags | |
200 | (not returned by \helpref{GetSupportedTags}{wxhtmltaghandlergetsupportedtags}). | |
201 | The handler should already be added to this parser. | |
202 | ||
203 | \wxheading{Parameters} | |
204 | ||
205 | \docparam{handler}{the handler} | |
206 | \docparam{tags}{List of tags (in same format as GetSupportedTags's return value). The parser | |
207 | will redirect these tags to {\it handler} (until call to \helpref{PopTagHandler}{wxhtmlparserpoptaghandler}). } | |
208 | ||
209 | \wxheading{Example} | |
210 | ||
211 | Imagine you want to parse following pseudo-html structure: | |
212 | ||
213 | \begin{verbatim} | |
214 | <myitems> | |
215 | <param name="one" value="1"> | |
216 | <param name="two" value="2"> | |
217 | </myitems> | |
218 | ||
219 | <execute> | |
220 | <param program="text.exe"> | |
221 | </execute> | |
222 | \end{verbatim} | |
223 | ||
224 | It is obvious that you cannot use only one tag handler for <param> tag. | |
225 | Instead you must use context-sensitive handlers for <param> inside <myitems> | |
226 | and <param> inside <execute>. | |
227 | ||
f6bcfd97 | 228 | This is the preferred solution: |
0eb8c938 VS |
229 | |
230 | \begin{verbatim} | |
231 | TAG_HANDLER_BEGIN(MYITEM, "MYITEMS") | |
232 | TAG_HANDLER_PROC(tag) | |
233 | { | |
234 | // ...something... | |
235 | ||
236 | m_Parser -> PushTagHandler(this, "PARAM"); | |
237 | ParseInner(tag); | |
238 | m_Parser -> PopTagHandler(); | |
239 | ||
240 | // ...something... | |
241 | } | |
242 | TAG_HANDLER_END(MYITEM) | |
243 | \end{verbatim} | |
244 | ||
245 | ||
246 | \membersection{wxHtmlParser::PopTagHandler}\label{wxhtmlparserpoptaghandler} | |
247 | ||
248 | \func{void}{PopTagHandler}{\void} | |
249 | ||
250 | Restores parser's state before last call to | |
251 | \helpref{PushTagHandler}{wxhtmlparserpushtaghandler}. | |
252 | ||
253 | ||
559fe022 VS |
254 | \membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs} |
255 | ||
256 | \func{void}{SetFS}{\param{wxFileSystem }{*fs}} | |
257 | ||
258 | Sets the virtual file system that will be used to request additional | |
259 | files. (For example {\tt <IMG>} tag handler requests wxFSFile with the | |
260 | image data.) | |
22d6efa8 | 261 | |
2b5f62a0 VZ |
262 | \membersection{wxHtmlParser::StopParsing}\label{wxhtmlparserstopparsing} |
263 | ||
264 | \func{void}{StopParsing}{\void} | |
265 | ||
266 | Call this function to interrupt parsing from a tag handler. No more tags | |
267 | will be parsed afterward. This function may only be called from | |
268 | \helpref{wxHtmlParser::Parse}{wxhtmlparserparse} or any function called | |
269 | by it (i.e. from tag handlers). | |
270 |