]>
Commit | Line | Data |
---|---|---|
1 | % | |
2 | % automatically generated by HelpGen from | |
3 | % htmlparser.tex at 14/Mar/99 20:13:37 | |
4 | % | |
5 | ||
6 | \section{\class{wxHtmlParser}}\label{wxhtmlparser} | |
7 | ||
8 | This class handles the {\bf generic} parsing of HTML document: it scans | |
9 | the document and divide it into blocks of tags (where one block | |
10 | consists of begining and ending tag and of text between these | |
11 | two tags). | |
12 | ||
13 | It is independent from wxHtmlWindow and can be used as stand-alone parser | |
14 | (Julian Smart's idea of speech-only HTML viewer or wget-like utility - | |
15 | see InetGet sample for example). | |
16 | ||
17 | It uses system of tag handlers to parse the HTML document. Tag handlers | |
18 | are not staticaly shared by all instances but are created for each | |
19 | wxHtmlParser instance. The reason is that the handler may contain | |
20 | document-specific temporary data used during parsing (e.g. complicated | |
21 | structures like tables). | |
22 | ||
23 | Typically the user calls only the \helpref{Parse}{wxhtmlparserparse} method. | |
24 | ||
25 | \wxheading{Derived from} | |
26 | ||
27 | wxObject | |
28 | ||
29 | \wxheading{Include files} | |
30 | ||
31 | <wx/html/htmlpars.h> | |
32 | ||
33 | ||
34 | \wxheading{See also} | |
35 | ||
36 | \helpref{Cells Overview}{cells}, | |
37 | \helpref{Tag Handlers Overview}{handlers}, | |
38 | \helpref{wxHtmlTag}{wxhtmltag} | |
39 | ||
40 | \latexignore{\rtfignore{\wxheading{Members}}} | |
41 | ||
42 | \membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser} | |
43 | ||
44 | \func{}{wxHtmlParser}{\void} | |
45 | ||
46 | Constructor. | |
47 | ||
48 | \membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag} | |
49 | ||
50 | \func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}} | |
51 | ||
52 | This may (and may not) be overwriten in derived class. | |
53 | ||
54 | This method is called each time new tag is about to be added. | |
55 | {\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag} | |
56 | for details.) | |
57 | ||
58 | Default (wxHtmlParser) behaviour is this: | |
59 | First it finds a handler capable of handling this tag and then it calls | |
60 | handler's HandleTag method. | |
61 | ||
62 | \membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler} | |
63 | ||
64 | \func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}} | |
65 | ||
66 | Adds handler to the internal list (\& hash table) of handlers. This | |
67 | method should not be called directly by user but rather by derived class' | |
68 | constructor. | |
69 | ||
70 | This adds the handler to this {\bf instance} of wxHtmlParser, not to | |
71 | all objects of this class! (Static front-end to AddTagHandler is provided | |
72 | by wxHtmlWinParser). | |
73 | ||
74 | All handlers are deleted on object deletion. | |
75 | ||
76 | \membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword} | |
77 | ||
78 | \func{virtual void}{AddWord}{\param{const char* }{txt}} | |
79 | ||
80 | Must be overwriten in derived class. | |
81 | ||
82 | This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing} | |
83 | each time a part of text is parsed. {\it txt} is NOT only one word, it is | |
84 | substring of input. It is not formatted or preprocessed (so white spaces are | |
85 | unmodified). | |
86 | ||
87 | \membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing} | |
88 | ||
89 | \func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}} | |
90 | ||
91 | \func{void}{DoParsing}{\void} | |
92 | ||
93 | Parses the m\_Source from begin\_pos to end\_pos-1. | |
94 | (in noparams version it parses whole m\_Source) | |
95 | ||
96 | \membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser} | |
97 | ||
98 | \func{virtual void}{DoneParser}{\void} | |
99 | ||
100 | This must be called after DoParsing(). | |
101 | ||
102 | \membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs} | |
103 | ||
104 | \constfunc{wxFileSystem*}{GetFS}{\void} | |
105 | ||
106 | Returns pointer to the file system. Because each tag handler has | |
107 | reference to it's parent parser it can easily request the file by | |
108 | calling | |
109 | ||
110 | \begin{verbatim} | |
111 | wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg"); | |
112 | \end{verbatim} | |
113 | ||
114 | \membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct} | |
115 | ||
116 | \func{virtual wxObject*}{GetProduct}{\void} | |
117 | ||
118 | Returns product of parsing. Returned value is result of parsing | |
119 | of the document. The type of this result depends on internal | |
120 | representation in derived parser (but it must be derived from wxObject!). | |
121 | ||
122 | See wxHtmlWinParser for details. | |
123 | ||
124 | \membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource} | |
125 | ||
126 | \func{wxString*}{GetSource}{\void} | |
127 | ||
128 | Returns pointer to the source being parsed. | |
129 | ||
130 | ||
131 | \membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser} | |
132 | ||
133 | \func{virtual void}{InitParser}{\param{const wxString\& }{source}} | |
134 | ||
135 | Setups the parser for parsing the {\it source} string. (Should be overriden | |
136 | in derived class) | |
137 | ||
138 | \membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse} | |
139 | ||
140 | \func{wxObject*}{Parse}{\param{const wxString\& }{source}} | |
141 | ||
142 | Proceeds parsing of the document. This is end-user method. You can simply | |
143 | call it when you need to obtain parsed output (which is parser-specific) | |
144 | ||
145 | The method does these things: | |
146 | ||
147 | \begin{enumerate}\itemsep=0pt | |
148 | \item calls \helpref{InitParser(source)}{wxhtmlparserinitparser} | |
149 | \item calls \helpref{DoParsing}{wxhtmlparserdoparsing} | |
150 | \item calls \helpref{GetProduct}{wxhtmlparsergetproduct} | |
151 | \item calls \helpref{DoneParser}{wxhtmlparserdoneparser} | |
152 | \item returns value returned by GetProduct | |
153 | \end{enumerate} | |
154 | ||
155 | You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly. | |
156 | ||
157 | ||
158 | ||
159 | \membersection{wxHtmlParser::PushTagHandler}\label{wxhtmlparserpushtaghandler} | |
160 | ||
161 | \func{void}{PushTagHandler}{\param{wxHtmlTagHandler* }{handler}, \param{wxString }{tags}} | |
162 | ||
163 | Forces the handler to handle additional tags | |
164 | (not returned by \helpref{GetSupportedTags}{wxhtmltaghandlergetsupportedtags}). | |
165 | The handler should already be added to this parser. | |
166 | ||
167 | \wxheading{Parameters} | |
168 | ||
169 | \docparam{handler}{the handler} | |
170 | \docparam{tags}{List of tags (in same format as GetSupportedTags's return value). The parser | |
171 | will redirect these tags to {\it handler} (until call to \helpref{PopTagHandler}{wxhtmlparserpoptaghandler}). } | |
172 | ||
173 | \wxheading{Example} | |
174 | ||
175 | Imagine you want to parse following pseudo-html structure: | |
176 | ||
177 | \begin{verbatim} | |
178 | <myitems> | |
179 | <param name="one" value="1"> | |
180 | <param name="two" value="2"> | |
181 | </myitems> | |
182 | ||
183 | <execute> | |
184 | <param program="text.exe"> | |
185 | </execute> | |
186 | \end{verbatim} | |
187 | ||
188 | It is obvious that you cannot use only one tag handler for <param> tag. | |
189 | Instead you must use context-sensitive handlers for <param> inside <myitems> | |
190 | and <param> inside <execute>. | |
191 | ||
192 | This is the prefered solution: | |
193 | ||
194 | \begin{verbatim} | |
195 | TAG_HANDLER_BEGIN(MYITEM, "MYITEMS") | |
196 | TAG_HANDLER_PROC(tag) | |
197 | { | |
198 | // ...something... | |
199 | ||
200 | m_Parser -> PushTagHandler(this, "PARAM"); | |
201 | ParseInner(tag); | |
202 | m_Parser -> PopTagHandler(); | |
203 | ||
204 | // ...something... | |
205 | } | |
206 | TAG_HANDLER_END(MYITEM) | |
207 | \end{verbatim} | |
208 | ||
209 | ||
210 | \membersection{wxHtmlParser::PopTagHandler}\label{wxhtmlparserpoptaghandler} | |
211 | ||
212 | \func{void}{PopTagHandler}{\void} | |
213 | ||
214 | Restores parser's state before last call to | |
215 | \helpref{PushTagHandler}{wxhtmlparserpushtaghandler}. | |
216 | ||
217 | ||
218 | \membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs} | |
219 | ||
220 | \func{void}{SetFS}{\param{wxFileSystem }{*fs}} | |
221 | ||
222 | Sets the virtual file system that will be used to request additional | |
223 | files. (For example {\tt <IMG>} tag handler requests wxFSFile with the | |
224 | image data.) | |
225 |