]> git.saurik.com Git - wxWidgets.git/blame - docs/latex/wx/htparser.tex
more dnd/clipboard docs
[wxWidgets.git] / docs / latex / wx / htparser.tex
CommitLineData
704a4b75
VS
1%
2% automatically generated by HelpGen from
3% htmlparser.tex at 14/Mar/99 20:13:37
4%
5
704a4b75
VS
6\section{\class{wxHtmlParser}}\label{wxhtmlparser}
7
22d6efa8 8This class handles the {\bf generic} parsing of HTML document: it scans
704a4b75
VS
9the document and divide it into blocks of tags (where one block
10consists of begining and ending tag and of text between these
22d6efa8 11two tags).
704a4b75
VS
12
13It is independent from wxHtmlWindow and can be used as stand-alone parser
14(Julian Smart's idea of speech-only HTML viewer or wget-like utility -
3660fc40 15see InetGet sample for example).
704a4b75
VS
16
17It uses system of tag handlers to parse the HTML document. Tag handlers
3660fc40 18are not staticaly shared by all instances but are created for each
704a4b75
VS
19wxHtmlParser instance. The reason is that the handler may contain
20document-specific temporary data used during parsing (e.g. complicated
22d6efa8 21structures like tables).
704a4b75 22
22d6efa8 23Typically the user calls only the \helpref{Parse}{wxhtmlparserparse} method.
704a4b75
VS
24
25\wxheading{Derived from}
26
27wxObject
28
29\wxheading{See also}
30
31\helpref{Cells Overview}{cells},
32\helpref{Tag Handlers Overview}{handlers},
33\helpref{wxHtmlTag}{wxhtmltag}
34
35\latexignore{\rtfignore{\wxheading{Members}}}
36
704a4b75
VS
37\membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser}
38
39\func{}{wxHtmlParser}{\void}
40
3660fc40 41Constructor.
704a4b75 42
559fe022 43\membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag}
704a4b75 44
559fe022 45\func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}}
704a4b75 46
559fe022 47This may (and may not) be overwriten in derived class.
704a4b75 48
559fe022
VS
49This method is called each time new tag is about to be added.
50{\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag}
51for details.)
704a4b75 52
559fe022
VS
53Default (wxHtmlParser) behaviour is this:
54First it finds a handler capable of handling this tag and then it calls
55handler's HandleTag method.
704a4b75 56
559fe022 57\membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler}
704a4b75 58
559fe022 59\func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}}
704a4b75 60
559fe022
VS
61Adds handler to the internal list (\& hash table) of handlers. This
62method should not be called directly by user but rather by derived class'
63constructor.
704a4b75 64
559fe022
VS
65This adds the handler to this {\bf instance} of wxHtmlParser, not to
66all objects of this class! (Static front-end to AddTagHandler is provided
67by wxHtmlWinParser).
704a4b75 68
559fe022 69All handlers are deleted on object deletion.
704a4b75 70
559fe022 71\membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword}
704a4b75 72
559fe022 73\func{virtual void}{AddWord}{\param{const char* }{txt}}
704a4b75 74
559fe022 75Must be overwriten in derived class.
704a4b75 76
559fe022
VS
77This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing}
78each time a part of text is parsed. {\it txt} is NOT only one word, it is
79substring of input. It is not formatted or preprocessed (so white spaces are
80unmodified).
704a4b75 81
559fe022 82\membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing}
704a4b75 83
559fe022
VS
84\func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}}
85
86\func{void}{DoParsing}{\void}
87
88Parses the m\_Source from begin\_pos to end\_pos-1.
89(in noparams version it parses whole m\_Source)
704a4b75 90
704a4b75
VS
91\membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser}
92
93\func{virtual void}{DoneParser}{\void}
94
95This must be called after DoParsing().
96
559fe022 97\membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs}
704a4b75 98
559fe022 99\constfunc{wxFileSystem*}{GetFS}{\void}
704a4b75 100
559fe022
VS
101Returns pointer to the file system. Because each tag handler has
102reference to it's parent parser it can easily request the file by
103calling
704a4b75 104
559fe022
VS
105\begin{verbatim}
106wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg");
107\end{verbatim}
704a4b75
VS
108
109\membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct}
110
111\func{virtual wxObject*}{GetProduct}{\void}
112
3660fc40
RD
113Returns product of parsing. Returned value is result of parsing
114of the document. The type of this result depends on internal
704a4b75
VS
115representation in derived parser (but it must be derived from wxObject!).
116
117See wxHtmlWinParser for details.
118
704a4b75
VS
119\membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource}
120
121\func{wxString*}{GetSource}{\void}
122
123Returns pointer to the source being parsed.
124
704a4b75
VS
125\membersection{wxHtmlParser::GetTempData}\label{wxhtmlparsergettempdata}
126
127\func{virtual wxList*}{GetTempData}{\void}
128
129This method returns list of wxObjects that represents
22d6efa8
JS
130all data allocated by the parser. These can't be freed
131by the destructor because they must be valid as long as
704a4b75
VS
132GetProduct's return value is valid - the caller must
133explicitly call
134
135\begin{verbatim}
136delete (MyParser -> GetTempData());
137\end{verbatim}
138
22d6efa8 139to free the memory (this method always sets the list to delete its contents).
704a4b75
VS
140
141\wxheading{Example}
142
22d6efa8
JS
143Why is this neccessary? Imagine wxHtmlWinParser: when handling
144a FONT tag it creates some fonts. These fonts are then used by wxHtmlWindow
145to display the text. But the wxHtmWinParser object is needed only when parsing
704a4b75 146the document - it may be deleted then. But fonts CAN'T be deleted - they
3660fc40 147must exist as long as the window is displaying text.
704a4b75
VS
148
149GetTempData() solves the problem.
150
559fe022 151\membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser}
704a4b75 152
559fe022 153\func{virtual void}{InitParser}{\param{const wxString\& }{source}}
704a4b75 154
559fe022
VS
155Setups the parser for parsing the {\it source} string. (Should be overriden
156in derived class)
704a4b75 157
559fe022 158\membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse}
704a4b75 159
559fe022 160\func{wxObject*}{Parse}{\param{const wxString\& }{source}}
704a4b75 161
559fe022
VS
162Proceeds parsing of the document. This is end-user method. You can simply
163call it when you need to obtain parsed output (which is parser-specific)
704a4b75 164
559fe022 165The method does these things:
704a4b75 166
448af9a4 167\begin{enumerate}\itemsep=0pt
559fe022
VS
168\item calls \helpref{InitParser(source)}{wxhtmlparserinitparser}
169\item calls \helpref{DoParsing}{wxhtmlparserdoparsing}
170\item calls \helpref{GetProduct}{wxhtmlparsergetproduct}
171\item calls \helpref{DoneParser}{wxhtmlparserdoneparser}
172\item returns value returned by GetProduct
173\end{enumerate}
704a4b75 174
559fe022
VS
175You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly.
176
0eb8c938
VS
177
178
179\membersection{wxHtmlParser::PushTagHandler}\label{wxhtmlparserpushtaghandler}
180
181\func{void}{PushTagHandler}{\param{wxHtmlTagHandler* }{handler}, \param{wxString }{tags}}
182
183Forces the handler to handle additional tags
184(not returned by \helpref{GetSupportedTags}{wxhtmltaghandlergetsupportedtags}).
185The handler should already be added to this parser.
186
187\wxheading{Parameters}
188
189\docparam{handler}{the handler}
190\docparam{tags}{List of tags (in same format as GetSupportedTags's return value). The parser
191will redirect these tags to {\it handler} (until call to \helpref{PopTagHandler}{wxhtmlparserpoptaghandler}). }
192
193\wxheading{Example}
194
195Imagine you want to parse following pseudo-html structure:
196
197\begin{verbatim}
198<myitems>
199 <param name="one" value="1">
200 <param name="two" value="2">
201</myitems>
202
203<execute>
204 <param program="text.exe">
205</execute>
206\end{verbatim}
207
208It is obvious that you cannot use only one tag handler for <param> tag.
209Instead you must use context-sensitive handlers for <param> inside <myitems>
210and <param> inside <execute>.
211
212This is the prefered solution:
213
214\begin{verbatim}
215TAG_HANDLER_BEGIN(MYITEM, "MYITEMS")
216 TAG_HANDLER_PROC(tag)
217 {
218 // ...something...
219
220 m_Parser -> PushTagHandler(this, "PARAM");
221 ParseInner(tag);
222 m_Parser -> PopTagHandler();
223
224 // ...something...
225 }
226TAG_HANDLER_END(MYITEM)
227\end{verbatim}
228
229
230\membersection{wxHtmlParser::PopTagHandler}\label{wxhtmlparserpoptaghandler}
231
232\func{void}{PopTagHandler}{\void}
233
234Restores parser's state before last call to
235\helpref{PushTagHandler}{wxhtmlparserpushtaghandler}.
236
237
559fe022
VS
238\membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs}
239
240\func{void}{SetFS}{\param{wxFileSystem }{*fs}}
241
242Sets the virtual file system that will be used to request additional
243files. (For example {\tt <IMG>} tag handler requests wxFSFile with the
244image data.)
22d6efa8 245