]> git.saurik.com Git - wxWidgets.git/blob - docs/latex/wx/htparser.tex
Several changes and updates
[wxWidgets.git] / docs / latex / wx / htparser.tex
1 %
2 % automatically generated by HelpGen from
3 % htmlparser.tex at 14/Mar/99 20:13:37
4 %
5
6 \section{\class{wxHtmlParser}}\label{wxhtmlparser}
7
8 This class handles the {\bf generic} parsing of HTML document: it scans
9 the document and divide it into blocks of tags (where one block
10 consists of begining and ending tag and of text between these
11 two tags).
12
13 It is independent from wxHtmlWindow and can be used as stand-alone parser
14 (Julian Smart's idea of speech-only HTML viewer or wget-like utility -
15 see InetGet sample for example).
16
17 It uses system of tag handlers to parse the HTML document. Tag handlers
18 are not staticaly shared by all instances but are created for each
19 wxHtmlParser instance. The reason is that the handler may contain
20 document-specific temporary data used during parsing (e.g. complicated
21 structures like tables).
22
23 Typically the user calls only the \helpref{Parse}{wxhtmlparserparse} method.
24
25 \wxheading{Derived from}
26
27 wxObject
28
29 \wxheading{See also}
30
31 \helpref{Cells Overview}{cells},
32 \helpref{Tag Handlers Overview}{handlers},
33 \helpref{wxHtmlTag}{wxhtmltag}
34
35 \latexignore{\rtfignore{\wxheading{Members}}}
36
37 \membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser}
38
39 \func{}{wxHtmlParser}{\void}
40
41 Constructor.
42
43 \membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs}
44
45 \func{void}{SetFS}{\param{wxFileSystem }{*fs}}
46
47 Sets the virtual file system that will be used to request additional
48 files. (For example {\tt <IMG>} tag handler requests wxFSFile with the
49 image data.)
50
51 \membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs}
52
53 \constfunc{wxFileSystem*}{GetFS}{\void}
54
55 Returns pointer to the file system. Because each tag handler has
56 reference to it's parent parser it can easily request the file by
57 calling
58
59 \begin{verbatim}
60 wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg");
61 \end{verbatim}
62
63 \membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse}
64
65 \func{wxObject*}{Parse}{\param{const wxString\& }{source}}
66
67 Proceeds parsing of the document. This is end-user method. You can simply
68 call it when you need to obtain parsed output (which is parser-specific)
69
70 The method does these things:
71
72 \begin{enumerate}
73 \item calls \helpref{InitParser(source)}{wxhtmlparserinitparser}
74 \item calls \helpref{DoParsing}{wxhtmlparserdoparsing}
75 \item calls \helpref{GetProduct}{wxhtmlparsergetproduct}
76 \item calls \helpref{DoneParser}{wxhtmlparserdoneparser}
77 \item returns value returned by GetProduct
78 \end{enumerate}
79
80 You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly.
81
82 \membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser}
83
84 \func{virtual void}{InitParser}{\param{const wxString\& }{source}}
85
86 Setups the parser for parsing the {\it source} string. (Should be overriden
87 in derived class)
88
89 \membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser}
90
91 \func{virtual void}{DoneParser}{\void}
92
93 This must be called after DoParsing().
94
95 \membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing}
96
97 \func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}}
98
99 \func{void}{DoParsing}{\void}
100
101 Parses the m\_Source from begin\_pos to end\_pos-1.
102 (in noparams version it parses whole m\_Source)
103
104 \membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct}
105
106 \func{virtual wxObject*}{GetProduct}{\void}
107
108 Returns product of parsing. Returned value is result of parsing
109 of the document. The type of this result depends on internal
110 representation in derived parser (but it must be derived from wxObject!).
111
112 See wxHtmlWinParser for details.
113
114 \membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler}
115
116 \func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}}
117
118 Adds handler to the internal list (\& hash table) of handlers. This
119 method should not be called directly by user but rather by derived class'
120 constructor.
121
122 This adds the handler to this {\bf instance} of wxHtmlParser, not to
123 all objects of this class! (Static front-end to AddTagHandler is provided
124 by wxHtmlWinParser).
125
126 All handlers are deleted on object deletion.
127
128 \membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource}
129
130 \func{wxString*}{GetSource}{\void}
131
132 Returns pointer to the source being parsed.
133
134 \membersection{wxHtmlParser::GetTempData}\label{wxhtmlparsergettempdata}
135
136 \func{virtual wxList*}{GetTempData}{\void}
137
138 This method returns list of wxObjects that represents
139 all data allocated by the parser. These can't be freed
140 by the destructor because they must be valid as long as
141 GetProduct's return value is valid - the caller must
142 explicitly call
143
144 \begin{verbatim}
145 delete (MyParser -> GetTempData());
146 \end{verbatim}
147
148 to free the memory (this method always sets the list to delete its contents).
149
150 \wxheading{Example}
151
152 Why is this neccessary? Imagine wxHtmlWinParser: when handling
153 a FONT tag it creates some fonts. These fonts are then used by wxHtmlWindow
154 to display the text. But the wxHtmWinParser object is needed only when parsing
155 the document - it may be deleted then. But fonts CAN'T be deleted - they
156 must exist as long as the window is displaying text.
157
158 GetTempData() solves the problem.
159
160 \membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword}
161
162 \func{virtual void}{AddWord}{\param{const char* }{txt}}
163
164 Must be overwriten in derived class.
165
166 This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing}
167 each time a part of text is parsed. {\it txt} is NOT only one word, it is
168 substring of input. It is not formatted or preprocessed (so white spaces are
169 unmodified).
170
171 \membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag}
172
173 \func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}}
174
175 This may (and may not) be overwriten in derived class.
176
177 This method is called each time new tag is about to be added.
178 {\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag}
179 for details.)
180
181 Default (wxHtmlParser) behaviour is this:
182 First it finds a handler capable of handling this tag and then it calls
183 handler's HandleTag method.
184