]> git.saurik.com Git - wxWidgets.git/blob - docs/latex/wx/htparser.tex
More asserts and stuff
[wxWidgets.git] / docs / latex / wx / htparser.tex
1 %
2 % automatically generated by HelpGen from
3 % htmlparser.tex at 14/Mar/99 20:13:37
4 %
5
6
7 \section{\class{wxHtmlParser}}\label{wxhtmlparser}
8
9 This class handles {\bf generic} parsing of HTML document : it scans
10 the document and divide it into blocks of tags (where one block
11 consists of begining and ending tag and of text between these
12 2 tags).
13
14 It is independent from wxHtmlWindow and can be used as stand-alone parser
15 (Julian Smart's idea of speech-only HTML viewer or wget-like utility -
16 see InetGet sample for example).
17
18 It uses system of tag handlers to parse the HTML document. Tag handlers
19 are not staticaly shared by all instances but are created for each
20 wxHtmlParser instance. The reason is that the handler may contain
21 document-specific temporary data used during parsing (e.g. complicated
22 structures like tables)
23
24 Typically the user calls only \helpref{Parse}{wxhtmlparserparse} method.
25
26 \wxheading{Derived from}
27
28 wxObject
29
30 \wxheading{See also}
31
32 \helpref{Cells Overview}{cells},
33 \helpref{Tag Handlers Overview}{handlers},
34 \helpref{wxHtmlTag}{wxhtmltag}
35
36 \latexignore{\rtfignore{\wxheading{Members}}}
37
38
39 \membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser}
40
41 \func{}{wxHtmlParser}{\void}
42
43 Constructor.
44
45
46 \membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs}
47
48 \func{void}{SetFS}{\param{wxFileSystem }{*fs}}
49
50 Sets the virtual file system that will be used to request additional
51 files. (For example {\tt <IMG>} tag handler requests wxFSFile with the
52 image data.)
53
54 \membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs}
55
56 \constfunc{wxFileSystem*}{GetFS}{\void}
57
58 Returns pointer to the file system. Because each tag handler has
59 reference to it's parent parser it can easily request the file by
60 calling
61
62 \begin{verbatim}
63 wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg");
64 \end{verbatim}
65
66
67 \membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse}
68
69 \func{wxObject*}{Parse}{\param{const wxString\& }{source}}
70
71 Proceeds parsing of the document. This is end-user method. You can simply
72 call it when you need to obtain parsed output (which is parser-specific)
73
74 The method does these things:
75
76 \begin{enumerate}
77 \item calls \helpref{InitParser(source)}{wxhtmlparserinitparser}
78 \item calls \helpref{DoParsing}{wxhtmlparserdoparsing}
79 \item calls \helpref{GetProduct}{wxhtmlparsergetproduct}
80 \item calls \helpref{DoneParser}{wxhtmlparserdoneparser}
81 \item returns value returned by GetProduct
82 \end{enumerate}
83
84 You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly.
85
86 \membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser}
87
88 \func{virtual void}{InitParser}{\param{const wxString\& }{source}}
89
90 Setups the parser for parsing the {\it source} string. (Should be overriden
91 in derived class)
92
93
94 \membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser}
95
96 \func{virtual void}{DoneParser}{\void}
97
98 This must be called after DoParsing().
99
100
101 \membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing}
102
103 \func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}}
104
105 \func{void}{DoParsing}{\void}
106
107 Parses the m\_Source from begin\_pos to end\_pos-1.
108 (in noparams version it parses whole m\_Source)
109
110 \membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct}
111
112 \func{virtual wxObject*}{GetProduct}{\void}
113
114 Returns product of parsing. Returned value is result of parsing
115 of the document. The type of this result depends on internal
116 representation in derived parser (but it must be derived from wxObject!).
117
118 See wxHtmlWinParser for details.
119
120
121 \membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler}
122
123 \func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}}
124
125 Adds handler to the internal list (\& hash table) of handlers. This
126 method should not be called directly by user but rather by derived class'
127 constructor.
128
129 This adds the handler to this {\bf instance} of wxHtmlParser not to
130 all objects of this class!!! (Static front-end to AddTagHandler is provided
131 by wxHtmlWinParser)
132
133 All handlers are deleted on object deletion.
134
135 \membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource}
136
137 \func{wxString*}{GetSource}{\void}
138
139 Returns pointer to the source being parsed.
140
141
142 \membersection{wxHtmlParser::GetTempData}\label{wxhtmlparsergettempdata}
143
144 \func{virtual wxList*}{GetTempData}{\void}
145
146 This method returns list of wxObjects that represents
147 all data allocated by the parser. These can't be freeded
148 by destructor because they must be valid as long as
149 GetProduct's return value is valid - the caller must
150 explicitly call
151
152 \begin{verbatim}
153 delete (MyParser -> GetTempData());
154 \end{verbatim}
155
156 to free the memory (this method always sets the list to delete its contents)
157
158 \wxheading{Example}
159
160 Why is this neccessary? Imagine wxHtmlWinParser : when handling
161 FONT tag it creates some fonts. These fonts are then used by wxHtmlWindow
162 to display the text. But wxHtmWinParser object is needed only when parsing
163 the document - it may be deleted then. But fonts CAN'T be deleted - they
164 must exist as long as the window is displaying text.
165
166 GetTempData() solves the problem.
167
168 \membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword}
169
170 \func{virtual void}{AddWord}{\param{const char* }{txt}}
171
172 Must be overwriten in derived class.
173
174 This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing}
175 each time a part of text is parsed. {\it txt} is NOT only one word, it is
176 substring of input. It is not formatted or preprocessed (so white spaces are
177 unmodified)
178
179 \membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag}
180
181 \func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}
182
183 This may (and may not) be overwriten in derived class.
184
185 This method is called each time new tag is about to be added.
186 {\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag}
187 for details.)
188
189 Default (wxHtmlParser) behaviour is this :
190 First it finds a handler capable of handling this tag and then it calls
191 handler's HandleTag method.