| 1 | % |
| 2 | % automatically generated by HelpGen from |
| 3 | % htmlparser.tex at 14/Mar/99 20:13:37 |
| 4 | % |
| 5 | |
| 6 | \section{\class{wxHtmlParser}}\label{wxhtmlparser} |
| 7 | |
| 8 | Classes derived from this handle the {\bf generic} parsing of HTML documents: it scans |
| 9 | the document and divide it into blocks of tags (where one block |
| 10 | consists of beginning and ending tag and of text between these |
| 11 | two tags). |
| 12 | |
| 13 | It is independent from wxHtmlWindow and can be used as stand-alone parser |
| 14 | (Julian Smart's idea of speech-only HTML viewer or wget-like utility - |
| 15 | see InetGet sample for example). |
| 16 | |
| 17 | It uses system of tag handlers to parse the HTML document. Tag handlers |
| 18 | are not statically shared by all instances but are created for each |
| 19 | wxHtmlParser instance. The reason is that the handler may contain |
| 20 | document-specific temporary data used during parsing (e.g. complicated |
| 21 | structures like tables). |
| 22 | |
| 23 | Typically the user calls only the \helpref{Parse}{wxhtmlparserparse} method. |
| 24 | |
| 25 | \wxheading{Derived from} |
| 26 | |
| 27 | wxObject |
| 28 | |
| 29 | \wxheading{Include files} |
| 30 | |
| 31 | <wx/html/htmlpars.h> |
| 32 | |
| 33 | \wxheading{See also} |
| 34 | |
| 35 | \helpref{Cells Overview}{cells}, |
| 36 | \helpref{Tag Handlers Overview}{handlers}, |
| 37 | \helpref{wxHtmlTag}{wxhtmltag} |
| 38 | |
| 39 | \latexignore{\rtfignore{\wxheading{Members}}} |
| 40 | |
| 41 | \membersection{wxHtmlParser::wxHtmlParser}\label{wxhtmlparserwxhtmlparser} |
| 42 | |
| 43 | \func{}{wxHtmlParser}{\void} |
| 44 | |
| 45 | Constructor. |
| 46 | |
| 47 | \membersection{wxHtmlParser::AddTag}\label{wxhtmlparseraddtag} |
| 48 | |
| 49 | \func{void}{AddTag}{\param{const wxHtmlTag\& }{tag}} |
| 50 | |
| 51 | This may (and may not) be overwritten in derived class. |
| 52 | |
| 53 | This method is called each time new tag is about to be added. |
| 54 | {\it tag} contains information about the tag. (See \helpref{wxHtmlTag}{wxhtmltag} |
| 55 | for details.) |
| 56 | |
| 57 | Default (wxHtmlParser) behaviour is this: |
| 58 | First it finds a handler capable of handling this tag and then it calls |
| 59 | handler's HandleTag method. |
| 60 | |
| 61 | \membersection{wxHtmlParser::AddTagHandler}\label{wxhtmlparseraddtaghandler} |
| 62 | |
| 63 | \func{virtual void}{AddTagHandler}{\param{wxHtmlTagHandler }{*handler}} |
| 64 | |
| 65 | Adds handler to the internal list (\& hash table) of handlers. This |
| 66 | method should not be called directly by user but rather by derived class' |
| 67 | constructor. |
| 68 | |
| 69 | This adds the handler to this {\bf instance} of wxHtmlParser, not to |
| 70 | all objects of this class! (Static front-end to AddTagHandler is provided |
| 71 | by wxHtmlWinParser). |
| 72 | |
| 73 | All handlers are deleted on object deletion. |
| 74 | |
| 75 | \membersection{wxHtmlParser::AddText}\label{wxhtmlparseraddword} |
| 76 | |
| 77 | \func{virtual void}{AddWord}{\param{const char* }{txt}} |
| 78 | |
| 79 | Must be overwritten in derived class. |
| 80 | |
| 81 | This method is called by \helpref{DoParsing}{wxhtmlparserdoparsing} |
| 82 | each time a part of text is parsed. {\it txt} is NOT only one word, it is |
| 83 | substring of input. It is not formatted or preprocessed (so white spaces are |
| 84 | unmodified). |
| 85 | |
| 86 | \membersection{wxHtmlParser::DoParsing}\label{wxhtmlparserdoparsing} |
| 87 | |
| 88 | \func{void}{DoParsing}{\param{int }{begin\_pos}, \param{int }{end\_pos}} |
| 89 | |
| 90 | \func{void}{DoParsing}{\void} |
| 91 | |
| 92 | Parses the m\_Source from begin\_pos to end\_pos-1. |
| 93 | (in noparams version it parses whole m\_Source) |
| 94 | |
| 95 | \membersection{wxHtmlParser::DoneParser}\label{wxhtmlparserdoneparser} |
| 96 | |
| 97 | \func{virtual void}{DoneParser}{\void} |
| 98 | |
| 99 | This must be called after DoParsing(). |
| 100 | |
| 101 | \membersection{wxHtmlParser::GetFS}\label{wxhtmlparsergetfs} |
| 102 | |
| 103 | \constfunc{wxFileSystem*}{GetFS}{\void} |
| 104 | |
| 105 | Returns pointer to the file system. Because each tag handler has |
| 106 | reference to it is parent parser it can easily request the file by |
| 107 | calling |
| 108 | |
| 109 | \begin{verbatim} |
| 110 | wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg"); |
| 111 | \end{verbatim} |
| 112 | |
| 113 | \membersection{wxHtmlParser::GetProduct}\label{wxhtmlparsergetproduct} |
| 114 | |
| 115 | \func{virtual wxObject*}{GetProduct}{\void} |
| 116 | |
| 117 | Returns product of parsing. Returned value is result of parsing |
| 118 | of the document. The type of this result depends on internal |
| 119 | representation in derived parser (but it must be derived from wxObject!). |
| 120 | |
| 121 | See wxHtmlWinParser for details. |
| 122 | |
| 123 | \membersection{wxHtmlParser::GetSource}\label{wxhtmlparsergetsource} |
| 124 | |
| 125 | \func{wxString*}{GetSource}{\void} |
| 126 | |
| 127 | Returns pointer to the source being parsed. |
| 128 | |
| 129 | |
| 130 | \membersection{wxHtmlParser::InitParser}\label{wxhtmlparserinitparser} |
| 131 | |
| 132 | \func{virtual void}{InitParser}{\param{const wxString\& }{source}} |
| 133 | |
| 134 | Setups the parser for parsing the {\it source} string. (Should be overridden |
| 135 | in derived class) |
| 136 | |
| 137 | \membersection{wxHtmlParser::OpenURL}\label{wxhtmlparseropenurl} |
| 138 | |
| 139 | \func{virtual wxFSFile*}{OpenURL}{\param{wxHtmlURLType }{type}, \param{const wxString\& }{url}} |
| 140 | |
| 141 | Opens given URL and returns {\tt wxFSFile} object that can be used to read data |
| 142 | from it. This method may return NULL in one of two cases: either the URL doesn't |
| 143 | point to any valid resource or the URL is blocked by overridden implementation |
| 144 | of {\it OpenURL} in derived class. |
| 145 | |
| 146 | \wxheading{Parameters} |
| 147 | |
| 148 | \docparam{type}{Indicates type of the resource. Is one of: |
| 149 | |
| 150 | \begin{twocollist}\itemsep=0pt |
| 151 | \twocolitem{{\bf wxHTML\_URL\_PAGE}}{Opening a HTML page.} |
| 152 | \twocolitem{{\bf wxHTML\_URL\_IMAGE}}{Opening an image.} |
| 153 | \twocolitem{{\bf wxHTML\_URL\_OTHER}}{Opening a resource that doesn't fall into |
| 154 | any other category.} |
| 155 | \end{twocollist}} |
| 156 | |
| 157 | \docparam{url}{URL being opened.} |
| 158 | |
| 159 | \wxheading{Notes} |
| 160 | |
| 161 | Always use this method in tag handlers instead of {\tt GetFS()->OpenFile()} |
| 162 | because it can block the URL and is thus more secure. |
| 163 | |
| 164 | Default behaviour is to call \helpref{wxHtmlWindow::OnOpeningURL}{wxhtmlwindowonopeningurl} |
| 165 | of the associated wxHtmlWindow object (which may decide to block the URL or |
| 166 | redirect it to another one),if there's any, and always open the URL if the |
| 167 | parser is not used with wxHtmlWindow. |
| 168 | |
| 169 | Returned {\tt wxFSFile} object is not guaranteed to point to {\it url}, it might |
| 170 | have been redirected! |
| 171 | |
| 172 | \membersection{wxHtmlParser::Parse}\label{wxhtmlparserparse} |
| 173 | |
| 174 | \func{wxObject*}{Parse}{\param{const wxString\& }{source}} |
| 175 | |
| 176 | Proceeds parsing of the document. This is end-user method. You can simply |
| 177 | call it when you need to obtain parsed output (which is parser-specific) |
| 178 | |
| 179 | The method does these things: |
| 180 | |
| 181 | \begin{enumerate}\itemsep=0pt |
| 182 | \item calls \helpref{InitParser(source)}{wxhtmlparserinitparser} |
| 183 | \item calls \helpref{DoParsing}{wxhtmlparserdoparsing} |
| 184 | \item calls \helpref{GetProduct}{wxhtmlparsergetproduct} |
| 185 | \item calls \helpref{DoneParser}{wxhtmlparserdoneparser} |
| 186 | \item returns value returned by GetProduct |
| 187 | \end{enumerate} |
| 188 | |
| 189 | You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly. |
| 190 | |
| 191 | \membersection{wxHtmlParser::PushTagHandler}\label{wxhtmlparserpushtaghandler} |
| 192 | |
| 193 | \func{void}{PushTagHandler}{\param{wxHtmlTagHandler* }{handler}, \param{wxString }{tags}} |
| 194 | |
| 195 | Forces the handler to handle additional tags |
| 196 | (not returned by \helpref{GetSupportedTags}{wxhtmltaghandlergetsupportedtags}). |
| 197 | The handler should already be added to this parser. |
| 198 | |
| 199 | \wxheading{Parameters} |
| 200 | |
| 201 | \docparam{handler}{the handler} |
| 202 | \docparam{tags}{List of tags (in same format as GetSupportedTags's return value). The parser |
| 203 | will redirect these tags to {\it handler} (until call to \helpref{PopTagHandler}{wxhtmlparserpoptaghandler}). } |
| 204 | |
| 205 | \wxheading{Example} |
| 206 | |
| 207 | Imagine you want to parse following pseudo-html structure: |
| 208 | |
| 209 | \begin{verbatim} |
| 210 | <myitems> |
| 211 | <param name="one" value="1"> |
| 212 | <param name="two" value="2"> |
| 213 | </myitems> |
| 214 | |
| 215 | <execute> |
| 216 | <param program="text.exe"> |
| 217 | </execute> |
| 218 | \end{verbatim} |
| 219 | |
| 220 | It is obvious that you cannot use only one tag handler for <param> tag. |
| 221 | Instead you must use context-sensitive handlers for <param> inside <myitems> |
| 222 | and <param> inside <execute>. |
| 223 | |
| 224 | This is the preferred solution: |
| 225 | |
| 226 | \begin{verbatim} |
| 227 | TAG_HANDLER_BEGIN(MYITEM, "MYITEMS") |
| 228 | TAG_HANDLER_PROC(tag) |
| 229 | { |
| 230 | // ...something... |
| 231 | |
| 232 | m_Parser -> PushTagHandler(this, "PARAM"); |
| 233 | ParseInner(tag); |
| 234 | m_Parser -> PopTagHandler(); |
| 235 | |
| 236 | // ...something... |
| 237 | } |
| 238 | TAG_HANDLER_END(MYITEM) |
| 239 | \end{verbatim} |
| 240 | |
| 241 | |
| 242 | \membersection{wxHtmlParser::PopTagHandler}\label{wxhtmlparserpoptaghandler} |
| 243 | |
| 244 | \func{void}{PopTagHandler}{\void} |
| 245 | |
| 246 | Restores parser's state before last call to |
| 247 | \helpref{PushTagHandler}{wxhtmlparserpushtaghandler}. |
| 248 | |
| 249 | |
| 250 | \membersection{wxHtmlParser::SetFS}\label{wxhtmlparsersetfs} |
| 251 | |
| 252 | \func{void}{SetFS}{\param{wxFileSystem }{*fs}} |
| 253 | |
| 254 | Sets the virtual file system that will be used to request additional |
| 255 | files. (For example {\tt <IMG>} tag handler requests wxFSFile with the |
| 256 | image data.) |
| 257 | |
| 258 | \membersection{wxHtmlParser::StopParsing}\label{wxhtmlparserstopparsing} |
| 259 | |
| 260 | \func{void}{StopParsing}{\void} |
| 261 | |
| 262 | Call this function to interrupt parsing from a tag handler. No more tags |
| 263 | will be parsed afterward. This function may only be called from |
| 264 | \helpref{wxHtmlParser::Parse}{wxhtmlparserparse} or any function called |
| 265 | by it (i.e. from tag handlers). |
| 266 | |