| 1 | ///////////////////////////////////////////////////////////////////////////// |
| 2 | // Name: html/htmlpars.h |
| 3 | // Purpose: interface of wxHtmlTagHandler |
| 4 | // Author: wxWidgets team |
| 5 | // RCS-ID: $Id$ |
| 6 | // Licence: wxWindows license |
| 7 | ///////////////////////////////////////////////////////////////////////////// |
| 8 | |
| 9 | /** |
| 10 | @class wxHtmlTagHandler |
| 11 | @headerfile htmlpars.h wx/html/htmlpars.h |
| 12 | |
| 13 | |
| 14 | @library{wxhtml} |
| 15 | @category{html} |
| 16 | |
| 17 | @see Overview(), wxHtmlTag |
| 18 | */ |
| 19 | class wxHtmlTagHandler : public wxObject |
| 20 | { |
| 21 | public: |
| 22 | /** |
| 23 | Constructor. |
| 24 | */ |
| 25 | wxHtmlTagHandler(); |
| 26 | |
| 27 | /** |
| 28 | Returns list of supported tags. The list is in uppercase and tags |
| 29 | are delimited by ','. Example : @c "I,B,FONT,P" |
| 30 | */ |
| 31 | virtual wxString GetSupportedTags(); |
| 32 | |
| 33 | /** |
| 34 | This is the core method of each handler. It is called each time |
| 35 | one of supported tags is detected. @a tag contains all necessary |
| 36 | info (see wxHtmlTag for details). |
| 37 | |
| 38 | @return @true if ParseInner was called, @false otherwise. |
| 39 | */ |
| 40 | virtual bool HandleTag(const wxHtmlTag& tag); |
| 41 | |
| 42 | /** |
| 43 | This method calls parser's wxHtmlParser::DoParsing method |
| 44 | for the string between this tag and the paired ending tag: |
| 45 | |
| 46 | In this example, a call to ParseInner (with @a tag pointing to A tag) |
| 47 | will parse 'Hello, world!'. |
| 48 | */ |
| 49 | void ParseInner(const wxHtmlTag& tag); |
| 50 | |
| 51 | /** |
| 52 | Assigns @a parser to this handler. Each @b instance of handler |
| 53 | is guaranteed to be called only from the parser. |
| 54 | */ |
| 55 | virtual void SetParser(wxHtmlParser parser); |
| 56 | |
| 57 | /** |
| 58 | @b wxHtmlParser* m_Parser |
| 59 | This attribute is used to access parent parser. It is protected so that |
| 60 | it can't be accessed by user but can be accessed from derived classes. |
| 61 | */ |
| 62 | }; |
| 63 | |
| 64 | |
| 65 | |
| 66 | /** |
| 67 | @class wxHtmlParser |
| 68 | @headerfile htmlpars.h wx/html/htmlpars.h |
| 69 | |
| 70 | Classes derived from this handle the @b generic parsing of HTML documents: it |
| 71 | scans |
| 72 | the document and divide it into blocks of tags (where one block |
| 73 | consists of beginning and ending tag and of text between these |
| 74 | two tags). |
| 75 | |
| 76 | It is independent from wxHtmlWindow and can be used as stand-alone parser |
| 77 | (Julian Smart's idea of speech-only HTML viewer or wget-like utility - |
| 78 | see InetGet sample for example). |
| 79 | |
| 80 | It uses system of tag handlers to parse the HTML document. Tag handlers |
| 81 | are not statically shared by all instances but are created for each |
| 82 | wxHtmlParser instance. The reason is that the handler may contain |
| 83 | document-specific temporary data used during parsing (e.g. complicated |
| 84 | structures like tables). |
| 85 | |
| 86 | Typically the user calls only the wxHtmlParser::Parse method. |
| 87 | |
| 88 | @library{wxhtml} |
| 89 | @category{html} |
| 90 | |
| 91 | @see @ref overview_cells "Cells Overview", @ref overview_handlers "Tag Handlers |
| 92 | Overview", wxHtmlTag |
| 93 | */ |
| 94 | class wxHtmlParser |
| 95 | { |
| 96 | public: |
| 97 | /** |
| 98 | Constructor. |
| 99 | */ |
| 100 | wxHtmlParser(); |
| 101 | |
| 102 | /** |
| 103 | This may (and may not) be overwritten in derived class. |
| 104 | This method is called each time new tag is about to be added. |
| 105 | @a tag contains information about the tag. (See wxHtmlTag |
| 106 | for details.) |
| 107 | Default (wxHtmlParser) behaviour is this: |
| 108 | First it finds a handler capable of handling this tag and then it calls |
| 109 | handler's HandleTag method. |
| 110 | */ |
| 111 | void AddTag(const wxHtmlTag& tag); |
| 112 | |
| 113 | /** |
| 114 | Adds handler to the internal list ( hash table) of handlers. This |
| 115 | method should not be called directly by user but rather by derived class' |
| 116 | constructor. |
| 117 | This adds the handler to this @b instance of wxHtmlParser, not to |
| 118 | all objects of this class! (Static front-end to AddTagHandler is provided |
| 119 | by wxHtmlWinParser). |
| 120 | All handlers are deleted on object deletion. |
| 121 | */ |
| 122 | virtual void AddTagHandler(wxHtmlTagHandler handler); |
| 123 | |
| 124 | /** |
| 125 | Must be overwritten in derived class. |
| 126 | This method is called by DoParsing() |
| 127 | each time a part of text is parsed. @a txt is NOT only one word, it is |
| 128 | substring of input. It is not formatted or preprocessed (so white spaces are |
| 129 | unmodified). |
| 130 | */ |
| 131 | virtual void AddWord(const wxString& txt); |
| 132 | |
| 133 | //@{ |
| 134 | /** |
| 135 | Parses the m_Source from begin_pos to end_pos-1. |
| 136 | (in noparams version it parses whole m_Source) |
| 137 | */ |
| 138 | void DoParsing(int begin_pos, int end_pos); |
| 139 | void DoParsing(); |
| 140 | //@} |
| 141 | |
| 142 | /** |
| 143 | This must be called after DoParsing(). |
| 144 | */ |
| 145 | virtual void DoneParser(); |
| 146 | |
| 147 | /** |
| 148 | Returns pointer to the file system. Because each tag handler has |
| 149 | reference to it is parent parser it can easily request the file by |
| 150 | calling |
| 151 | */ |
| 152 | wxFileSystem* GetFS() const; |
| 153 | |
| 154 | /** |
| 155 | Returns product of parsing. Returned value is result of parsing |
| 156 | of the document. The type of this result depends on internal |
| 157 | representation in derived parser (but it must be derived from wxObject!). |
| 158 | See wxHtmlWinParser for details. |
| 159 | */ |
| 160 | virtual wxObject* GetProduct(); |
| 161 | |
| 162 | /** |
| 163 | Returns pointer to the source being parsed. |
| 164 | */ |
| 165 | wxString* GetSource(); |
| 166 | |
| 167 | /** |
| 168 | Setups the parser for parsing the @a source string. (Should be overridden |
| 169 | in derived class) |
| 170 | */ |
| 171 | virtual void InitParser(const wxString& source); |
| 172 | |
| 173 | /** |
| 174 | Opens given URL and returns @c wxFSFile object that can be used to read data |
| 175 | from it. This method may return @NULL in one of two cases: either the URL doesn't |
| 176 | point to any valid resource or the URL is blocked by overridden implementation |
| 177 | of @e OpenURL in derived class. |
| 178 | |
| 179 | @param type |
| 180 | Indicates type of the resource. Is one of: |
| 181 | |
| 182 | |
| 183 | |
| 184 | |
| 185 | |
| 186 | |
| 187 | wxHTML_URL_PAGE |
| 188 | |
| 189 | |
| 190 | |
| 191 | |
| 192 | Opening a HTML page. |
| 193 | |
| 194 | |
| 195 | |
| 196 | |
| 197 | |
| 198 | wxHTML_URL_IMAGE |
| 199 | |
| 200 | |
| 201 | |
| 202 | |
| 203 | Opening an image. |
| 204 | |
| 205 | |
| 206 | |
| 207 | |
| 208 | |
| 209 | wxHTML_URL_OTHER |
| 210 | |
| 211 | |
| 212 | |
| 213 | |
| 214 | Opening a resource that doesn't fall into |
| 215 | any other category. |
| 216 | @param url |
| 217 | URL being opened. |
| 218 | */ |
| 219 | virtual wxFSFile* OpenURL(wxHtmlURLType type, |
| 220 | const wxString& url); |
| 221 | |
| 222 | /** |
| 223 | Proceeds parsing of the document. This is end-user method. You can simply |
| 224 | call it when you need to obtain parsed output (which is parser-specific) |
| 225 | The method does these things: |
| 226 | calls @ref initparser() InitParser(source) |
| 227 | calls DoParsing() |
| 228 | calls GetProduct() |
| 229 | calls DoneParser() |
| 230 | returns value returned by GetProduct |
| 231 | You shouldn't use InitParser, DoParsing, GetProduct or DoneParser directly. |
| 232 | */ |
| 233 | wxObject* Parse(const wxString& source); |
| 234 | |
| 235 | /** |
| 236 | Restores parser's state before last call to |
| 237 | PushTagHandler(). |
| 238 | */ |
| 239 | void PopTagHandler(); |
| 240 | |
| 241 | /** |
| 242 | Forces the handler to handle additional tags |
| 243 | (not returned by wxHtmlTagHandler::GetSupportedTags). |
| 244 | The handler should already be added to this parser. |
| 245 | |
| 246 | @param handler |
| 247 | the handler |
| 248 | @param tags |
| 249 | List of tags (in same format as GetSupportedTags's return value). The parser |
| 250 | will redirect these tags to handler (until call to PopTagHandler). |
| 251 | */ |
| 252 | void PushTagHandler(wxHtmlTagHandler* handler, |
| 253 | const wxString& tags); |
| 254 | |
| 255 | /** |
| 256 | Sets the virtual file system that will be used to request additional |
| 257 | files. (For example @c IMG tag handler requests wxFSFile with the |
| 258 | image data.) |
| 259 | */ |
| 260 | void SetFS(wxFileSystem fs); |
| 261 | |
| 262 | /** |
| 263 | Call this function to interrupt parsing from a tag handler. No more tags |
| 264 | will be parsed afterward. This function may only be called from |
| 265 | Parse() or any function called |
| 266 | by it (i.e. from tag handlers). |
| 267 | */ |
| 268 | void StopParsing(); |
| 269 | }; |
| 270 | |