1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     wxHtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows Licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  11 #ifndef _WX_HTMLPARS_H_ 
  12 #define _WX_HTMLPARS_H_ 
  21 #include "wx/html/htmltag.h" 
  22 #include "wx/filesys.h" 
  25 class wxHtmlTagHandler
; 
  27 //-------------------------------------------------------------------------------- 
  29 //                  This class handles generic parsing of HTML document : it scans 
  30 //                  the document and divide it into blocks of tags (where one block 
  31 //                  consists of starting and ending tag and of text between these 
  33 //-------------------------------------------------------------------------------- 
  35 class WXDLLEXPORT wxHtmlParser 
: public wxObject
 
  37     DECLARE_ABSTRACT_CLASS(wxHtmlParser
) 
  40         wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING
) {m_FS 
= NULL
; m_Cache 
= NULL
; m_HandlersStack 
= NULL
;} 
  41         virtual ~wxHtmlParser(); 
  43         void SetFS(wxFileSystem 
*fs
) {m_FS 
= fs
;} 
  44                 // Sets the class which will be used for opening files 
  45         wxFileSystem
* GetFS() const {return m_FS
;} 
  47         wxObject
* Parse(const wxString
& source
); 
  48                 // You can simply call this method when you need parsed output. 
  49                 // This method does these things: 
  50                 // 1. call InitParser(source); 
  51                 // 2. call DoParsing(); 
  52                 // 3. call GetProduct(); (it's return value is then returned) 
  53                 // 4. call DoneParser(); 
  55         virtual void InitParser(const wxString
& source
); 
  56                 // Sets the source. This must be called before running Parse() method. 
  57         virtual void DoneParser(); 
  58                 // This must be called after Parse(). 
  60         void DoParsing(int begin_pos
, int end_pos
); 
  61         inline void DoParsing() {DoParsing(0, m_Source
.Length());}; 
  62                 // Parses the m_Source from begin_pos to end_pos-1. 
  63                 // (in noparams version it parses whole m_Source) 
  65         virtual wxObject
* GetProduct() = 0; 
  66                 // Returns product of parsing 
  67                 // Returned value is result of parsing of the part. The type of this result 
  68                 // depends on internal representation in derived parser 
  69                 // (see wxHtmlWinParser for details). 
  71         virtual void AddTagHandler(wxHtmlTagHandler 
*handler
); 
  72                 // adds handler to the list & hash table of handlers. 
  74         void PushTagHandler(wxHtmlTagHandler 
*handler
, wxString tags
); 
  75                 // Forces the handler to handle additional tags (not returned by GetSupportedTags).  
  76                 // The handler should already be in use by this parser. 
  77                 // Example: you want to parse following pseudo-html structure: 
  79                 //     <it name="one" value="1"> 
  80                 //     <it name="two" value="2"> 
  82                 //   <it> This last it has different meaning, we don't want it to be parsed by myitems handler! 
  83                 // handler can handle only 'myitems' (e.g. it's GetSupportedTags returns "MYITEMS") 
  84                 // you can call PushTagHandler(handler, "IT") when you find <myitems> 
  85                 // and call PopTagHandler() when you find </myitems> 
  88                 // Restores state before last call to PushTagHandler 
  90         wxString
* GetSource() {return &m_Source
;} 
  94         virtual void AddText(const char* txt
) = 0; 
  95                 // Adds text to the output. 
  96                 // This is called from Parse() and must be overriden in derived classes. 
  97                 // txt is not guaranteed to be only one word. It is largest continuous part of text 
  98                 // (= not broken by tags) 
  99                 // NOTE : using char* because of speed improvements 
 101         virtual void AddTag(const wxHtmlTag
& tag
); 
 102                 // Adds tag and proceeds it. Parse() may (and usually is) called from this method. 
 103                 // This is called from Parse() and may be overriden. 
 104                 // Default behavior is that it looks for proper handler in m_Handlers. The tag is 
 105                 // ignored if no hander is found. 
 106                 // Derived class is *responsible* for filling in m_Handlers table. 
 111                 // source being parsed 
 112         wxHtmlTagsCache 
*m_Cache
; 
 113                 // tags cache, used during parsing. 
 114         wxHashTable m_HandlersHash
; 
 115         wxList m_HandlersList
; 
 116                 // handlers that handle particular tags. The table is accessed by 
 118                 // This attribute MUST be filled by derived class otherwise it would 
 119                 // be empty and no tags would be recognized 
 120                 // (see wxHtmlWinParser for details about filling it) 
 121                 // m_HandlersHash is for random access based on knowledge of tag name (BR, P, etc.) 
 122                 //      it may (and often does) contain more references to one object 
 123                 // m_HandlersList is list of all handlers and it is guaranteed to contain 
 124                 //      only one reference to each handler instance. 
 126                 // class for opening files (file system) 
 127         wxList 
*m_HandlersStack
; 
 128                 // handlers stack used by PushTagHandler and PopTagHandler 
 137 //-------------------------------------------------------------------------------- 
 139 //                  This class (and derived classes) cooperates with wxHtmlParser. 
 140 //                  Each recognized tag is passed to handler which is capable 
 141 //                  of handling it. Each tag is handled in 3 steps: 
 142 //                  1. Handler will modifies state of parser 
 143 //                    (using it's public methods) 
 144 //                  2. Parser parses source between starting and ending tag 
 145 //                  3. Handler restores original state of the parser 
 146 //-------------------------------------------------------------------------------- 
 148 class WXDLLEXPORT wxHtmlTagHandler 
: public wxObject
 
 150     DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler
) 
 153         wxHtmlParser 
*m_Parser
; 
 156         wxHtmlTagHandler() : wxObject () {m_Parser 
= NULL
;}; 
 158         virtual void SetParser(wxHtmlParser 
*parser
) {m_Parser 
= parser
;} 
 160                 // NOTE : each _instance_ of handler is guaranteed to be called 
 161                 // only by one parser. This means you don't have to care about 
 164         virtual wxString 
GetSupportedTags() = 0; 
 165                 // Returns list of supported tags. The list is in uppercase and 
 166                 // tags are delimited by ','. 
 167                 // Example : "I,B,FONT,P" 
 168                 //   is capable of handling italic, bold, font and paragraph tags 
 170         virtual bool HandleTag(const wxHtmlTag
& tag
) = 0; 
 171                 // This is hadling core method. It does all the Steps 1-3. 
 172                 // To process step 2, you can call ParseInner() 
 173                 // returned value : TRUE if it called ParseInner(), 
 177         void ParseInner(const wxHtmlTag
& tag
) {m_Parser 
-> DoParsing(tag
.GetBeginPos(), tag
.GetEndPos1());} 
 178                 // parses input between beginning and ending tag. 
 179                 // m_Parser must be set. 
 187 #endif // _WX_HTMLPARS_H_