include/wx/html/htmlpars.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        htmlpars.h
   3 // Purpose:     wxHtmlParser class (generic parser)
   4 // Author:      Vaclav Slavik
   5 // RCS-ID:      $Id$
   6 // Copyright:   (c) 1999 Vaclav Slavik
   7 // Licence:     wxWindows Licence
   8 /////////////////////////////////////////////////////////////////////////////
   9
  10
  11 #ifndef _WX_HTMLPARS_H_
  12 #define _WX_HTMLPARS_H_
  13
  14 #ifdef __GNUG__
  15 #pragma interface
  16 #endif
  17
  18 #include "wx/defs.h"
  19 #if wxUSE_HTML
  20
  21 #include "wx/html/htmltag.h"
  22 #include "wx/filesys.h"
  23
  24 class wxHtmlParser;
  25 class wxHtmlTagHandler;
  26
  27 //--------------------------------------------------------------------------------
  28 // wxHtmlParser
  29 //                  This class handles generic parsing of HTML document : it scans
  30 //                  the document and divide it into blocks of tags (where one block
  31 //                  consists of starting and ending tag and of text between these
  32 //                  2 tags.
  33 //--------------------------------------------------------------------------------
  34
  35 class WXDLLEXPORT wxHtmlParser : public wxObject
  36 {
  37     DECLARE_ABSTRACT_CLASS(wxHtmlParser)
  38
  39     public:
  40         wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING) {m_FS = NULL; m_Cache = NULL;}
  41         virtual ~wxHtmlParser();
  42
  43         void SetFS(wxFileSystem *fs) {m_FS = fs;}
  44                 // Sets the class which will be used for opening files
  45         wxFileSystem* GetFS() const {return m_FS;}
  46
  47         wxObject* Parse(const wxString& source);
  48                 // You can simply call this method when you need parsed output.
  49                 // This method does these things:
  50                 // 1. call InitParser(source);
  51                 // 2. call DoParsing();
  52                 // 3. call GetProduct(); (it's return value is then returned)
  53                 // 4. call DoneParser();
  54
  55         virtual void InitParser(const wxString& source);
  56                 // Sets the source. This must be called before running Parse() method.
  57         virtual void DoneParser();
  58                 // This must be called after Parse().
  59
  60         void DoParsing(int begin_pos, int end_pos);
  61         inline void DoParsing() {DoParsing(0, m_Source.Length());};
  62                 // Parses the m_Source from begin_pos to end_pos-1.
  63                 // (in noparams version it parses whole m_Source)
  64
  65         virtual wxObject* GetProduct() = 0;
  66                 // Returns product of parsing
  67                 // Returned value is result of parsing of the part. The type of this result
  68                 // depends on internal representation in derived parser
  69                 // (see wxHtmlWinParser for details).
  70
  71         virtual void AddTagHandler(wxHtmlTagHandler *handler);
  72                 // adds handler to the list & hash table of handlers.
  73
  74         wxString* GetSource() {return &m_Source;}
  75
  76         virtual wxList* GetTempData() {return NULL;}
  77                 // this method returns list of wxObjects that represents
  78                 // all data allocated by the parser. These can't be freeded
  79                 // by destructor because they must be valid as long as
  80                 // GetProduct's return value is valid - the caller must
  81                 // explicitly call delete MyParser -> GetTempData() to free
  82                 // the memory
  83                 // (this method always sets the list to delete its contents)
  84
  85     protected:
  86
  87         virtual void AddText(const char* txt) = 0;
  88                 // Adds text to the output.
  89                 // This is called from Parse() and must be overriden in derived classes.
  90                 // txt is not guaranteed to be only one word. It is largest continuous part of text
  91                 // (= not broken by tags)
  92                 // NOTE : using char* because of speed improvements
  93
  94         virtual void AddTag(const wxHtmlTag& tag);
  95                 // Adds tag and proceeds it. Parse() may (and usually is) called from this method.
  96                 // This is called from Parse() and may be overriden.
  97                 // Default behavior is that it looks for proper handler in m_Handlers. The tag is
  98                 // ignored if no hander is found.
  99                 // Derived class is *responsible* for filling in m_Handlers table.
 100
 101
 102     protected:
 103         wxString m_Source;
 104                 // source being parsed
 105         wxHtmlTagsCache *m_Cache;
 106                 // tags cache, used during parsing.
 107         wxHashTable m_HandlersHash;
 108         wxList m_HandlersList;
 109                 // handlers that handle particular tags. The table is accessed by
 110                 // key = tag's name.
 111                 // This attribute MUST be filled by derived class otherwise it would
 112                 // be empty and no tags would be recognized
 113                 // (see wxHtmlWinParser for details about filling it)
 114                 // m_HandlersHash is for random access based on knowledge of tag name (BR, P, etc.)
 115                 //      it may (and often does) contain more references to one object
 116                 // m_HandlersList is list of all handlers and it is guaranteed to contain
 117                 //      only one reference to each handler instance.
 118         wxFileSystem *m_FS;
 119                 // class for opening files (file system)
 120
 121 };
 122
 123
 124
 125
 126
 127
 128 //--------------------------------------------------------------------------------
 129 // wxHtmlTagHandler
 130 //                  This class (and derived classes) cooperates with wxHtmlParser.
 131 //                  Each recognized tag is passed to handler which is capable
 132 //                  of handling it. Each tag is handled in 3 steps:
 133 //                  1. Handler will modifies state of parser
 134 //                    (using it's public methods)
 135 //                  2. Parser parses source between starting and ending tag
 136 //                  3. Handler restores original state of the parser
 137 //--------------------------------------------------------------------------------
 138
 139 class WXDLLEXPORT wxHtmlTagHandler : public wxObject
 140 {
 141     DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler)
 142
 143     protected:
 144         wxHtmlParser *m_Parser;
 145
 146     public:
 147         wxHtmlTagHandler() : wxObject () {m_Parser = NULL;};
 148
 149         virtual void SetParser(wxHtmlParser *parser) {m_Parser = parser;}
 150                 // Sets the parser.
 151                 // NOTE : each _instance_ of handler is guaranteed to be called
 152                 // only by one parser. This means you don't have to care about
 153                 // reentrancy.
 154
 155         virtual wxString GetSupportedTags() = 0;
 156                 // Returns list of supported tags. The list is in uppercase and
 157                 // tags are delimited by ','.
 158                 // Example : "I,B,FONT,P"
 159                 //   is capable of handling italic, bold, font and paragraph tags
 160
 161         virtual bool HandleTag(const wxHtmlTag& tag) = 0;
 162                 // This is hadling core method. It does all the Steps 1-3.
 163                 // To process step 2, you can call ParseInner()
 164                 // returned value : TRUE if it called ParseInner(),
 165                 //                  FALSE etherwise
 166
 167     protected:
 168         void ParseInner(const wxHtmlTag& tag) {m_Parser -> DoParsing(tag.GetBeginPos(), tag.GetEndPos1());}
 169                 // parses input between beginning and ending tag.
 170                 // m_Parser must be set.
 171 };
 172
 173
 174
 175
 176 #endif
 177
 178 #endif // _WX_HTMLPARS_H_