]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: htmlparser.h | |
3 | // Purpose: wxHtmlParser class (generic parser) | |
4 | // Author: Vaclav Slavik | |
5 | // Copyright: (c) 1999 Vaclav Slavik | |
6 | // Licence: wxWindows Licence | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | ||
10 | #ifndef __HTMLPARSER_H__ | |
11 | #define __HTMLPARSER_H__ | |
12 | ||
13 | #ifdef __GNUG__ | |
14 | #pragma interface | |
15 | #endif | |
16 | ||
17 | #include "wx/defs.h" | |
18 | #if wxUSE_HTML | |
19 | ||
20 | #include <wx/html/htmltag.h> | |
21 | #include <wx/filesys.h> | |
22 | ||
23 | class wxHtmlParser; | |
24 | class wxHtmlTagHandler; | |
25 | ||
26 | //-------------------------------------------------------------------------------- | |
27 | // wxHtmlParser | |
28 | // This class handles generic parsing of HTML document : it scans | |
29 | // the document and divide it into blocks of tags (where one block | |
30 | // consists of starting and ending tag and of text between these | |
31 | // 2 tags. | |
32 | //-------------------------------------------------------------------------------- | |
33 | ||
34 | class WXDLLEXPORT wxHtmlParser : public wxObject | |
35 | { | |
36 | DECLARE_ABSTRACT_CLASS(wxHtmlParser) | |
37 | ||
38 | protected: | |
39 | wxString m_Source; | |
40 | // source being parsed | |
41 | wxHtmlTagsCache *m_Cache; | |
42 | // tags cache, used during parsing. | |
43 | wxHashTable m_HandlersHash; | |
44 | wxList m_HandlersList; | |
45 | // handlers that handle particular tags. The table is accessed by | |
46 | // key = tag's name. | |
47 | // This attribute MUST be filled by derived class otherwise it would | |
48 | // be empty and no tags would be recognized | |
49 | // (see wxHtmlWinParser for details about filling it) | |
50 | // m_HandlersHash is for random access based on knowledge of tag name (BR, P, etc.) | |
51 | // it may (and often does) contain more references to one object | |
52 | // m_HandlersList is list of all handlers and it is guaranteed to contain | |
53 | // only one reference to each handler instance. | |
54 | wxFileSystem *m_FS; | |
55 | // class for opening files (file system) | |
56 | ||
57 | public: | |
58 | wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING) {m_FS = NULL; m_Cache = NULL;} | |
59 | virtual ~wxHtmlParser(); | |
60 | ||
61 | void SetFS(wxFileSystem *fs) {m_FS = fs;} | |
62 | // Sets the class which will be used for opening files | |
63 | wxFileSystem* GetFS() const {return m_FS;} | |
64 | ||
65 | wxObject* Parse(const wxString& source); | |
66 | // You can simply call this method when you need parsed output. | |
67 | // This method does these things: | |
68 | // 1. call InitParser(source); | |
69 | // 2. call DoParsing(); | |
70 | // 3. call GetProduct(); (it's return value is then returned) | |
71 | // 4. call DoneParser(); | |
72 | ||
73 | virtual void InitParser(const wxString& source); | |
74 | // Sets the source. This must be called before running Parse() method. | |
75 | virtual void DoneParser(); | |
76 | // This must be called after Parse(). | |
77 | ||
78 | void DoParsing(int begin_pos, int end_pos); | |
79 | inline void DoParsing() {DoParsing(0, m_Source.Length());}; | |
80 | // Parses the m_Source from begin_pos to end_pos-1. | |
81 | // (in noparams version it parses whole m_Source) | |
82 | ||
83 | virtual wxObject* GetProduct() = 0; | |
84 | // Returns product of parsing | |
85 | // Returned value is result of parsing of the part. The type of this result | |
86 | // depends on internal representation in derived parser | |
87 | // (see wxHtmlWinParser for details). | |
88 | ||
89 | virtual void AddTagHandler(wxHtmlTagHandler *handler); | |
90 | // adds handler to the list & hash table of handlers. | |
91 | ||
92 | wxString* GetSource() {return &m_Source;} | |
93 | ||
94 | virtual wxList* GetTempData() {return NULL;} | |
95 | // this method returns list of wxObjects that represents | |
96 | // all data allocated by the parser. These can't be freeded | |
97 | // by destructor because they must be valid as long as | |
98 | // GetProduct's return value is valid - the caller must | |
99 | // explicitly call delete MyParser -> GetTempData() to free | |
100 | // the memory | |
101 | // (this method always sets the list to delete its contents) | |
102 | ||
103 | protected: | |
104 | ||
105 | virtual void AddText(const char* txt) = 0; | |
106 | // Adds text to the output. | |
107 | // This is called from Parse() and must be overriden in derived classes. | |
108 | // txt is not guaranteed to be only one word. It is largest continuous part of text | |
109 | // (= not broken by tags) | |
110 | // NOTE : using char* because of speed improvements | |
111 | ||
112 | virtual void AddTag(const wxHtmlTag& tag); | |
113 | // Adds tag and proceeds it. Parse() may (and usually is) called from this method. | |
114 | // This is called from Parse() and may be overriden. | |
115 | // Default behavior is that it looks for proper handler in m_Handlers. The tag is | |
116 | // ignored if no hander is found. | |
117 | // Derived class is *responsible* for filling in m_Handlers table. | |
118 | }; | |
119 | ||
120 | ||
121 | ||
122 | ||
123 | ||
124 | ||
125 | //-------------------------------------------------------------------------------- | |
126 | // wxHtmlTagHandler | |
127 | // This class (and derived classes) cooperates with wxHtmlParser. | |
128 | // Each recognized tag is passed to handler which is capable | |
129 | // of handling it. Each tag is handled in 3 steps: | |
130 | // 1. Handler will modifies state of parser | |
131 | // (using it's public methods) | |
132 | // 2. Parser parses source between starting and ending tag | |
133 | // 3. Handler restores original state of the parser | |
134 | //-------------------------------------------------------------------------------- | |
135 | ||
136 | class WXDLLEXPORT wxHtmlTagHandler : public wxObject | |
137 | { | |
138 | DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler) | |
139 | ||
140 | protected: | |
141 | wxHtmlParser *m_Parser; | |
142 | ||
143 | public: | |
144 | wxHtmlTagHandler() : wxObject () {m_Parser = NULL;}; | |
145 | ||
146 | virtual void SetParser(wxHtmlParser *parser) {m_Parser = parser;} | |
147 | // Sets the parser. | |
148 | // NOTE : each _instance_ of handler is guaranteed to be called | |
149 | // only by one parser. This means you don't have to care about | |
150 | // reentrancy. | |
151 | ||
152 | virtual wxString GetSupportedTags() = 0; | |
153 | // Returns list of supported tags. The list is in uppercase and | |
154 | // tags are delimited by ','. | |
155 | // Example : "I,B,FONT,P" | |
156 | // is capable of handling italic, bold, font and paragraph tags | |
157 | ||
158 | virtual bool HandleTag(const wxHtmlTag& tag) = 0; | |
159 | // This is hadling core method. It does all the Steps 1-3. | |
160 | // To process step 2, you can call ParseInner() | |
161 | // returned value : TRUE if it called ParseInner(), | |
162 | // FALSE etherwise | |
163 | ||
164 | protected: | |
165 | void ParseInner(const wxHtmlTag& tag) {m_Parser -> DoParsing(tag.GetBeginPos(), tag.GetEndPos1());} | |
166 | // parses input between beginning and ending tag. | |
167 | // m_Parser must be set. | |
168 | }; | |
169 | ||
170 | ||
171 | ||
172 | ||
173 | ||
174 | #endif // __HTMLPARSER_H__ | |
175 | ||
176 | #endif |