]> git.saurik.com Git - wxWidgets.git/blob - utils/configtool/src/htmlparser.h
Added first cut wxWindows Configuration Tool
[wxWidgets.git] / utils / configtool / src / htmlparser.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmlparser.cpp
3 // Purpose: Simple HTML parser
4 // Author: Julian Smart
5 // Modified by:
6 // Created: 2002-09-25
7 // RCS-ID: $Id$
8 // Copyright: (c) Julian Smart
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 #ifndef _HTMLPARSER_H_
13 #define _HTMLPARSER_H_
14
15 #ifdef __GNUG__
16 #pragma interface "htmlparser.h"
17 #endif
18
19 //#include "wx/module.h"
20 #include "wx/stream.h"
21
22 /*
23 * wxSimpleHtmlAttribute
24 * Representation of an attribute
25 */
26
27 class wxSimpleHtmlAttribute
28 {
29 friend class wxSimpleHtmlTag;
30 public:
31 wxSimpleHtmlAttribute(const wxString& name, const wxString& value)
32 {
33 m_name = name; m_value = value; m_next = NULL;
34 }
35 //// Operations
36
37 // Write this attribute
38 void Write(wxOutputStream& stream);
39
40 //// Accessors
41 const wxString& GetName() const { return m_name; }
42 const wxString& GetValue() const { return m_value; }
43 void SetName(const wxString& name) { m_name = name; }
44 void SetValue(const wxString& value) { m_value = value; }
45
46 wxSimpleHtmlAttribute* GetNextAttribute() { return m_next; }
47 void SetNextAttribute(wxSimpleHtmlAttribute* attr) { m_next = attr; }
48
49 bool HasName(const wxString& name) const { return (0 == m_name.CmpNoCase(name)); }
50 bool HasValue(const wxString& val) const { return (0 == m_value.CmpNoCase(val)); }
51
52 private:
53 wxString m_name;
54 wxString m_value;
55 wxSimpleHtmlAttribute* m_next;
56 };
57
58
59 /*
60 * wxSimpleHtmlTag
61 * Representation of a tag or chunk of text
62 */
63
64 enum { wxSimpleHtmlTag_Text, wxSimpleHtmlTag_TopLevel, wxSimpleHtmlTag_Open, wxSimpleHtmlTag_Close, wxSimpleHtmlTag_Directive, wxSimpleHtmlTag_XMLDeclaration };
65
66 class wxSimpleHtmlTag
67 {
68 public:
69 wxSimpleHtmlTag(const wxString& tagName, int tagType);
70 ~wxSimpleHtmlTag();
71
72 //// Operations
73 void ClearAttributes();
74 wxSimpleHtmlAttribute* FindAttribute(const wxString& name) const ;
75 void AppendAttribute(const wxString& name, const wxString& value);
76 void ClearChildren();
77 // Remove 1 tag from the child list.
78 void RemoveChild(wxSimpleHtmlTag *remove);
79 // Appaned tag to the end of the child list.
80 void AppendTag(wxSimpleHtmlTag* tag);
81 // Insert tag after ourself in the parents child list.
82 void AppendTagAfterUs(wxSimpleHtmlTag* tag);
83 // Write this tag
84 void Write(wxOutputStream& stream);
85
86 // Gets the text from this tag and its descendants
87 wxString GetTagText();
88
89 //// Accessors
90 const wxString& GetName() const { return m_name; }
91 void SetName(const wxString& name) { m_name = name; }
92
93 int GetType() const { return m_type; }
94 void SetType(int t) { m_type = t; }
95
96 // If type is wxSimpleHtmlTag_Text, m_text will contain some text.
97 const wxString& GetText() const { return m_text; }
98 void SetText(const wxString& text) { m_text = text; }
99
100 wxSimpleHtmlAttribute* GetFirstAttribute() { return m_attributes; }
101 void SetFirstAttribute(wxSimpleHtmlAttribute* attr) { m_attributes = attr; }
102
103 int GetAttributeCount() const ;
104 wxSimpleHtmlAttribute* GetAttribute(int i) const ;
105
106 wxSimpleHtmlTag* GetChildren() const { return m_children; }
107 void SetChildren(wxSimpleHtmlTag* children) { m_children = children; }
108
109 wxSimpleHtmlTag* GetParent() const { return m_parent; }
110 void SetParent(wxSimpleHtmlTag* parent) { m_parent = parent; }
111 int GetChildCount() const;
112 wxSimpleHtmlTag* GetChild(int i) const;
113 wxSimpleHtmlTag* GetNext() const { return m_next; }
114
115 //// Convenience accessors & search functions
116 bool NameIs(const wxString& name) { return (m_name.CmpNoCase(name) == 0); }
117 bool HasAttribute(const wxString& name, const wxString& value) const;
118 bool HasAttribute(const wxString& name) const;
119 bool GetAttributeValue(wxString& value, const wxString& attrName);
120
121 // Search forward from this tag until we find a tag with this name & optionally attribute
122 wxSimpleHtmlTag* FindTag(const wxString& tagName, const wxString& attrName = wxEmptyString);
123
124 // Gather the text until we hit the given close tag
125 bool FindTextUntilTagClose(wxString& text, const wxString& tagName);
126
127 private:
128 wxString m_name;
129 int m_type;
130 wxString m_text;
131 wxSimpleHtmlAttribute* m_attributes;
132
133 // List of children
134 wxSimpleHtmlTag* m_children;
135 wxSimpleHtmlTag* m_next; // Next sibling
136 wxSimpleHtmlTag* m_parent;
137 };
138
139 /*
140 * wxSimpleHtmlParser
141 * Simple HTML parser, for such tasks as scanning HTML for keywords, contents, etc.
142 */
143
144 class wxSimpleHtmlParser : public wxObject
145 {
146
147 public:
148 wxSimpleHtmlParser();
149 ~wxSimpleHtmlParser();
150
151 //// Operations
152 bool ParseFile(const wxString& filename);
153 bool ParseString(const wxString& str);
154 void Clear();
155 // Write this file
156 void Write(wxOutputStream& stream);
157 bool WriteFile(wxString& filename);
158
159 //// Helpers
160
161 // Main recursive parsing function
162 bool ParseHtml(wxSimpleHtmlTag* parent);
163
164 wxSimpleHtmlTag* ParseTagHeader();
165 wxSimpleHtmlTag* ParseTagClose();
166 bool ParseAttributes(wxSimpleHtmlTag* tag);
167 wxSimpleHtmlTag* ParseDirective(); // e.g. <!DOCTYPE ....>
168 wxSimpleHtmlTag* ParseXMLDeclaration(); // e.g. <?xml .... ?>
169 bool ParseComment(); // Throw away comments
170 // Plain text, up until an angled bracket
171 bool ParseText(wxString& text);
172
173 bool EatWhitespace(); // Throw away whitespace
174 bool EatWhitespace(int& pos); // Throw away whitespace: using 'pos'
175 bool ReadString(wxString& str, bool eatIt = FALSE);
176 bool ReadWord(wxString& str, bool eatIt = FALSE);
177 bool ReadNumber(wxString& str, bool eatIt = FALSE);
178 // Could be number, string, whatever, but read up until whitespace.
179 bool ReadLiteral(wxString& str, bool eatIt = FALSE);
180
181 bool IsComment();
182 bool IsDirective();
183 bool IsXMLDeclaration();
184 bool IsString();
185 bool IsWord();
186 bool IsTagClose();
187 bool IsTagStartBracket(int ch);
188 bool IsTagEndBracket(int ch);
189 bool IsWhitespace(int ch);
190 bool IsAlpha(int ch);
191 bool IsWordChar(int ch);
192 bool IsNumeric(int ch);
193 // Check if a specific tag needs a close tag. If not this function should return FALSE.
194 // If no close tag is needed the result will be that the tag will be insert in a none
195 // hierarchical way. i.e. if the function would return FALSE all the time we would get
196 // a flat list of all tags (like it used to be previously).
197 virtual bool IsCloseTagNeeded(const wxString &name);
198
199 // Encode/Decode Special Characters like:
200 // > Begins a tag. &gt;
201 // < Ends a tag. &lt;
202 // " Quotation mark. &quot;
203 // ' Apostrophe. &apos;
204 // & Ampersand. &amp;
205 static void DecodeSpecialChars(wxString &value);
206 static wxString EncodeSpecialChars(const wxString &value);
207
208 // Matches this string (case insensitive)
209 bool Matches(const wxString& tok, bool eatIt = FALSE) ;
210 bool Eof() const { return (m_pos >= m_length); }
211 bool Eof(int pos) const { return (pos >= m_length); }
212
213 void SetPosition(int pos) { m_pos = pos; }
214
215
216 //// Accessors
217 wxSimpleHtmlTag* GetTopLevelTag() const { return m_topLevel; }
218
219 // Safe way of getting a character
220 int GetChar(size_t i) const;
221
222 private:
223
224 wxSimpleHtmlTag* m_topLevel;
225 int m_pos; // Position in string
226 int m_length; // Length of string
227 wxString m_text; // The actual text
228
229 };
230
231 /*
232 * wxSimpleHtmlTagSpec
233 * Describes a tag, and what type it is.
234 * wxSimpleHtmlModule will initialise/cleanup a list of these, one per tag type
235 */
236
237 #if 0
238 class wxSimpleHtmlTagSpec : public wxObject
239 {
240
241 public:
242 wxSimpleHtmlTagSpec(const wxString& name, int type);
243
244 //// Operations
245 static void AddTagSpec(wxSimpleHtmlTagSpec* spec);
246 static void Clear();
247
248 //// Accessors
249 const wxString& GetName() const { return m_name; }
250 int GetType() const { return m_type; }
251
252 private:
253
254 wxString m_name;
255 int m_type;
256
257 static wxList* sm_tagSpecs;
258 };
259
260 /*
261 * wxSimpleHtmlModule
262 * Responsible for init/cleanup of appropriate data structures
263 */
264
265 class wxSimpleHtmlModule : public wxModule
266 {
267 DECLARE_DYNAMIC_CLASS(wxSimpleHtmlModule)
268
269 public:
270 wxSimpleHtmlModule() {};
271
272 bool OnInit() ;
273 void OnExit() ;
274 };
275 #endif
276
277 #endif