]> git.saurik.com Git - wxWidgets.git/blob - utils/configtool/src/htmlparser.h
Fixes tex2rtf to search for files on current folder too. This is basically to test...
[wxWidgets.git] / utils / configtool / src / htmlparser.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmlparser.cpp
3 // Purpose: Simple HTML parser
4 // Author: Julian Smart
5 // Modified by:
6 // Created: 2002-09-25
7 // RCS-ID: $Id$
8 // Copyright: (c) Julian Smart
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 #ifndef _HTMLPARSER_H_
13 #define _HTMLPARSER_H_
14
15 //#include "wx/module.h"
16 #include "wx/stream.h"
17
18 /*
19 * wxSimpleHtmlAttribute
20 * Representation of an attribute
21 */
22
23 class wxSimpleHtmlAttribute
24 {
25 friend class wxSimpleHtmlTag;
26 public:
27 wxSimpleHtmlAttribute(const wxString& name, const wxString& value)
28 {
29 m_name = name; m_value = value; m_next = NULL;
30 }
31 //// Operations
32
33 // Write this attribute
34 void Write(wxOutputStream& stream);
35
36 //// Accessors
37 const wxString& GetName() const { return m_name; }
38 const wxString& GetValue() const { return m_value; }
39 void SetName(const wxString& name) { m_name = name; }
40 void SetValue(const wxString& value) { m_value = value; }
41
42 wxSimpleHtmlAttribute* GetNextAttribute() { return m_next; }
43 void SetNextAttribute(wxSimpleHtmlAttribute* attr) { m_next = attr; }
44
45 bool HasName(const wxString& name) const { return (0 == m_name.CmpNoCase(name)); }
46 bool HasValue(const wxString& val) const { return (0 == m_value.CmpNoCase(val)); }
47
48 private:
49 wxString m_name;
50 wxString m_value;
51 wxSimpleHtmlAttribute* m_next;
52 };
53
54
55 /*
56 * wxSimpleHtmlTag
57 * Representation of a tag or chunk of text
58 */
59
60 enum { wxSimpleHtmlTag_Text, wxSimpleHtmlTag_TopLevel, wxSimpleHtmlTag_Open, wxSimpleHtmlTag_Close, wxSimpleHtmlTag_Directive, wxSimpleHtmlTag_XMLDeclaration };
61
62 class wxSimpleHtmlTag
63 {
64 public:
65 wxSimpleHtmlTag(const wxString& tagName, int tagType);
66 ~wxSimpleHtmlTag();
67
68 //// Operations
69 void ClearAttributes();
70 wxSimpleHtmlAttribute* FindAttribute(const wxString& name) const ;
71 void AppendAttribute(const wxString& name, const wxString& value);
72 void ClearChildren();
73 // Remove 1 tag from the child list.
74 void RemoveChild(wxSimpleHtmlTag *remove);
75 // Appaned tag to the end of the child list.
76 void AppendTag(wxSimpleHtmlTag* tag);
77 // Insert tag after ourself in the parents child list.
78 void AppendTagAfterUs(wxSimpleHtmlTag* tag);
79 // Write this tag
80 void Write(wxOutputStream& stream);
81
82 // Gets the text from this tag and its descendants
83 wxString GetTagText();
84
85 //// Accessors
86 const wxString& GetName() const { return m_name; }
87 void SetName(const wxString& name) { m_name = name; }
88
89 int GetType() const { return m_type; }
90 void SetType(int t) { m_type = t; }
91
92 // If type is wxSimpleHtmlTag_Text, m_text will contain some text.
93 const wxString& GetText() const { return m_text; }
94 void SetText(const wxString& text) { m_text = text; }
95
96 wxSimpleHtmlAttribute* GetFirstAttribute() { return m_attributes; }
97 void SetFirstAttribute(wxSimpleHtmlAttribute* attr) { m_attributes = attr; }
98
99 int GetAttributeCount() const ;
100 wxSimpleHtmlAttribute* GetAttribute(int i) const ;
101
102 wxSimpleHtmlTag* GetChildren() const { return m_children; }
103 void SetChildren(wxSimpleHtmlTag* children) { m_children = children; }
104
105 wxSimpleHtmlTag* GetParent() const { return m_parent; }
106 void SetParent(wxSimpleHtmlTag* parent) { m_parent = parent; }
107 int GetChildCount() const;
108 wxSimpleHtmlTag* GetChild(int i) const;
109 wxSimpleHtmlTag* GetNext() const { return m_next; }
110
111 //// Convenience accessors & search functions
112 bool NameIs(const wxString& name) { return (m_name.CmpNoCase(name) == 0); }
113 bool HasAttribute(const wxString& name, const wxString& value) const;
114 bool HasAttribute(const wxString& name) const;
115 bool GetAttributeValue(wxString& value, const wxString& attrName);
116
117 // Search forward from this tag until we find a tag with this name & optionally attribute
118 wxSimpleHtmlTag* FindTag(const wxString& tagName, const wxString& attrName = wxEmptyString);
119
120 // Gather the text until we hit the given close tag
121 bool FindTextUntilTagClose(wxString& text, const wxString& tagName);
122
123 private:
124 wxString m_name;
125 int m_type;
126 wxString m_text;
127 wxSimpleHtmlAttribute* m_attributes;
128
129 // List of children
130 wxSimpleHtmlTag* m_children;
131 wxSimpleHtmlTag* m_next; // Next sibling
132 wxSimpleHtmlTag* m_parent;
133 };
134
135 /*
136 * wxSimpleHtmlParser
137 * Simple HTML parser, for such tasks as scanning HTML for keywords, contents, etc.
138 */
139
140 class wxSimpleHtmlParser : public wxObject
141 {
142
143 public:
144 wxSimpleHtmlParser();
145 ~wxSimpleHtmlParser();
146
147 //// Operations
148 bool ParseFile(const wxString& filename);
149 bool ParseString(const wxString& str);
150 void Clear();
151 // Write this file
152 void Write(wxOutputStream& stream);
153 bool WriteFile(wxString& filename);
154
155 //// Helpers
156
157 // Main recursive parsing function
158 bool ParseHtml(wxSimpleHtmlTag* parent);
159
160 wxSimpleHtmlTag* ParseTagHeader();
161 wxSimpleHtmlTag* ParseTagClose();
162 bool ParseAttributes(wxSimpleHtmlTag* tag);
163 wxSimpleHtmlTag* ParseDirective(); // e.g. <!DOCTYPE ....>
164 wxSimpleHtmlTag* ParseXMLDeclaration(); // e.g. <?xml .... ?>
165 bool ParseComment(); // Throw away comments
166 // Plain text, up until an angled bracket
167 bool ParseText(wxString& text);
168
169 bool EatWhitespace(); // Throw away whitespace
170 bool EatWhitespace(int& pos); // Throw away whitespace: using 'pos'
171 bool ReadString(wxString& str, bool eatIt = false);
172 bool ReadWord(wxString& str, bool eatIt = false);
173 bool ReadNumber(wxString& str, bool eatIt = false);
174 // Could be number, string, whatever, but read up until whitespace.
175 bool ReadLiteral(wxString& str, bool eatIt = false);
176
177 bool IsComment();
178 bool IsDirective();
179 bool IsXMLDeclaration();
180 bool IsString();
181 bool IsWord();
182 bool IsTagClose();
183 bool IsTagStartBracket(int ch);
184 bool IsTagEndBracket(int ch);
185 bool IsWhitespace(int ch);
186 bool IsAlpha(int ch);
187 bool IsWordChar(int ch);
188 bool IsNumeric(int ch);
189 // Check if a specific tag needs a close tag. If not this function should return false.
190 // If no close tag is needed the result will be that the tag will be insert in a none
191 // hierarchical way. i.e. if the function would return false all the time we would get
192 // a flat list of all tags (like it used to be previously).
193 virtual bool IsCloseTagNeeded(const wxString &name);
194
195 // Encode/Decode Special Characters like:
196 // > Begins a tag. &gt;
197 // < Ends a tag. &lt;
198 // " Quotation mark. &quot;
199 // ' Apostrophe. &apos;
200 // & Ampersand. &amp;
201 static void DecodeSpecialChars(wxString &value);
202 static wxString EncodeSpecialChars(const wxString &value);
203
204 // Matches this string (case insensitive)
205 bool Matches(const wxString& tok, bool eatIt = false) ;
206 bool Eof() const { return (m_pos >= m_length); }
207 bool Eof(int pos) const { return (pos >= m_length); }
208
209 void SetPosition(int pos) { m_pos = pos; }
210
211
212 //// Accessors
213 wxSimpleHtmlTag* GetTopLevelTag() const { return m_topLevel; }
214
215 // Safe way of getting a character
216 int GetChar(size_t i) const;
217
218 private:
219
220 wxSimpleHtmlTag* m_topLevel;
221 int m_pos; // Position in string
222 int m_length; // Length of string
223 wxString m_text; // The actual text
224
225 };
226
227 /*
228 * wxSimpleHtmlTagSpec
229 * Describes a tag, and what type it is.
230 * wxSimpleHtmlModule will initialise/cleanup a list of these, one per tag type
231 */
232
233 #if 0
234 class wxSimpleHtmlTagSpec : public wxObject
235 {
236
237 public:
238 wxSimpleHtmlTagSpec(const wxString& name, int type);
239
240 //// Operations
241 static void AddTagSpec(wxSimpleHtmlTagSpec* spec);
242 static void Clear();
243
244 //// Accessors
245 const wxString& GetName() const { return m_name; }
246 int GetType() const { return m_type; }
247
248 private:
249
250 wxString m_name;
251 int m_type;
252
253 static wxList* sm_tagSpecs;
254 };
255
256 /*
257 * wxSimpleHtmlModule
258 * Responsible for init/cleanup of appropriate data structures
259 */
260
261 class wxSimpleHtmlModule : public wxModule
262 {
263 DECLARE_DYNAMIC_CLASS(wxSimpleHtmlModule)
264
265 public:
266 wxSimpleHtmlModule() {};
267
268 bool OnInit() ;
269 void OnExit() ;
270 };
271 #endif
272
273 #endif