]> git.saurik.com Git - wxWidgets.git/blame - utils/configtool/src/htmlparser.h
wxURI is enough for the basic url operations.
[wxWidgets.git] / utils / configtool / src / htmlparser.h
CommitLineData
d7463f75
JS
1/////////////////////////////////////////////////////////////////////////////
2// Name: htmlparser.cpp
3// Purpose: Simple HTML parser
4// Author: Julian Smart
5// Modified by:
6// Created: 2002-09-25
7// RCS-ID: $Id$
8// Copyright: (c) Julian Smart
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
12#ifndef _HTMLPARSER_H_
13#define _HTMLPARSER_H_
14
71ada1a5 15#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
f8105809 16 #pragma interface "htmlparser.cpp"
d7463f75
JS
17#endif
18
19//#include "wx/module.h"
20#include "wx/stream.h"
21
22/*
23 * wxSimpleHtmlAttribute
24 * Representation of an attribute
25 */
26
27class wxSimpleHtmlAttribute
28{
29 friend class wxSimpleHtmlTag;
30public:
31 wxSimpleHtmlAttribute(const wxString& name, const wxString& value)
32 {
33 m_name = name; m_value = value; m_next = NULL;
34 }
35//// Operations
36
37 // Write this attribute
38 void Write(wxOutputStream& stream);
39
40//// Accessors
41 const wxString& GetName() const { return m_name; }
42 const wxString& GetValue() const { return m_value; }
43 void SetName(const wxString& name) { m_name = name; }
44 void SetValue(const wxString& value) { m_value = value; }
45
46 wxSimpleHtmlAttribute* GetNextAttribute() { return m_next; }
47 void SetNextAttribute(wxSimpleHtmlAttribute* attr) { m_next = attr; }
48
49 bool HasName(const wxString& name) const { return (0 == m_name.CmpNoCase(name)); }
50 bool HasValue(const wxString& val) const { return (0 == m_value.CmpNoCase(val)); }
51
52private:
53 wxString m_name;
54 wxString m_value;
55 wxSimpleHtmlAttribute* m_next;
56};
57
58
59/*
60 * wxSimpleHtmlTag
61 * Representation of a tag or chunk of text
62 */
63
64enum { wxSimpleHtmlTag_Text, wxSimpleHtmlTag_TopLevel, wxSimpleHtmlTag_Open, wxSimpleHtmlTag_Close, wxSimpleHtmlTag_Directive, wxSimpleHtmlTag_XMLDeclaration };
65
66class wxSimpleHtmlTag
67{
68public:
69 wxSimpleHtmlTag(const wxString& tagName, int tagType);
70 ~wxSimpleHtmlTag();
71
72//// Operations
73 void ClearAttributes();
74 wxSimpleHtmlAttribute* FindAttribute(const wxString& name) const ;
75 void AppendAttribute(const wxString& name, const wxString& value);
76 void ClearChildren();
77 // Remove 1 tag from the child list.
78 void RemoveChild(wxSimpleHtmlTag *remove);
79 // Appaned tag to the end of the child list.
80 void AppendTag(wxSimpleHtmlTag* tag);
81 // Insert tag after ourself in the parents child list.
82 void AppendTagAfterUs(wxSimpleHtmlTag* tag);
83 // Write this tag
84 void Write(wxOutputStream& stream);
85
86 // Gets the text from this tag and its descendants
87 wxString GetTagText();
88
89//// Accessors
90 const wxString& GetName() const { return m_name; }
91 void SetName(const wxString& name) { m_name = name; }
92
93 int GetType() const { return m_type; }
94 void SetType(int t) { m_type = t; }
95
96 // If type is wxSimpleHtmlTag_Text, m_text will contain some text.
97 const wxString& GetText() const { return m_text; }
98 void SetText(const wxString& text) { m_text = text; }
99
100 wxSimpleHtmlAttribute* GetFirstAttribute() { return m_attributes; }
101 void SetFirstAttribute(wxSimpleHtmlAttribute* attr) { m_attributes = attr; }
102
103 int GetAttributeCount() const ;
104 wxSimpleHtmlAttribute* GetAttribute(int i) const ;
105
106 wxSimpleHtmlTag* GetChildren() const { return m_children; }
107 void SetChildren(wxSimpleHtmlTag* children) { m_children = children; }
108
109 wxSimpleHtmlTag* GetParent() const { return m_parent; }
110 void SetParent(wxSimpleHtmlTag* parent) { m_parent = parent; }
111 int GetChildCount() const;
112 wxSimpleHtmlTag* GetChild(int i) const;
113 wxSimpleHtmlTag* GetNext() const { return m_next; }
114
115//// Convenience accessors & search functions
116 bool NameIs(const wxString& name) { return (m_name.CmpNoCase(name) == 0); }
117 bool HasAttribute(const wxString& name, const wxString& value) const;
118 bool HasAttribute(const wxString& name) const;
119 bool GetAttributeValue(wxString& value, const wxString& attrName);
120
121 // Search forward from this tag until we find a tag with this name & optionally attribute
122 wxSimpleHtmlTag* FindTag(const wxString& tagName, const wxString& attrName = wxEmptyString);
123
124 // Gather the text until we hit the given close tag
125 bool FindTextUntilTagClose(wxString& text, const wxString& tagName);
126
127private:
128 wxString m_name;
129 int m_type;
130 wxString m_text;
131 wxSimpleHtmlAttribute* m_attributes;
132
133 // List of children
134 wxSimpleHtmlTag* m_children;
135 wxSimpleHtmlTag* m_next; // Next sibling
136 wxSimpleHtmlTag* m_parent;
137};
138
139/*
140 * wxSimpleHtmlParser
141 * Simple HTML parser, for such tasks as scanning HTML for keywords, contents, etc.
142 */
143
144class wxSimpleHtmlParser : public wxObject
145{
146
147public:
148 wxSimpleHtmlParser();
149 ~wxSimpleHtmlParser();
150
151//// Operations
152 bool ParseFile(const wxString& filename);
153 bool ParseString(const wxString& str);
154 void Clear();
155 // Write this file
156 void Write(wxOutputStream& stream);
157 bool WriteFile(wxString& filename);
158
159//// Helpers
160
161 // Main recursive parsing function
162 bool ParseHtml(wxSimpleHtmlTag* parent);
163
164 wxSimpleHtmlTag* ParseTagHeader();
165 wxSimpleHtmlTag* ParseTagClose();
166 bool ParseAttributes(wxSimpleHtmlTag* tag);
167 wxSimpleHtmlTag* ParseDirective(); // e.g. <!DOCTYPE ....>
168 wxSimpleHtmlTag* ParseXMLDeclaration(); // e.g. <?xml .... ?>
169 bool ParseComment(); // Throw away comments
170 // Plain text, up until an angled bracket
171 bool ParseText(wxString& text);
172
173 bool EatWhitespace(); // Throw away whitespace
174 bool EatWhitespace(int& pos); // Throw away whitespace: using 'pos'
4fe30bce
WS
175 bool ReadString(wxString& str, bool eatIt = false);
176 bool ReadWord(wxString& str, bool eatIt = false);
177 bool ReadNumber(wxString& str, bool eatIt = false);
d7463f75 178 // Could be number, string, whatever, but read up until whitespace.
4fe30bce 179 bool ReadLiteral(wxString& str, bool eatIt = false);
d7463f75
JS
180
181 bool IsComment();
182 bool IsDirective();
183 bool IsXMLDeclaration();
184 bool IsString();
185 bool IsWord();
186 bool IsTagClose();
187 bool IsTagStartBracket(int ch);
188 bool IsTagEndBracket(int ch);
189 bool IsWhitespace(int ch);
190 bool IsAlpha(int ch);
191 bool IsWordChar(int ch);
192 bool IsNumeric(int ch);
4fe30bce 193 // Check if a specific tag needs a close tag. If not this function should return false.
d7463f75 194 // If no close tag is needed the result will be that the tag will be insert in a none
4fe30bce 195 // hierarchical way. i.e. if the function would return false all the time we would get
d7463f75
JS
196 // a flat list of all tags (like it used to be previously).
197 virtual bool IsCloseTagNeeded(const wxString &name);
198
199 // Encode/Decode Special Characters like:
200 // > Begins a tag. &gt;
201 // < Ends a tag. &lt;
202 // " Quotation mark. &quot;
203 // ' Apostrophe. &apos;
204 // & Ampersand. &amp;
205 static void DecodeSpecialChars(wxString &value);
206 static wxString EncodeSpecialChars(const wxString &value);
207
208 // Matches this string (case insensitive)
4fe30bce 209 bool Matches(const wxString& tok, bool eatIt = false) ;
d7463f75
JS
210 bool Eof() const { return (m_pos >= m_length); }
211 bool Eof(int pos) const { return (pos >= m_length); }
212
213 void SetPosition(int pos) { m_pos = pos; }
214
215
216//// Accessors
217 wxSimpleHtmlTag* GetTopLevelTag() const { return m_topLevel; }
218
219 // Safe way of getting a character
220 int GetChar(size_t i) const;
221
222private:
223
224 wxSimpleHtmlTag* m_topLevel;
225 int m_pos; // Position in string
226 int m_length; // Length of string
227 wxString m_text; // The actual text
228
229};
230
231/*
232 * wxSimpleHtmlTagSpec
233 * Describes a tag, and what type it is.
234 * wxSimpleHtmlModule will initialise/cleanup a list of these, one per tag type
235 */
236
237#if 0
238class wxSimpleHtmlTagSpec : public wxObject
239{
240
241public:
242 wxSimpleHtmlTagSpec(const wxString& name, int type);
243
244//// Operations
245 static void AddTagSpec(wxSimpleHtmlTagSpec* spec);
246 static void Clear();
247
248//// Accessors
249 const wxString& GetName() const { return m_name; }
250 int GetType() const { return m_type; }
251
252private:
253
254 wxString m_name;
255 int m_type;
256
257 static wxList* sm_tagSpecs;
258};
259
260/*
261 * wxSimpleHtmlModule
262 * Responsible for init/cleanup of appropriate data structures
263 */
264
265class wxSimpleHtmlModule : public wxModule
266{
267DECLARE_DYNAMIC_CLASS(wxSimpleHtmlModule)
268
269public:
270 wxSimpleHtmlModule() {};
271
272 bool OnInit() ;
273 void OnExit() ;
274};
275#endif
276
277#endif