]> git.saurik.com Git - wxWidgets.git/blame - utils/configtool/src/htmlparser.h
Fixes tex2rtf to search for files on current folder too. This is basically to test...
[wxWidgets.git] / utils / configtool / src / htmlparser.h
CommitLineData
d7463f75
JS
1/////////////////////////////////////////////////////////////////////////////
2// Name: htmlparser.cpp
3// Purpose: Simple HTML parser
4// Author: Julian Smart
5// Modified by:
6// Created: 2002-09-25
7// RCS-ID: $Id$
8// Copyright: (c) Julian Smart
9// Licence: wxWindows license
10/////////////////////////////////////////////////////////////////////////////
11
12#ifndef _HTMLPARSER_H_
13#define _HTMLPARSER_H_
14
d7463f75
JS
15//#include "wx/module.h"
16#include "wx/stream.h"
17
18/*
19 * wxSimpleHtmlAttribute
20 * Representation of an attribute
21 */
22
23class wxSimpleHtmlAttribute
24{
25 friend class wxSimpleHtmlTag;
26public:
27 wxSimpleHtmlAttribute(const wxString& name, const wxString& value)
28 {
29 m_name = name; m_value = value; m_next = NULL;
30 }
31//// Operations
32
33 // Write this attribute
34 void Write(wxOutputStream& stream);
35
36//// Accessors
37 const wxString& GetName() const { return m_name; }
38 const wxString& GetValue() const { return m_value; }
39 void SetName(const wxString& name) { m_name = name; }
40 void SetValue(const wxString& value) { m_value = value; }
41
42 wxSimpleHtmlAttribute* GetNextAttribute() { return m_next; }
43 void SetNextAttribute(wxSimpleHtmlAttribute* attr) { m_next = attr; }
44
45 bool HasName(const wxString& name) const { return (0 == m_name.CmpNoCase(name)); }
46 bool HasValue(const wxString& val) const { return (0 == m_value.CmpNoCase(val)); }
47
48private:
49 wxString m_name;
50 wxString m_value;
51 wxSimpleHtmlAttribute* m_next;
52};
53
54
55/*
56 * wxSimpleHtmlTag
57 * Representation of a tag or chunk of text
58 */
59
60enum { wxSimpleHtmlTag_Text, wxSimpleHtmlTag_TopLevel, wxSimpleHtmlTag_Open, wxSimpleHtmlTag_Close, wxSimpleHtmlTag_Directive, wxSimpleHtmlTag_XMLDeclaration };
61
62class wxSimpleHtmlTag
63{
64public:
65 wxSimpleHtmlTag(const wxString& tagName, int tagType);
66 ~wxSimpleHtmlTag();
67
68//// Operations
69 void ClearAttributes();
70 wxSimpleHtmlAttribute* FindAttribute(const wxString& name) const ;
71 void AppendAttribute(const wxString& name, const wxString& value);
72 void ClearChildren();
73 // Remove 1 tag from the child list.
74 void RemoveChild(wxSimpleHtmlTag *remove);
75 // Appaned tag to the end of the child list.
76 void AppendTag(wxSimpleHtmlTag* tag);
77 // Insert tag after ourself in the parents child list.
78 void AppendTagAfterUs(wxSimpleHtmlTag* tag);
79 // Write this tag
80 void Write(wxOutputStream& stream);
81
82 // Gets the text from this tag and its descendants
83 wxString GetTagText();
84
85//// Accessors
86 const wxString& GetName() const { return m_name; }
87 void SetName(const wxString& name) { m_name = name; }
88
89 int GetType() const { return m_type; }
90 void SetType(int t) { m_type = t; }
91
92 // If type is wxSimpleHtmlTag_Text, m_text will contain some text.
93 const wxString& GetText() const { return m_text; }
94 void SetText(const wxString& text) { m_text = text; }
95
96 wxSimpleHtmlAttribute* GetFirstAttribute() { return m_attributes; }
97 void SetFirstAttribute(wxSimpleHtmlAttribute* attr) { m_attributes = attr; }
98
99 int GetAttributeCount() const ;
100 wxSimpleHtmlAttribute* GetAttribute(int i) const ;
101
102 wxSimpleHtmlTag* GetChildren() const { return m_children; }
103 void SetChildren(wxSimpleHtmlTag* children) { m_children = children; }
104
105 wxSimpleHtmlTag* GetParent() const { return m_parent; }
106 void SetParent(wxSimpleHtmlTag* parent) { m_parent = parent; }
107 int GetChildCount() const;
108 wxSimpleHtmlTag* GetChild(int i) const;
109 wxSimpleHtmlTag* GetNext() const { return m_next; }
110
111//// Convenience accessors & search functions
112 bool NameIs(const wxString& name) { return (m_name.CmpNoCase(name) == 0); }
113 bool HasAttribute(const wxString& name, const wxString& value) const;
114 bool HasAttribute(const wxString& name) const;
115 bool GetAttributeValue(wxString& value, const wxString& attrName);
116
117 // Search forward from this tag until we find a tag with this name & optionally attribute
118 wxSimpleHtmlTag* FindTag(const wxString& tagName, const wxString& attrName = wxEmptyString);
119
120 // Gather the text until we hit the given close tag
121 bool FindTextUntilTagClose(wxString& text, const wxString& tagName);
122
123private:
124 wxString m_name;
125 int m_type;
126 wxString m_text;
127 wxSimpleHtmlAttribute* m_attributes;
128
129 // List of children
130 wxSimpleHtmlTag* m_children;
131 wxSimpleHtmlTag* m_next; // Next sibling
132 wxSimpleHtmlTag* m_parent;
133};
134
135/*
136 * wxSimpleHtmlParser
137 * Simple HTML parser, for such tasks as scanning HTML for keywords, contents, etc.
138 */
139
140class wxSimpleHtmlParser : public wxObject
141{
142
143public:
144 wxSimpleHtmlParser();
145 ~wxSimpleHtmlParser();
146
147//// Operations
148 bool ParseFile(const wxString& filename);
149 bool ParseString(const wxString& str);
150 void Clear();
151 // Write this file
152 void Write(wxOutputStream& stream);
153 bool WriteFile(wxString& filename);
154
155//// Helpers
156
157 // Main recursive parsing function
158 bool ParseHtml(wxSimpleHtmlTag* parent);
159
160 wxSimpleHtmlTag* ParseTagHeader();
161 wxSimpleHtmlTag* ParseTagClose();
162 bool ParseAttributes(wxSimpleHtmlTag* tag);
163 wxSimpleHtmlTag* ParseDirective(); // e.g. <!DOCTYPE ....>
164 wxSimpleHtmlTag* ParseXMLDeclaration(); // e.g. <?xml .... ?>
165 bool ParseComment(); // Throw away comments
166 // Plain text, up until an angled bracket
167 bool ParseText(wxString& text);
168
169 bool EatWhitespace(); // Throw away whitespace
170 bool EatWhitespace(int& pos); // Throw away whitespace: using 'pos'
4fe30bce
WS
171 bool ReadString(wxString& str, bool eatIt = false);
172 bool ReadWord(wxString& str, bool eatIt = false);
173 bool ReadNumber(wxString& str, bool eatIt = false);
d7463f75 174 // Could be number, string, whatever, but read up until whitespace.
4fe30bce 175 bool ReadLiteral(wxString& str, bool eatIt = false);
d7463f75
JS
176
177 bool IsComment();
178 bool IsDirective();
179 bool IsXMLDeclaration();
180 bool IsString();
181 bool IsWord();
182 bool IsTagClose();
183 bool IsTagStartBracket(int ch);
184 bool IsTagEndBracket(int ch);
185 bool IsWhitespace(int ch);
186 bool IsAlpha(int ch);
187 bool IsWordChar(int ch);
188 bool IsNumeric(int ch);
4fe30bce 189 // Check if a specific tag needs a close tag. If not this function should return false.
d7463f75 190 // If no close tag is needed the result will be that the tag will be insert in a none
4fe30bce 191 // hierarchical way. i.e. if the function would return false all the time we would get
d7463f75
JS
192 // a flat list of all tags (like it used to be previously).
193 virtual bool IsCloseTagNeeded(const wxString &name);
194
195 // Encode/Decode Special Characters like:
196 // > Begins a tag. &gt;
197 // < Ends a tag. &lt;
198 // " Quotation mark. &quot;
199 // ' Apostrophe. &apos;
200 // & Ampersand. &amp;
201 static void DecodeSpecialChars(wxString &value);
202 static wxString EncodeSpecialChars(const wxString &value);
203
204 // Matches this string (case insensitive)
4fe30bce 205 bool Matches(const wxString& tok, bool eatIt = false) ;
d7463f75
JS
206 bool Eof() const { return (m_pos >= m_length); }
207 bool Eof(int pos) const { return (pos >= m_length); }
208
209 void SetPosition(int pos) { m_pos = pos; }
210
211
212//// Accessors
213 wxSimpleHtmlTag* GetTopLevelTag() const { return m_topLevel; }
214
215 // Safe way of getting a character
216 int GetChar(size_t i) const;
217
218private:
219
220 wxSimpleHtmlTag* m_topLevel;
221 int m_pos; // Position in string
222 int m_length; // Length of string
223 wxString m_text; // The actual text
224
225};
226
227/*
228 * wxSimpleHtmlTagSpec
229 * Describes a tag, and what type it is.
230 * wxSimpleHtmlModule will initialise/cleanup a list of these, one per tag type
231 */
232
233#if 0
234class wxSimpleHtmlTagSpec : public wxObject
235{
236
237public:
238 wxSimpleHtmlTagSpec(const wxString& name, int type);
239
240//// Operations
241 static void AddTagSpec(wxSimpleHtmlTagSpec* spec);
242 static void Clear();
243
244//// Accessors
245 const wxString& GetName() const { return m_name; }
246 int GetType() const { return m_type; }
247
248private:
249
250 wxString m_name;
251 int m_type;
252
253 static wxList* sm_tagSpecs;
254};
255
256/*
257 * wxSimpleHtmlModule
258 * Responsible for init/cleanup of appropriate data structures
259 */
260
261class wxSimpleHtmlModule : public wxModule
262{
263DECLARE_DYNAMIC_CLASS(wxSimpleHtmlModule)
264
265public:
266 wxSimpleHtmlModule() {};
267
268 bool OnInit() ;
269 void OnExit() ;
270};
271#endif
272
273#endif