fixed bug in parsing html strings longer than 1024 chars than does not contain tags
[wxWidgets.git] / src / html / htmlpars.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmlpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10
11 #ifdef __GNUG__
12 #pragma implementation
13 #endif
14
15 #include "wx/wxprec.h"
16
17 #include "wx/defs.h"
18 #if wxUSE_HTML && wxUSE_STREAMS
19
20 #ifdef __BORDLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WXPRECOMP
25 #include "wx/wx.h"
26 #endif
27
28 #include "wx/tokenzr.h"
29 #include "wx/wfstream.h"
30 #include "wx/url.h"
31 #include "wx/html/htmldefs.h"
32 #include "wx/html/htmlpars.h"
33
34
35
36 //-----------------------------------------------------------------------------
37 // wxHtmlParser
38 //-----------------------------------------------------------------------------
39
40 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
41
42
43 wxObject* wxHtmlParser::Parse(const wxString& source)
44 {
45 wxObject *result;
46
47 InitParser(source);
48 DoParsing();
49 result = GetProduct();
50 DoneParser();
51 return result;
52 }
53
54
55
56 void wxHtmlParser::InitParser(const wxString& source)
57 {
58 m_Source = source;
59 m_Cache = new wxHtmlTagsCache(m_Source);
60 }
61
62
63
64 void wxHtmlParser::DoneParser()
65 {
66 delete m_Cache;
67 m_Cache = NULL;
68 }
69
70
71
72
73 void wxHtmlParser::DoParsing(int begin_pos, int end_pos)
74 {
75 char c;
76 char *temp = new char[end_pos - begin_pos + 1];
77 int i;
78 int templen;
79
80 templen = 0;
81 i = begin_pos;
82
83 while (i < end_pos) {
84 c = m_Source[(unsigned int) i];
85
86 // continue building word:
87 if (c != '<') {
88 temp[templen++] = c;
89 i++;
90 }
91
92 else if (c == '<') {
93 wxHtmlTag tag(m_Source, i, end_pos, m_Cache);
94
95 if (templen) {
96 temp[templen] = 0;
97 AddText(temp);
98 templen = 0;
99 }
100 AddTag(tag);
101 if (tag.HasEnding()) i = tag.GetEndPos2();
102 else i = tag.GetBeginPos();
103 }
104 }
105
106 if (templen) { // last word of block :-(
107 temp[templen] = 0;
108 AddText(temp);
109 }
110 delete[] temp;
111 }
112
113
114
115 void wxHtmlParser::AddTag(const wxHtmlTag& tag)
116 {
117 wxHtmlTagHandler *h;
118 bool inner = FALSE;
119
120 h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName());
121 if (h)
122 inner = h -> HandleTag(tag);
123 if (!inner) {
124 if (tag.HasEnding())
125 DoParsing(tag.GetBeginPos(), tag.GetEndPos1());
126 }
127 }
128
129
130
131 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler)
132 {
133 wxString s(handler -> GetSupportedTags());
134 wxStringTokenizer tokenizer(s, ", ");
135
136 while (tokenizer.HasMoreTokens())
137 m_HandlersHash.Put(tokenizer.NextToken(), handler);
138
139 if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND)
140 m_HandlersList.Append(handler);
141
142 handler -> SetParser(this);
143 }
144
145
146
147 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags)
148 {
149 wxStringTokenizer tokenizer(tags, ", ");
150 wxString key;
151
152 if (m_HandlersStack == NULL) {
153 m_HandlersStack = new wxList;
154 m_HandlersStack -> DeleteContents(TRUE);
155 }
156
157 m_HandlersStack -> Insert(new wxHashTable(m_HandlersHash));
158
159 while (tokenizer.HasMoreTokens()) {
160 key = tokenizer.NextToken();
161 m_HandlersHash.Delete(key);
162 m_HandlersHash.Put(key, handler);
163 }
164 }
165
166
167
168 void wxHtmlParser::PopTagHandler()
169 {
170 wxNode *first;
171
172 if (m_HandlersStack == NULL ||
173 (first = m_HandlersStack -> GetFirst()) == NULL)
174 {
175 wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
176 return;
177 }
178 m_HandlersHash = *((wxHashTable*) first -> GetData());
179 m_HandlersStack -> DeleteNode(first);
180 }
181
182
183
184 wxHtmlParser::~wxHtmlParser()
185 {
186 if (m_HandlersStack) delete m_HandlersStack;
187 m_HandlersHash.Clear();
188 m_HandlersList.DeleteContents(TRUE);
189 m_HandlersList.Clear();
190 }
191
192
193
194 //-----------------------------------------------------------------------------
195 // wxHtmlTagHandler
196 //-----------------------------------------------------------------------------
197
198 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
199 #endif
200