fixed parsing of tags with whitespace around = sign
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows Licence
8 /////////////////////////////////////////////////////////////////////////////
9
10
11 #ifdef __GNUG__
12 #pragma implementation
13 #endif
14
15 #include "wx/wxprec.h"
16
17 #include "wx/defs.h"
18 #if wxUSE_HTML
19
20 #ifdef __BORDLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WXPRECOMP
25 #include "wx/wx.h"
26 #endif
27
28 #include "wx/html/htmltag.h"
29 #include <stdio.h> // for vsscanf
30 #include <stdarg.h>
31
32
33
34
35 //-----------------------------------------------------------------------------
36 // wxHtmlTagsCache
37 //-----------------------------------------------------------------------------
38
39 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
40
41 #define CACHE_INCREMENT 64
42
43 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
44 {
45 const wxChar *src = source.c_str();
46 int i, tg, pos, stpos;
47 int lng = source.Length();
48 wxChar dummy[256];
49
50 m_Cache = NULL;
51 m_CacheSize = 0;
52 m_CachePos = 0;
53
54 pos = 0;
55 while (pos < lng)
56 {
57 if (src[pos] == wxT('<')) // tag found:
58 {
59 if (m_CacheSize % CACHE_INCREMENT == 0)
60 m_Cache = (sCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(sCacheItem));
61 tg = m_CacheSize++;
62 m_Cache[tg].Key = stpos = pos++;
63 dummy[0] = 0; i = 0;
64 while (pos < lng &&
65 src[pos] != wxT('>') &&
66 src[pos] != wxT(' ') && src[pos] != wxT('\r') &&
67 src[pos] != wxT('\n') && src[pos] != wxT('\t'))
68 {
69 dummy[i] = src[pos++];
70 if ((dummy[i] >= wxT('a')) && (dummy[i] <= wxT('z'))) dummy[i] -= (wxT('a') - wxT('A'));
71 i++;
72 }
73 dummy[i] = 0;
74 m_Cache[tg].Name = new wxChar[i+1];
75 memcpy(m_Cache[tg].Name, dummy, (i+1)*sizeof(wxChar));
76
77 while (pos < lng && src[pos] != wxT('>')) pos++;
78
79 if (src[stpos+1] == wxT('/')) // ending tag:
80 {
81 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
82 // find matching begin tag:
83 for (i = tg; i >= 0; i--)
84 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, dummy+1) == 0))
85 {
86 m_Cache[i].End1 = stpos;
87 m_Cache[i].End2 = pos + 1;
88 break;
89 }
90 }
91 else
92 {
93 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
94 }
95 }
96
97 pos++;
98 }
99
100 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
101 for (i = 0; i < m_CacheSize; i++)
102 {
103 delete[] m_Cache[i].Name;
104 m_Cache[i].Name = NULL;
105 }
106 }
107
108
109
110 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
111 {
112 if (m_Cache == NULL) return;
113 if (m_Cache[m_CachePos].Key != at)
114 {
115 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
116 do {m_CachePos += delta;} while (m_Cache[m_CachePos].Key != at);
117 }
118 *end1 = m_Cache[m_CachePos].End1;
119 *end2 = m_Cache[m_CachePos].End2;
120 }
121
122
123
124
125 //-----------------------------------------------------------------------------
126 // wxHtmlTag
127 //-----------------------------------------------------------------------------
128
129 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
130
131 wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCache* cache) : wxObject()
132 {
133 int i;
134 char c;
135
136 // fill-in name, params and begin pos:
137 m_Name = m_Params = wxEmptyString;
138 i = pos+1;
139 if (source[i] == '/') { m_Ending = TRUE; i++; }
140 else m_Ending = FALSE;
141
142 // find tag's name and convert it to uppercase:
143 while ((i < end_pos) &&
144 ((c = source[i++]) != ' ' && c != '\r' && c != '\n' && c != '\t' &&
145 c != '>'))
146 {
147 if ((c >= 'a') && (c <= 'z')) c -= ('a' - 'A');
148 m_Name += c;
149 }
150
151 // if the tag has parameters, read them and "normalize" them,
152 // i.e. convert to uppercase, replace whitespaces by spaces and
153 // remove whitespaces around '=':
154 if (source[i-1] != '>')
155 while ((i < end_pos) && ((c = source[i++]) != '>'))
156 {
157 if ((c >= 'a') && (c <= 'z')) c -= ('a' - 'A');
158 if (c == '\r' || c == '\n' || c == '\t') c = ' '; // make future parsing a bit simpler
159 m_Params += c;
160 if (c == '"')
161 {
162 // remove spaces around the '=' character:
163 if (m_Params.Length() > 1 &&
164 m_Params[m_Params.Length()-2] == ' ')
165 {
166 m_Params.RemoveLast();
167 while (m_Params.Length() > 0 && m_Params.Last() == ' ')
168 m_Params.RemoveLast();
169 m_Params += '"';
170 }
171 while ((i < end_pos) && (source[i++] == ' ')) {}
172 if (i < end_pos) i--;
173
174 // ...and copy the value to m_Params:
175 while ((i < end_pos) && ((c = source[i++]) != '"')) m_Params += c;
176 m_Params += c;
177 }
178 else if (c == '\'')
179 {
180 while ((i < end_pos) && ((c = source[i++]) != '\'')) m_Params += c;
181 m_Params += c;
182 }
183 }
184 m_Begin = i;
185
186 cache->QueryTag(pos, &m_End1, &m_End2);
187 if (m_End1 > end_pos) m_End1 = end_pos;
188 if (m_End2 > end_pos) m_End2 = end_pos;
189 }
190
191
192
193 bool wxHtmlTag::HasParam(const wxString& par) const
194 {
195 const wxChar *st = m_Params, *p = par;
196 const wxChar *st2, *p2;
197
198 if (*st == 0) return FALSE;
199 if (*p == 0) return FALSE;
200 for (st2 = st, p2 = p; ; st2++)
201 {
202 if (*p2 == 0) return TRUE;
203 if (*st2 == 0) return FALSE;
204 if (*p2 != *st2) p2 = p;
205 if (*p2 == *st2) p2++;
206 if (*st2 == ' ') p2 = p;
207 else if (*st2 == '=')
208 {
209 p2 = p;
210 while (*st2 != ' ')
211 {
212 if (*st2 == '"')
213 {
214 st2++;
215 while (*st2 != '"') st2++;
216 }
217 st2++;
218 if (*st2 == 0) return FALSE;
219 }
220 }
221 }
222 }
223
224
225
226 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
227 {
228 const wxChar *st = m_Params, *p = par;
229 const wxChar *st2, *p2;
230 bool comma;
231 char comma_char;
232
233 if (*st == 0) return "";
234 if (*p == 0) return "";
235 for (st2 = st, p2 = p; ; st2++)
236 {
237 if (*p2 == 0) // found
238 {
239 wxString fnd = "";
240 st2++; // '=' character
241 comma = FALSE;
242 comma_char = '\0';
243 if (!with_commas && (*(st2) == '"'))
244 {
245 st2++;
246 comma = TRUE;
247 comma_char = '"';
248 }
249 else if (!with_commas && (*(st2) == '\''))
250 {
251 st2++;
252 comma = TRUE;
253 comma_char = '\'';
254 }
255
256 while (*st2 != 0)
257 {
258 if (comma && *st2 == comma_char) comma = FALSE;
259 else if ((*st2 == ' ') && (!comma)) break;
260 fnd += (*(st2++));
261 }
262 if (!with_commas && (*(st2-1) == comma_char)) fnd.RemoveLast();
263 return fnd;
264 }
265 if (*st2 == 0) return "";
266 if (*p2 != *st2) p2 = p;
267 if (*p2 == *st2) p2++;
268 if (*st2 == ' ') p2 = p;
269 else if (*st2 == '=')
270 {
271 p2 = p;
272 while (*st2 != ' ')
273 {
274 if (*st2 == '"')
275 {
276 st2++;
277 while (*st2 != '"') st2++;
278 }
279 else if (*st2 == '\'')
280 {
281 st2++;
282 while (*st2 != '\'') st2++;
283 }
284 st2++;
285 }
286 }
287 }
288 }
289
290
291
292 int wxHtmlTag::ScanParam(const wxString& par, wxChar *format, void *param) const
293 {
294 wxString parval = GetParam(par);
295 return wxSscanf((const wxChar*)parval, format, param);
296 }
297
298 #endif