]> git.saurik.com Git - wxWidgets.git/blob - tests/benchmarks/htmlparser/htmltag.cpp
Further improve caching of locale-specific data in wxNumberFormatter.
[wxWidgets.git] / tests / benchmarks / htmlparser / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #include "htmltag.h"
17
18 #ifndef WXPRECOMP
19 #include "wx/colour.h"
20 #endif
21
22 #include "htmlpars.h"
23 #include <stdio.h> // for vsscanf
24 #include <stdarg.h>
25
26
27 //-----------------------------------------------------------------------------
28 // wx28HtmlTagsCache
29 //-----------------------------------------------------------------------------
30
31 struct wx28HtmlCacheItem
32 {
33 // this is "pos" value passed to wx28HtmlTag's constructor.
34 // it is position of '<' character of the tag
35 int Key;
36
37 // end positions for the tag:
38 // end1 is '<' of ending tag,
39 // end2 is '>' or both are
40 // -1 if there is no ending tag for this one...
41 // or -2 if this is ending tag </...>
42 int End1, End2;
43
44 // name of this tag
45 wxChar *Name;
46 };
47
48
49 IMPLEMENT_CLASS(wx28HtmlTagsCache,wxObject)
50
51 #define CACHE_INCREMENT 64
52
53 bool wxIsCDATAElement(const wxChar *tag)
54 {
55 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
56 (wxStrcmp(tag, wxT("STYLE")) == 0);
57 }
58
59 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source)
60 {
61 const wxChar *src = source.c_str();
62 int lng = source.length();
63 wxChar tagBuffer[256];
64
65 m_Cache = NULL;
66 m_CacheSize = 0;
67 m_CachePos = 0;
68
69 int pos = 0;
70 while (pos < lng)
71 {
72 if (src[pos] == wxT('<')) // tag found:
73 {
74 if (m_CacheSize % CACHE_INCREMENT == 0)
75 m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem));
76 int tg = m_CacheSize++;
77 int stpos = pos++;
78 m_Cache[tg].Key = stpos;
79
80 int i;
81 for ( i = 0;
82 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
83 src[pos] != wxT('>') && !wxIsspace(src[pos]);
84 i++, pos++ )
85 {
86 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
87 }
88 tagBuffer[i] = wxT('\0');
89
90 m_Cache[tg].Name = new wxChar[i+1];
91 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
92
93 while (pos < lng && src[pos] != wxT('>')) pos++;
94
95 if (src[stpos+1] == wxT('/')) // ending tag:
96 {
97 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
98 // find matching begin tag:
99 for (i = tg; i >= 0; i--)
100 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
101 {
102 m_Cache[i].End1 = stpos;
103 m_Cache[i].End2 = pos + 1;
104 break;
105 }
106 }
107 else
108 {
109 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
110
111 if (wxIsCDATAElement(tagBuffer))
112 {
113 // store the orig pos in case we are missing the closing
114 // tag (see below)
115 wxInt32 old_pos = pos;
116 bool foundCloseTag = false;
117
118 // find next matching tag
119 int tag_len = wxStrlen(tagBuffer);
120 while (pos < lng)
121 {
122 // find the ending tag
123 while (pos + 1 < lng &&
124 (src[pos] != '<' || src[pos+1] != '/'))
125 ++pos;
126 if (src[pos] == '<')
127 ++pos;
128
129 // see if it matches
130 int match_pos = 0;
131 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
132 // cast to wxChar needed to suppress warning in
133 // Unicode build
134 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
135 ++match_pos;
136 }
137 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
138 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
139 // need to skip over these
140 }
141 else {
142 match_pos = 0;
143 }
144 ++pos;
145 }
146
147 // found a match
148 if (match_pos == tag_len)
149 {
150 pos = pos - tag_len - 3;
151 foundCloseTag = true;
152 break;
153 }
154 else // keep looking for the closing tag
155 {
156 ++pos;
157 }
158 }
159 if (!foundCloseTag)
160 {
161 // we didn't find closing tag; this means the markup
162 // is incorrect and the best thing we can do is to
163 // ignore the unclosed tag and continue parsing as if
164 // it didn't exist:
165 pos = old_pos;
166 }
167 }
168 }
169 }
170
171 pos++;
172 }
173
174 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
175 for (int i = 0; i < m_CacheSize; i++)
176 {
177 delete[] m_Cache[i].Name;
178 m_Cache[i].Name = NULL;
179 }
180 }
181
182 void wx28HtmlTagsCache::QueryTag(int at, int* end1, int* end2)
183 {
184 if (m_Cache == NULL) return;
185 if (m_Cache[m_CachePos].Key != at)
186 {
187 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
188 do
189 {
190 if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
191 {
192 // something is very wrong with HTML, give up by returning an
193 // impossibly large value which is going to be ignored by the
194 // caller
195 *end1 =
196 *end2 = INT_MAX;
197 return;
198 }
199
200 m_CachePos += delta;
201 }
202 while (m_Cache[m_CachePos].Key != at);
203 }
204 *end1 = m_Cache[m_CachePos].End1;
205 *end2 = m_Cache[m_CachePos].End2;
206 }
207
208
209
210
211 //-----------------------------------------------------------------------------
212 // wx28HtmlTag
213 //-----------------------------------------------------------------------------
214
215 IMPLEMENT_CLASS(wx28HtmlTag,wxObject)
216
217 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag *parent,
218 const wxString& source, int pos, int end_pos,
219 wx28HtmlTagsCache *cache,
220 wx28HtmlEntitiesParser *entParser) : wxObject()
221 {
222 /* Setup DOM relations */
223
224 m_Next = NULL;
225 m_FirstChild = m_LastChild = NULL;
226 m_Parent = parent;
227 if (parent)
228 {
229 m_Prev = m_Parent->m_LastChild;
230 if (m_Prev == NULL)
231 m_Parent->m_FirstChild = this;
232 else
233 m_Prev->m_Next = this;
234 m_Parent->m_LastChild = this;
235 }
236 else
237 m_Prev = NULL;
238
239 /* Find parameters and their values: */
240
241 int i;
242 wxChar c;
243
244 // fill-in name, params and begin pos:
245 i = pos+1;
246
247 // find tag's name and convert it to uppercase:
248 while ((i < end_pos) &&
249 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
250 c != wxT('\n') && c != wxT('\t') &&
251 c != wxT('>')))
252 {
253 if ((c >= wxT('a')) && (c <= wxT('z')))
254 c -= (wxT('a') - wxT('A'));
255 m_Name << c;
256 }
257
258 // if the tag has parameters, read them and "normalize" them,
259 // i.e. convert to uppercase, replace whitespaces by spaces and
260 // remove whitespaces around '=':
261 if (source[i-1] != wxT('>'))
262 {
263 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
264 c == wxT('\n') || c == wxT('\t'))
265 wxString pname, pvalue;
266 wxChar quote;
267 enum
268 {
269 ST_BEFORE_NAME = 1,
270 ST_NAME,
271 ST_BEFORE_EQ,
272 ST_BEFORE_VALUE,
273 ST_VALUE
274 } state;
275
276 quote = 0;
277 state = ST_BEFORE_NAME;
278 while (i < end_pos)
279 {
280 c = source[i++];
281
282 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
283 {
284 if (state == ST_BEFORE_EQ || state == ST_NAME)
285 {
286 m_ParamNames.Add(pname);
287 m_ParamValues.Add(wxEmptyString);
288 }
289 else if (state == ST_VALUE && quote == 0)
290 {
291 m_ParamNames.Add(pname);
292 if (entParser)
293 m_ParamValues.Add(entParser->Parse(pvalue));
294 else
295 m_ParamValues.Add(pvalue);
296 }
297 break;
298 }
299 switch (state)
300 {
301 case ST_BEFORE_NAME:
302 if (!IS_WHITE(c))
303 {
304 pname = c;
305 state = ST_NAME;
306 }
307 break;
308 case ST_NAME:
309 if (IS_WHITE(c))
310 state = ST_BEFORE_EQ;
311 else if (c == wxT('='))
312 state = ST_BEFORE_VALUE;
313 else
314 pname << c;
315 break;
316 case ST_BEFORE_EQ:
317 if (c == wxT('='))
318 state = ST_BEFORE_VALUE;
319 else if (!IS_WHITE(c))
320 {
321 m_ParamNames.Add(pname);
322 m_ParamValues.Add(wxEmptyString);
323 pname = c;
324 state = ST_NAME;
325 }
326 break;
327 case ST_BEFORE_VALUE:
328 if (!IS_WHITE(c))
329 {
330 if (c == wxT('"') || c == wxT('\''))
331 quote = c, pvalue = wxEmptyString;
332 else
333 quote = 0, pvalue = c;
334 state = ST_VALUE;
335 }
336 break;
337 case ST_VALUE:
338 if ((quote != 0 && c == quote) ||
339 (quote == 0 && IS_WHITE(c)))
340 {
341 m_ParamNames.Add(pname);
342 if (quote == 0)
343 {
344 // VS: backward compatibility, no real reason,
345 // but wxHTML code relies on this... :(
346 pvalue.MakeUpper();
347 }
348 if (entParser)
349 m_ParamValues.Add(entParser->Parse(pvalue));
350 else
351 m_ParamValues.Add(pvalue);
352 state = ST_BEFORE_NAME;
353 }
354 else
355 pvalue << c;
356 break;
357 }
358 }
359
360 #undef IS_WHITE
361 }
362 m_Begin = i;
363
364 cache->QueryTag(pos, &m_End1, &m_End2);
365 if (m_End1 > end_pos) m_End1 = end_pos;
366 if (m_End2 > end_pos) m_End2 = end_pos;
367 }
368
369 wx28HtmlTag::~wx28HtmlTag()
370 {
371 wx28HtmlTag *t1, *t2;
372 t1 = m_FirstChild;
373 while (t1)
374 {
375 t2 = t1->GetNextSibling();
376 delete t1;
377 t1 = t2;
378 }
379 }
380
381 bool wx28HtmlTag::HasParam(const wxString& par) const
382 {
383 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
384 }
385
386 wxString wx28HtmlTag::GetParam(const wxString& par, bool with_commas) const
387 {
388 int index = m_ParamNames.Index(par, false);
389 if (index == wxNOT_FOUND)
390 return wxEmptyString;
391 if (with_commas)
392 {
393 // VS: backward compatibility, seems to be never used by wxHTML...
394 wxString s;
395 s << wxT('"') << m_ParamValues[index] << wxT('"');
396 return s;
397 }
398 else
399 return m_ParamValues[index];
400 }
401
402 int wx28HtmlTag::ScanParam(const wxString& par,
403 const wxChar *format,
404 void *param) const
405 {
406 wxString parval = GetParam(par);
407 return wxSscanf(parval, format, param);
408 }
409
410 bool wx28HtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
411 {
412 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
413
414 wxString str = GetParam(par);
415
416 // handle colours defined in HTML 4.0 first:
417 if (str.length() > 1 && str[0] != wxT('#'))
418 {
419 #define HTML_COLOUR(name, r, g, b) \
420 if (str.IsSameAs(wxT(name), false)) \
421 { clr->Set(r, g, b); return true; }
422 HTML_COLOUR("black", 0x00,0x00,0x00)
423 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
424 HTML_COLOUR("gray", 0x80,0x80,0x80)
425 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
426 HTML_COLOUR("maroon", 0x80,0x00,0x00)
427 HTML_COLOUR("red", 0xFF,0x00,0x00)
428 HTML_COLOUR("purple", 0x80,0x00,0x80)
429 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
430 HTML_COLOUR("green", 0x00,0x80,0x00)
431 HTML_COLOUR("lime", 0x00,0xFF,0x00)
432 HTML_COLOUR("olive", 0x80,0x80,0x00)
433 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
434 HTML_COLOUR("navy", 0x00,0x00,0x80)
435 HTML_COLOUR("blue", 0x00,0x00,0xFF)
436 HTML_COLOUR("teal", 0x00,0x80,0x80)
437 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
438 #undef HTML_COLOUR
439 }
440
441 // then try to parse #rrggbb representations or set from other well
442 // known names (note that this doesn't strictly conform to HTML spec,
443 // but it doesn't do real harm -- but it *must* be done after the standard
444 // colors are handled above):
445 if (clr->Set(str))
446 return true;
447
448 return false;
449 }
450
451 bool wx28HtmlTag::GetParamAsInt(const wxString& par, int *clr) const
452 {
453 if ( !HasParam(par) )
454 return false;
455
456 long i;
457 if ( !GetParam(par).ToLong(&i) )
458 return false;
459
460 *clr = (int)i;
461 return true;
462 }
463
464 wxString wx28HtmlTag::GetAllParams() const
465 {
466 // VS: this function is for backward compatibility only,
467 // never used by wxHTML
468 wxString s;
469 size_t cnt = m_ParamNames.GetCount();
470 for (size_t i = 0; i < cnt; i++)
471 {
472 s << m_ParamNames[i];
473 s << wxT('=');
474 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
475 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
476 else
477 s << wxT('"') << m_ParamValues[i] << wxT('"');
478 }
479 return s;
480 }
481
482 wx28HtmlTag *wx28HtmlTag::GetFirstSibling() const
483 {
484 if (m_Parent)
485 return m_Parent->m_FirstChild;
486 else
487 {
488 wx28HtmlTag *cur = (wx28HtmlTag*)this;
489 while (cur->m_Prev)
490 cur = cur->m_Prev;
491 return cur;
492 }
493 }
494
495 wx28HtmlTag *wx28HtmlTag::GetLastSibling() const
496 {
497 if (m_Parent)
498 return m_Parent->m_LastChild;
499 else
500 {
501 wx28HtmlTag *cur = (wx28HtmlTag*)this;
502 while (cur->m_Next)
503 cur = cur->m_Next;
504 return cur;
505 }
506 }
507
508 wx28HtmlTag *wx28HtmlTag::GetNextTag() const
509 {
510 if (m_FirstChild) return m_FirstChild;
511 if (m_Next) return m_Next;
512 wx28HtmlTag *cur = m_Parent;
513 if (!cur) return NULL;
514 while (cur->m_Parent && !cur->m_Next)
515 cur = cur->m_Parent;
516 return cur->m_Next;
517 }