Remove all lines containing cvs/svn "$Id$" keyword.
[wxWidgets.git] / tests / benchmarks / htmlparser / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9 #include "wx/wxprec.h"
10
11 #ifdef __BORLANDC__
12 #pragma hdrstop
13 #endif
14
15 #include "htmltag.h"
16
17 #ifndef WXPRECOMP
18 #include "wx/colour.h"
19 #endif
20
21 #include "htmlpars.h"
22 #include <stdio.h> // for vsscanf
23 #include <stdarg.h>
24
25
26 //-----------------------------------------------------------------------------
27 // wx28HtmlTagsCache
28 //-----------------------------------------------------------------------------
29
30 struct wx28HtmlCacheItem
31 {
32 // this is "pos" value passed to wx28HtmlTag's constructor.
33 // it is position of '<' character of the tag
34 int Key;
35
36 // end positions for the tag:
37 // end1 is '<' of ending tag,
38 // end2 is '>' or both are
39 // -1 if there is no ending tag for this one...
40 // or -2 if this is ending tag </...>
41 int End1, End2;
42
43 // name of this tag
44 wxChar *Name;
45 };
46
47
48 IMPLEMENT_CLASS(wx28HtmlTagsCache,wxObject)
49
50 #define CACHE_INCREMENT 64
51
52 bool wxIsCDATAElement(const wxChar *tag)
53 {
54 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
55 (wxStrcmp(tag, wxT("STYLE")) == 0);
56 }
57
58 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source)
59 {
60 const wxChar *src = source.c_str();
61 int lng = source.length();
62 wxChar tagBuffer[256];
63
64 m_Cache = NULL;
65 m_CacheSize = 0;
66 m_CachePos = 0;
67
68 int pos = 0;
69 while (pos < lng)
70 {
71 if (src[pos] == wxT('<')) // tag found:
72 {
73 if (m_CacheSize % CACHE_INCREMENT == 0)
74 m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem));
75 int tg = m_CacheSize++;
76 int stpos = pos++;
77 m_Cache[tg].Key = stpos;
78
79 int i;
80 for ( i = 0;
81 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
82 src[pos] != wxT('>') && !wxIsspace(src[pos]);
83 i++, pos++ )
84 {
85 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
86 }
87 tagBuffer[i] = wxT('\0');
88
89 m_Cache[tg].Name = new wxChar[i+1];
90 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
91
92 while (pos < lng && src[pos] != wxT('>')) pos++;
93
94 if (src[stpos+1] == wxT('/')) // ending tag:
95 {
96 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
97 // find matching begin tag:
98 for (i = tg; i >= 0; i--)
99 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
100 {
101 m_Cache[i].End1 = stpos;
102 m_Cache[i].End2 = pos + 1;
103 break;
104 }
105 }
106 else
107 {
108 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
109
110 if (wxIsCDATAElement(tagBuffer))
111 {
112 // store the orig pos in case we are missing the closing
113 // tag (see below)
114 wxInt32 old_pos = pos;
115 bool foundCloseTag = false;
116
117 // find next matching tag
118 int tag_len = wxStrlen(tagBuffer);
119 while (pos < lng)
120 {
121 // find the ending tag
122 while (pos + 1 < lng &&
123 (src[pos] != '<' || src[pos+1] != '/'))
124 ++pos;
125 if (src[pos] == '<')
126 ++pos;
127
128 // see if it matches
129 int match_pos = 0;
130 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
131 // cast to wxChar needed to suppress warning in
132 // Unicode build
133 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
134 ++match_pos;
135 }
136 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
137 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
138 // need to skip over these
139 }
140 else {
141 match_pos = 0;
142 }
143 ++pos;
144 }
145
146 // found a match
147 if (match_pos == tag_len)
148 {
149 pos = pos - tag_len - 3;
150 foundCloseTag = true;
151 break;
152 }
153 else // keep looking for the closing tag
154 {
155 ++pos;
156 }
157 }
158 if (!foundCloseTag)
159 {
160 // we didn't find closing tag; this means the markup
161 // is incorrect and the best thing we can do is to
162 // ignore the unclosed tag and continue parsing as if
163 // it didn't exist:
164 pos = old_pos;
165 }
166 }
167 }
168 }
169
170 pos++;
171 }
172
173 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
174 for (int i = 0; i < m_CacheSize; i++)
175 {
176 delete[] m_Cache[i].Name;
177 m_Cache[i].Name = NULL;
178 }
179 }
180
181 void wx28HtmlTagsCache::QueryTag(int at, int* end1, int* end2)
182 {
183 if (m_Cache == NULL) return;
184 if (m_Cache[m_CachePos].Key != at)
185 {
186 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
187 do
188 {
189 if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
190 {
191 // something is very wrong with HTML, give up by returning an
192 // impossibly large value which is going to be ignored by the
193 // caller
194 *end1 =
195 *end2 = INT_MAX;
196 return;
197 }
198
199 m_CachePos += delta;
200 }
201 while (m_Cache[m_CachePos].Key != at);
202 }
203 *end1 = m_Cache[m_CachePos].End1;
204 *end2 = m_Cache[m_CachePos].End2;
205 }
206
207
208
209
210 //-----------------------------------------------------------------------------
211 // wx28HtmlTag
212 //-----------------------------------------------------------------------------
213
214 IMPLEMENT_CLASS(wx28HtmlTag,wxObject)
215
216 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag *parent,
217 const wxString& source, int pos, int end_pos,
218 wx28HtmlTagsCache *cache,
219 wx28HtmlEntitiesParser *entParser) : wxObject()
220 {
221 /* Setup DOM relations */
222
223 m_Next = NULL;
224 m_FirstChild = m_LastChild = NULL;
225 m_Parent = parent;
226 if (parent)
227 {
228 m_Prev = m_Parent->m_LastChild;
229 if (m_Prev == NULL)
230 m_Parent->m_FirstChild = this;
231 else
232 m_Prev->m_Next = this;
233 m_Parent->m_LastChild = this;
234 }
235 else
236 m_Prev = NULL;
237
238 /* Find parameters and their values: */
239
240 int i;
241 wxChar c;
242
243 // fill-in name, params and begin pos:
244 i = pos+1;
245
246 // find tag's name and convert it to uppercase:
247 while ((i < end_pos) &&
248 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
249 c != wxT('\n') && c != wxT('\t') &&
250 c != wxT('>')))
251 {
252 if ((c >= wxT('a')) && (c <= wxT('z')))
253 c -= (wxT('a') - wxT('A'));
254 m_Name << c;
255 }
256
257 // if the tag has parameters, read them and "normalize" them,
258 // i.e. convert to uppercase, replace whitespaces by spaces and
259 // remove whitespaces around '=':
260 if (source[i-1] != wxT('>'))
261 {
262 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
263 c == wxT('\n') || c == wxT('\t'))
264 wxString pname, pvalue;
265 wxChar quote;
266 enum
267 {
268 ST_BEFORE_NAME = 1,
269 ST_NAME,
270 ST_BEFORE_EQ,
271 ST_BEFORE_VALUE,
272 ST_VALUE
273 } state;
274
275 quote = 0;
276 state = ST_BEFORE_NAME;
277 while (i < end_pos)
278 {
279 c = source[i++];
280
281 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
282 {
283 if (state == ST_BEFORE_EQ || state == ST_NAME)
284 {
285 m_ParamNames.Add(pname);
286 m_ParamValues.Add(wxEmptyString);
287 }
288 else if (state == ST_VALUE && quote == 0)
289 {
290 m_ParamNames.Add(pname);
291 if (entParser)
292 m_ParamValues.Add(entParser->Parse(pvalue));
293 else
294 m_ParamValues.Add(pvalue);
295 }
296 break;
297 }
298 switch (state)
299 {
300 case ST_BEFORE_NAME:
301 if (!IS_WHITE(c))
302 {
303 pname = c;
304 state = ST_NAME;
305 }
306 break;
307 case ST_NAME:
308 if (IS_WHITE(c))
309 state = ST_BEFORE_EQ;
310 else if (c == wxT('='))
311 state = ST_BEFORE_VALUE;
312 else
313 pname << c;
314 break;
315 case ST_BEFORE_EQ:
316 if (c == wxT('='))
317 state = ST_BEFORE_VALUE;
318 else if (!IS_WHITE(c))
319 {
320 m_ParamNames.Add(pname);
321 m_ParamValues.Add(wxEmptyString);
322 pname = c;
323 state = ST_NAME;
324 }
325 break;
326 case ST_BEFORE_VALUE:
327 if (!IS_WHITE(c))
328 {
329 if (c == wxT('"') || c == wxT('\''))
330 quote = c, pvalue = wxEmptyString;
331 else
332 quote = 0, pvalue = c;
333 state = ST_VALUE;
334 }
335 break;
336 case ST_VALUE:
337 if ((quote != 0 && c == quote) ||
338 (quote == 0 && IS_WHITE(c)))
339 {
340 m_ParamNames.Add(pname);
341 if (quote == 0)
342 {
343 // VS: backward compatibility, no real reason,
344 // but wxHTML code relies on this... :(
345 pvalue.MakeUpper();
346 }
347 if (entParser)
348 m_ParamValues.Add(entParser->Parse(pvalue));
349 else
350 m_ParamValues.Add(pvalue);
351 state = ST_BEFORE_NAME;
352 }
353 else
354 pvalue << c;
355 break;
356 }
357 }
358
359 #undef IS_WHITE
360 }
361 m_Begin = i;
362
363 cache->QueryTag(pos, &m_End1, &m_End2);
364 if (m_End1 > end_pos) m_End1 = end_pos;
365 if (m_End2 > end_pos) m_End2 = end_pos;
366 }
367
368 wx28HtmlTag::~wx28HtmlTag()
369 {
370 wx28HtmlTag *t1, *t2;
371 t1 = m_FirstChild;
372 while (t1)
373 {
374 t2 = t1->GetNextSibling();
375 delete t1;
376 t1 = t2;
377 }
378 }
379
380 bool wx28HtmlTag::HasParam(const wxString& par) const
381 {
382 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
383 }
384
385 wxString wx28HtmlTag::GetParam(const wxString& par, bool with_commas) const
386 {
387 int index = m_ParamNames.Index(par, false);
388 if (index == wxNOT_FOUND)
389 return wxEmptyString;
390 if (with_commas)
391 {
392 // VS: backward compatibility, seems to be never used by wxHTML...
393 wxString s;
394 s << wxT('"') << m_ParamValues[index] << wxT('"');
395 return s;
396 }
397 else
398 return m_ParamValues[index];
399 }
400
401 int wx28HtmlTag::ScanParam(const wxString& par,
402 const wxChar *format,
403 void *param) const
404 {
405 wxString parval = GetParam(par);
406 return wxSscanf(parval, format, param);
407 }
408
409 bool wx28HtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
410 {
411 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
412
413 wxString str = GetParam(par);
414
415 // handle colours defined in HTML 4.0 first:
416 if (str.length() > 1 && str[0] != wxT('#'))
417 {
418 #define HTML_COLOUR(name, r, g, b) \
419 if (str.IsSameAs(wxT(name), false)) \
420 { clr->Set(r, g, b); return true; }
421 HTML_COLOUR("black", 0x00,0x00,0x00)
422 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
423 HTML_COLOUR("gray", 0x80,0x80,0x80)
424 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
425 HTML_COLOUR("maroon", 0x80,0x00,0x00)
426 HTML_COLOUR("red", 0xFF,0x00,0x00)
427 HTML_COLOUR("purple", 0x80,0x00,0x80)
428 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
429 HTML_COLOUR("green", 0x00,0x80,0x00)
430 HTML_COLOUR("lime", 0x00,0xFF,0x00)
431 HTML_COLOUR("olive", 0x80,0x80,0x00)
432 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
433 HTML_COLOUR("navy", 0x00,0x00,0x80)
434 HTML_COLOUR("blue", 0x00,0x00,0xFF)
435 HTML_COLOUR("teal", 0x00,0x80,0x80)
436 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
437 #undef HTML_COLOUR
438 }
439
440 // then try to parse #rrggbb representations or set from other well
441 // known names (note that this doesn't strictly conform to HTML spec,
442 // but it doesn't do real harm -- but it *must* be done after the standard
443 // colors are handled above):
444 if (clr->Set(str))
445 return true;
446
447 return false;
448 }
449
450 bool wx28HtmlTag::GetParamAsInt(const wxString& par, int *clr) const
451 {
452 if ( !HasParam(par) )
453 return false;
454
455 long i;
456 if ( !GetParam(par).ToLong(&i) )
457 return false;
458
459 *clr = (int)i;
460 return true;
461 }
462
463 wxString wx28HtmlTag::GetAllParams() const
464 {
465 // VS: this function is for backward compatibility only,
466 // never used by wxHTML
467 wxString s;
468 size_t cnt = m_ParamNames.GetCount();
469 for (size_t i = 0; i < cnt; i++)
470 {
471 s << m_ParamNames[i];
472 s << wxT('=');
473 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
474 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
475 else
476 s << wxT('"') << m_ParamValues[i] << wxT('"');
477 }
478 return s;
479 }
480
481 wx28HtmlTag *wx28HtmlTag::GetFirstSibling() const
482 {
483 if (m_Parent)
484 return m_Parent->m_FirstChild;
485 else
486 {
487 wx28HtmlTag *cur = (wx28HtmlTag*)this;
488 while (cur->m_Prev)
489 cur = cur->m_Prev;
490 return cur;
491 }
492 }
493
494 wx28HtmlTag *wx28HtmlTag::GetLastSibling() const
495 {
496 if (m_Parent)
497 return m_Parent->m_LastChild;
498 else
499 {
500 wx28HtmlTag *cur = (wx28HtmlTag*)this;
501 while (cur->m_Next)
502 cur = cur->m_Next;
503 return cur;
504 }
505 }
506
507 wx28HtmlTag *wx28HtmlTag::GetNextTag() const
508 {
509 if (m_FirstChild) return m_FirstChild;
510 if (m_Next) return m_Next;
511 wx28HtmlTag *cur = m_Parent;
512 if (!cur) return NULL;
513 while (cur->m_Parent && !cur->m_Next)
514 cur = cur->m_Parent;
515 return cur->m_Next;
516 }