use wxVector<T> instead of homegrown growing array in wxHtmlTagsCache
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #include "wx/html/htmltag.h"
19
20 #ifndef WX_PRECOMP
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
23 #endif
24
25 #include "wx/html/htmlpars.h"
26 #include "wx/vector.h"
27
28 #include <stdio.h> // for vsscanf
29 #include <stdarg.h>
30
31 //-----------------------------------------------------------------------------
32 // wxHtmlTagsCache
33 //-----------------------------------------------------------------------------
34
35 struct wxHtmlCacheItem
36 {
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
39 int Key;
40
41 // end positions for the tag:
42 // end1 is '<' of ending tag,
43 // end2 is '>' or both are
44 // -1 if there is no ending tag for this one...
45 // or -2 if this is ending tag </...>
46 int End1, End2;
47
48 // name of this tag
49 wxChar *Name;
50 };
51
52 // NB: this is an empty class and not typedef because of forward declaration
53 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
54 {
55 };
56
57 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
58
59 bool wxIsCDATAElement(const wxChar *tag)
60 {
61 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
62 (wxStrcmp(tag, _T("STYLE")) == 0);
63 }
64
65 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
66 {
67 m_Cache = new wxHtmlTagsCacheData;
68 m_CachePos = 0;
69
70 const wxChar *src = source.c_str();
71 int lng = source.length();
72 wxChar tagBuffer[256];
73
74 for ( int pos = 0; pos < lng; pos++ )
75 {
76 if (src[pos] == wxT('<')) // tag found:
77 {
78 // don't cache comment tags
79 wxString::const_iterator iter = source.begin() + pos;
80 if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
81 {
82 pos = iter - source.begin();
83 continue;
84 }
85
86 size_t tg = Cache().size();
87 Cache().push_back(wxHtmlCacheItem());
88
89 int stpos = pos++;
90 Cache()[tg].Key = stpos;
91
92 int i;
93 for ( i = 0;
94 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
95 src[pos] != wxT('>') && !wxIsspace(src[pos]);
96 i++, pos++ )
97 {
98 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
99 }
100 tagBuffer[i] = _T('\0');
101
102 Cache()[tg].Name = new wxChar[i+1];
103 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
104
105 while (pos < lng && src[pos] != wxT('>')) pos++;
106
107 if (src[stpos+1] == wxT('/')) // ending tag:
108 {
109 Cache()[tg].End1 = Cache()[tg].End2 = -2;
110 // find matching begin tag:
111 for (i = tg; i >= 0; i--)
112 if ((Cache()[i].End1 == -1) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
113 {
114 Cache()[i].End1 = stpos;
115 Cache()[i].End2 = pos + 1;
116 break;
117 }
118 }
119 else
120 {
121 Cache()[tg].End1 = Cache()[tg].End2 = -1;
122
123 if (wxIsCDATAElement(tagBuffer))
124 {
125 // store the orig pos in case we are missing the closing
126 // tag (see below)
127 wxInt32 old_pos = pos;
128 bool foundCloseTag = false;
129
130 // find next matching tag
131 int tag_len = wxStrlen(tagBuffer);
132 while (pos < lng)
133 {
134 // find the ending tag
135 while (pos + 1 < lng &&
136 (src[pos] != '<' || src[pos+1] != '/'))
137 ++pos;
138 if (src[pos] == '<')
139 ++pos;
140
141 // see if it matches
142 int match_pos = 0;
143 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
144 // cast to wxChar needed to suppress warning in
145 // Unicode build
146 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
147 ++match_pos;
148 }
149 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
150 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
151 // need to skip over these
152 }
153 else {
154 match_pos = 0;
155 }
156 ++pos;
157 }
158
159 // found a match
160 if (match_pos == tag_len)
161 {
162 pos = pos - tag_len - 3;
163 foundCloseTag = true;
164 break;
165 }
166 else // keep looking for the closing tag
167 {
168 ++pos;
169 }
170 }
171 if (!foundCloseTag)
172 {
173 // we didn't find closing tag; this means the markup
174 // is incorrect and the best thing we can do is to
175 // ignore the unclosed tag and continue parsing as if
176 // it didn't exist:
177 pos = old_pos;
178 }
179 }
180 }
181 }
182 }
183
184 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
185 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
186 i != Cache().end(); ++i )
187 {
188 delete[] i->Name;
189 i->Name = NULL;
190 }
191 }
192
193 wxHtmlTagsCache::~wxHtmlTagsCache()
194 {
195 delete m_Cache;
196 }
197
198 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
199 {
200 if (Cache().empty())
201 return;
202
203 if (Cache()[m_CachePos].Key != at)
204 {
205 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
206 do
207 {
208 if ( m_CachePos < 0 || m_CachePos == Cache().size() )
209 {
210 // something is very wrong with HTML, give up by returning an
211 // impossibly large value which is going to be ignored by the
212 // caller
213 *end1 =
214 *end2 = INT_MAX;
215 return;
216 }
217
218 m_CachePos += delta;
219 }
220 while (Cache()[m_CachePos].Key != at);
221 }
222 *end1 = Cache()[m_CachePos].End1;
223 *end2 = Cache()[m_CachePos].End2;
224 }
225
226
227
228
229 //-----------------------------------------------------------------------------
230 // wxHtmlTag
231 //-----------------------------------------------------------------------------
232
233 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
234
235 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
236 const wxString& source, int pos, int end_pos,
237 wxHtmlTagsCache *cache,
238 wxHtmlEntitiesParser *entParser) : wxObject()
239 {
240 /* Setup DOM relations */
241
242 m_Next = NULL;
243 m_FirstChild = m_LastChild = NULL;
244 m_Parent = parent;
245 if (parent)
246 {
247 m_Prev = m_Parent->m_LastChild;
248 if (m_Prev == NULL)
249 m_Parent->m_FirstChild = this;
250 else
251 m_Prev->m_Next = this;
252 m_Parent->m_LastChild = this;
253 }
254 else
255 m_Prev = NULL;
256
257 /* Find parameters and their values: */
258
259 int i;
260 wxChar c;
261
262 // fill-in name, params and begin pos:
263 i = pos+1;
264
265 // find tag's name and convert it to uppercase:
266 while ((i < end_pos) &&
267 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
268 c != wxT('\n') && c != wxT('\t') &&
269 c != wxT('>')))
270 {
271 if ((c >= wxT('a')) && (c <= wxT('z')))
272 c -= (wxT('a') - wxT('A'));
273 m_Name << c;
274 }
275
276 // if the tag has parameters, read them and "normalize" them,
277 // i.e. convert to uppercase, replace whitespaces by spaces and
278 // remove whitespaces around '=':
279 if (source[i-1] != wxT('>'))
280 {
281 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
282 c == wxT('\n') || c == wxT('\t'))
283 wxString pname, pvalue;
284 wxChar quote;
285 enum
286 {
287 ST_BEFORE_NAME = 1,
288 ST_NAME,
289 ST_BEFORE_EQ,
290 ST_BEFORE_VALUE,
291 ST_VALUE
292 } state;
293
294 quote = 0;
295 state = ST_BEFORE_NAME;
296 while (i < end_pos)
297 {
298 c = source[i++];
299
300 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
301 {
302 if (state == ST_BEFORE_EQ || state == ST_NAME)
303 {
304 m_ParamNames.Add(pname);
305 m_ParamValues.Add(wxEmptyString);
306 }
307 else if (state == ST_VALUE && quote == 0)
308 {
309 m_ParamNames.Add(pname);
310 if (entParser)
311 m_ParamValues.Add(entParser->Parse(pvalue));
312 else
313 m_ParamValues.Add(pvalue);
314 }
315 break;
316 }
317 switch (state)
318 {
319 case ST_BEFORE_NAME:
320 if (!IS_WHITE(c))
321 {
322 pname = c;
323 state = ST_NAME;
324 }
325 break;
326 case ST_NAME:
327 if (IS_WHITE(c))
328 state = ST_BEFORE_EQ;
329 else if (c == wxT('='))
330 state = ST_BEFORE_VALUE;
331 else
332 pname << c;
333 break;
334 case ST_BEFORE_EQ:
335 if (c == wxT('='))
336 state = ST_BEFORE_VALUE;
337 else if (!IS_WHITE(c))
338 {
339 m_ParamNames.Add(pname);
340 m_ParamValues.Add(wxEmptyString);
341 pname = c;
342 state = ST_NAME;
343 }
344 break;
345 case ST_BEFORE_VALUE:
346 if (!IS_WHITE(c))
347 {
348 if (c == wxT('"') || c == wxT('\''))
349 quote = c, pvalue = wxEmptyString;
350 else
351 quote = 0, pvalue = c;
352 state = ST_VALUE;
353 }
354 break;
355 case ST_VALUE:
356 if ((quote != 0 && c == quote) ||
357 (quote == 0 && IS_WHITE(c)))
358 {
359 m_ParamNames.Add(pname);
360 if (quote == 0)
361 {
362 // VS: backward compatibility, no real reason,
363 // but wxHTML code relies on this... :(
364 pvalue.MakeUpper();
365 }
366 if (entParser)
367 m_ParamValues.Add(entParser->Parse(pvalue));
368 else
369 m_ParamValues.Add(pvalue);
370 state = ST_BEFORE_NAME;
371 }
372 else
373 pvalue << c;
374 break;
375 }
376 }
377
378 #undef IS_WHITE
379 }
380 m_Begin = i;
381
382 cache->QueryTag(pos, &m_End1, &m_End2);
383 if (m_End1 > end_pos) m_End1 = end_pos;
384 if (m_End2 > end_pos) m_End2 = end_pos;
385 }
386
387 wxHtmlTag::~wxHtmlTag()
388 {
389 wxHtmlTag *t1, *t2;
390 t1 = m_FirstChild;
391 while (t1)
392 {
393 t2 = t1->GetNextSibling();
394 delete t1;
395 t1 = t2;
396 }
397 }
398
399 bool wxHtmlTag::HasParam(const wxString& par) const
400 {
401 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
402 }
403
404 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
405 {
406 int index = m_ParamNames.Index(par, false);
407 if (index == wxNOT_FOUND)
408 return wxEmptyString;
409 if (with_commas)
410 {
411 // VS: backward compatibility, seems to be never used by wxHTML...
412 wxString s;
413 s << wxT('"') << m_ParamValues[index] << wxT('"');
414 return s;
415 }
416 else
417 return m_ParamValues[index];
418 }
419
420 int wxHtmlTag::ScanParam(const wxString& par,
421 const char *format,
422 void *param) const
423 {
424 wxString parval = GetParam(par);
425 return wxSscanf(parval, format, param);
426 }
427
428 int wxHtmlTag::ScanParam(const wxString& par,
429 const wchar_t *format,
430 void *param) const
431 {
432 wxString parval = GetParam(par);
433 return wxSscanf(parval, format, param);
434 }
435
436 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
437 {
438 wxCHECK_MSG( clr, false, _T("invalid colour argument") );
439
440 wxString str = GetParam(par);
441
442 // handle colours defined in HTML 4.0 first:
443 if (str.length() > 1 && str[0] != _T('#'))
444 {
445 #define HTML_COLOUR(name, r, g, b) \
446 if (str.IsSameAs(wxT(name), false)) \
447 { clr->Set(r, g, b); return true; }
448 HTML_COLOUR("black", 0x00,0x00,0x00)
449 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
450 HTML_COLOUR("gray", 0x80,0x80,0x80)
451 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
452 HTML_COLOUR("maroon", 0x80,0x00,0x00)
453 HTML_COLOUR("red", 0xFF,0x00,0x00)
454 HTML_COLOUR("purple", 0x80,0x00,0x80)
455 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
456 HTML_COLOUR("green", 0x00,0x80,0x00)
457 HTML_COLOUR("lime", 0x00,0xFF,0x00)
458 HTML_COLOUR("olive", 0x80,0x80,0x00)
459 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
460 HTML_COLOUR("navy", 0x00,0x00,0x80)
461 HTML_COLOUR("blue", 0x00,0x00,0xFF)
462 HTML_COLOUR("teal", 0x00,0x80,0x80)
463 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
464 #undef HTML_COLOUR
465 }
466
467 // then try to parse #rrggbb representations or set from other well
468 // known names (note that this doesn't strictly conform to HTML spec,
469 // but it doesn't do real harm -- but it *must* be done after the standard
470 // colors are handled above):
471 if (clr->Set(str))
472 return true;
473
474 return false;
475 }
476
477 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
478 {
479 if (!HasParam(par)) return false;
480 long i;
481 bool succ = GetParam(par).ToLong(&i);
482 *clr = (int)i;
483 return succ;
484 }
485
486 wxString wxHtmlTag::GetAllParams() const
487 {
488 // VS: this function is for backward compatibility only,
489 // never used by wxHTML
490 wxString s;
491 size_t cnt = m_ParamNames.GetCount();
492 for (size_t i = 0; i < cnt; i++)
493 {
494 s << m_ParamNames[i];
495 s << wxT('=');
496 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
497 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
498 else
499 s << wxT('"') << m_ParamValues[i] << wxT('"');
500 }
501 return s;
502 }
503
504 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
505 {
506 if (m_Parent)
507 return m_Parent->m_FirstChild;
508 else
509 {
510 wxHtmlTag *cur = (wxHtmlTag*)this;
511 while (cur->m_Prev)
512 cur = cur->m_Prev;
513 return cur;
514 }
515 }
516
517 wxHtmlTag *wxHtmlTag::GetLastSibling() const
518 {
519 if (m_Parent)
520 return m_Parent->m_LastChild;
521 else
522 {
523 wxHtmlTag *cur = (wxHtmlTag*)this;
524 while (cur->m_Next)
525 cur = cur->m_Next;
526 return cur;
527 }
528 }
529
530 wxHtmlTag *wxHtmlTag::GetNextTag() const
531 {
532 if (m_FirstChild) return m_FirstChild;
533 if (m_Next) return m_Next;
534 wxHtmlTag *cur = m_Parent;
535 if (!cur) return NULL;
536 while (cur->m_Parent && !cur->m_Next)
537 cur = cur->m_Parent;
538 return cur->m_Next;
539 }
540
541 #endif