Add a wxHtmlTag helper parsing both absolute values and percents.
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #include "wx/html/htmltag.h"
19
20 #ifndef WX_PRECOMP
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
23 #endif
24
25 #include "wx/html/htmlpars.h"
26 #include "wx/html/styleparams.h"
27
28 #include "wx/vector.h"
29
30 #include <stdio.h> // for vsscanf
31 #include <stdarg.h>
32
33 //-----------------------------------------------------------------------------
34 // wxHtmlTagsCache
35 //-----------------------------------------------------------------------------
36
37 struct wxHtmlCacheItem
38 {
39 // this is "pos" value passed to wxHtmlTag's constructor.
40 // it is position of '<' character of the tag
41 wxString::const_iterator Key;
42
43 // Tag type
44 enum Type
45 {
46 Type_Normal, // normal tag with a matching ending tag
47 Type_NoMatchingEndingTag, // there's no ending tag for this tag
48 Type_EndingTag // this is ending tag </..>
49 };
50 Type type;
51
52 // end positions for the tag:
53 // end1 is '<' of ending tag,
54 // end2 is '>' or both are
55 wxString::const_iterator End1, End2;
56
57 // name of this tag
58 wxChar *Name;
59 };
60
61 // NB: this is an empty class and not typedef because of forward declaration
62 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
63 {
64 };
65
66 bool wxIsCDATAElement(const wxChar *tag)
67 {
68 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
69 (wxStrcmp(tag, wxT("STYLE")) == 0);
70 }
71
72 bool wxIsCDATAElement(const wxString& tag)
73 {
74 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
75 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
76 }
77
78 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
79 {
80 m_Cache = new wxHtmlTagsCacheData;
81 m_CachePos = 0;
82
83 wxChar tagBuffer[256];
84
85 const wxString::const_iterator end = source.end();
86 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
87 {
88 if (*pos != wxT('<'))
89 continue;
90
91 // possible tag start found:
92
93 // don't cache comment tags
94 if ( wxHtmlParser::SkipCommentTag(pos, end) )
95 continue;
96
97 // Remember the starting tag position.
98 wxString::const_iterator stpos = pos++;
99
100 // And look for the ending one.
101 int i;
102 for ( i = 0;
103 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
104 *pos != wxT('>') && !wxIsspace(*pos);
105 ++i, ++pos )
106 {
107 tagBuffer[i] = (wxChar)wxToupper(*pos);
108 }
109 tagBuffer[i] = wxT('\0');
110
111 while (pos < end && *pos != wxT('>'))
112 ++pos;
113
114 if ( pos == end )
115 {
116 // We didn't find a closing bracket, this is not a valid tag after
117 // all. Notice that we need to roll back pos to avoid creating an
118 // invalid iterator when "++pos" is done in the loop statement.
119 --pos;
120
121 continue;
122 }
123
124 // We have a valid tag, add it to the cache.
125 size_t tg = Cache().size();
126 Cache().push_back(wxHtmlCacheItem());
127 Cache()[tg].Key = stpos;
128 Cache()[tg].Name = new wxChar[i+1];
129 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
130
131 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
132 {
133 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
134 // find matching begin tag:
135 for (i = tg; i >= 0; i--)
136 {
137 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
138 {
139 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
140 Cache()[i].End1 = stpos;
141 Cache()[i].End2 = pos + 1;
142 break;
143 }
144 }
145 }
146 else
147 {
148 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
149
150 if (wxIsCDATAElement(tagBuffer))
151 {
152 // store the orig pos in case we are missing the closing
153 // tag (see below)
154 const wxString::const_iterator old_pos = pos;
155 bool foundCloseTag = false;
156
157 // find next matching tag
158 int tag_len = wxStrlen(tagBuffer);
159 while (pos < end)
160 {
161 // find the ending tag
162 while (pos + 1 < end &&
163 (*pos != '<' || *(pos+1) != '/'))
164 ++pos;
165 if (*pos == '<')
166 ++pos;
167
168 // see if it matches
169 int match_pos = 0;
170 while (pos < end && match_pos < tag_len )
171 {
172 wxChar c = *pos;
173 if ( c == '>' || c == '<' )
174 break;
175
176 // cast to wxChar needed to suppress warning in
177 // Unicode build
178 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
179 {
180 ++match_pos;
181 }
182 else if (c == wxT(' ') || c == wxT('\n') ||
183 c == wxT('\r') || c == wxT('\t'))
184 {
185 // need to skip over these
186 }
187 else
188 {
189 match_pos = 0;
190 }
191 ++pos;
192 }
193
194 // found a match
195 if (match_pos == tag_len)
196 {
197 pos = pos - tag_len - 3;
198 foundCloseTag = true;
199 break;
200 }
201 else // keep looking for the closing tag
202 {
203 ++pos;
204 }
205 }
206 if (!foundCloseTag)
207 {
208 // we didn't find closing tag; this means the markup
209 // is incorrect and the best thing we can do is to
210 // ignore the unclosed tag and continue parsing as if
211 // it didn't exist:
212 pos = old_pos;
213 }
214 }
215 }
216 }
217
218 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
219 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
220 i != Cache().end(); ++i )
221 {
222 wxDELETEA(i->Name);
223 }
224 }
225
226 wxHtmlTagsCache::~wxHtmlTagsCache()
227 {
228 delete m_Cache;
229 }
230
231 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
232 const wxString::const_iterator& inputEnd,
233 wxString::const_iterator *end1,
234 wxString::const_iterator *end2,
235 bool *hasEnding)
236 {
237 if (Cache().empty())
238 {
239 *end1 =
240 *end2 = inputEnd;
241 *hasEnding = true;
242 return;
243 }
244
245 if (Cache()[m_CachePos].Key != at)
246 {
247 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
248 do
249 {
250 m_CachePos += delta;
251
252 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
253 {
254 if ( m_CachePos < 0 )
255 m_CachePos = 0;
256 else
257 m_CachePos = Cache().size() - 1;
258 // something is very wrong with HTML, give up by returning an
259 // impossibly large value which is going to be ignored by the
260 // caller
261 *end1 =
262 *end2 = inputEnd;
263 *hasEnding = true;
264 return;
265 }
266 }
267 while (Cache()[m_CachePos].Key != at);
268 }
269
270 switch ( Cache()[m_CachePos].type )
271 {
272 case wxHtmlCacheItem::Type_Normal:
273 *end1 = Cache()[m_CachePos].End1;
274 *end2 = Cache()[m_CachePos].End2;
275 *hasEnding = true;
276 break;
277
278 case wxHtmlCacheItem::Type_EndingTag:
279 wxFAIL_MSG("QueryTag called for ending tag - can't be");
280 // but if it does happen, fall through, better than crashing
281
282 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
283 // If input HTML is invalid and there's no closing tag for this
284 // one, pretend that it runs all the way to the end of input
285 *end1 = inputEnd;
286 *end2 = inputEnd;
287 *hasEnding = false;
288 break;
289 }
290 }
291
292
293
294
295 //-----------------------------------------------------------------------------
296 // wxHtmlTag
297 //-----------------------------------------------------------------------------
298
299 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
300 const wxString *source,
301 const wxString::const_iterator& pos,
302 const wxString::const_iterator& end_pos,
303 wxHtmlTagsCache *cache,
304 wxHtmlEntitiesParser *entParser)
305 {
306 /* Setup DOM relations */
307
308 m_Next = NULL;
309 m_FirstChild = m_LastChild = NULL;
310 m_Parent = parent;
311 if (parent)
312 {
313 m_Prev = m_Parent->m_LastChild;
314 if (m_Prev == NULL)
315 m_Parent->m_FirstChild = this;
316 else
317 m_Prev->m_Next = this;
318 m_Parent->m_LastChild = this;
319 }
320 else
321 m_Prev = NULL;
322
323 /* Find parameters and their values: */
324
325 wxChar c wxDUMMY_INITIALIZE(0);
326
327 // fill-in name, params and begin pos:
328 wxString::const_iterator i(pos+1);
329
330 // find tag's name and convert it to uppercase:
331 while ((i < end_pos) &&
332 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
333 c != wxT('\n') && c != wxT('\t') &&
334 c != wxT('>') && c != wxT('/')))
335 {
336 if ((c >= wxT('a')) && (c <= wxT('z')))
337 c -= (wxT('a') - wxT('A'));
338 m_Name << c;
339 }
340
341 // if the tag has parameters, read them and "normalize" them,
342 // i.e. convert to uppercase, replace whitespaces by spaces and
343 // remove whitespaces around '=':
344 if (*(i-1) != wxT('>'))
345 {
346 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
347 c == wxT('\n') || c == wxT('\t'))
348 wxString pname, pvalue;
349 wxChar quote;
350 enum
351 {
352 ST_BEFORE_NAME = 1,
353 ST_NAME,
354 ST_BEFORE_EQ,
355 ST_BEFORE_VALUE,
356 ST_VALUE
357 } state;
358
359 quote = 0;
360 state = ST_BEFORE_NAME;
361 while (i < end_pos)
362 {
363 c = *(i++);
364
365 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
366 {
367 if (state == ST_BEFORE_EQ || state == ST_NAME)
368 {
369 m_ParamNames.Add(pname);
370 m_ParamValues.Add(wxGetEmptyString());
371 }
372 else if (state == ST_VALUE && quote == 0)
373 {
374 m_ParamNames.Add(pname);
375 if (entParser)
376 m_ParamValues.Add(entParser->Parse(pvalue));
377 else
378 m_ParamValues.Add(pvalue);
379 }
380 break;
381 }
382 switch (state)
383 {
384 case ST_BEFORE_NAME:
385 if (!IS_WHITE(c))
386 {
387 pname = c;
388 state = ST_NAME;
389 }
390 break;
391 case ST_NAME:
392 if (IS_WHITE(c))
393 state = ST_BEFORE_EQ;
394 else if (c == wxT('='))
395 state = ST_BEFORE_VALUE;
396 else
397 pname << c;
398 break;
399 case ST_BEFORE_EQ:
400 if (c == wxT('='))
401 state = ST_BEFORE_VALUE;
402 else if (!IS_WHITE(c))
403 {
404 m_ParamNames.Add(pname);
405 m_ParamValues.Add(wxGetEmptyString());
406 pname = c;
407 state = ST_NAME;
408 }
409 break;
410 case ST_BEFORE_VALUE:
411 if (!IS_WHITE(c))
412 {
413 if (c == wxT('"') || c == wxT('\''))
414 quote = c, pvalue = wxGetEmptyString();
415 else
416 quote = 0, pvalue = c;
417 state = ST_VALUE;
418 }
419 break;
420 case ST_VALUE:
421 if ((quote != 0 && c == quote) ||
422 (quote == 0 && IS_WHITE(c)))
423 {
424 m_ParamNames.Add(pname);
425 if (quote == 0)
426 {
427 // VS: backward compatibility, no real reason,
428 // but wxHTML code relies on this... :(
429 pvalue.MakeUpper();
430 }
431 if (entParser)
432 m_ParamValues.Add(entParser->Parse(pvalue));
433 else
434 m_ParamValues.Add(pvalue);
435 state = ST_BEFORE_NAME;
436 }
437 else
438 pvalue << c;
439 break;
440 }
441 }
442
443 #undef IS_WHITE
444 }
445 m_Begin = i;
446 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
447 if (m_End1 > end_pos) m_End1 = end_pos;
448 if (m_End2 > end_pos) m_End2 = end_pos;
449
450 #if WXWIN_COMPATIBILITY_2_8
451 m_sourceStart = source->begin();
452 #endif
453
454 // Try to parse any style parameters that can be handled simply by
455 // converting them to the equivalent HTML 3 attributes: this is a far cry
456 // from perfect but better than nothing.
457 static const struct EquivAttr
458 {
459 const char *style;
460 const char *attr;
461 } equivAttrs[] =
462 {
463 { "text-align", "ALIGN" },
464 { "width", "WIDTH" },
465 { "vertical-align", "VALIGN" },
466 { "background", "BGCOLOR" },
467 { "background-color", "BGCOLOR" },
468 };
469
470 wxHtmlStyleParams styleParams(*this);
471 for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
472 {
473 const EquivAttr& ea = equivAttrs[n];
474 if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
475 {
476 m_ParamNames.Add(ea.attr);
477 m_ParamValues.Add(styleParams.GetParam(ea.style));
478 }
479 }
480 }
481
482 wxHtmlTag::~wxHtmlTag()
483 {
484 wxHtmlTag *t1, *t2;
485 t1 = m_FirstChild;
486 while (t1)
487 {
488 t2 = t1->GetNextSibling();
489 delete t1;
490 t1 = t2;
491 }
492 }
493
494 bool wxHtmlTag::HasParam(const wxString& par) const
495 {
496 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
497 }
498
499 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
500 {
501 int index = m_ParamNames.Index(par, false);
502 if (index == wxNOT_FOUND)
503 return wxGetEmptyString();
504 if (with_quotes)
505 {
506 // VS: backward compatibility, seems to be never used by wxHTML...
507 wxString s;
508 s << wxT('"') << m_ParamValues[index] << wxT('"');
509 return s;
510 }
511 else
512 return m_ParamValues[index];
513 }
514
515 int wxHtmlTag::ScanParam(const wxString& par,
516 const char *format,
517 void *param) const
518 {
519 wxString parval = GetParam(par);
520 return wxSscanf(parval, format, param);
521 }
522
523 int wxHtmlTag::ScanParam(const wxString& par,
524 const wchar_t *format,
525 void *param) const
526 {
527 wxString parval = GetParam(par);
528 return wxSscanf(parval, format, param);
529 }
530
531 /* static */
532 bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
533 {
534 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
535
536 // handle colours defined in HTML 4.0 first:
537 if (str.length() > 1 && str[0] != wxT('#'))
538 {
539 #define HTML_COLOUR(name, r, g, b) \
540 if (str.IsSameAs(wxS(name), false)) \
541 { clr->Set(r, g, b); return true; }
542 HTML_COLOUR("black", 0x00,0x00,0x00)
543 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
544 HTML_COLOUR("gray", 0x80,0x80,0x80)
545 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
546 HTML_COLOUR("maroon", 0x80,0x00,0x00)
547 HTML_COLOUR("red", 0xFF,0x00,0x00)
548 HTML_COLOUR("purple", 0x80,0x00,0x80)
549 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
550 HTML_COLOUR("green", 0x00,0x80,0x00)
551 HTML_COLOUR("lime", 0x00,0xFF,0x00)
552 HTML_COLOUR("olive", 0x80,0x80,0x00)
553 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
554 HTML_COLOUR("navy", 0x00,0x00,0x80)
555 HTML_COLOUR("blue", 0x00,0x00,0xFF)
556 HTML_COLOUR("teal", 0x00,0x80,0x80)
557 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
558 #undef HTML_COLOUR
559 }
560
561 // then try to parse #rrggbb representations or set from other well
562 // known names (note that this doesn't strictly conform to HTML spec,
563 // but it doesn't do real harm -- but it *must* be done after the standard
564 // colors are handled above):
565 if (clr->Set(str))
566 return true;
567
568 return false;
569 }
570
571 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
572 {
573 const wxString str = GetParam(par);
574 return !str.empty() && ParseAsColour(str, clr);
575 }
576
577 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
578 {
579 if ( !HasParam(par) )
580 return false;
581
582 long i;
583 if ( !GetParam(par).ToLong(&i) )
584 return false;
585
586 *clr = (int)i;
587 return true;
588 }
589
590 bool
591 wxHtmlTag::GetParamAsIntOrPercent(const wxString& par,
592 int* value,
593 bool& isPercent) const
594 {
595 const wxString param = GetParam(par);
596 if ( param.empty() )
597 return false;
598
599 wxString num;
600 if ( param.EndsWith("%", &num) )
601 {
602 isPercent = true;
603 }
604 else
605 {
606 isPercent = false;
607 num = param;
608 }
609
610 long lValue;
611 if ( !num.ToLong(&lValue) )
612 return false;
613
614 if ( lValue > INT_MAX || lValue < INT_MIN )
615 return false;
616
617 *value = static_cast<int>(lValue);
618
619 return true;
620 }
621
622 wxString wxHtmlTag::GetAllParams() const
623 {
624 // VS: this function is for backward compatibility only,
625 // never used by wxHTML
626 wxString s;
627 size_t cnt = m_ParamNames.GetCount();
628 for (size_t i = 0; i < cnt; i++)
629 {
630 s << m_ParamNames[i];
631 s << wxT('=');
632 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
633 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
634 else
635 s << wxT('"') << m_ParamValues[i] << wxT('"');
636 }
637 return s;
638 }
639
640 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
641 {
642 if (m_Parent)
643 return m_Parent->m_FirstChild;
644 else
645 {
646 wxHtmlTag *cur = (wxHtmlTag*)this;
647 while (cur->m_Prev)
648 cur = cur->m_Prev;
649 return cur;
650 }
651 }
652
653 wxHtmlTag *wxHtmlTag::GetLastSibling() const
654 {
655 if (m_Parent)
656 return m_Parent->m_LastChild;
657 else
658 {
659 wxHtmlTag *cur = (wxHtmlTag*)this;
660 while (cur->m_Next)
661 cur = cur->m_Next;
662 return cur;
663 }
664 }
665
666 wxHtmlTag *wxHtmlTag::GetNextTag() const
667 {
668 if (m_FirstChild) return m_FirstChild;
669 if (m_Next) return m_Next;
670 wxHtmlTag *cur = m_Parent;
671 if (!cur) return NULL;
672 while (cur->m_Parent && !cur->m_Next)
673 cur = cur->m_Parent;
674 return cur->m_Next;
675 }
676
677 #endif