]> git.saurik.com Git - wxWidgets.git/blob - src/html/htmltag.cpp
Fix another crash when conversion fails in Unix PostScript code.
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9 #include "wx/wxprec.h"
10
11 #ifdef __BORLANDC__
12 #pragma hdrstop
13 #endif
14
15 #if wxUSE_HTML
16
17 #include "wx/html/htmltag.h"
18
19 #ifndef WX_PRECOMP
20 #include "wx/colour.h"
21 #include "wx/wxcrtvararg.h"
22 #endif
23
24 #include "wx/html/htmlpars.h"
25 #include "wx/html/styleparams.h"
26
27 #include "wx/vector.h"
28
29 #include <stdio.h> // for vsscanf
30 #include <stdarg.h>
31
32 //-----------------------------------------------------------------------------
33 // wxHtmlTagsCache
34 //-----------------------------------------------------------------------------
35
36 struct wxHtmlCacheItem
37 {
38 // this is "pos" value passed to wxHtmlTag's constructor.
39 // it is position of '<' character of the tag
40 wxString::const_iterator Key;
41
42 // Tag type
43 enum Type
44 {
45 Type_Normal, // normal tag with a matching ending tag
46 Type_NoMatchingEndingTag, // there's no ending tag for this tag
47 Type_EndingTag // this is ending tag </..>
48 };
49 Type type;
50
51 // end positions for the tag:
52 // end1 is '<' of ending tag,
53 // end2 is '>' or both are
54 wxString::const_iterator End1, End2;
55
56 // name of this tag
57 wxChar *Name;
58 };
59
60 // NB: this is an empty class and not typedef because of forward declaration
61 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
62 {
63 };
64
65 bool wxIsCDATAElement(const wxChar *tag)
66 {
67 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
68 (wxStrcmp(tag, wxT("STYLE")) == 0);
69 }
70
71 bool wxIsCDATAElement(const wxString& tag)
72 {
73 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
74 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
75 }
76
77 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
78 {
79 m_Cache = new wxHtmlTagsCacheData;
80 m_CachePos = 0;
81
82 wxChar tagBuffer[256];
83
84 const wxString::const_iterator end = source.end();
85 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
86 {
87 if (*pos != wxT('<'))
88 continue;
89
90 // possible tag start found:
91
92 // don't cache comment tags
93 if ( wxHtmlParser::SkipCommentTag(pos, end) )
94 continue;
95
96 // Remember the starting tag position.
97 wxString::const_iterator stpos = pos++;
98
99 // And look for the ending one.
100 int i;
101 for ( i = 0;
102 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
103 *pos != wxT('>') && !wxIsspace(*pos);
104 ++i, ++pos )
105 {
106 tagBuffer[i] = (wxChar)wxToupper(*pos);
107 }
108 tagBuffer[i] = wxT('\0');
109
110 while (pos < end && *pos != wxT('>'))
111 ++pos;
112
113 if ( pos == end )
114 {
115 // We didn't find a closing bracket, this is not a valid tag after
116 // all. Notice that we need to roll back pos to avoid creating an
117 // invalid iterator when "++pos" is done in the loop statement.
118 --pos;
119
120 continue;
121 }
122
123 // We have a valid tag, add it to the cache.
124 size_t tg = Cache().size();
125 Cache().push_back(wxHtmlCacheItem());
126 Cache()[tg].Key = stpos;
127 Cache()[tg].Name = new wxChar[i+1];
128 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
129
130 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
131 {
132 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
133 // find matching begin tag:
134 for (i = tg; i >= 0; i--)
135 {
136 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
137 {
138 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
139 Cache()[i].End1 = stpos;
140 Cache()[i].End2 = pos + 1;
141 break;
142 }
143 }
144 }
145 else
146 {
147 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
148
149 if (wxIsCDATAElement(tagBuffer))
150 {
151 // store the orig pos in case we are missing the closing
152 // tag (see below)
153 const wxString::const_iterator old_pos = pos;
154 bool foundCloseTag = false;
155
156 // find next matching tag
157 int tag_len = wxStrlen(tagBuffer);
158 while (pos < end)
159 {
160 // find the ending tag
161 while (pos + 1 < end &&
162 (*pos != '<' || *(pos+1) != '/'))
163 ++pos;
164 if (*pos == '<')
165 ++pos;
166
167 // see if it matches
168 int match_pos = 0;
169 while (pos < end && match_pos < tag_len )
170 {
171 wxChar c = *pos;
172 if ( c == '>' || c == '<' )
173 break;
174
175 // cast to wxChar needed to suppress warning in
176 // Unicode build
177 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
178 {
179 ++match_pos;
180 }
181 else if (c == wxT(' ') || c == wxT('\n') ||
182 c == wxT('\r') || c == wxT('\t'))
183 {
184 // need to skip over these
185 }
186 else
187 {
188 match_pos = 0;
189 }
190 ++pos;
191 }
192
193 // found a match
194 if (match_pos == tag_len)
195 {
196 pos = pos - tag_len - 3;
197 foundCloseTag = true;
198 break;
199 }
200 else // keep looking for the closing tag
201 {
202 ++pos;
203 }
204 }
205 if (!foundCloseTag)
206 {
207 // we didn't find closing tag; this means the markup
208 // is incorrect and the best thing we can do is to
209 // ignore the unclosed tag and continue parsing as if
210 // it didn't exist:
211 pos = old_pos;
212 }
213 }
214 }
215 }
216
217 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
218 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
219 i != Cache().end(); ++i )
220 {
221 wxDELETEA(i->Name);
222 }
223 }
224
225 wxHtmlTagsCache::~wxHtmlTagsCache()
226 {
227 delete m_Cache;
228 }
229
230 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
231 const wxString::const_iterator& inputEnd,
232 wxString::const_iterator *end1,
233 wxString::const_iterator *end2,
234 bool *hasEnding)
235 {
236 if (Cache().empty())
237 {
238 *end1 =
239 *end2 = inputEnd;
240 *hasEnding = true;
241 return;
242 }
243
244 if (Cache()[m_CachePos].Key != at)
245 {
246 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
247 do
248 {
249 m_CachePos += delta;
250
251 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
252 {
253 if ( m_CachePos < 0 )
254 m_CachePos = 0;
255 else
256 m_CachePos = Cache().size() - 1;
257 // something is very wrong with HTML, give up by returning an
258 // impossibly large value which is going to be ignored by the
259 // caller
260 *end1 =
261 *end2 = inputEnd;
262 *hasEnding = true;
263 return;
264 }
265 }
266 while (Cache()[m_CachePos].Key != at);
267 }
268
269 switch ( Cache()[m_CachePos].type )
270 {
271 case wxHtmlCacheItem::Type_Normal:
272 *end1 = Cache()[m_CachePos].End1;
273 *end2 = Cache()[m_CachePos].End2;
274 *hasEnding = true;
275 break;
276
277 case wxHtmlCacheItem::Type_EndingTag:
278 wxFAIL_MSG("QueryTag called for ending tag - can't be");
279 // but if it does happen, fall through, better than crashing
280
281 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
282 // If input HTML is invalid and there's no closing tag for this
283 // one, pretend that it runs all the way to the end of input
284 *end1 = inputEnd;
285 *end2 = inputEnd;
286 *hasEnding = false;
287 break;
288 }
289 }
290
291
292
293
294 //-----------------------------------------------------------------------------
295 // wxHtmlTag
296 //-----------------------------------------------------------------------------
297
298 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
299 const wxString *source,
300 const wxString::const_iterator& pos,
301 const wxString::const_iterator& end_pos,
302 wxHtmlTagsCache *cache,
303 wxHtmlEntitiesParser *entParser)
304 {
305 /* Setup DOM relations */
306
307 m_Next = NULL;
308 m_FirstChild = m_LastChild = NULL;
309 m_Parent = parent;
310 if (parent)
311 {
312 m_Prev = m_Parent->m_LastChild;
313 if (m_Prev == NULL)
314 m_Parent->m_FirstChild = this;
315 else
316 m_Prev->m_Next = this;
317 m_Parent->m_LastChild = this;
318 }
319 else
320 m_Prev = NULL;
321
322 /* Find parameters and their values: */
323
324 wxChar c wxDUMMY_INITIALIZE(0);
325
326 // fill-in name, params and begin pos:
327 wxString::const_iterator i(pos+1);
328
329 // find tag's name and convert it to uppercase:
330 while ((i < end_pos) &&
331 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
332 c != wxT('\n') && c != wxT('\t') &&
333 c != wxT('>') && c != wxT('/')))
334 {
335 if ((c >= wxT('a')) && (c <= wxT('z')))
336 c -= (wxT('a') - wxT('A'));
337 m_Name << c;
338 }
339
340 // if the tag has parameters, read them and "normalize" them,
341 // i.e. convert to uppercase, replace whitespaces by spaces and
342 // remove whitespaces around '=':
343 if (*(i-1) != wxT('>'))
344 {
345 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
346 c == wxT('\n') || c == wxT('\t'))
347 wxString pname, pvalue;
348 wxChar quote;
349 enum
350 {
351 ST_BEFORE_NAME = 1,
352 ST_NAME,
353 ST_BEFORE_EQ,
354 ST_BEFORE_VALUE,
355 ST_VALUE
356 } state;
357
358 quote = 0;
359 state = ST_BEFORE_NAME;
360 while (i < end_pos)
361 {
362 c = *(i++);
363
364 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
365 {
366 if (state == ST_BEFORE_EQ || state == ST_NAME)
367 {
368 m_ParamNames.Add(pname);
369 m_ParamValues.Add(wxGetEmptyString());
370 }
371 else if (state == ST_VALUE && quote == 0)
372 {
373 m_ParamNames.Add(pname);
374 if (entParser)
375 m_ParamValues.Add(entParser->Parse(pvalue));
376 else
377 m_ParamValues.Add(pvalue);
378 }
379 break;
380 }
381 switch (state)
382 {
383 case ST_BEFORE_NAME:
384 if (!IS_WHITE(c))
385 {
386 pname = c;
387 state = ST_NAME;
388 }
389 break;
390 case ST_NAME:
391 if (IS_WHITE(c))
392 state = ST_BEFORE_EQ;
393 else if (c == wxT('='))
394 state = ST_BEFORE_VALUE;
395 else
396 pname << c;
397 break;
398 case ST_BEFORE_EQ:
399 if (c == wxT('='))
400 state = ST_BEFORE_VALUE;
401 else if (!IS_WHITE(c))
402 {
403 m_ParamNames.Add(pname);
404 m_ParamValues.Add(wxGetEmptyString());
405 pname = c;
406 state = ST_NAME;
407 }
408 break;
409 case ST_BEFORE_VALUE:
410 if (!IS_WHITE(c))
411 {
412 if (c == wxT('"') || c == wxT('\''))
413 quote = c, pvalue = wxGetEmptyString();
414 else
415 quote = 0, pvalue = c;
416 state = ST_VALUE;
417 }
418 break;
419 case ST_VALUE:
420 if ((quote != 0 && c == quote) ||
421 (quote == 0 && IS_WHITE(c)))
422 {
423 m_ParamNames.Add(pname);
424 if (quote == 0)
425 {
426 // VS: backward compatibility, no real reason,
427 // but wxHTML code relies on this... :(
428 pvalue.MakeUpper();
429 }
430 if (entParser)
431 m_ParamValues.Add(entParser->Parse(pvalue));
432 else
433 m_ParamValues.Add(pvalue);
434 state = ST_BEFORE_NAME;
435 }
436 else
437 pvalue << c;
438 break;
439 }
440 }
441
442 #undef IS_WHITE
443 }
444 m_Begin = i;
445 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
446 if (m_End1 > end_pos) m_End1 = end_pos;
447 if (m_End2 > end_pos) m_End2 = end_pos;
448
449 #if WXWIN_COMPATIBILITY_2_8
450 m_sourceStart = source->begin();
451 #endif
452
453 // Try to parse any style parameters that can be handled simply by
454 // converting them to the equivalent HTML 3 attributes: this is a far cry
455 // from perfect but better than nothing.
456 static const struct EquivAttr
457 {
458 const char *style;
459 const char *attr;
460 } equivAttrs[] =
461 {
462 { "text-align", "ALIGN" },
463 { "width", "WIDTH" },
464 { "vertical-align", "VALIGN" },
465 { "background", "BGCOLOR" },
466 { "background-color", "BGCOLOR" },
467 };
468
469 wxHtmlStyleParams styleParams(*this);
470 for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
471 {
472 const EquivAttr& ea = equivAttrs[n];
473 if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
474 {
475 m_ParamNames.Add(ea.attr);
476 m_ParamValues.Add(styleParams.GetParam(ea.style));
477 }
478 }
479 }
480
481 wxHtmlTag::~wxHtmlTag()
482 {
483 wxHtmlTag *t1, *t2;
484 t1 = m_FirstChild;
485 while (t1)
486 {
487 t2 = t1->GetNextSibling();
488 delete t1;
489 t1 = t2;
490 }
491 }
492
493 bool wxHtmlTag::HasParam(const wxString& par) const
494 {
495 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
496 }
497
498 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
499 {
500 int index = m_ParamNames.Index(par, false);
501 if (index == wxNOT_FOUND)
502 return wxGetEmptyString();
503 if (with_quotes)
504 {
505 // VS: backward compatibility, seems to be never used by wxHTML...
506 wxString s;
507 s << wxT('"') << m_ParamValues[index] << wxT('"');
508 return s;
509 }
510 else
511 return m_ParamValues[index];
512 }
513
514 int wxHtmlTag::ScanParam(const wxString& par,
515 const char *format,
516 void *param) const
517 {
518 wxString parval = GetParam(par);
519 return wxSscanf(parval, format, param);
520 }
521
522 int wxHtmlTag::ScanParam(const wxString& par,
523 const wchar_t *format,
524 void *param) const
525 {
526 wxString parval = GetParam(par);
527 return wxSscanf(parval, format, param);
528 }
529
530 /* static */
531 bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
532 {
533 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
534
535 // handle colours defined in HTML 4.0 first:
536 if (str.length() > 1 && str[0] != wxT('#'))
537 {
538 #define HTML_COLOUR(name, r, g, b) \
539 if (str.IsSameAs(wxS(name), false)) \
540 { clr->Set(r, g, b); return true; }
541 HTML_COLOUR("black", 0x00,0x00,0x00)
542 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
543 HTML_COLOUR("gray", 0x80,0x80,0x80)
544 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
545 HTML_COLOUR("maroon", 0x80,0x00,0x00)
546 HTML_COLOUR("red", 0xFF,0x00,0x00)
547 HTML_COLOUR("purple", 0x80,0x00,0x80)
548 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
549 HTML_COLOUR("green", 0x00,0x80,0x00)
550 HTML_COLOUR("lime", 0x00,0xFF,0x00)
551 HTML_COLOUR("olive", 0x80,0x80,0x00)
552 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
553 HTML_COLOUR("navy", 0x00,0x00,0x80)
554 HTML_COLOUR("blue", 0x00,0x00,0xFF)
555 HTML_COLOUR("teal", 0x00,0x80,0x80)
556 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
557 #undef HTML_COLOUR
558 }
559
560 // then try to parse #rrggbb representations or set from other well
561 // known names (note that this doesn't strictly conform to HTML spec,
562 // but it doesn't do real harm -- but it *must* be done after the standard
563 // colors are handled above):
564 if (clr->Set(str))
565 return true;
566
567 return false;
568 }
569
570 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
571 {
572 const wxString str = GetParam(par);
573 return !str.empty() && ParseAsColour(str, clr);
574 }
575
576 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
577 {
578 if ( !HasParam(par) )
579 return false;
580
581 long i;
582 if ( !GetParam(par).ToLong(&i) )
583 return false;
584
585 *clr = (int)i;
586 return true;
587 }
588
589 bool
590 wxHtmlTag::GetParamAsIntOrPercent(const wxString& par,
591 int* value,
592 bool& isPercent) const
593 {
594 const wxString param = GetParam(par);
595 if ( param.empty() )
596 return false;
597
598 wxString num;
599 if ( param.EndsWith("%", &num) )
600 {
601 isPercent = true;
602 }
603 else
604 {
605 isPercent = false;
606 num = param;
607 }
608
609 long lValue;
610 if ( !num.ToLong(&lValue) )
611 return false;
612
613 if ( lValue > INT_MAX || lValue < INT_MIN )
614 return false;
615
616 *value = static_cast<int>(lValue);
617
618 return true;
619 }
620
621 wxString wxHtmlTag::GetAllParams() const
622 {
623 // VS: this function is for backward compatibility only,
624 // never used by wxHTML
625 wxString s;
626 size_t cnt = m_ParamNames.GetCount();
627 for (size_t i = 0; i < cnt; i++)
628 {
629 s << m_ParamNames[i];
630 s << wxT('=');
631 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
632 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
633 else
634 s << wxT('"') << m_ParamValues[i] << wxT('"');
635 }
636 return s;
637 }
638
639 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
640 {
641 if (m_Parent)
642 return m_Parent->m_FirstChild;
643 else
644 {
645 wxHtmlTag *cur = (wxHtmlTag*)this;
646 while (cur->m_Prev)
647 cur = cur->m_Prev;
648 return cur;
649 }
650 }
651
652 wxHtmlTag *wxHtmlTag::GetLastSibling() const
653 {
654 if (m_Parent)
655 return m_Parent->m_LastChild;
656 else
657 {
658 wxHtmlTag *cur = (wxHtmlTag*)this;
659 while (cur->m_Next)
660 cur = cur->m_Next;
661 return cur;
662 }
663 }
664
665 wxHtmlTag *wxHtmlTag::GetNextTag() const
666 {
667 if (m_FirstChild) return m_FirstChild;
668 if (m_Next) return m_Next;
669 wxHtmlTag *cur = m_Parent;
670 if (!cur) return NULL;
671 while (cur->m_Parent && !cur->m_Next)
672 cur = cur->m_Parent;
673 return cur->m_Next;
674 }
675
676 #endif