]> git.saurik.com Git - wxWidgets.git/blame - src/html/htmltag.cpp
really correct detecting the end of UTF-7-encoded strings
[wxWidgets.git] / src / html / htmltag.cpp
CommitLineData
5526e819 1/////////////////////////////////////////////////////////////////////////////
93763ad5 2// Name: src/html/htmltag.cpp
5526e819
VS
3// Purpose: wxHtmlTag class (represents single tag)
4// Author: Vaclav Slavik
69941f05 5// RCS-ID: $Id$
5526e819 6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
5526e819
VS
8/////////////////////////////////////////////////////////////////////////////
9
3096bd2f 10#include "wx/wxprec.h"
5526e819 11
2b5f62a0 12#ifdef __BORLANDC__
93763ad5 13 #pragma hdrstop
5526e819
VS
14#endif
15
93763ad5
WS
16#if wxUSE_HTML
17
40989e46
WS
18#include "wx/html/htmltag.h"
19
b4f4d3dd 20#ifndef WX_PRECOMP
7cf41a5d 21 #include "wx/colour.h"
193d0c93 22 #include "wx/wxcrtvararg.h"
5526e819
VS
23#endif
24
daa616fc 25#include "wx/html/htmlpars.h"
4fe7567d
VS
26#include "wx/vector.h"
27
7e1e0960 28#include <stdio.h> // for vsscanf
5526e819
VS
29#include <stdarg.h>
30
5526e819
VS
31//-----------------------------------------------------------------------------
32// wxHtmlTagsCache
33//-----------------------------------------------------------------------------
34
5e8e25e7
VS
35struct wxHtmlCacheItem
36{
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
b1a3a964
VS
39 wxString::const_iterator Key;
40
41 // Tag type
42 enum Type
43 {
44 Type_Normal, // normal tag with a matching ending tag
45 Type_NoMatchingEndingTag, // there's no ending tag for this tag
46 Type_EndingTag // this is ending tag </..>
47 };
48 Type type;
5e8e25e7
VS
49
50 // end positions for the tag:
51 // end1 is '<' of ending tag,
52 // end2 is '>' or both are
b1a3a964 53 wxString::const_iterator End1, End2;
5e8e25e7
VS
54
55 // name of this tag
56 wxChar *Name;
57};
58
4fe7567d
VS
59// NB: this is an empty class and not typedef because of forward declaration
60class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
61{
62};
5e8e25e7 63
07cc7ddc 64bool wxIsCDATAElement(const wxChar *tag)
7c6cd4a8
VS
65{
66 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
67 (wxStrcmp(tag, _T("STYLE")) == 0);
68}
69
b1a3a964
VS
70bool wxIsCDATAElement(const wxString& tag)
71{
d9359369
VS
72 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
73 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
b1a3a964
VS
74}
75
5526e819
VS
76wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
77{
4fe7567d
VS
78 m_Cache = new wxHtmlTagsCacheData;
79 m_CachePos = 0;
80
8cd82622 81 wxChar tagBuffer[256];
5526e819 82
b1a3a964
VS
83 const wxString::const_iterator end = source.end();
84 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
4f9297b0 85 {
b1a3a964 86 if (*pos == wxT('<')) // tag found:
a914db0f 87 {
4609ee2e 88 // don't cache comment tags
b1a3a964 89 if ( wxHtmlParser::SkipCommentTag(pos, source.end()) )
4609ee2e 90 continue;
4609ee2e 91
4fe7567d
VS
92 size_t tg = Cache().size();
93 Cache().push_back(wxHtmlCacheItem());
94
b1a3a964 95 wxString::const_iterator stpos = pos++;
4fe7567d 96 Cache()[tg].Key = stpos;
8cd82622 97
4f22f506 98 int i;
8cd82622 99 for ( i = 0;
b1a3a964
VS
100 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
101 *pos != wxT('>') && !wxIsspace(*pos);
102 ++i, ++pos )
a914db0f 103 {
b1a3a964 104 tagBuffer[i] = (wxChar)wxToupper(*pos);
5526e819 105 }
8cd82622
VZ
106 tagBuffer[i] = _T('\0');
107
4fe7567d
VS
108 Cache()[tg].Name = new wxChar[i+1];
109 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
5526e819 110
b1a3a964
VS
111 while (pos < end && *pos != wxT('>'))
112 ++pos;
5526e819 113
b1a3a964 114 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
a914db0f 115 {
b1a3a964 116 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
5526e819
VS
117 // find matching begin tag:
118 for (i = tg; i >= 0; i--)
b1a3a964
VS
119 {
120 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
a914db0f 121 {
b1a3a964 122 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
4fe7567d
VS
123 Cache()[i].End1 = stpos;
124 Cache()[i].End2 = pos + 1;
5526e819
VS
125 break;
126 }
b1a3a964 127 }
5526e819 128 }
8cd82622 129 else
a914db0f 130 {
b1a3a964 131 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
7448de8d 132
7c6cd4a8
VS
133 if (wxIsCDATAElement(tagBuffer))
134 {
313ffa19
VS
135 // store the orig pos in case we are missing the closing
136 // tag (see below)
b1a3a964 137 const wxString::const_iterator old_pos = pos;
313ffa19 138 bool foundCloseTag = false;
7448de8d 139
7c6cd4a8
VS
140 // find next matching tag
141 int tag_len = wxStrlen(tagBuffer);
b1a3a964 142 while (pos < end)
7c6cd4a8
VS
143 {
144 // find the ending tag
b1a3a964
VS
145 while (pos + 1 < end &&
146 (*pos != '<' || *(pos+1) != '/'))
7c6cd4a8 147 ++pos;
b1a3a964 148 if (*pos == '<')
7c6cd4a8 149 ++pos;
d1da8872 150
7c6cd4a8
VS
151 // see if it matches
152 int match_pos = 0;
b1a3a964
VS
153 while (pos < end && match_pos < tag_len )
154 {
155 wxChar c = *pos;
156 if ( c == '>' || c == '<' )
157 break;
158
5447d1b4
VZ
159 // cast to wxChar needed to suppress warning in
160 // Unicode build
b1a3a964
VS
161 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
162 {
7c6cd4a8 163 ++match_pos;
d1da8872 164 }
b1a3a964
VS
165 else if (c == wxT(' ') || c == wxT('\n') ||
166 c == wxT('\r') || c == wxT('\t'))
167 {
7c6cd4a8
VS
168 // need to skip over these
169 }
b1a3a964
VS
170 else
171 {
7c6cd4a8
VS
172 match_pos = 0;
173 }
174 ++pos;
175 }
176
177 // found a match
7448de8d 178 if (match_pos == tag_len)
313ffa19 179 {
b5d464b9 180 pos = pos - tag_len - 3;
313ffa19 181 foundCloseTag = true;
7c6cd4a8
VS
182 break;
183 }
313ffa19
VS
184 else // keep looking for the closing tag
185 {
7c6cd4a8
VS
186 ++pos;
187 }
188 }
313ffa19
VS
189 if (!foundCloseTag)
190 {
191 // we didn't find closing tag; this means the markup
192 // is incorrect and the best thing we can do is to
193 // ignore the unclosed tag and continue parsing as if
194 // it didn't exist:
195 pos = old_pos;
196 }
7c6cd4a8 197 }
5526e819
VS
198 }
199 }
5526e819
VS
200 }
201
202 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
4fe7567d
VS
203 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
204 i != Cache().end(); ++i )
4f9297b0 205 {
4fe7567d
VS
206 delete[] i->Name;
207 i->Name = NULL;
5526e819
VS
208 }
209}
210
4fe7567d
VS
211wxHtmlTagsCache::~wxHtmlTagsCache()
212{
213 delete m_Cache;
214}
215
b1a3a964
VS
216void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
217 const wxString::const_iterator& inputEnd,
218 wxString::const_iterator *end1,
219 wxString::const_iterator *end2,
220 bool *hasEnding)
5526e819 221{
4fe7567d
VS
222 if (Cache().empty())
223 return;
224
225 if (Cache()[m_CachePos].Key != at)
4f9297b0 226 {
4fe7567d 227 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
8cd82622
VZ
228 do
229 {
b1a3a964
VS
230 m_CachePos += delta;
231
232 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
10b9be32 233 {
b1a3a964
VS
234 if ( m_CachePos < 0 )
235 m_CachePos = 0;
236 else
237 m_CachePos = Cache().size() - 1;
10b9be32
VZ
238 // something is very wrong with HTML, give up by returning an
239 // impossibly large value which is going to be ignored by the
240 // caller
241 *end1 =
b1a3a964
VS
242 *end2 = inputEnd;
243 *hasEnding = true;
10b9be32
VZ
244 return;
245 }
daa616fc 246 }
4fe7567d 247 while (Cache()[m_CachePos].Key != at);
5526e819 248 }
61679695
VS
249
250 switch ( Cache()[m_CachePos].type )
251 {
252 case wxHtmlCacheItem::Type_Normal:
253 *end1 = Cache()[m_CachePos].End1;
254 *end2 = Cache()[m_CachePos].End2;
255 *hasEnding = true;
256 break;
257
258 case wxHtmlCacheItem::Type_EndingTag:
259 wxFAIL_MSG("QueryTag called for ending tag - can't be");
260 // but if it does happen, fall through, better than crashing
261
262 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
263 // If input HTML is invalid and there's no closing tag for this
264 // one, pretend that it runs all the way to the end of input
265 *end1 = inputEnd;
266 *end2 = inputEnd;
267 *hasEnding = false;
268 break;
269 }
5526e819
VS
270}
271
272
273
274
275//-----------------------------------------------------------------------------
276// wxHtmlTag
277//-----------------------------------------------------------------------------
278
211dfedd 279wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
b1a3a964
VS
280 const wxString *source,
281 const wxString::const_iterator& pos,
282 const wxString::const_iterator& end_pos,
daa616fc 283 wxHtmlTagsCache *cache,
7da48d49 284 wxHtmlEntitiesParser *entParser)
5526e819 285{
211dfedd
VS
286 /* Setup DOM relations */
287
288 m_Next = NULL;
289 m_FirstChild = m_LastChild = NULL;
290 m_Parent = parent;
291 if (parent)
292 {
293 m_Prev = m_Parent->m_LastChild;
294 if (m_Prev == NULL)
295 m_Parent->m_FirstChild = this;
296 else
297 m_Prev->m_Next = this;
298 m_Parent->m_LastChild = this;
299 }
300 else
301 m_Prev = NULL;
302
303 /* Find parameters and their values: */
8cd82622 304
daa616fc 305 wxChar c;
5526e819
VS
306
307 // fill-in name, params and begin pos:
b1a3a964 308 wxString::const_iterator i(pos+1);
5526e819 309
b076dc01 310 // find tag's name and convert it to uppercase:
8cd82622 311 while ((i < end_pos) &&
b1a3a964 312 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
daa616fc 313 c != wxT('\n') && c != wxT('\t') &&
ad20c567 314 c != wxT('>') && c != wxT('/')))
a914db0f 315 {
8cd82622 316 if ((c >= wxT('a')) && (c <= wxT('z')))
daa616fc
VS
317 c -= (wxT('a') - wxT('A'));
318 m_Name << c;
5526e819
VS
319 }
320
b076dc01 321 // if the tag has parameters, read them and "normalize" them,
8cd82622 322 // i.e. convert to uppercase, replace whitespaces by spaces and
b076dc01 323 // remove whitespaces around '=':
b1a3a964 324 if (*(i-1) != wxT('>'))
daa616fc
VS
325 {
326 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
327 c == wxT('\n') || c == wxT('\t'))
328 wxString pname, pvalue;
329 wxChar quote;
8cd82622 330 enum
a914db0f 331 {
8cd82622 332 ST_BEFORE_NAME = 1,
daa616fc
VS
333 ST_NAME,
334 ST_BEFORE_EQ,
335 ST_BEFORE_VALUE,
336 ST_VALUE
337 } state;
8cd82622 338
daa616fc
VS
339 quote = 0;
340 state = ST_BEFORE_NAME;
341 while (i < end_pos)
342 {
b1a3a964 343 c = *(i++);
daa616fc 344
8cd82622 345 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
a914db0f 346 {
daa616fc 347 if (state == ST_BEFORE_EQ || state == ST_NAME)
b076dc01 348 {
daa616fc 349 m_ParamNames.Add(pname);
b1a3a964 350 m_ParamValues.Add(wxGetEmptyString());
b076dc01 351 }
daa616fc
VS
352 else if (state == ST_VALUE && quote == 0)
353 {
354 m_ParamNames.Add(pname);
367c84b9
VS
355 if (entParser)
356 m_ParamValues.Add(entParser->Parse(pvalue));
357 else
358 m_ParamValues.Add(pvalue);
daa616fc
VS
359 }
360 break;
5526e819 361 }
daa616fc 362 switch (state)
a914db0f 363 {
daa616fc
VS
364 case ST_BEFORE_NAME:
365 if (!IS_WHITE(c))
366 {
367 pname = c;
368 state = ST_NAME;
369 }
370 break;
371 case ST_NAME:
372 if (IS_WHITE(c))
373 state = ST_BEFORE_EQ;
374 else if (c == wxT('='))
375 state = ST_BEFORE_VALUE;
376 else
377 pname << c;
378 break;
379 case ST_BEFORE_EQ:
380 if (c == wxT('='))
381 state = ST_BEFORE_VALUE;
382 else if (!IS_WHITE(c))
383 {
384 m_ParamNames.Add(pname);
b1a3a964 385 m_ParamValues.Add(wxGetEmptyString());
daa616fc
VS
386 pname = c;
387 state = ST_NAME;
388 }
389 break;
390 case ST_BEFORE_VALUE:
391 if (!IS_WHITE(c))
392 {
393 if (c == wxT('"') || c == wxT('\''))
b1a3a964 394 quote = c, pvalue = wxGetEmptyString();
daa616fc
VS
395 else
396 quote = 0, pvalue = c;
397 state = ST_VALUE;
398 }
399 break;
400 case ST_VALUE:
401 if ((quote != 0 && c == quote) ||
402 (quote == 0 && IS_WHITE(c)))
403 {
404 m_ParamNames.Add(pname);
405 if (quote == 0)
406 {
407 // VS: backward compatibility, no real reason,
408 // but wxHTML code relies on this... :(
409 pvalue.MakeUpper();
410 }
367c84b9
VS
411 if (entParser)
412 m_ParamValues.Add(entParser->Parse(pvalue));
413 else
414 m_ParamValues.Add(pvalue);
daa616fc
VS
415 state = ST_BEFORE_NAME;
416 }
417 else
418 pvalue << c;
419 break;
72aa4a98 420 }
5526e819 421 }
8cd82622 422
daa616fc 423 #undef IS_WHITE
7448de8d
WS
424 }
425 m_Begin = i;
b1a3a964 426 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
7448de8d
WS
427 if (m_End1 > end_pos) m_End1 = end_pos;
428 if (m_End2 > end_pos) m_End2 = end_pos;
b1a3a964
VS
429
430#if WXWIN_COMPATIBILITY_2_8
431 m_sourceStart = source->begin();
432#endif
5526e819
VS
433}
434
211dfedd
VS
435wxHtmlTag::~wxHtmlTag()
436{
0d58bb65
VS
437 wxHtmlTag *t1, *t2;
438 t1 = m_FirstChild;
439 while (t1)
440 {
441 t2 = t1->GetNextSibling();
442 delete t1;
443 t1 = t2;
444 }
211dfedd
VS
445}
446
5526e819
VS
447bool wxHtmlTag::HasParam(const wxString& par) const
448{
8703bc01 449 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
5526e819
VS
450}
451
614f9713 452wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
5526e819 453{
8703bc01 454 int index = m_ParamNames.Index(par, false);
daa616fc 455 if (index == wxNOT_FOUND)
b1a3a964 456 return wxGetEmptyString();
614f9713 457 if (with_quotes)
4f9297b0 458 {
daa616fc
VS
459 // VS: backward compatibility, seems to be never used by wxHTML...
460 wxString s;
461 s << wxT('"') << m_ParamValues[index] << wxT('"');
462 return s;
5526e819 463 }
daa616fc
VS
464 else
465 return m_ParamValues[index];
5526e819
VS
466}
467
90350682 468int wxHtmlTag::ScanParam(const wxString& par,
d7640339
VS
469 const char *format,
470 void *param) const
471{
472 wxString parval = GetParam(par);
473 return wxSscanf(parval, format, param);
474}
475
476int wxHtmlTag::ScanParam(const wxString& par,
477 const wchar_t *format,
90350682 478 void *param) const
5526e819 479{
5526e819 480 wxString parval = GetParam(par);
161f4f73 481 return wxSscanf(parval, format, param);
5526e819
VS
482}
483
8bd72d90
VS
484bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
485{
86766dfd 486 wxCHECK_MSG( clr, false, _T("invalid colour argument") );
8cd82622 487
86766dfd 488 wxString str = GetParam(par);
40989e46 489
86766dfd
VS
490 // handle colours defined in HTML 4.0 first:
491 if (str.length() > 1 && str[0] != _T('#'))
8bd72d90 492 {
d9359369
VS
493 #define HTML_COLOUR(name, r, g, b) \
494 if (str.IsSameAs(wxS(name), false)) \
86766dfd 495 { clr->Set(r, g, b); return true; }
8bd72d90
VS
496 HTML_COLOUR("black", 0x00,0x00,0x00)
497 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
498 HTML_COLOUR("gray", 0x80,0x80,0x80)
499 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
500 HTML_COLOUR("maroon", 0x80,0x00,0x00)
501 HTML_COLOUR("red", 0xFF,0x00,0x00)
502 HTML_COLOUR("purple", 0x80,0x00,0x80)
503 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
504 HTML_COLOUR("green", 0x00,0x80,0x00)
505 HTML_COLOUR("lime", 0x00,0xFF,0x00)
506 HTML_COLOUR("olive", 0x80,0x80,0x00)
507 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
508 HTML_COLOUR("navy", 0x00,0x00,0x80)
509 HTML_COLOUR("blue", 0x00,0x00,0xFF)
510 HTML_COLOUR("teal", 0x00,0x80,0x80)
511 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
512 #undef HTML_COLOUR
8bd72d90 513 }
5716a1ab 514
86766dfd
VS
515 // then try to parse #rrggbb representations or set from other well
516 // known names (note that this doesn't strictly conform to HTML spec,
517 // but it doesn't do real harm -- but it *must* be done after the standard
518 // colors are handled above):
519 if (clr->Set(str))
520 return true;
521
8703bc01 522 return false;
8bd72d90
VS
523}
524
525bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
526{
3f6901ad
VZ
527 if ( !HasParam(par) )
528 return false;
529
8bd72d90 530 long i;
3f6901ad
VZ
531 if ( !GetParam(par).ToLong(&i) )
532 return false;
533
8bd72d90 534 *clr = (int)i;
3f6901ad 535 return true;
8bd72d90
VS
536}
537
daa616fc
VS
538wxString wxHtmlTag::GetAllParams() const
539{
3103e8a9 540 // VS: this function is for backward compatibility only,
daa616fc
VS
541 // never used by wxHTML
542 wxString s;
543 size_t cnt = m_ParamNames.GetCount();
544 for (size_t i = 0; i < cnt; i++)
545 {
546 s << m_ParamNames[i];
547 s << wxT('=');
548 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
549 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
550 else
551 s << wxT('"') << m_ParamValues[i] << wxT('"');
552 }
553 return s;
554}
555
211dfedd
VS
556wxHtmlTag *wxHtmlTag::GetFirstSibling() const
557{
558 if (m_Parent)
559 return m_Parent->m_FirstChild;
560 else
561 {
562 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 563 while (cur->m_Prev)
211dfedd
VS
564 cur = cur->m_Prev;
565 return cur;
566 }
567}
568
569wxHtmlTag *wxHtmlTag::GetLastSibling() const
570{
571 if (m_Parent)
572 return m_Parent->m_LastChild;
573 else
574 {
575 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 576 while (cur->m_Next)
211dfedd
VS
577 cur = cur->m_Next;
578 return cur;
579 }
580}
581
582wxHtmlTag *wxHtmlTag::GetNextTag() const
583{
584 if (m_FirstChild) return m_FirstChild;
585 if (m_Next) return m_Next;
586 wxHtmlTag *cur = m_Parent;
587 if (!cur) return NULL;
8cd82622 588 while (cur->m_Parent && !cur->m_Next)
211dfedd
VS
589 cur = cur->m_Parent;
590 return cur->m_Next;
591}
592
4d223b67 593#endif