]> git.saurik.com Git - wxWidgets.git/blame - src/html/htmltag.cpp
Corrected bug in in revision 47973
[wxWidgets.git] / src / html / htmltag.cpp
CommitLineData
5526e819 1/////////////////////////////////////////////////////////////////////////////
93763ad5 2// Name: src/html/htmltag.cpp
5526e819
VS
3// Purpose: wxHtmlTag class (represents single tag)
4// Author: Vaclav Slavik
69941f05 5// RCS-ID: $Id$
5526e819 6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
5526e819
VS
8/////////////////////////////////////////////////////////////////////////////
9
3096bd2f 10#include "wx/wxprec.h"
5526e819 11
2b5f62a0 12#ifdef __BORLANDC__
93763ad5 13 #pragma hdrstop
5526e819
VS
14#endif
15
93763ad5
WS
16#if wxUSE_HTML
17
40989e46
WS
18#include "wx/html/htmltag.h"
19
b4f4d3dd 20#ifndef WX_PRECOMP
7cf41a5d 21 #include "wx/colour.h"
193d0c93 22 #include "wx/wxcrtvararg.h"
5526e819
VS
23#endif
24
daa616fc 25#include "wx/html/htmlpars.h"
4fe7567d
VS
26#include "wx/vector.h"
27
7e1e0960 28#include <stdio.h> // for vsscanf
5526e819
VS
29#include <stdarg.h>
30
5526e819
VS
31//-----------------------------------------------------------------------------
32// wxHtmlTagsCache
33//-----------------------------------------------------------------------------
34
5e8e25e7
VS
35struct wxHtmlCacheItem
36{
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
b1a3a964
VS
39 wxString::const_iterator Key;
40
41 // Tag type
42 enum Type
43 {
44 Type_Normal, // normal tag with a matching ending tag
45 Type_NoMatchingEndingTag, // there's no ending tag for this tag
46 Type_EndingTag // this is ending tag </..>
47 };
48 Type type;
5e8e25e7
VS
49
50 // end positions for the tag:
51 // end1 is '<' of ending tag,
52 // end2 is '>' or both are
b1a3a964 53 wxString::const_iterator End1, End2;
5e8e25e7
VS
54
55 // name of this tag
56 wxChar *Name;
57};
58
4fe7567d
VS
59// NB: this is an empty class and not typedef because of forward declaration
60class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
61{
62};
5e8e25e7 63
07cc7ddc 64bool wxIsCDATAElement(const wxChar *tag)
7c6cd4a8
VS
65{
66 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
67 (wxStrcmp(tag, _T("STYLE")) == 0);
68}
69
b1a3a964
VS
70bool wxIsCDATAElement(const wxString& tag)
71{
72 return (wxStrcmp(tag.wx_str(), wxSTRING_TEXT("SCRIPT")) == 0) ||
73 (wxStrcmp(tag.wx_str(), wxSTRING_TEXT("STYLE")) == 0);
74}
75
5526e819
VS
76wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
77{
4fe7567d
VS
78 m_Cache = new wxHtmlTagsCacheData;
79 m_CachePos = 0;
80
8cd82622 81 wxChar tagBuffer[256];
5526e819 82
b1a3a964
VS
83 const wxString::const_iterator end = source.end();
84 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
4f9297b0 85 {
b1a3a964 86 if (*pos == wxT('<')) // tag found:
a914db0f 87 {
4609ee2e 88 // don't cache comment tags
b1a3a964 89 if ( wxHtmlParser::SkipCommentTag(pos, source.end()) )
4609ee2e 90 continue;
4609ee2e 91
4fe7567d
VS
92 size_t tg = Cache().size();
93 Cache().push_back(wxHtmlCacheItem());
94
b1a3a964 95 wxString::const_iterator stpos = pos++;
4fe7567d 96 Cache()[tg].Key = stpos;
8cd82622 97
4f22f506 98 int i;
8cd82622 99 for ( i = 0;
b1a3a964
VS
100 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
101 *pos != wxT('>') && !wxIsspace(*pos);
102 ++i, ++pos )
a914db0f 103 {
b1a3a964 104 tagBuffer[i] = (wxChar)wxToupper(*pos);
5526e819 105 }
8cd82622
VZ
106 tagBuffer[i] = _T('\0');
107
4fe7567d
VS
108 Cache()[tg].Name = new wxChar[i+1];
109 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
5526e819 110
b1a3a964
VS
111 while (pos < end && *pos != wxT('>'))
112 ++pos;
5526e819 113
b1a3a964 114 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
a914db0f 115 {
b1a3a964 116 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
5526e819
VS
117 // find matching begin tag:
118 for (i = tg; i >= 0; i--)
b1a3a964
VS
119 {
120 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
a914db0f 121 {
b1a3a964 122 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
4fe7567d
VS
123 Cache()[i].End1 = stpos;
124 Cache()[i].End2 = pos + 1;
5526e819
VS
125 break;
126 }
b1a3a964 127 }
5526e819 128 }
8cd82622 129 else
a914db0f 130 {
b1a3a964 131 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
7448de8d 132
7c6cd4a8
VS
133 if (wxIsCDATAElement(tagBuffer))
134 {
313ffa19
VS
135 // store the orig pos in case we are missing the closing
136 // tag (see below)
b1a3a964 137 const wxString::const_iterator old_pos = pos;
313ffa19 138 bool foundCloseTag = false;
7448de8d 139
7c6cd4a8
VS
140 // find next matching tag
141 int tag_len = wxStrlen(tagBuffer);
b1a3a964 142 while (pos < end)
7c6cd4a8
VS
143 {
144 // find the ending tag
b1a3a964
VS
145 while (pos + 1 < end &&
146 (*pos != '<' || *(pos+1) != '/'))
7c6cd4a8 147 ++pos;
b1a3a964 148 if (*pos == '<')
7c6cd4a8 149 ++pos;
d1da8872 150
7c6cd4a8
VS
151 // see if it matches
152 int match_pos = 0;
b1a3a964
VS
153 while (pos < end && match_pos < tag_len )
154 {
155 wxChar c = *pos;
156 if ( c == '>' || c == '<' )
157 break;
158
5447d1b4
VZ
159 // cast to wxChar needed to suppress warning in
160 // Unicode build
b1a3a964
VS
161 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
162 {
7c6cd4a8 163 ++match_pos;
d1da8872 164 }
b1a3a964
VS
165 else if (c == wxT(' ') || c == wxT('\n') ||
166 c == wxT('\r') || c == wxT('\t'))
167 {
7c6cd4a8
VS
168 // need to skip over these
169 }
b1a3a964
VS
170 else
171 {
7c6cd4a8
VS
172 match_pos = 0;
173 }
174 ++pos;
175 }
176
177 // found a match
7448de8d 178 if (match_pos == tag_len)
313ffa19 179 {
b5d464b9 180 pos = pos - tag_len - 3;
313ffa19 181 foundCloseTag = true;
7c6cd4a8
VS
182 break;
183 }
313ffa19
VS
184 else // keep looking for the closing tag
185 {
7c6cd4a8
VS
186 ++pos;
187 }
188 }
313ffa19
VS
189 if (!foundCloseTag)
190 {
191 // we didn't find closing tag; this means the markup
192 // is incorrect and the best thing we can do is to
193 // ignore the unclosed tag and continue parsing as if
194 // it didn't exist:
195 pos = old_pos;
196 }
7c6cd4a8 197 }
5526e819
VS
198 }
199 }
5526e819
VS
200 }
201
202 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
4fe7567d
VS
203 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
204 i != Cache().end(); ++i )
4f9297b0 205 {
4fe7567d
VS
206 delete[] i->Name;
207 i->Name = NULL;
5526e819
VS
208 }
209}
210
4fe7567d
VS
211wxHtmlTagsCache::~wxHtmlTagsCache()
212{
213 delete m_Cache;
214}
215
b1a3a964
VS
216void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
217 const wxString::const_iterator& inputEnd,
218 wxString::const_iterator *end1,
219 wxString::const_iterator *end2,
220 bool *hasEnding)
5526e819 221{
4fe7567d
VS
222 if (Cache().empty())
223 return;
224
225 if (Cache()[m_CachePos].Key != at)
4f9297b0 226 {
4fe7567d 227 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
8cd82622
VZ
228 do
229 {
b1a3a964
VS
230 m_CachePos += delta;
231
232 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
10b9be32 233 {
b1a3a964
VS
234 if ( m_CachePos < 0 )
235 m_CachePos = 0;
236 else
237 m_CachePos = Cache().size() - 1;
10b9be32
VZ
238 // something is very wrong with HTML, give up by returning an
239 // impossibly large value which is going to be ignored by the
240 // caller
241 *end1 =
b1a3a964
VS
242 *end2 = inputEnd;
243 *hasEnding = true;
10b9be32
VZ
244 return;
245 }
daa616fc 246 }
4fe7567d 247 while (Cache()[m_CachePos].Key != at);
5526e819 248 }
4fe7567d
VS
249 *end1 = Cache()[m_CachePos].End1;
250 *end2 = Cache()[m_CachePos].End2;
b1a3a964 251 *hasEnding = (Cache()[m_CachePos].type == wxHtmlCacheItem::Type_Normal);
5526e819
VS
252}
253
254
255
256
257//-----------------------------------------------------------------------------
258// wxHtmlTag
259//-----------------------------------------------------------------------------
260
211dfedd 261wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
b1a3a964
VS
262 const wxString *source,
263 const wxString::const_iterator& pos,
264 const wxString::const_iterator& end_pos,
daa616fc 265 wxHtmlTagsCache *cache,
7da48d49 266 wxHtmlEntitiesParser *entParser)
5526e819 267{
211dfedd
VS
268 /* Setup DOM relations */
269
270 m_Next = NULL;
271 m_FirstChild = m_LastChild = NULL;
272 m_Parent = parent;
273 if (parent)
274 {
275 m_Prev = m_Parent->m_LastChild;
276 if (m_Prev == NULL)
277 m_Parent->m_FirstChild = this;
278 else
279 m_Prev->m_Next = this;
280 m_Parent->m_LastChild = this;
281 }
282 else
283 m_Prev = NULL;
284
285 /* Find parameters and their values: */
8cd82622 286
daa616fc 287 wxChar c;
5526e819
VS
288
289 // fill-in name, params and begin pos:
b1a3a964 290 wxString::const_iterator i(pos+1);
5526e819 291
b076dc01 292 // find tag's name and convert it to uppercase:
8cd82622 293 while ((i < end_pos) &&
b1a3a964 294 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
daa616fc 295 c != wxT('\n') && c != wxT('\t') &&
8cd82622 296 c != wxT('>')))
a914db0f 297 {
8cd82622 298 if ((c >= wxT('a')) && (c <= wxT('z')))
daa616fc
VS
299 c -= (wxT('a') - wxT('A'));
300 m_Name << c;
5526e819
VS
301 }
302
b076dc01 303 // if the tag has parameters, read them and "normalize" them,
8cd82622 304 // i.e. convert to uppercase, replace whitespaces by spaces and
b076dc01 305 // remove whitespaces around '=':
b1a3a964 306 if (*(i-1) != wxT('>'))
daa616fc
VS
307 {
308 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
309 c == wxT('\n') || c == wxT('\t'))
310 wxString pname, pvalue;
311 wxChar quote;
8cd82622 312 enum
a914db0f 313 {
8cd82622 314 ST_BEFORE_NAME = 1,
daa616fc
VS
315 ST_NAME,
316 ST_BEFORE_EQ,
317 ST_BEFORE_VALUE,
318 ST_VALUE
319 } state;
8cd82622 320
daa616fc
VS
321 quote = 0;
322 state = ST_BEFORE_NAME;
323 while (i < end_pos)
324 {
b1a3a964 325 c = *(i++);
daa616fc 326
8cd82622 327 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
a914db0f 328 {
daa616fc 329 if (state == ST_BEFORE_EQ || state == ST_NAME)
b076dc01 330 {
daa616fc 331 m_ParamNames.Add(pname);
b1a3a964 332 m_ParamValues.Add(wxGetEmptyString());
b076dc01 333 }
daa616fc
VS
334 else if (state == ST_VALUE && quote == 0)
335 {
336 m_ParamNames.Add(pname);
367c84b9
VS
337 if (entParser)
338 m_ParamValues.Add(entParser->Parse(pvalue));
339 else
340 m_ParamValues.Add(pvalue);
daa616fc
VS
341 }
342 break;
5526e819 343 }
daa616fc 344 switch (state)
a914db0f 345 {
daa616fc
VS
346 case ST_BEFORE_NAME:
347 if (!IS_WHITE(c))
348 {
349 pname = c;
350 state = ST_NAME;
351 }
352 break;
353 case ST_NAME:
354 if (IS_WHITE(c))
355 state = ST_BEFORE_EQ;
356 else if (c == wxT('='))
357 state = ST_BEFORE_VALUE;
358 else
359 pname << c;
360 break;
361 case ST_BEFORE_EQ:
362 if (c == wxT('='))
363 state = ST_BEFORE_VALUE;
364 else if (!IS_WHITE(c))
365 {
366 m_ParamNames.Add(pname);
b1a3a964 367 m_ParamValues.Add(wxGetEmptyString());
daa616fc
VS
368 pname = c;
369 state = ST_NAME;
370 }
371 break;
372 case ST_BEFORE_VALUE:
373 if (!IS_WHITE(c))
374 {
375 if (c == wxT('"') || c == wxT('\''))
b1a3a964 376 quote = c, pvalue = wxGetEmptyString();
daa616fc
VS
377 else
378 quote = 0, pvalue = c;
379 state = ST_VALUE;
380 }
381 break;
382 case ST_VALUE:
383 if ((quote != 0 && c == quote) ||
384 (quote == 0 && IS_WHITE(c)))
385 {
386 m_ParamNames.Add(pname);
387 if (quote == 0)
388 {
389 // VS: backward compatibility, no real reason,
390 // but wxHTML code relies on this... :(
391 pvalue.MakeUpper();
392 }
367c84b9
VS
393 if (entParser)
394 m_ParamValues.Add(entParser->Parse(pvalue));
395 else
396 m_ParamValues.Add(pvalue);
daa616fc
VS
397 state = ST_BEFORE_NAME;
398 }
399 else
400 pvalue << c;
401 break;
72aa4a98 402 }
5526e819 403 }
8cd82622 404
daa616fc 405 #undef IS_WHITE
7448de8d
WS
406 }
407 m_Begin = i;
b1a3a964 408 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
7448de8d
WS
409 if (m_End1 > end_pos) m_End1 = end_pos;
410 if (m_End2 > end_pos) m_End2 = end_pos;
b1a3a964
VS
411
412#if WXWIN_COMPATIBILITY_2_8
413 m_sourceStart = source->begin();
414#endif
5526e819
VS
415}
416
211dfedd
VS
417wxHtmlTag::~wxHtmlTag()
418{
0d58bb65
VS
419 wxHtmlTag *t1, *t2;
420 t1 = m_FirstChild;
421 while (t1)
422 {
423 t2 = t1->GetNextSibling();
424 delete t1;
425 t1 = t2;
426 }
211dfedd
VS
427}
428
5526e819
VS
429bool wxHtmlTag::HasParam(const wxString& par) const
430{
8703bc01 431 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
5526e819
VS
432}
433
614f9713 434wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
5526e819 435{
8703bc01 436 int index = m_ParamNames.Index(par, false);
daa616fc 437 if (index == wxNOT_FOUND)
b1a3a964 438 return wxGetEmptyString();
614f9713 439 if (with_quotes)
4f9297b0 440 {
daa616fc
VS
441 // VS: backward compatibility, seems to be never used by wxHTML...
442 wxString s;
443 s << wxT('"') << m_ParamValues[index] << wxT('"');
444 return s;
5526e819 445 }
daa616fc
VS
446 else
447 return m_ParamValues[index];
5526e819
VS
448}
449
90350682 450int wxHtmlTag::ScanParam(const wxString& par,
d7640339
VS
451 const char *format,
452 void *param) const
453{
454 wxString parval = GetParam(par);
455 return wxSscanf(parval, format, param);
456}
457
458int wxHtmlTag::ScanParam(const wxString& par,
459 const wchar_t *format,
90350682 460 void *param) const
5526e819 461{
5526e819 462 wxString parval = GetParam(par);
161f4f73 463 return wxSscanf(parval, format, param);
5526e819
VS
464}
465
8bd72d90
VS
466bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
467{
86766dfd 468 wxCHECK_MSG( clr, false, _T("invalid colour argument") );
8cd82622 469
86766dfd 470 wxString str = GetParam(par);
40989e46 471
86766dfd
VS
472 // handle colours defined in HTML 4.0 first:
473 if (str.length() > 1 && str[0] != _T('#'))
8bd72d90 474 {
b1a3a964
VS
475 #define HTML_COLOUR(name, r, g, b) \
476 if (str.IsSameAs(wxSTRING_TEXT(name), false)) \
86766dfd 477 { clr->Set(r, g, b); return true; }
8bd72d90
VS
478 HTML_COLOUR("black", 0x00,0x00,0x00)
479 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
480 HTML_COLOUR("gray", 0x80,0x80,0x80)
481 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
482 HTML_COLOUR("maroon", 0x80,0x00,0x00)
483 HTML_COLOUR("red", 0xFF,0x00,0x00)
484 HTML_COLOUR("purple", 0x80,0x00,0x80)
485 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
486 HTML_COLOUR("green", 0x00,0x80,0x00)
487 HTML_COLOUR("lime", 0x00,0xFF,0x00)
488 HTML_COLOUR("olive", 0x80,0x80,0x00)
489 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
490 HTML_COLOUR("navy", 0x00,0x00,0x80)
491 HTML_COLOUR("blue", 0x00,0x00,0xFF)
492 HTML_COLOUR("teal", 0x00,0x80,0x80)
493 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
494 #undef HTML_COLOUR
8bd72d90 495 }
5716a1ab 496
86766dfd
VS
497 // then try to parse #rrggbb representations or set from other well
498 // known names (note that this doesn't strictly conform to HTML spec,
499 // but it doesn't do real harm -- but it *must* be done after the standard
500 // colors are handled above):
501 if (clr->Set(str))
502 return true;
503
8703bc01 504 return false;
8bd72d90
VS
505}
506
507bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
508{
8703bc01 509 if (!HasParam(par)) return false;
8bd72d90
VS
510 long i;
511 bool succ = GetParam(par).ToLong(&i);
512 *clr = (int)i;
513 return succ;
514}
515
daa616fc
VS
516wxString wxHtmlTag::GetAllParams() const
517{
3103e8a9 518 // VS: this function is for backward compatibility only,
daa616fc
VS
519 // never used by wxHTML
520 wxString s;
521 size_t cnt = m_ParamNames.GetCount();
522 for (size_t i = 0; i < cnt; i++)
523 {
524 s << m_ParamNames[i];
525 s << wxT('=');
526 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
527 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
528 else
529 s << wxT('"') << m_ParamValues[i] << wxT('"');
530 }
531 return s;
532}
533
211dfedd
VS
534wxHtmlTag *wxHtmlTag::GetFirstSibling() const
535{
536 if (m_Parent)
537 return m_Parent->m_FirstChild;
538 else
539 {
540 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 541 while (cur->m_Prev)
211dfedd
VS
542 cur = cur->m_Prev;
543 return cur;
544 }
545}
546
547wxHtmlTag *wxHtmlTag::GetLastSibling() const
548{
549 if (m_Parent)
550 return m_Parent->m_LastChild;
551 else
552 {
553 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 554 while (cur->m_Next)
211dfedd
VS
555 cur = cur->m_Next;
556 return cur;
557 }
558}
559
560wxHtmlTag *wxHtmlTag::GetNextTag() const
561{
562 if (m_FirstChild) return m_FirstChild;
563 if (m_Next) return m_Next;
564 wxHtmlTag *cur = m_Parent;
565 if (!cur) return NULL;
8cd82622 566 while (cur->m_Parent && !cur->m_Next)
211dfedd
VS
567 cur = cur->m_Parent;
568 return cur->m_Next;
569}
570
4d223b67 571#endif