]> git.saurik.com Git - wxWidgets.git/blame - src/html/htmltag.cpp
Fix processing of events for MRU entries #10 and more in docview.
[wxWidgets.git] / src / html / htmltag.cpp
CommitLineData
5526e819 1/////////////////////////////////////////////////////////////////////////////
93763ad5 2// Name: src/html/htmltag.cpp
5526e819
VS
3// Purpose: wxHtmlTag class (represents single tag)
4// Author: Vaclav Slavik
69941f05 5// RCS-ID: $Id$
5526e819 6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
5526e819
VS
8/////////////////////////////////////////////////////////////////////////////
9
3096bd2f 10#include "wx/wxprec.h"
5526e819 11
2b5f62a0 12#ifdef __BORLANDC__
93763ad5 13 #pragma hdrstop
5526e819
VS
14#endif
15
93763ad5
WS
16#if wxUSE_HTML
17
40989e46
WS
18#include "wx/html/htmltag.h"
19
b4f4d3dd 20#ifndef WX_PRECOMP
7cf41a5d 21 #include "wx/colour.h"
193d0c93 22 #include "wx/wxcrtvararg.h"
5526e819
VS
23#endif
24
daa616fc 25#include "wx/html/htmlpars.h"
f68e16c5
VZ
26#include "wx/html/styleparams.h"
27
4fe7567d
VS
28#include "wx/vector.h"
29
7e1e0960 30#include <stdio.h> // for vsscanf
5526e819
VS
31#include <stdarg.h>
32
5526e819
VS
33//-----------------------------------------------------------------------------
34// wxHtmlTagsCache
35//-----------------------------------------------------------------------------
36
5e8e25e7
VS
37struct wxHtmlCacheItem
38{
39 // this is "pos" value passed to wxHtmlTag's constructor.
40 // it is position of '<' character of the tag
b1a3a964
VS
41 wxString::const_iterator Key;
42
43 // Tag type
44 enum Type
45 {
46 Type_Normal, // normal tag with a matching ending tag
47 Type_NoMatchingEndingTag, // there's no ending tag for this tag
48 Type_EndingTag // this is ending tag </..>
49 };
50 Type type;
5e8e25e7
VS
51
52 // end positions for the tag:
53 // end1 is '<' of ending tag,
54 // end2 is '>' or both are
b1a3a964 55 wxString::const_iterator End1, End2;
5e8e25e7
VS
56
57 // name of this tag
58 wxChar *Name;
59};
60
4fe7567d
VS
61// NB: this is an empty class and not typedef because of forward declaration
62class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
63{
64};
5e8e25e7 65
07cc7ddc 66bool wxIsCDATAElement(const wxChar *tag)
7c6cd4a8 67{
9a83f860
VZ
68 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
69 (wxStrcmp(tag, wxT("STYLE")) == 0);
7c6cd4a8
VS
70}
71
b1a3a964
VS
72bool wxIsCDATAElement(const wxString& tag)
73{
d9359369
VS
74 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
75 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
b1a3a964
VS
76}
77
5526e819
VS
78wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
79{
4fe7567d
VS
80 m_Cache = new wxHtmlTagsCacheData;
81 m_CachePos = 0;
82
8cd82622 83 wxChar tagBuffer[256];
5526e819 84
b1a3a964
VS
85 const wxString::const_iterator end = source.end();
86 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
4f9297b0 87 {
b1a3a964 88 if (*pos == wxT('<')) // tag found:
a914db0f 89 {
4609ee2e 90 // don't cache comment tags
b1a3a964 91 if ( wxHtmlParser::SkipCommentTag(pos, source.end()) )
4609ee2e 92 continue;
4609ee2e 93
4fe7567d
VS
94 size_t tg = Cache().size();
95 Cache().push_back(wxHtmlCacheItem());
96
b1a3a964 97 wxString::const_iterator stpos = pos++;
4fe7567d 98 Cache()[tg].Key = stpos;
8cd82622 99
4f22f506 100 int i;
8cd82622 101 for ( i = 0;
b1a3a964
VS
102 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
103 *pos != wxT('>') && !wxIsspace(*pos);
104 ++i, ++pos )
a914db0f 105 {
b1a3a964 106 tagBuffer[i] = (wxChar)wxToupper(*pos);
5526e819 107 }
9a83f860 108 tagBuffer[i] = wxT('\0');
8cd82622 109
4fe7567d
VS
110 Cache()[tg].Name = new wxChar[i+1];
111 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
5526e819 112
b1a3a964
VS
113 while (pos < end && *pos != wxT('>'))
114 ++pos;
5526e819 115
b1a3a964 116 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
a914db0f 117 {
b1a3a964 118 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
5526e819
VS
119 // find matching begin tag:
120 for (i = tg; i >= 0; i--)
b1a3a964
VS
121 {
122 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
a914db0f 123 {
b1a3a964 124 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
4fe7567d
VS
125 Cache()[i].End1 = stpos;
126 Cache()[i].End2 = pos + 1;
5526e819
VS
127 break;
128 }
b1a3a964 129 }
5526e819 130 }
8cd82622 131 else
a914db0f 132 {
b1a3a964 133 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
7448de8d 134
7c6cd4a8
VS
135 if (wxIsCDATAElement(tagBuffer))
136 {
313ffa19
VS
137 // store the orig pos in case we are missing the closing
138 // tag (see below)
b1a3a964 139 const wxString::const_iterator old_pos = pos;
313ffa19 140 bool foundCloseTag = false;
7448de8d 141
7c6cd4a8
VS
142 // find next matching tag
143 int tag_len = wxStrlen(tagBuffer);
b1a3a964 144 while (pos < end)
7c6cd4a8
VS
145 {
146 // find the ending tag
b1a3a964
VS
147 while (pos + 1 < end &&
148 (*pos != '<' || *(pos+1) != '/'))
7c6cd4a8 149 ++pos;
b1a3a964 150 if (*pos == '<')
7c6cd4a8 151 ++pos;
d1da8872 152
7c6cd4a8
VS
153 // see if it matches
154 int match_pos = 0;
b1a3a964
VS
155 while (pos < end && match_pos < tag_len )
156 {
157 wxChar c = *pos;
158 if ( c == '>' || c == '<' )
159 break;
160
5447d1b4
VZ
161 // cast to wxChar needed to suppress warning in
162 // Unicode build
b1a3a964
VS
163 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
164 {
7c6cd4a8 165 ++match_pos;
d1da8872 166 }
b1a3a964
VS
167 else if (c == wxT(' ') || c == wxT('\n') ||
168 c == wxT('\r') || c == wxT('\t'))
169 {
7c6cd4a8
VS
170 // need to skip over these
171 }
b1a3a964
VS
172 else
173 {
7c6cd4a8
VS
174 match_pos = 0;
175 }
176 ++pos;
177 }
178
179 // found a match
7448de8d 180 if (match_pos == tag_len)
313ffa19 181 {
b5d464b9 182 pos = pos - tag_len - 3;
313ffa19 183 foundCloseTag = true;
7c6cd4a8
VS
184 break;
185 }
313ffa19
VS
186 else // keep looking for the closing tag
187 {
7c6cd4a8
VS
188 ++pos;
189 }
190 }
313ffa19
VS
191 if (!foundCloseTag)
192 {
193 // we didn't find closing tag; this means the markup
194 // is incorrect and the best thing we can do is to
195 // ignore the unclosed tag and continue parsing as if
196 // it didn't exist:
197 pos = old_pos;
198 }
7c6cd4a8 199 }
5526e819
VS
200 }
201 }
5526e819
VS
202 }
203
204 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
4fe7567d
VS
205 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
206 i != Cache().end(); ++i )
4f9297b0 207 {
4fe7567d
VS
208 delete[] i->Name;
209 i->Name = NULL;
5526e819
VS
210 }
211}
212
4fe7567d
VS
213wxHtmlTagsCache::~wxHtmlTagsCache()
214{
215 delete m_Cache;
216}
217
b1a3a964
VS
218void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
219 const wxString::const_iterator& inputEnd,
220 wxString::const_iterator *end1,
221 wxString::const_iterator *end2,
222 bool *hasEnding)
5526e819 223{
4fe7567d
VS
224 if (Cache().empty())
225 return;
226
227 if (Cache()[m_CachePos].Key != at)
4f9297b0 228 {
4fe7567d 229 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
8cd82622
VZ
230 do
231 {
b1a3a964
VS
232 m_CachePos += delta;
233
234 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
10b9be32 235 {
b1a3a964
VS
236 if ( m_CachePos < 0 )
237 m_CachePos = 0;
238 else
239 m_CachePos = Cache().size() - 1;
10b9be32
VZ
240 // something is very wrong with HTML, give up by returning an
241 // impossibly large value which is going to be ignored by the
242 // caller
243 *end1 =
b1a3a964
VS
244 *end2 = inputEnd;
245 *hasEnding = true;
10b9be32
VZ
246 return;
247 }
daa616fc 248 }
4fe7567d 249 while (Cache()[m_CachePos].Key != at);
5526e819 250 }
61679695
VS
251
252 switch ( Cache()[m_CachePos].type )
253 {
254 case wxHtmlCacheItem::Type_Normal:
255 *end1 = Cache()[m_CachePos].End1;
256 *end2 = Cache()[m_CachePos].End2;
257 *hasEnding = true;
258 break;
259
260 case wxHtmlCacheItem::Type_EndingTag:
261 wxFAIL_MSG("QueryTag called for ending tag - can't be");
262 // but if it does happen, fall through, better than crashing
263
264 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
265 // If input HTML is invalid and there's no closing tag for this
266 // one, pretend that it runs all the way to the end of input
267 *end1 = inputEnd;
268 *end2 = inputEnd;
269 *hasEnding = false;
270 break;
271 }
5526e819
VS
272}
273
274
275
276
277//-----------------------------------------------------------------------------
278// wxHtmlTag
279//-----------------------------------------------------------------------------
280
211dfedd 281wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
b1a3a964
VS
282 const wxString *source,
283 const wxString::const_iterator& pos,
284 const wxString::const_iterator& end_pos,
daa616fc 285 wxHtmlTagsCache *cache,
7da48d49 286 wxHtmlEntitiesParser *entParser)
5526e819 287{
211dfedd
VS
288 /* Setup DOM relations */
289
290 m_Next = NULL;
291 m_FirstChild = m_LastChild = NULL;
292 m_Parent = parent;
293 if (parent)
294 {
295 m_Prev = m_Parent->m_LastChild;
296 if (m_Prev == NULL)
297 m_Parent->m_FirstChild = this;
298 else
299 m_Prev->m_Next = this;
300 m_Parent->m_LastChild = this;
301 }
302 else
303 m_Prev = NULL;
304
305 /* Find parameters and their values: */
8cd82622 306
76de2296 307 wxChar c wxDUMMY_INITIALIZE(0);
5526e819
VS
308
309 // fill-in name, params and begin pos:
b1a3a964 310 wxString::const_iterator i(pos+1);
5526e819 311
b076dc01 312 // find tag's name and convert it to uppercase:
8cd82622 313 while ((i < end_pos) &&
b1a3a964 314 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
daa616fc 315 c != wxT('\n') && c != wxT('\t') &&
ad20c567 316 c != wxT('>') && c != wxT('/')))
a914db0f 317 {
8cd82622 318 if ((c >= wxT('a')) && (c <= wxT('z')))
daa616fc
VS
319 c -= (wxT('a') - wxT('A'));
320 m_Name << c;
5526e819
VS
321 }
322
b076dc01 323 // if the tag has parameters, read them and "normalize" them,
8cd82622 324 // i.e. convert to uppercase, replace whitespaces by spaces and
b076dc01 325 // remove whitespaces around '=':
b1a3a964 326 if (*(i-1) != wxT('>'))
daa616fc
VS
327 {
328 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
329 c == wxT('\n') || c == wxT('\t'))
330 wxString pname, pvalue;
331 wxChar quote;
8cd82622 332 enum
a914db0f 333 {
8cd82622 334 ST_BEFORE_NAME = 1,
daa616fc
VS
335 ST_NAME,
336 ST_BEFORE_EQ,
337 ST_BEFORE_VALUE,
338 ST_VALUE
339 } state;
8cd82622 340
daa616fc
VS
341 quote = 0;
342 state = ST_BEFORE_NAME;
343 while (i < end_pos)
344 {
b1a3a964 345 c = *(i++);
daa616fc 346
8cd82622 347 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
a914db0f 348 {
daa616fc 349 if (state == ST_BEFORE_EQ || state == ST_NAME)
b076dc01 350 {
daa616fc 351 m_ParamNames.Add(pname);
b1a3a964 352 m_ParamValues.Add(wxGetEmptyString());
b076dc01 353 }
daa616fc
VS
354 else if (state == ST_VALUE && quote == 0)
355 {
356 m_ParamNames.Add(pname);
367c84b9
VS
357 if (entParser)
358 m_ParamValues.Add(entParser->Parse(pvalue));
359 else
360 m_ParamValues.Add(pvalue);
daa616fc
VS
361 }
362 break;
5526e819 363 }
daa616fc 364 switch (state)
a914db0f 365 {
daa616fc
VS
366 case ST_BEFORE_NAME:
367 if (!IS_WHITE(c))
368 {
369 pname = c;
370 state = ST_NAME;
371 }
372 break;
373 case ST_NAME:
374 if (IS_WHITE(c))
375 state = ST_BEFORE_EQ;
376 else if (c == wxT('='))
377 state = ST_BEFORE_VALUE;
378 else
379 pname << c;
380 break;
381 case ST_BEFORE_EQ:
382 if (c == wxT('='))
383 state = ST_BEFORE_VALUE;
384 else if (!IS_WHITE(c))
385 {
386 m_ParamNames.Add(pname);
b1a3a964 387 m_ParamValues.Add(wxGetEmptyString());
daa616fc
VS
388 pname = c;
389 state = ST_NAME;
390 }
391 break;
392 case ST_BEFORE_VALUE:
393 if (!IS_WHITE(c))
394 {
395 if (c == wxT('"') || c == wxT('\''))
b1a3a964 396 quote = c, pvalue = wxGetEmptyString();
daa616fc
VS
397 else
398 quote = 0, pvalue = c;
399 state = ST_VALUE;
400 }
401 break;
402 case ST_VALUE:
403 if ((quote != 0 && c == quote) ||
404 (quote == 0 && IS_WHITE(c)))
405 {
406 m_ParamNames.Add(pname);
407 if (quote == 0)
408 {
409 // VS: backward compatibility, no real reason,
410 // but wxHTML code relies on this... :(
411 pvalue.MakeUpper();
412 }
367c84b9
VS
413 if (entParser)
414 m_ParamValues.Add(entParser->Parse(pvalue));
415 else
416 m_ParamValues.Add(pvalue);
daa616fc
VS
417 state = ST_BEFORE_NAME;
418 }
419 else
420 pvalue << c;
421 break;
72aa4a98 422 }
5526e819 423 }
8cd82622 424
daa616fc 425 #undef IS_WHITE
7448de8d
WS
426 }
427 m_Begin = i;
b1a3a964 428 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
7448de8d
WS
429 if (m_End1 > end_pos) m_End1 = end_pos;
430 if (m_End2 > end_pos) m_End2 = end_pos;
b1a3a964
VS
431
432#if WXWIN_COMPATIBILITY_2_8
433 m_sourceStart = source->begin();
434#endif
f68e16c5
VZ
435
436 // Try to parse any style parameters that can be handled simply by
437 // converting them to the equivalent HTML 3 attributes: this is a far cry
438 // from perfect but better than nothing.
439 static const struct EquivAttr
440 {
441 const char *style;
442 const char *attr;
443 } equivAttrs[] =
444 {
445 { "text-align", "ALIGN" },
446 { "width", "WIDTH" },
447 { "vertical-align", "VALIGN" },
448 { "background", "BGCOLOR" },
449 };
450
451 wxHtmlStyleParams styleParams(*this);
452 for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
453 {
454 const EquivAttr& ea = equivAttrs[n];
455 if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
456 {
457 m_ParamNames.Add(ea.attr);
458 m_ParamValues.Add(styleParams.GetParam(ea.style));
459 }
460 }
5526e819
VS
461}
462
211dfedd
VS
463wxHtmlTag::~wxHtmlTag()
464{
0d58bb65
VS
465 wxHtmlTag *t1, *t2;
466 t1 = m_FirstChild;
467 while (t1)
468 {
469 t2 = t1->GetNextSibling();
470 delete t1;
471 t1 = t2;
472 }
211dfedd
VS
473}
474
5526e819
VS
475bool wxHtmlTag::HasParam(const wxString& par) const
476{
8703bc01 477 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
5526e819
VS
478}
479
614f9713 480wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
5526e819 481{
8703bc01 482 int index = m_ParamNames.Index(par, false);
daa616fc 483 if (index == wxNOT_FOUND)
b1a3a964 484 return wxGetEmptyString();
614f9713 485 if (with_quotes)
4f9297b0 486 {
daa616fc
VS
487 // VS: backward compatibility, seems to be never used by wxHTML...
488 wxString s;
489 s << wxT('"') << m_ParamValues[index] << wxT('"');
490 return s;
5526e819 491 }
daa616fc
VS
492 else
493 return m_ParamValues[index];
5526e819
VS
494}
495
90350682 496int wxHtmlTag::ScanParam(const wxString& par,
d7640339
VS
497 const char *format,
498 void *param) const
499{
500 wxString parval = GetParam(par);
501 return wxSscanf(parval, format, param);
502}
503
504int wxHtmlTag::ScanParam(const wxString& par,
505 const wchar_t *format,
90350682 506 void *param) const
5526e819 507{
5526e819 508 wxString parval = GetParam(par);
161f4f73 509 return wxSscanf(parval, format, param);
5526e819
VS
510}
511
f68e16c5
VZ
512/* static */
513bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
8bd72d90 514{
9a83f860 515 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
8cd82622 516
86766dfd 517 // handle colours defined in HTML 4.0 first:
9a83f860 518 if (str.length() > 1 && str[0] != wxT('#'))
8bd72d90 519 {
d9359369
VS
520 #define HTML_COLOUR(name, r, g, b) \
521 if (str.IsSameAs(wxS(name), false)) \
86766dfd 522 { clr->Set(r, g, b); return true; }
8bd72d90
VS
523 HTML_COLOUR("black", 0x00,0x00,0x00)
524 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
525 HTML_COLOUR("gray", 0x80,0x80,0x80)
526 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
527 HTML_COLOUR("maroon", 0x80,0x00,0x00)
528 HTML_COLOUR("red", 0xFF,0x00,0x00)
529 HTML_COLOUR("purple", 0x80,0x00,0x80)
530 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
531 HTML_COLOUR("green", 0x00,0x80,0x00)
532 HTML_COLOUR("lime", 0x00,0xFF,0x00)
533 HTML_COLOUR("olive", 0x80,0x80,0x00)
534 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
535 HTML_COLOUR("navy", 0x00,0x00,0x80)
536 HTML_COLOUR("blue", 0x00,0x00,0xFF)
537 HTML_COLOUR("teal", 0x00,0x80,0x80)
538 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
539 #undef HTML_COLOUR
8bd72d90 540 }
5716a1ab 541
86766dfd
VS
542 // then try to parse #rrggbb representations or set from other well
543 // known names (note that this doesn't strictly conform to HTML spec,
544 // but it doesn't do real harm -- but it *must* be done after the standard
545 // colors are handled above):
546 if (clr->Set(str))
547 return true;
548
8703bc01 549 return false;
8bd72d90
VS
550}
551
f68e16c5
VZ
552bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
553{
554 const wxString str = GetParam(par);
555 return !str.empty() && ParseAsColour(str, clr);
556}
557
8bd72d90
VS
558bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
559{
3f6901ad
VZ
560 if ( !HasParam(par) )
561 return false;
562
8bd72d90 563 long i;
3f6901ad
VZ
564 if ( !GetParam(par).ToLong(&i) )
565 return false;
566
8bd72d90 567 *clr = (int)i;
3f6901ad 568 return true;
8bd72d90
VS
569}
570
daa616fc
VS
571wxString wxHtmlTag::GetAllParams() const
572{
3103e8a9 573 // VS: this function is for backward compatibility only,
daa616fc
VS
574 // never used by wxHTML
575 wxString s;
576 size_t cnt = m_ParamNames.GetCount();
577 for (size_t i = 0; i < cnt; i++)
578 {
579 s << m_ParamNames[i];
580 s << wxT('=');
581 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
582 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
583 else
584 s << wxT('"') << m_ParamValues[i] << wxT('"');
585 }
586 return s;
587}
588
211dfedd
VS
589wxHtmlTag *wxHtmlTag::GetFirstSibling() const
590{
591 if (m_Parent)
592 return m_Parent->m_FirstChild;
593 else
594 {
595 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 596 while (cur->m_Prev)
211dfedd
VS
597 cur = cur->m_Prev;
598 return cur;
599 }
600}
601
602wxHtmlTag *wxHtmlTag::GetLastSibling() const
603{
604 if (m_Parent)
605 return m_Parent->m_LastChild;
606 else
607 {
608 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 609 while (cur->m_Next)
211dfedd
VS
610 cur = cur->m_Next;
611 return cur;
612 }
613}
614
615wxHtmlTag *wxHtmlTag::GetNextTag() const
616{
617 if (m_FirstChild) return m_FirstChild;
618 if (m_Next) return m_Next;
619 wxHtmlTag *cur = m_Parent;
620 if (!cur) return NULL;
8cd82622 621 while (cur->m_Parent && !cur->m_Next)
211dfedd
VS
622 cur = cur->m_Parent;
623 return cur->m_Next;
624}
625
4d223b67 626#endif