No changes, just remove a level of indentation in wxHtmlTagsCache ctor.
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #include "wx/html/htmltag.h"
19
20 #ifndef WX_PRECOMP
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
23 #endif
24
25 #include "wx/html/htmlpars.h"
26 #include "wx/html/styleparams.h"
27
28 #include "wx/vector.h"
29
30 #include <stdio.h> // for vsscanf
31 #include <stdarg.h>
32
33 //-----------------------------------------------------------------------------
34 // wxHtmlTagsCache
35 //-----------------------------------------------------------------------------
36
37 struct wxHtmlCacheItem
38 {
39 // this is "pos" value passed to wxHtmlTag's constructor.
40 // it is position of '<' character of the tag
41 wxString::const_iterator Key;
42
43 // Tag type
44 enum Type
45 {
46 Type_Normal, // normal tag with a matching ending tag
47 Type_NoMatchingEndingTag, // there's no ending tag for this tag
48 Type_EndingTag // this is ending tag </..>
49 };
50 Type type;
51
52 // end positions for the tag:
53 // end1 is '<' of ending tag,
54 // end2 is '>' or both are
55 wxString::const_iterator End1, End2;
56
57 // name of this tag
58 wxChar *Name;
59 };
60
61 // NB: this is an empty class and not typedef because of forward declaration
62 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
63 {
64 };
65
66 bool wxIsCDATAElement(const wxChar *tag)
67 {
68 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
69 (wxStrcmp(tag, wxT("STYLE")) == 0);
70 }
71
72 bool wxIsCDATAElement(const wxString& tag)
73 {
74 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
75 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
76 }
77
78 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
79 {
80 m_Cache = new wxHtmlTagsCacheData;
81 m_CachePos = 0;
82
83 wxChar tagBuffer[256];
84
85 const wxString::const_iterator end = source.end();
86 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
87 {
88 if (*pos != wxT('<'))
89 continue;
90
91 // possible tag start found:
92
93 // don't cache comment tags
94 if ( wxHtmlParser::SkipCommentTag(pos, end) )
95 continue;
96
97 size_t tg = Cache().size();
98 Cache().push_back(wxHtmlCacheItem());
99
100 wxString::const_iterator stpos = pos++;
101 Cache()[tg].Key = stpos;
102
103 int i;
104 for ( i = 0;
105 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
106 *pos != wxT('>') && !wxIsspace(*pos);
107 ++i, ++pos )
108 {
109 tagBuffer[i] = (wxChar)wxToupper(*pos);
110 }
111 tagBuffer[i] = wxT('\0');
112
113 Cache()[tg].Name = new wxChar[i+1];
114 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
115
116 while (pos < end && *pos != wxT('>'))
117 ++pos;
118
119 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
120 {
121 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
122 // find matching begin tag:
123 for (i = tg; i >= 0; i--)
124 {
125 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
126 {
127 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
128 Cache()[i].End1 = stpos;
129 Cache()[i].End2 = pos + 1;
130 break;
131 }
132 }
133 }
134 else
135 {
136 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
137
138 if (wxIsCDATAElement(tagBuffer))
139 {
140 // store the orig pos in case we are missing the closing
141 // tag (see below)
142 const wxString::const_iterator old_pos = pos;
143 bool foundCloseTag = false;
144
145 // find next matching tag
146 int tag_len = wxStrlen(tagBuffer);
147 while (pos < end)
148 {
149 // find the ending tag
150 while (pos + 1 < end &&
151 (*pos != '<' || *(pos+1) != '/'))
152 ++pos;
153 if (*pos == '<')
154 ++pos;
155
156 // see if it matches
157 int match_pos = 0;
158 while (pos < end && match_pos < tag_len )
159 {
160 wxChar c = *pos;
161 if ( c == '>' || c == '<' )
162 break;
163
164 // cast to wxChar needed to suppress warning in
165 // Unicode build
166 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
167 {
168 ++match_pos;
169 }
170 else if (c == wxT(' ') || c == wxT('\n') ||
171 c == wxT('\r') || c == wxT('\t'))
172 {
173 // need to skip over these
174 }
175 else
176 {
177 match_pos = 0;
178 }
179 ++pos;
180 }
181
182 // found a match
183 if (match_pos == tag_len)
184 {
185 pos = pos - tag_len - 3;
186 foundCloseTag = true;
187 break;
188 }
189 else // keep looking for the closing tag
190 {
191 ++pos;
192 }
193 }
194 if (!foundCloseTag)
195 {
196 // we didn't find closing tag; this means the markup
197 // is incorrect and the best thing we can do is to
198 // ignore the unclosed tag and continue parsing as if
199 // it didn't exist:
200 pos = old_pos;
201 }
202 }
203 }
204 }
205
206 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
207 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
208 i != Cache().end(); ++i )
209 {
210 wxDELETEA(i->Name);
211 }
212 }
213
214 wxHtmlTagsCache::~wxHtmlTagsCache()
215 {
216 delete m_Cache;
217 }
218
219 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
220 const wxString::const_iterator& inputEnd,
221 wxString::const_iterator *end1,
222 wxString::const_iterator *end2,
223 bool *hasEnding)
224 {
225 if (Cache().empty())
226 return;
227
228 if (Cache()[m_CachePos].Key != at)
229 {
230 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
231 do
232 {
233 m_CachePos += delta;
234
235 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
236 {
237 if ( m_CachePos < 0 )
238 m_CachePos = 0;
239 else
240 m_CachePos = Cache().size() - 1;
241 // something is very wrong with HTML, give up by returning an
242 // impossibly large value which is going to be ignored by the
243 // caller
244 *end1 =
245 *end2 = inputEnd;
246 *hasEnding = true;
247 return;
248 }
249 }
250 while (Cache()[m_CachePos].Key != at);
251 }
252
253 switch ( Cache()[m_CachePos].type )
254 {
255 case wxHtmlCacheItem::Type_Normal:
256 *end1 = Cache()[m_CachePos].End1;
257 *end2 = Cache()[m_CachePos].End2;
258 *hasEnding = true;
259 break;
260
261 case wxHtmlCacheItem::Type_EndingTag:
262 wxFAIL_MSG("QueryTag called for ending tag - can't be");
263 // but if it does happen, fall through, better than crashing
264
265 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
266 // If input HTML is invalid and there's no closing tag for this
267 // one, pretend that it runs all the way to the end of input
268 *end1 = inputEnd;
269 *end2 = inputEnd;
270 *hasEnding = false;
271 break;
272 }
273 }
274
275
276
277
278 //-----------------------------------------------------------------------------
279 // wxHtmlTag
280 //-----------------------------------------------------------------------------
281
282 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
283 const wxString *source,
284 const wxString::const_iterator& pos,
285 const wxString::const_iterator& end_pos,
286 wxHtmlTagsCache *cache,
287 wxHtmlEntitiesParser *entParser)
288 {
289 /* Setup DOM relations */
290
291 m_Next = NULL;
292 m_FirstChild = m_LastChild = NULL;
293 m_Parent = parent;
294 if (parent)
295 {
296 m_Prev = m_Parent->m_LastChild;
297 if (m_Prev == NULL)
298 m_Parent->m_FirstChild = this;
299 else
300 m_Prev->m_Next = this;
301 m_Parent->m_LastChild = this;
302 }
303 else
304 m_Prev = NULL;
305
306 /* Find parameters and their values: */
307
308 wxChar c wxDUMMY_INITIALIZE(0);
309
310 // fill-in name, params and begin pos:
311 wxString::const_iterator i(pos+1);
312
313 // find tag's name and convert it to uppercase:
314 while ((i < end_pos) &&
315 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
316 c != wxT('\n') && c != wxT('\t') &&
317 c != wxT('>') && c != wxT('/')))
318 {
319 if ((c >= wxT('a')) && (c <= wxT('z')))
320 c -= (wxT('a') - wxT('A'));
321 m_Name << c;
322 }
323
324 // if the tag has parameters, read them and "normalize" them,
325 // i.e. convert to uppercase, replace whitespaces by spaces and
326 // remove whitespaces around '=':
327 if (*(i-1) != wxT('>'))
328 {
329 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
330 c == wxT('\n') || c == wxT('\t'))
331 wxString pname, pvalue;
332 wxChar quote;
333 enum
334 {
335 ST_BEFORE_NAME = 1,
336 ST_NAME,
337 ST_BEFORE_EQ,
338 ST_BEFORE_VALUE,
339 ST_VALUE
340 } state;
341
342 quote = 0;
343 state = ST_BEFORE_NAME;
344 while (i < end_pos)
345 {
346 c = *(i++);
347
348 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
349 {
350 if (state == ST_BEFORE_EQ || state == ST_NAME)
351 {
352 m_ParamNames.Add(pname);
353 m_ParamValues.Add(wxGetEmptyString());
354 }
355 else if (state == ST_VALUE && quote == 0)
356 {
357 m_ParamNames.Add(pname);
358 if (entParser)
359 m_ParamValues.Add(entParser->Parse(pvalue));
360 else
361 m_ParamValues.Add(pvalue);
362 }
363 break;
364 }
365 switch (state)
366 {
367 case ST_BEFORE_NAME:
368 if (!IS_WHITE(c))
369 {
370 pname = c;
371 state = ST_NAME;
372 }
373 break;
374 case ST_NAME:
375 if (IS_WHITE(c))
376 state = ST_BEFORE_EQ;
377 else if (c == wxT('='))
378 state = ST_BEFORE_VALUE;
379 else
380 pname << c;
381 break;
382 case ST_BEFORE_EQ:
383 if (c == wxT('='))
384 state = ST_BEFORE_VALUE;
385 else if (!IS_WHITE(c))
386 {
387 m_ParamNames.Add(pname);
388 m_ParamValues.Add(wxGetEmptyString());
389 pname = c;
390 state = ST_NAME;
391 }
392 break;
393 case ST_BEFORE_VALUE:
394 if (!IS_WHITE(c))
395 {
396 if (c == wxT('"') || c == wxT('\''))
397 quote = c, pvalue = wxGetEmptyString();
398 else
399 quote = 0, pvalue = c;
400 state = ST_VALUE;
401 }
402 break;
403 case ST_VALUE:
404 if ((quote != 0 && c == quote) ||
405 (quote == 0 && IS_WHITE(c)))
406 {
407 m_ParamNames.Add(pname);
408 if (quote == 0)
409 {
410 // VS: backward compatibility, no real reason,
411 // but wxHTML code relies on this... :(
412 pvalue.MakeUpper();
413 }
414 if (entParser)
415 m_ParamValues.Add(entParser->Parse(pvalue));
416 else
417 m_ParamValues.Add(pvalue);
418 state = ST_BEFORE_NAME;
419 }
420 else
421 pvalue << c;
422 break;
423 }
424 }
425
426 #undef IS_WHITE
427 }
428 m_Begin = i;
429 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
430 if (m_End1 > end_pos) m_End1 = end_pos;
431 if (m_End2 > end_pos) m_End2 = end_pos;
432
433 #if WXWIN_COMPATIBILITY_2_8
434 m_sourceStart = source->begin();
435 #endif
436
437 // Try to parse any style parameters that can be handled simply by
438 // converting them to the equivalent HTML 3 attributes: this is a far cry
439 // from perfect but better than nothing.
440 static const struct EquivAttr
441 {
442 const char *style;
443 const char *attr;
444 } equivAttrs[] =
445 {
446 { "text-align", "ALIGN" },
447 { "width", "WIDTH" },
448 { "vertical-align", "VALIGN" },
449 { "background", "BGCOLOR" },
450 };
451
452 wxHtmlStyleParams styleParams(*this);
453 for ( unsigned n = 0; n < WXSIZEOF(equivAttrs); n++ )
454 {
455 const EquivAttr& ea = equivAttrs[n];
456 if ( styleParams.HasParam(ea.style) && !HasParam(ea.attr) )
457 {
458 m_ParamNames.Add(ea.attr);
459 m_ParamValues.Add(styleParams.GetParam(ea.style));
460 }
461 }
462 }
463
464 wxHtmlTag::~wxHtmlTag()
465 {
466 wxHtmlTag *t1, *t2;
467 t1 = m_FirstChild;
468 while (t1)
469 {
470 t2 = t1->GetNextSibling();
471 delete t1;
472 t1 = t2;
473 }
474 }
475
476 bool wxHtmlTag::HasParam(const wxString& par) const
477 {
478 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
479 }
480
481 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
482 {
483 int index = m_ParamNames.Index(par, false);
484 if (index == wxNOT_FOUND)
485 return wxGetEmptyString();
486 if (with_quotes)
487 {
488 // VS: backward compatibility, seems to be never used by wxHTML...
489 wxString s;
490 s << wxT('"') << m_ParamValues[index] << wxT('"');
491 return s;
492 }
493 else
494 return m_ParamValues[index];
495 }
496
497 int wxHtmlTag::ScanParam(const wxString& par,
498 const char *format,
499 void *param) const
500 {
501 wxString parval = GetParam(par);
502 return wxSscanf(parval, format, param);
503 }
504
505 int wxHtmlTag::ScanParam(const wxString& par,
506 const wchar_t *format,
507 void *param) const
508 {
509 wxString parval = GetParam(par);
510 return wxSscanf(parval, format, param);
511 }
512
513 /* static */
514 bool wxHtmlTag::ParseAsColour(const wxString& str, wxColour *clr)
515 {
516 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
517
518 // handle colours defined in HTML 4.0 first:
519 if (str.length() > 1 && str[0] != wxT('#'))
520 {
521 #define HTML_COLOUR(name, r, g, b) \
522 if (str.IsSameAs(wxS(name), false)) \
523 { clr->Set(r, g, b); return true; }
524 HTML_COLOUR("black", 0x00,0x00,0x00)
525 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
526 HTML_COLOUR("gray", 0x80,0x80,0x80)
527 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
528 HTML_COLOUR("maroon", 0x80,0x00,0x00)
529 HTML_COLOUR("red", 0xFF,0x00,0x00)
530 HTML_COLOUR("purple", 0x80,0x00,0x80)
531 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
532 HTML_COLOUR("green", 0x00,0x80,0x00)
533 HTML_COLOUR("lime", 0x00,0xFF,0x00)
534 HTML_COLOUR("olive", 0x80,0x80,0x00)
535 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
536 HTML_COLOUR("navy", 0x00,0x00,0x80)
537 HTML_COLOUR("blue", 0x00,0x00,0xFF)
538 HTML_COLOUR("teal", 0x00,0x80,0x80)
539 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
540 #undef HTML_COLOUR
541 }
542
543 // then try to parse #rrggbb representations or set from other well
544 // known names (note that this doesn't strictly conform to HTML spec,
545 // but it doesn't do real harm -- but it *must* be done after the standard
546 // colors are handled above):
547 if (clr->Set(str))
548 return true;
549
550 return false;
551 }
552
553 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
554 {
555 const wxString str = GetParam(par);
556 return !str.empty() && ParseAsColour(str, clr);
557 }
558
559 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
560 {
561 if ( !HasParam(par) )
562 return false;
563
564 long i;
565 if ( !GetParam(par).ToLong(&i) )
566 return false;
567
568 *clr = (int)i;
569 return true;
570 }
571
572 wxString wxHtmlTag::GetAllParams() const
573 {
574 // VS: this function is for backward compatibility only,
575 // never used by wxHTML
576 wxString s;
577 size_t cnt = m_ParamNames.GetCount();
578 for (size_t i = 0; i < cnt; i++)
579 {
580 s << m_ParamNames[i];
581 s << wxT('=');
582 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
583 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
584 else
585 s << wxT('"') << m_ParamValues[i] << wxT('"');
586 }
587 return s;
588 }
589
590 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
591 {
592 if (m_Parent)
593 return m_Parent->m_FirstChild;
594 else
595 {
596 wxHtmlTag *cur = (wxHtmlTag*)this;
597 while (cur->m_Prev)
598 cur = cur->m_Prev;
599 return cur;
600 }
601 }
602
603 wxHtmlTag *wxHtmlTag::GetLastSibling() const
604 {
605 if (m_Parent)
606 return m_Parent->m_LastChild;
607 else
608 {
609 wxHtmlTag *cur = (wxHtmlTag*)this;
610 while (cur->m_Next)
611 cur = cur->m_Next;
612 return cur;
613 }
614 }
615
616 wxHtmlTag *wxHtmlTag::GetNextTag() const
617 {
618 if (m_FirstChild) return m_FirstChild;
619 if (m_Next) return m_Next;
620 wxHtmlTag *cur = m_Parent;
621 if (!cur) return NULL;
622 while (cur->m_Parent && !cur->m_Next)
623 cur = cur->m_Parent;
624 return cur->m_Next;
625 }
626
627 #endif