]> git.saurik.com Git - wxWidgets.git/blob - src/html/htmltag.cpp
Use _UNICODE instead of UNICODE in wx/msw/winundef.h.
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #include "wx/html/htmltag.h"
19
20 #ifndef WX_PRECOMP
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
23 #endif
24
25 #include "wx/html/htmlpars.h"
26 #include "wx/vector.h"
27
28 #include <stdio.h> // for vsscanf
29 #include <stdarg.h>
30
31 //-----------------------------------------------------------------------------
32 // wxHtmlTagsCache
33 //-----------------------------------------------------------------------------
34
35 struct wxHtmlCacheItem
36 {
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
39 wxString::const_iterator Key;
40
41 // Tag type
42 enum Type
43 {
44 Type_Normal, // normal tag with a matching ending tag
45 Type_NoMatchingEndingTag, // there's no ending tag for this tag
46 Type_EndingTag // this is ending tag </..>
47 };
48 Type type;
49
50 // end positions for the tag:
51 // end1 is '<' of ending tag,
52 // end2 is '>' or both are
53 wxString::const_iterator End1, End2;
54
55 // name of this tag
56 wxChar *Name;
57 };
58
59 // NB: this is an empty class and not typedef because of forward declaration
60 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
61 {
62 };
63
64 bool wxIsCDATAElement(const wxChar *tag)
65 {
66 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
67 (wxStrcmp(tag, wxT("STYLE")) == 0);
68 }
69
70 bool wxIsCDATAElement(const wxString& tag)
71 {
72 return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) ||
73 (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0);
74 }
75
76 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
77 {
78 m_Cache = new wxHtmlTagsCacheData;
79 m_CachePos = 0;
80
81 wxChar tagBuffer[256];
82
83 const wxString::const_iterator end = source.end();
84 for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos )
85 {
86 if (*pos == wxT('<')) // tag found:
87 {
88 // don't cache comment tags
89 if ( wxHtmlParser::SkipCommentTag(pos, source.end()) )
90 continue;
91
92 size_t tg = Cache().size();
93 Cache().push_back(wxHtmlCacheItem());
94
95 wxString::const_iterator stpos = pos++;
96 Cache()[tg].Key = stpos;
97
98 int i;
99 for ( i = 0;
100 pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 &&
101 *pos != wxT('>') && !wxIsspace(*pos);
102 ++i, ++pos )
103 {
104 tagBuffer[i] = (wxChar)wxToupper(*pos);
105 }
106 tagBuffer[i] = wxT('\0');
107
108 Cache()[tg].Name = new wxChar[i+1];
109 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
110
111 while (pos < end && *pos != wxT('>'))
112 ++pos;
113
114 if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag:
115 {
116 Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag;
117 // find matching begin tag:
118 for (i = tg; i >= 0; i--)
119 {
120 if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
121 {
122 Cache()[i].type = wxHtmlCacheItem::Type_Normal;
123 Cache()[i].End1 = stpos;
124 Cache()[i].End2 = pos + 1;
125 break;
126 }
127 }
128 }
129 else
130 {
131 Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag;
132
133 if (wxIsCDATAElement(tagBuffer))
134 {
135 // store the orig pos in case we are missing the closing
136 // tag (see below)
137 const wxString::const_iterator old_pos = pos;
138 bool foundCloseTag = false;
139
140 // find next matching tag
141 int tag_len = wxStrlen(tagBuffer);
142 while (pos < end)
143 {
144 // find the ending tag
145 while (pos + 1 < end &&
146 (*pos != '<' || *(pos+1) != '/'))
147 ++pos;
148 if (*pos == '<')
149 ++pos;
150
151 // see if it matches
152 int match_pos = 0;
153 while (pos < end && match_pos < tag_len )
154 {
155 wxChar c = *pos;
156 if ( c == '>' || c == '<' )
157 break;
158
159 // cast to wxChar needed to suppress warning in
160 // Unicode build
161 if ((wxChar)wxToupper(c) == tagBuffer[match_pos])
162 {
163 ++match_pos;
164 }
165 else if (c == wxT(' ') || c == wxT('\n') ||
166 c == wxT('\r') || c == wxT('\t'))
167 {
168 // need to skip over these
169 }
170 else
171 {
172 match_pos = 0;
173 }
174 ++pos;
175 }
176
177 // found a match
178 if (match_pos == tag_len)
179 {
180 pos = pos - tag_len - 3;
181 foundCloseTag = true;
182 break;
183 }
184 else // keep looking for the closing tag
185 {
186 ++pos;
187 }
188 }
189 if (!foundCloseTag)
190 {
191 // we didn't find closing tag; this means the markup
192 // is incorrect and the best thing we can do is to
193 // ignore the unclosed tag and continue parsing as if
194 // it didn't exist:
195 pos = old_pos;
196 }
197 }
198 }
199 }
200 }
201
202 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
203 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
204 i != Cache().end(); ++i )
205 {
206 delete[] i->Name;
207 i->Name = NULL;
208 }
209 }
210
211 wxHtmlTagsCache::~wxHtmlTagsCache()
212 {
213 delete m_Cache;
214 }
215
216 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at,
217 const wxString::const_iterator& inputEnd,
218 wxString::const_iterator *end1,
219 wxString::const_iterator *end2,
220 bool *hasEnding)
221 {
222 if (Cache().empty())
223 return;
224
225 if (Cache()[m_CachePos].Key != at)
226 {
227 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
228 do
229 {
230 m_CachePos += delta;
231
232 if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() )
233 {
234 if ( m_CachePos < 0 )
235 m_CachePos = 0;
236 else
237 m_CachePos = Cache().size() - 1;
238 // something is very wrong with HTML, give up by returning an
239 // impossibly large value which is going to be ignored by the
240 // caller
241 *end1 =
242 *end2 = inputEnd;
243 *hasEnding = true;
244 return;
245 }
246 }
247 while (Cache()[m_CachePos].Key != at);
248 }
249
250 switch ( Cache()[m_CachePos].type )
251 {
252 case wxHtmlCacheItem::Type_Normal:
253 *end1 = Cache()[m_CachePos].End1;
254 *end2 = Cache()[m_CachePos].End2;
255 *hasEnding = true;
256 break;
257
258 case wxHtmlCacheItem::Type_EndingTag:
259 wxFAIL_MSG("QueryTag called for ending tag - can't be");
260 // but if it does happen, fall through, better than crashing
261
262 case wxHtmlCacheItem::Type_NoMatchingEndingTag:
263 // If input HTML is invalid and there's no closing tag for this
264 // one, pretend that it runs all the way to the end of input
265 *end1 = inputEnd;
266 *end2 = inputEnd;
267 *hasEnding = false;
268 break;
269 }
270 }
271
272
273
274
275 //-----------------------------------------------------------------------------
276 // wxHtmlTag
277 //-----------------------------------------------------------------------------
278
279 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
280 const wxString *source,
281 const wxString::const_iterator& pos,
282 const wxString::const_iterator& end_pos,
283 wxHtmlTagsCache *cache,
284 wxHtmlEntitiesParser *entParser)
285 {
286 /* Setup DOM relations */
287
288 m_Next = NULL;
289 m_FirstChild = m_LastChild = NULL;
290 m_Parent = parent;
291 if (parent)
292 {
293 m_Prev = m_Parent->m_LastChild;
294 if (m_Prev == NULL)
295 m_Parent->m_FirstChild = this;
296 else
297 m_Prev->m_Next = this;
298 m_Parent->m_LastChild = this;
299 }
300 else
301 m_Prev = NULL;
302
303 /* Find parameters and their values: */
304
305 wxChar c wxDUMMY_INITIALIZE(0);
306
307 // fill-in name, params and begin pos:
308 wxString::const_iterator i(pos+1);
309
310 // find tag's name and convert it to uppercase:
311 while ((i < end_pos) &&
312 ((c = *(i++)) != wxT(' ') && c != wxT('\r') &&
313 c != wxT('\n') && c != wxT('\t') &&
314 c != wxT('>') && c != wxT('/')))
315 {
316 if ((c >= wxT('a')) && (c <= wxT('z')))
317 c -= (wxT('a') - wxT('A'));
318 m_Name << c;
319 }
320
321 // if the tag has parameters, read them and "normalize" them,
322 // i.e. convert to uppercase, replace whitespaces by spaces and
323 // remove whitespaces around '=':
324 if (*(i-1) != wxT('>'))
325 {
326 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
327 c == wxT('\n') || c == wxT('\t'))
328 wxString pname, pvalue;
329 wxChar quote;
330 enum
331 {
332 ST_BEFORE_NAME = 1,
333 ST_NAME,
334 ST_BEFORE_EQ,
335 ST_BEFORE_VALUE,
336 ST_VALUE
337 } state;
338
339 quote = 0;
340 state = ST_BEFORE_NAME;
341 while (i < end_pos)
342 {
343 c = *(i++);
344
345 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
346 {
347 if (state == ST_BEFORE_EQ || state == ST_NAME)
348 {
349 m_ParamNames.Add(pname);
350 m_ParamValues.Add(wxGetEmptyString());
351 }
352 else if (state == ST_VALUE && quote == 0)
353 {
354 m_ParamNames.Add(pname);
355 if (entParser)
356 m_ParamValues.Add(entParser->Parse(pvalue));
357 else
358 m_ParamValues.Add(pvalue);
359 }
360 break;
361 }
362 switch (state)
363 {
364 case ST_BEFORE_NAME:
365 if (!IS_WHITE(c))
366 {
367 pname = c;
368 state = ST_NAME;
369 }
370 break;
371 case ST_NAME:
372 if (IS_WHITE(c))
373 state = ST_BEFORE_EQ;
374 else if (c == wxT('='))
375 state = ST_BEFORE_VALUE;
376 else
377 pname << c;
378 break;
379 case ST_BEFORE_EQ:
380 if (c == wxT('='))
381 state = ST_BEFORE_VALUE;
382 else if (!IS_WHITE(c))
383 {
384 m_ParamNames.Add(pname);
385 m_ParamValues.Add(wxGetEmptyString());
386 pname = c;
387 state = ST_NAME;
388 }
389 break;
390 case ST_BEFORE_VALUE:
391 if (!IS_WHITE(c))
392 {
393 if (c == wxT('"') || c == wxT('\''))
394 quote = c, pvalue = wxGetEmptyString();
395 else
396 quote = 0, pvalue = c;
397 state = ST_VALUE;
398 }
399 break;
400 case ST_VALUE:
401 if ((quote != 0 && c == quote) ||
402 (quote == 0 && IS_WHITE(c)))
403 {
404 m_ParamNames.Add(pname);
405 if (quote == 0)
406 {
407 // VS: backward compatibility, no real reason,
408 // but wxHTML code relies on this... :(
409 pvalue.MakeUpper();
410 }
411 if (entParser)
412 m_ParamValues.Add(entParser->Parse(pvalue));
413 else
414 m_ParamValues.Add(pvalue);
415 state = ST_BEFORE_NAME;
416 }
417 else
418 pvalue << c;
419 break;
420 }
421 }
422
423 #undef IS_WHITE
424 }
425 m_Begin = i;
426 cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding);
427 if (m_End1 > end_pos) m_End1 = end_pos;
428 if (m_End2 > end_pos) m_End2 = end_pos;
429
430 #if WXWIN_COMPATIBILITY_2_8
431 m_sourceStart = source->begin();
432 #endif
433 }
434
435 wxHtmlTag::~wxHtmlTag()
436 {
437 wxHtmlTag *t1, *t2;
438 t1 = m_FirstChild;
439 while (t1)
440 {
441 t2 = t1->GetNextSibling();
442 delete t1;
443 t1 = t2;
444 }
445 }
446
447 bool wxHtmlTag::HasParam(const wxString& par) const
448 {
449 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
450 }
451
452 wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const
453 {
454 int index = m_ParamNames.Index(par, false);
455 if (index == wxNOT_FOUND)
456 return wxGetEmptyString();
457 if (with_quotes)
458 {
459 // VS: backward compatibility, seems to be never used by wxHTML...
460 wxString s;
461 s << wxT('"') << m_ParamValues[index] << wxT('"');
462 return s;
463 }
464 else
465 return m_ParamValues[index];
466 }
467
468 int wxHtmlTag::ScanParam(const wxString& par,
469 const char *format,
470 void *param) const
471 {
472 wxString parval = GetParam(par);
473 return wxSscanf(parval, format, param);
474 }
475
476 int wxHtmlTag::ScanParam(const wxString& par,
477 const wchar_t *format,
478 void *param) const
479 {
480 wxString parval = GetParam(par);
481 return wxSscanf(parval, format, param);
482 }
483
484 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
485 {
486 wxCHECK_MSG( clr, false, wxT("invalid colour argument") );
487
488 wxString str = GetParam(par);
489
490 // handle colours defined in HTML 4.0 first:
491 if (str.length() > 1 && str[0] != wxT('#'))
492 {
493 #define HTML_COLOUR(name, r, g, b) \
494 if (str.IsSameAs(wxS(name), false)) \
495 { clr->Set(r, g, b); return true; }
496 HTML_COLOUR("black", 0x00,0x00,0x00)
497 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
498 HTML_COLOUR("gray", 0x80,0x80,0x80)
499 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
500 HTML_COLOUR("maroon", 0x80,0x00,0x00)
501 HTML_COLOUR("red", 0xFF,0x00,0x00)
502 HTML_COLOUR("purple", 0x80,0x00,0x80)
503 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
504 HTML_COLOUR("green", 0x00,0x80,0x00)
505 HTML_COLOUR("lime", 0x00,0xFF,0x00)
506 HTML_COLOUR("olive", 0x80,0x80,0x00)
507 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
508 HTML_COLOUR("navy", 0x00,0x00,0x80)
509 HTML_COLOUR("blue", 0x00,0x00,0xFF)
510 HTML_COLOUR("teal", 0x00,0x80,0x80)
511 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
512 #undef HTML_COLOUR
513 }
514
515 // then try to parse #rrggbb representations or set from other well
516 // known names (note that this doesn't strictly conform to HTML spec,
517 // but it doesn't do real harm -- but it *must* be done after the standard
518 // colors are handled above):
519 if (clr->Set(str))
520 return true;
521
522 return false;
523 }
524
525 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
526 {
527 if ( !HasParam(par) )
528 return false;
529
530 long i;
531 if ( !GetParam(par).ToLong(&i) )
532 return false;
533
534 *clr = (int)i;
535 return true;
536 }
537
538 wxString wxHtmlTag::GetAllParams() const
539 {
540 // VS: this function is for backward compatibility only,
541 // never used by wxHTML
542 wxString s;
543 size_t cnt = m_ParamNames.GetCount();
544 for (size_t i = 0; i < cnt; i++)
545 {
546 s << m_ParamNames[i];
547 s << wxT('=');
548 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
549 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
550 else
551 s << wxT('"') << m_ParamValues[i] << wxT('"');
552 }
553 return s;
554 }
555
556 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
557 {
558 if (m_Parent)
559 return m_Parent->m_FirstChild;
560 else
561 {
562 wxHtmlTag *cur = (wxHtmlTag*)this;
563 while (cur->m_Prev)
564 cur = cur->m_Prev;
565 return cur;
566 }
567 }
568
569 wxHtmlTag *wxHtmlTag::GetLastSibling() const
570 {
571 if (m_Parent)
572 return m_Parent->m_LastChild;
573 else
574 {
575 wxHtmlTag *cur = (wxHtmlTag*)this;
576 while (cur->m_Next)
577 cur = cur->m_Next;
578 return cur;
579 }
580 }
581
582 wxHtmlTag *wxHtmlTag::GetNextTag() const
583 {
584 if (m_FirstChild) return m_FirstChild;
585 if (m_Next) return m_Next;
586 wxHtmlTag *cur = m_Parent;
587 if (!cur) return NULL;
588 while (cur->m_Parent && !cur->m_Next)
589 cur = cur->m_Parent;
590 return cur->m_Next;
591 }
592
593 #endif