don't derive wxHtmlTag and wxHtmlTagsCache from wxObject, it's useless
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #include "wx/html/htmltag.h"
19
20 #ifndef WX_PRECOMP
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
23 #endif
24
25 #include "wx/html/htmlpars.h"
26 #include "wx/vector.h"
27
28 #include <stdio.h> // for vsscanf
29 #include <stdarg.h>
30
31 //-----------------------------------------------------------------------------
32 // wxHtmlTagsCache
33 //-----------------------------------------------------------------------------
34
35 struct wxHtmlCacheItem
36 {
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
39 int Key;
40
41 // end positions for the tag:
42 // end1 is '<' of ending tag,
43 // end2 is '>' or both are
44 // -1 if there is no ending tag for this one...
45 // or -2 if this is ending tag </...>
46 int End1, End2;
47
48 // name of this tag
49 wxChar *Name;
50 };
51
52 // NB: this is an empty class and not typedef because of forward declaration
53 class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem>
54 {
55 };
56
57 bool wxIsCDATAElement(const wxChar *tag)
58 {
59 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
60 (wxStrcmp(tag, _T("STYLE")) == 0);
61 }
62
63 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
64 {
65 m_Cache = new wxHtmlTagsCacheData;
66 m_CachePos = 0;
67
68 const wxChar *src = source.c_str();
69 int lng = source.length();
70 wxChar tagBuffer[256];
71
72 for ( int pos = 0; pos < lng; pos++ )
73 {
74 if (src[pos] == wxT('<')) // tag found:
75 {
76 // don't cache comment tags
77 wxString::const_iterator iter = source.begin() + pos;
78 if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
79 {
80 pos = iter - source.begin();
81 continue;
82 }
83
84 size_t tg = Cache().size();
85 Cache().push_back(wxHtmlCacheItem());
86
87 int stpos = pos++;
88 Cache()[tg].Key = stpos;
89
90 int i;
91 for ( i = 0;
92 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
93 src[pos] != wxT('>') && !wxIsspace(src[pos]);
94 i++, pos++ )
95 {
96 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
97 }
98 tagBuffer[i] = _T('\0');
99
100 Cache()[tg].Name = new wxChar[i+1];
101 memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
102
103 while (pos < lng && src[pos] != wxT('>')) pos++;
104
105 if (src[stpos+1] == wxT('/')) // ending tag:
106 {
107 Cache()[tg].End1 = Cache()[tg].End2 = -2;
108 // find matching begin tag:
109 for (i = tg; i >= 0; i--)
110 if ((Cache()[i].End1 == -1) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0))
111 {
112 Cache()[i].End1 = stpos;
113 Cache()[i].End2 = pos + 1;
114 break;
115 }
116 }
117 else
118 {
119 Cache()[tg].End1 = Cache()[tg].End2 = -1;
120
121 if (wxIsCDATAElement(tagBuffer))
122 {
123 // store the orig pos in case we are missing the closing
124 // tag (see below)
125 wxInt32 old_pos = pos;
126 bool foundCloseTag = false;
127
128 // find next matching tag
129 int tag_len = wxStrlen(tagBuffer);
130 while (pos < lng)
131 {
132 // find the ending tag
133 while (pos + 1 < lng &&
134 (src[pos] != '<' || src[pos+1] != '/'))
135 ++pos;
136 if (src[pos] == '<')
137 ++pos;
138
139 // see if it matches
140 int match_pos = 0;
141 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
142 // cast to wxChar needed to suppress warning in
143 // Unicode build
144 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
145 ++match_pos;
146 }
147 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
148 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
149 // need to skip over these
150 }
151 else {
152 match_pos = 0;
153 }
154 ++pos;
155 }
156
157 // found a match
158 if (match_pos == tag_len)
159 {
160 pos = pos - tag_len - 3;
161 foundCloseTag = true;
162 break;
163 }
164 else // keep looking for the closing tag
165 {
166 ++pos;
167 }
168 }
169 if (!foundCloseTag)
170 {
171 // we didn't find closing tag; this means the markup
172 // is incorrect and the best thing we can do is to
173 // ignore the unclosed tag and continue parsing as if
174 // it didn't exist:
175 pos = old_pos;
176 }
177 }
178 }
179 }
180 }
181
182 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
183 for ( wxHtmlTagsCacheData::iterator i = Cache().begin();
184 i != Cache().end(); ++i )
185 {
186 delete[] i->Name;
187 i->Name = NULL;
188 }
189 }
190
191 wxHtmlTagsCache::~wxHtmlTagsCache()
192 {
193 delete m_Cache;
194 }
195
196 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
197 {
198 if (Cache().empty())
199 return;
200
201 if (Cache()[m_CachePos].Key != at)
202 {
203 int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1;
204 do
205 {
206 if ( m_CachePos < 0 || m_CachePos == Cache().size() )
207 {
208 // something is very wrong with HTML, give up by returning an
209 // impossibly large value which is going to be ignored by the
210 // caller
211 *end1 =
212 *end2 = INT_MAX;
213 return;
214 }
215
216 m_CachePos += delta;
217 }
218 while (Cache()[m_CachePos].Key != at);
219 }
220 *end1 = Cache()[m_CachePos].End1;
221 *end2 = Cache()[m_CachePos].End2;
222 }
223
224
225
226
227 //-----------------------------------------------------------------------------
228 // wxHtmlTag
229 //-----------------------------------------------------------------------------
230
231 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
232 const wxString& source, int pos, int end_pos,
233 wxHtmlTagsCache *cache,
234 wxHtmlEntitiesParser *entParser)
235 {
236 /* Setup DOM relations */
237
238 m_Next = NULL;
239 m_FirstChild = m_LastChild = NULL;
240 m_Parent = parent;
241 if (parent)
242 {
243 m_Prev = m_Parent->m_LastChild;
244 if (m_Prev == NULL)
245 m_Parent->m_FirstChild = this;
246 else
247 m_Prev->m_Next = this;
248 m_Parent->m_LastChild = this;
249 }
250 else
251 m_Prev = NULL;
252
253 /* Find parameters and their values: */
254
255 int i;
256 wxChar c;
257
258 // fill-in name, params and begin pos:
259 i = pos+1;
260
261 // find tag's name and convert it to uppercase:
262 while ((i < end_pos) &&
263 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
264 c != wxT('\n') && c != wxT('\t') &&
265 c != wxT('>')))
266 {
267 if ((c >= wxT('a')) && (c <= wxT('z')))
268 c -= (wxT('a') - wxT('A'));
269 m_Name << c;
270 }
271
272 // if the tag has parameters, read them and "normalize" them,
273 // i.e. convert to uppercase, replace whitespaces by spaces and
274 // remove whitespaces around '=':
275 if (source[i-1] != wxT('>'))
276 {
277 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
278 c == wxT('\n') || c == wxT('\t'))
279 wxString pname, pvalue;
280 wxChar quote;
281 enum
282 {
283 ST_BEFORE_NAME = 1,
284 ST_NAME,
285 ST_BEFORE_EQ,
286 ST_BEFORE_VALUE,
287 ST_VALUE
288 } state;
289
290 quote = 0;
291 state = ST_BEFORE_NAME;
292 while (i < end_pos)
293 {
294 c = source[i++];
295
296 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
297 {
298 if (state == ST_BEFORE_EQ || state == ST_NAME)
299 {
300 m_ParamNames.Add(pname);
301 m_ParamValues.Add(wxEmptyString);
302 }
303 else if (state == ST_VALUE && quote == 0)
304 {
305 m_ParamNames.Add(pname);
306 if (entParser)
307 m_ParamValues.Add(entParser->Parse(pvalue));
308 else
309 m_ParamValues.Add(pvalue);
310 }
311 break;
312 }
313 switch (state)
314 {
315 case ST_BEFORE_NAME:
316 if (!IS_WHITE(c))
317 {
318 pname = c;
319 state = ST_NAME;
320 }
321 break;
322 case ST_NAME:
323 if (IS_WHITE(c))
324 state = ST_BEFORE_EQ;
325 else if (c == wxT('='))
326 state = ST_BEFORE_VALUE;
327 else
328 pname << c;
329 break;
330 case ST_BEFORE_EQ:
331 if (c == wxT('='))
332 state = ST_BEFORE_VALUE;
333 else if (!IS_WHITE(c))
334 {
335 m_ParamNames.Add(pname);
336 m_ParamValues.Add(wxEmptyString);
337 pname = c;
338 state = ST_NAME;
339 }
340 break;
341 case ST_BEFORE_VALUE:
342 if (!IS_WHITE(c))
343 {
344 if (c == wxT('"') || c == wxT('\''))
345 quote = c, pvalue = wxEmptyString;
346 else
347 quote = 0, pvalue = c;
348 state = ST_VALUE;
349 }
350 break;
351 case ST_VALUE:
352 if ((quote != 0 && c == quote) ||
353 (quote == 0 && IS_WHITE(c)))
354 {
355 m_ParamNames.Add(pname);
356 if (quote == 0)
357 {
358 // VS: backward compatibility, no real reason,
359 // but wxHTML code relies on this... :(
360 pvalue.MakeUpper();
361 }
362 if (entParser)
363 m_ParamValues.Add(entParser->Parse(pvalue));
364 else
365 m_ParamValues.Add(pvalue);
366 state = ST_BEFORE_NAME;
367 }
368 else
369 pvalue << c;
370 break;
371 }
372 }
373
374 #undef IS_WHITE
375 }
376 m_Begin = i;
377
378 cache->QueryTag(pos, &m_End1, &m_End2);
379 if (m_End1 > end_pos) m_End1 = end_pos;
380 if (m_End2 > end_pos) m_End2 = end_pos;
381 }
382
383 wxHtmlTag::~wxHtmlTag()
384 {
385 wxHtmlTag *t1, *t2;
386 t1 = m_FirstChild;
387 while (t1)
388 {
389 t2 = t1->GetNextSibling();
390 delete t1;
391 t1 = t2;
392 }
393 }
394
395 bool wxHtmlTag::HasParam(const wxString& par) const
396 {
397 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
398 }
399
400 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
401 {
402 int index = m_ParamNames.Index(par, false);
403 if (index == wxNOT_FOUND)
404 return wxEmptyString;
405 if (with_commas)
406 {
407 // VS: backward compatibility, seems to be never used by wxHTML...
408 wxString s;
409 s << wxT('"') << m_ParamValues[index] << wxT('"');
410 return s;
411 }
412 else
413 return m_ParamValues[index];
414 }
415
416 int wxHtmlTag::ScanParam(const wxString& par,
417 const char *format,
418 void *param) const
419 {
420 wxString parval = GetParam(par);
421 return wxSscanf(parval, format, param);
422 }
423
424 int wxHtmlTag::ScanParam(const wxString& par,
425 const wchar_t *format,
426 void *param) const
427 {
428 wxString parval = GetParam(par);
429 return wxSscanf(parval, format, param);
430 }
431
432 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
433 {
434 wxCHECK_MSG( clr, false, _T("invalid colour argument") );
435
436 wxString str = GetParam(par);
437
438 // handle colours defined in HTML 4.0 first:
439 if (str.length() > 1 && str[0] != _T('#'))
440 {
441 #define HTML_COLOUR(name, r, g, b) \
442 if (str.IsSameAs(wxT(name), false)) \
443 { clr->Set(r, g, b); return true; }
444 HTML_COLOUR("black", 0x00,0x00,0x00)
445 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
446 HTML_COLOUR("gray", 0x80,0x80,0x80)
447 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
448 HTML_COLOUR("maroon", 0x80,0x00,0x00)
449 HTML_COLOUR("red", 0xFF,0x00,0x00)
450 HTML_COLOUR("purple", 0x80,0x00,0x80)
451 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
452 HTML_COLOUR("green", 0x00,0x80,0x00)
453 HTML_COLOUR("lime", 0x00,0xFF,0x00)
454 HTML_COLOUR("olive", 0x80,0x80,0x00)
455 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
456 HTML_COLOUR("navy", 0x00,0x00,0x80)
457 HTML_COLOUR("blue", 0x00,0x00,0xFF)
458 HTML_COLOUR("teal", 0x00,0x80,0x80)
459 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
460 #undef HTML_COLOUR
461 }
462
463 // then try to parse #rrggbb representations or set from other well
464 // known names (note that this doesn't strictly conform to HTML spec,
465 // but it doesn't do real harm -- but it *must* be done after the standard
466 // colors are handled above):
467 if (clr->Set(str))
468 return true;
469
470 return false;
471 }
472
473 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
474 {
475 if (!HasParam(par)) return false;
476 long i;
477 bool succ = GetParam(par).ToLong(&i);
478 *clr = (int)i;
479 return succ;
480 }
481
482 wxString wxHtmlTag::GetAllParams() const
483 {
484 // VS: this function is for backward compatibility only,
485 // never used by wxHTML
486 wxString s;
487 size_t cnt = m_ParamNames.GetCount();
488 for (size_t i = 0; i < cnt; i++)
489 {
490 s << m_ParamNames[i];
491 s << wxT('=');
492 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
493 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
494 else
495 s << wxT('"') << m_ParamValues[i] << wxT('"');
496 }
497 return s;
498 }
499
500 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
501 {
502 if (m_Parent)
503 return m_Parent->m_FirstChild;
504 else
505 {
506 wxHtmlTag *cur = (wxHtmlTag*)this;
507 while (cur->m_Prev)
508 cur = cur->m_Prev;
509 return cur;
510 }
511 }
512
513 wxHtmlTag *wxHtmlTag::GetLastSibling() const
514 {
515 if (m_Parent)
516 return m_Parent->m_LastChild;
517 else
518 {
519 wxHtmlTag *cur = (wxHtmlTag*)this;
520 while (cur->m_Next)
521 cur = cur->m_Next;
522 return cur;
523 }
524 }
525
526 wxHtmlTag *wxHtmlTag::GetNextTag() const
527 {
528 if (m_FirstChild) return m_FirstChild;
529 if (m_Next) return m_Next;
530 wxHtmlTag *cur = m_Parent;
531 if (!cur) return NULL;
532 while (cur->m_Parent && !cur->m_Next)
533 cur = cur->m_Parent;
534 return cur->m_Next;
535 }
536
537 #endif