]> git.saurik.com Git - wxWidgets.git/blame - src/html/htmltag.cpp
correction to last commit: don't test unsetenv() return value, it's void under Darwin
[wxWidgets.git] / src / html / htmltag.cpp
CommitLineData
5526e819 1/////////////////////////////////////////////////////////////////////////////
93763ad5 2// Name: src/html/htmltag.cpp
5526e819
VS
3// Purpose: wxHtmlTag class (represents single tag)
4// Author: Vaclav Slavik
69941f05 5// RCS-ID: $Id$
5526e819 6// Copyright: (c) 1999 Vaclav Slavik
65571936 7// Licence: wxWindows licence
5526e819
VS
8/////////////////////////////////////////////////////////////////////////////
9
3096bd2f 10#include "wx/wxprec.h"
5526e819 11
2b5f62a0 12#ifdef __BORLANDC__
93763ad5 13 #pragma hdrstop
5526e819
VS
14#endif
15
93763ad5
WS
16#if wxUSE_HTML
17
40989e46
WS
18#include "wx/html/htmltag.h"
19
b4f4d3dd 20#ifndef WX_PRECOMP
7cf41a5d 21 #include "wx/colour.h"
193d0c93 22 #include "wx/wxcrtvararg.h"
5526e819
VS
23#endif
24
daa616fc 25#include "wx/html/htmlpars.h"
7e1e0960 26#include <stdio.h> // for vsscanf
5526e819
VS
27#include <stdarg.h>
28
5526e819
VS
29//-----------------------------------------------------------------------------
30// wxHtmlTagsCache
31//-----------------------------------------------------------------------------
32
5e8e25e7
VS
33struct wxHtmlCacheItem
34{
35 // this is "pos" value passed to wxHtmlTag's constructor.
36 // it is position of '<' character of the tag
37 int Key;
38
39 // end positions for the tag:
40 // end1 is '<' of ending tag,
41 // end2 is '>' or both are
42 // -1 if there is no ending tag for this one...
43 // or -2 if this is ending tag </...>
44 int End1, End2;
45
46 // name of this tag
47 wxChar *Name;
48};
49
50
5526e819
VS
51IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
52
53#define CACHE_INCREMENT 64
54
07cc7ddc 55bool wxIsCDATAElement(const wxChar *tag)
7c6cd4a8
VS
56{
57 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
58 (wxStrcmp(tag, _T("STYLE")) == 0);
59}
60
5526e819
VS
61wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
62{
66a77a74 63 const wxChar *src = source.c_str();
93763ad5 64 int lng = source.length();
8cd82622 65 wxChar tagBuffer[256];
5526e819
VS
66
67 m_Cache = NULL;
68 m_CacheSize = 0;
69 m_CachePos = 0;
70
4609ee2e 71 for ( int pos = 0; pos < lng; pos++ )
4f9297b0
VS
72 {
73 if (src[pos] == wxT('<')) // tag found:
a914db0f 74 {
4609ee2e
VZ
75 // don't cache comment tags
76 wxString::const_iterator iter = source.begin() + pos;
77 if ( wxHtmlParser::SkipCommentTag(iter, source.end()) )
78 {
79 pos = iter - source.begin();
80 continue;
81 }
82
5526e819 83 if (m_CacheSize % CACHE_INCREMENT == 0)
5e8e25e7 84 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
999836aa
VZ
85 int tg = m_CacheSize++;
86 int stpos = pos++;
87 m_Cache[tg].Key = stpos;
8cd82622 88
4f22f506 89 int i;
8cd82622 90 for ( i = 0;
4f22f506 91 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
8cd82622
VZ
92 src[pos] != wxT('>') && !wxIsspace(src[pos]);
93 i++, pos++ )
a914db0f 94 {
42841dfc 95 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
5526e819 96 }
8cd82622
VZ
97 tagBuffer[i] = _T('\0');
98
66a77a74 99 m_Cache[tg].Name = new wxChar[i+1];
8cd82622 100 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
5526e819 101
15db3cf5 102 while (pos < lng && src[pos] != wxT('>')) pos++;
5526e819 103
4f9297b0 104 if (src[stpos+1] == wxT('/')) // ending tag:
a914db0f 105 {
5526e819
VS
106 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
107 // find matching begin tag:
108 for (i = tg; i >= 0; i--)
8cd82622 109 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
a914db0f 110 {
5526e819
VS
111 m_Cache[i].End1 = stpos;
112 m_Cache[i].End2 = pos + 1;
113 break;
114 }
115 }
8cd82622 116 else
a914db0f 117 {
5526e819 118 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
7448de8d 119
7c6cd4a8
VS
120 if (wxIsCDATAElement(tagBuffer))
121 {
313ffa19
VS
122 // store the orig pos in case we are missing the closing
123 // tag (see below)
7448de8d 124 wxInt32 old_pos = pos;
313ffa19 125 bool foundCloseTag = false;
7448de8d 126
7c6cd4a8
VS
127 // find next matching tag
128 int tag_len = wxStrlen(tagBuffer);
129 while (pos < lng)
130 {
131 // find the ending tag
132 while (pos + 1 < lng &&
133 (src[pos] != '<' || src[pos+1] != '/'))
134 ++pos;
135 if (src[pos] == '<')
136 ++pos;
d1da8872 137
7c6cd4a8
VS
138 // see if it matches
139 int match_pos = 0;
140 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
5447d1b4
VZ
141 // cast to wxChar needed to suppress warning in
142 // Unicode build
143 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
7c6cd4a8 144 ++match_pos;
d1da8872 145 }
7c6cd4a8
VS
146 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
147 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
148 // need to skip over these
149 }
150 else {
151 match_pos = 0;
152 }
153 ++pos;
154 }
155
156 // found a match
7448de8d 157 if (match_pos == tag_len)
313ffa19 158 {
b5d464b9 159 pos = pos - tag_len - 3;
313ffa19 160 foundCloseTag = true;
7c6cd4a8
VS
161 break;
162 }
313ffa19
VS
163 else // keep looking for the closing tag
164 {
7c6cd4a8
VS
165 ++pos;
166 }
167 }
313ffa19
VS
168 if (!foundCloseTag)
169 {
170 // we didn't find closing tag; this means the markup
171 // is incorrect and the best thing we can do is to
172 // ignore the unclosed tag and continue parsing as if
173 // it didn't exist:
174 pos = old_pos;
175 }
7c6cd4a8 176 }
5526e819
VS
177 }
178 }
5526e819
VS
179 }
180
181 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
14d36de8 182 for (int i = 0; i < m_CacheSize; i++)
4f9297b0 183 {
2776d7c3 184 delete[] m_Cache[i].Name;
5526e819
VS
185 m_Cache[i].Name = NULL;
186 }
187}
188
5526e819
VS
189void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
190{
191 if (m_Cache == NULL) return;
8cd82622 192 if (m_Cache[m_CachePos].Key != at)
4f9297b0 193 {
5526e819 194 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
8cd82622
VZ
195 do
196 {
10b9be32
VZ
197 if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
198 {
199 // something is very wrong with HTML, give up by returning an
200 // impossibly large value which is going to be ignored by the
201 // caller
202 *end1 =
203 *end2 = INT_MAX;
204 return;
205 }
206
8cd82622 207 m_CachePos += delta;
daa616fc
VS
208 }
209 while (m_Cache[m_CachePos].Key != at);
5526e819
VS
210 }
211 *end1 = m_Cache[m_CachePos].End1;
212 *end2 = m_Cache[m_CachePos].End2;
213}
214
215
216
217
218//-----------------------------------------------------------------------------
219// wxHtmlTag
220//-----------------------------------------------------------------------------
221
222IMPLEMENT_CLASS(wxHtmlTag,wxObject)
223
211dfedd 224wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
8cd82622 225 const wxString& source, int pos, int end_pos,
daa616fc
VS
226 wxHtmlTagsCache *cache,
227 wxHtmlEntitiesParser *entParser) : wxObject()
5526e819 228{
211dfedd
VS
229 /* Setup DOM relations */
230
231 m_Next = NULL;
232 m_FirstChild = m_LastChild = NULL;
233 m_Parent = parent;
234 if (parent)
235 {
236 m_Prev = m_Parent->m_LastChild;
237 if (m_Prev == NULL)
238 m_Parent->m_FirstChild = this;
239 else
240 m_Prev->m_Next = this;
241 m_Parent->m_LastChild = this;
242 }
243 else
244 m_Prev = NULL;
245
246 /* Find parameters and their values: */
8cd82622 247
5526e819 248 int i;
daa616fc 249 wxChar c;
5526e819
VS
250
251 // fill-in name, params and begin pos:
5526e819 252 i = pos+1;
5526e819 253
b076dc01 254 // find tag's name and convert it to uppercase:
8cd82622
VZ
255 while ((i < end_pos) &&
256 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
daa616fc 257 c != wxT('\n') && c != wxT('\t') &&
8cd82622 258 c != wxT('>')))
a914db0f 259 {
8cd82622 260 if ((c >= wxT('a')) && (c <= wxT('z')))
daa616fc
VS
261 c -= (wxT('a') - wxT('A'));
262 m_Name << c;
5526e819
VS
263 }
264
b076dc01 265 // if the tag has parameters, read them and "normalize" them,
8cd82622 266 // i.e. convert to uppercase, replace whitespaces by spaces and
b076dc01 267 // remove whitespaces around '=':
c9893146 268 if (source[i-1] != wxT('>'))
daa616fc
VS
269 {
270 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
271 c == wxT('\n') || c == wxT('\t'))
272 wxString pname, pvalue;
273 wxChar quote;
8cd82622 274 enum
a914db0f 275 {
8cd82622 276 ST_BEFORE_NAME = 1,
daa616fc
VS
277 ST_NAME,
278 ST_BEFORE_EQ,
279 ST_BEFORE_VALUE,
280 ST_VALUE
281 } state;
8cd82622 282
daa616fc
VS
283 quote = 0;
284 state = ST_BEFORE_NAME;
285 while (i < end_pos)
286 {
287 c = source[i++];
288
8cd82622 289 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
a914db0f 290 {
daa616fc 291 if (state == ST_BEFORE_EQ || state == ST_NAME)
b076dc01 292 {
daa616fc
VS
293 m_ParamNames.Add(pname);
294 m_ParamValues.Add(wxEmptyString);
b076dc01 295 }
daa616fc
VS
296 else if (state == ST_VALUE && quote == 0)
297 {
298 m_ParamNames.Add(pname);
367c84b9
VS
299 if (entParser)
300 m_ParamValues.Add(entParser->Parse(pvalue));
301 else
302 m_ParamValues.Add(pvalue);
daa616fc
VS
303 }
304 break;
5526e819 305 }
daa616fc 306 switch (state)
a914db0f 307 {
daa616fc
VS
308 case ST_BEFORE_NAME:
309 if (!IS_WHITE(c))
310 {
311 pname = c;
312 state = ST_NAME;
313 }
314 break;
315 case ST_NAME:
316 if (IS_WHITE(c))
317 state = ST_BEFORE_EQ;
318 else if (c == wxT('='))
319 state = ST_BEFORE_VALUE;
320 else
321 pname << c;
322 break;
323 case ST_BEFORE_EQ:
324 if (c == wxT('='))
325 state = ST_BEFORE_VALUE;
326 else if (!IS_WHITE(c))
327 {
328 m_ParamNames.Add(pname);
329 m_ParamValues.Add(wxEmptyString);
330 pname = c;
331 state = ST_NAME;
332 }
333 break;
334 case ST_BEFORE_VALUE:
335 if (!IS_WHITE(c))
336 {
337 if (c == wxT('"') || c == wxT('\''))
338 quote = c, pvalue = wxEmptyString;
339 else
340 quote = 0, pvalue = c;
341 state = ST_VALUE;
342 }
343 break;
344 case ST_VALUE:
345 if ((quote != 0 && c == quote) ||
346 (quote == 0 && IS_WHITE(c)))
347 {
348 m_ParamNames.Add(pname);
349 if (quote == 0)
350 {
351 // VS: backward compatibility, no real reason,
352 // but wxHTML code relies on this... :(
353 pvalue.MakeUpper();
354 }
367c84b9
VS
355 if (entParser)
356 m_ParamValues.Add(entParser->Parse(pvalue));
357 else
358 m_ParamValues.Add(pvalue);
daa616fc
VS
359 state = ST_BEFORE_NAME;
360 }
361 else
362 pvalue << c;
363 break;
72aa4a98 364 }
5526e819 365 }
8cd82622 366
daa616fc 367 #undef IS_WHITE
7448de8d
WS
368 }
369 m_Begin = i;
5526e819 370
7448de8d
WS
371 cache->QueryTag(pos, &m_End1, &m_End2);
372 if (m_End1 > end_pos) m_End1 = end_pos;
373 if (m_End2 > end_pos) m_End2 = end_pos;
5526e819
VS
374}
375
211dfedd
VS
376wxHtmlTag::~wxHtmlTag()
377{
0d58bb65
VS
378 wxHtmlTag *t1, *t2;
379 t1 = m_FirstChild;
380 while (t1)
381 {
382 t2 = t1->GetNextSibling();
383 delete t1;
384 t1 = t2;
385 }
211dfedd
VS
386}
387
5526e819
VS
388bool wxHtmlTag::HasParam(const wxString& par) const
389{
8703bc01 390 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
5526e819
VS
391}
392
5526e819
VS
393wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
394{
8703bc01 395 int index = m_ParamNames.Index(par, false);
daa616fc
VS
396 if (index == wxNOT_FOUND)
397 return wxEmptyString;
398 if (with_commas)
4f9297b0 399 {
daa616fc
VS
400 // VS: backward compatibility, seems to be never used by wxHTML...
401 wxString s;
402 s << wxT('"') << m_ParamValues[index] << wxT('"');
403 return s;
5526e819 404 }
daa616fc
VS
405 else
406 return m_ParamValues[index];
5526e819
VS
407}
408
90350682 409int wxHtmlTag::ScanParam(const wxString& par,
d7640339
VS
410 const char *format,
411 void *param) const
412{
413 wxString parval = GetParam(par);
414 return wxSscanf(parval, format, param);
415}
416
417int wxHtmlTag::ScanParam(const wxString& par,
418 const wchar_t *format,
90350682 419 void *param) const
5526e819 420{
5526e819 421 wxString parval = GetParam(par);
161f4f73 422 return wxSscanf(parval, format, param);
5526e819
VS
423}
424
8bd72d90
VS
425bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
426{
86766dfd 427 wxCHECK_MSG( clr, false, _T("invalid colour argument") );
8cd82622 428
86766dfd 429 wxString str = GetParam(par);
40989e46 430
86766dfd
VS
431 // handle colours defined in HTML 4.0 first:
432 if (str.length() > 1 && str[0] != _T('#'))
8bd72d90 433 {
86766dfd 434 #define HTML_COLOUR(name, r, g, b) \
8703bc01 435 if (str.IsSameAs(wxT(name), false)) \
86766dfd 436 { clr->Set(r, g, b); return true; }
8bd72d90
VS
437 HTML_COLOUR("black", 0x00,0x00,0x00)
438 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
439 HTML_COLOUR("gray", 0x80,0x80,0x80)
440 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
441 HTML_COLOUR("maroon", 0x80,0x00,0x00)
442 HTML_COLOUR("red", 0xFF,0x00,0x00)
443 HTML_COLOUR("purple", 0x80,0x00,0x80)
444 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
445 HTML_COLOUR("green", 0x00,0x80,0x00)
446 HTML_COLOUR("lime", 0x00,0xFF,0x00)
447 HTML_COLOUR("olive", 0x80,0x80,0x00)
448 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
449 HTML_COLOUR("navy", 0x00,0x00,0x80)
450 HTML_COLOUR("blue", 0x00,0x00,0xFF)
451 HTML_COLOUR("teal", 0x00,0x80,0x80)
452 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
453 #undef HTML_COLOUR
8bd72d90 454 }
5716a1ab 455
86766dfd
VS
456 // then try to parse #rrggbb representations or set from other well
457 // known names (note that this doesn't strictly conform to HTML spec,
458 // but it doesn't do real harm -- but it *must* be done after the standard
459 // colors are handled above):
460 if (clr->Set(str))
461 return true;
462
8703bc01 463 return false;
8bd72d90
VS
464}
465
466bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
467{
8703bc01 468 if (!HasParam(par)) return false;
8bd72d90
VS
469 long i;
470 bool succ = GetParam(par).ToLong(&i);
471 *clr = (int)i;
472 return succ;
473}
474
daa616fc
VS
475wxString wxHtmlTag::GetAllParams() const
476{
3103e8a9 477 // VS: this function is for backward compatibility only,
daa616fc
VS
478 // never used by wxHTML
479 wxString s;
480 size_t cnt = m_ParamNames.GetCount();
481 for (size_t i = 0; i < cnt; i++)
482 {
483 s << m_ParamNames[i];
484 s << wxT('=');
485 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
486 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
487 else
488 s << wxT('"') << m_ParamValues[i] << wxT('"');
489 }
490 return s;
491}
492
211dfedd
VS
493wxHtmlTag *wxHtmlTag::GetFirstSibling() const
494{
495 if (m_Parent)
496 return m_Parent->m_FirstChild;
497 else
498 {
499 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 500 while (cur->m_Prev)
211dfedd
VS
501 cur = cur->m_Prev;
502 return cur;
503 }
504}
505
506wxHtmlTag *wxHtmlTag::GetLastSibling() const
507{
508 if (m_Parent)
509 return m_Parent->m_LastChild;
510 else
511 {
512 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 513 while (cur->m_Next)
211dfedd
VS
514 cur = cur->m_Next;
515 return cur;
516 }
517}
518
519wxHtmlTag *wxHtmlTag::GetNextTag() const
520{
521 if (m_FirstChild) return m_FirstChild;
522 if (m_Next) return m_Next;
523 wxHtmlTag *cur = m_Parent;
524 if (!cur) return NULL;
8cd82622 525 while (cur->m_Parent && !cur->m_Next)
211dfedd
VS
526 cur = cur->m_Parent;
527 return cur->m_Next;
528}
529
4d223b67 530#endif