]> git.saurik.com Git - wxWidgets.git/blame - src/html/htmltag.cpp
added UTF-16/32-[LB]E conversions; got rid of wxCharacterSet and simplified and fixed...
[wxWidgets.git] / src / html / htmltag.cpp
CommitLineData
5526e819
VS
1/////////////////////////////////////////////////////////////////////////////
2// Name: htmltag.cpp
3// Purpose: wxHtmlTag class (represents single tag)
4// Author: Vaclav Slavik
69941f05 5// RCS-ID: $Id$
5526e819
VS
6// Copyright: (c) 1999 Vaclav Slavik
7// Licence: wxWindows Licence
8/////////////////////////////////////////////////////////////////////////////
9
10
14f355c2 11#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
1aedb1dd 12#pragma implementation "htmltag.h"
5526e819
VS
13#endif
14
3096bd2f 15#include "wx/wxprec.h"
5526e819
VS
16
17#include "wx/defs.h"
18#if wxUSE_HTML
19
2b5f62a0 20#ifdef __BORLANDC__
5526e819
VS
21#pragma hdrstop
22#endif
23
24#ifndef WXPRECOMP
5526e819
VS
25#endif
26
69941f05 27#include "wx/html/htmltag.h"
daa616fc 28#include "wx/html/htmlpars.h"
fc1f2125 29#include "wx/colour.h"
7e1e0960 30#include <stdio.h> // for vsscanf
5526e819
VS
31#include <stdarg.h>
32
33
5526e819
VS
34//-----------------------------------------------------------------------------
35// wxHtmlTagsCache
36//-----------------------------------------------------------------------------
37
5e8e25e7
VS
38struct wxHtmlCacheItem
39{
40 // this is "pos" value passed to wxHtmlTag's constructor.
41 // it is position of '<' character of the tag
42 int Key;
43
44 // end positions for the tag:
45 // end1 is '<' of ending tag,
46 // end2 is '>' or both are
47 // -1 if there is no ending tag for this one...
48 // or -2 if this is ending tag </...>
49 int End1, End2;
50
51 // name of this tag
52 wxChar *Name;
53};
54
55
5526e819
VS
56IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
57
58#define CACHE_INCREMENT 64
59
07cc7ddc 60bool wxIsCDATAElement(const wxChar *tag)
7c6cd4a8
VS
61{
62 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
63 (wxStrcmp(tag, _T("STYLE")) == 0);
64}
65
5526e819
VS
66wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
67{
66a77a74 68 const wxChar *src = source.c_str();
14d36de8 69 int tg, stpos;
5526e819 70 int lng = source.Length();
8cd82622 71 wxChar tagBuffer[256];
5526e819
VS
72
73 m_Cache = NULL;
74 m_CacheSize = 0;
75 m_CachePos = 0;
76
14d36de8 77 int pos = 0;
8cd82622 78 while (pos < lng)
4f9297b0
VS
79 {
80 if (src[pos] == wxT('<')) // tag found:
a914db0f 81 {
5526e819 82 if (m_CacheSize % CACHE_INCREMENT == 0)
5e8e25e7 83 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
5526e819
VS
84 tg = m_CacheSize++;
85 m_Cache[tg].Key = stpos = pos++;
8cd82622 86
4f22f506 87 int i;
8cd82622 88 for ( i = 0;
4f22f506 89 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
8cd82622
VZ
90 src[pos] != wxT('>') && !wxIsspace(src[pos]);
91 i++, pos++ )
a914db0f 92 {
8cd82622 93 tagBuffer[i] = wxToupper(src[pos]);
5526e819 94 }
8cd82622
VZ
95 tagBuffer[i] = _T('\0');
96
66a77a74 97 m_Cache[tg].Name = new wxChar[i+1];
8cd82622 98 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
5526e819 99
15db3cf5 100 while (pos < lng && src[pos] != wxT('>')) pos++;
5526e819 101
4f9297b0 102 if (src[stpos+1] == wxT('/')) // ending tag:
a914db0f 103 {
5526e819
VS
104 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
105 // find matching begin tag:
106 for (i = tg; i >= 0; i--)
8cd82622 107 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
a914db0f 108 {
5526e819
VS
109 m_Cache[i].End1 = stpos;
110 m_Cache[i].End2 = pos + 1;
111 break;
112 }
113 }
8cd82622 114 else
a914db0f 115 {
5526e819 116 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
7c6cd4a8
VS
117
118 if (wxIsCDATAElement(tagBuffer))
119 {
120 // find next matching tag
121 int tag_len = wxStrlen(tagBuffer);
122 while (pos < lng)
123 {
124 // find the ending tag
125 while (pos + 1 < lng &&
126 (src[pos] != '<' || src[pos+1] != '/'))
127 ++pos;
128 if (src[pos] == '<')
129 ++pos;
130
131 // see if it matches
132 int match_pos = 0;
133 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
5447d1b4
VZ
134 // cast to wxChar needed to suppress warning in
135 // Unicode build
136 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
7c6cd4a8
VS
137 ++match_pos;
138 }
139 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
140 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
141 // need to skip over these
142 }
143 else {
144 match_pos = 0;
145 }
146 ++pos;
147 }
148
149 // found a match
150 if (match_pos == tag_len) {
151 pos = pos - tag_len - 3;
152 stpos = pos;
153 break;
154 }
155 else {
156 ++pos;
157 }
158 }
159 }
5526e819
VS
160 }
161 }
162
163 pos++;
164 }
165
166 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
14d36de8 167 for (int i = 0; i < m_CacheSize; i++)
4f9297b0 168 {
2776d7c3 169 delete[] m_Cache[i].Name;
5526e819
VS
170 m_Cache[i].Name = NULL;
171 }
172}
173
5526e819
VS
174void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
175{
176 if (m_Cache == NULL) return;
8cd82622 177 if (m_Cache[m_CachePos].Key != at)
4f9297b0 178 {
5526e819 179 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
8cd82622
VZ
180 do
181 {
182 m_CachePos += delta;
daa616fc
VS
183 }
184 while (m_Cache[m_CachePos].Key != at);
5526e819
VS
185 }
186 *end1 = m_Cache[m_CachePos].End1;
187 *end2 = m_Cache[m_CachePos].End2;
188}
189
190
191
192
193//-----------------------------------------------------------------------------
194// wxHtmlTag
195//-----------------------------------------------------------------------------
196
197IMPLEMENT_CLASS(wxHtmlTag,wxObject)
198
211dfedd 199wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
8cd82622 200 const wxString& source, int pos, int end_pos,
daa616fc
VS
201 wxHtmlTagsCache *cache,
202 wxHtmlEntitiesParser *entParser) : wxObject()
5526e819 203{
211dfedd
VS
204 /* Setup DOM relations */
205
206 m_Next = NULL;
207 m_FirstChild = m_LastChild = NULL;
208 m_Parent = parent;
209 if (parent)
210 {
211 m_Prev = m_Parent->m_LastChild;
212 if (m_Prev == NULL)
213 m_Parent->m_FirstChild = this;
214 else
215 m_Prev->m_Next = this;
216 m_Parent->m_LastChild = this;
217 }
218 else
219 m_Prev = NULL;
220
221 /* Find parameters and their values: */
8cd82622 222
5526e819 223 int i;
daa616fc 224 wxChar c;
5526e819
VS
225
226 // fill-in name, params and begin pos:
5526e819 227 i = pos+1;
5526e819 228
b076dc01 229 // find tag's name and convert it to uppercase:
8cd82622
VZ
230 while ((i < end_pos) &&
231 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
daa616fc 232 c != wxT('\n') && c != wxT('\t') &&
8cd82622 233 c != wxT('>')))
a914db0f 234 {
8cd82622 235 if ((c >= wxT('a')) && (c <= wxT('z')))
daa616fc
VS
236 c -= (wxT('a') - wxT('A'));
237 m_Name << c;
5526e819
VS
238 }
239
b076dc01 240 // if the tag has parameters, read them and "normalize" them,
8cd82622 241 // i.e. convert to uppercase, replace whitespaces by spaces and
b076dc01 242 // remove whitespaces around '=':
c9893146 243 if (source[i-1] != wxT('>'))
daa616fc
VS
244 {
245 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
246 c == wxT('\n') || c == wxT('\t'))
247 wxString pname, pvalue;
248 wxChar quote;
8cd82622 249 enum
a914db0f 250 {
8cd82622 251 ST_BEFORE_NAME = 1,
daa616fc
VS
252 ST_NAME,
253 ST_BEFORE_EQ,
254 ST_BEFORE_VALUE,
255 ST_VALUE
256 } state;
8cd82622 257
daa616fc
VS
258 quote = 0;
259 state = ST_BEFORE_NAME;
260 while (i < end_pos)
261 {
262 c = source[i++];
263
8cd82622 264 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
a914db0f 265 {
daa616fc 266 if (state == ST_BEFORE_EQ || state == ST_NAME)
b076dc01 267 {
daa616fc
VS
268 m_ParamNames.Add(pname);
269 m_ParamValues.Add(wxEmptyString);
b076dc01 270 }
daa616fc
VS
271 else if (state == ST_VALUE && quote == 0)
272 {
273 m_ParamNames.Add(pname);
367c84b9
VS
274 if (entParser)
275 m_ParamValues.Add(entParser->Parse(pvalue));
276 else
277 m_ParamValues.Add(pvalue);
daa616fc
VS
278 }
279 break;
5526e819 280 }
daa616fc 281 switch (state)
a914db0f 282 {
daa616fc
VS
283 case ST_BEFORE_NAME:
284 if (!IS_WHITE(c))
285 {
286 pname = c;
287 state = ST_NAME;
288 }
289 break;
290 case ST_NAME:
291 if (IS_WHITE(c))
292 state = ST_BEFORE_EQ;
293 else if (c == wxT('='))
294 state = ST_BEFORE_VALUE;
295 else
296 pname << c;
297 break;
298 case ST_BEFORE_EQ:
299 if (c == wxT('='))
300 state = ST_BEFORE_VALUE;
301 else if (!IS_WHITE(c))
302 {
303 m_ParamNames.Add(pname);
304 m_ParamValues.Add(wxEmptyString);
305 pname = c;
306 state = ST_NAME;
307 }
308 break;
309 case ST_BEFORE_VALUE:
310 if (!IS_WHITE(c))
311 {
312 if (c == wxT('"') || c == wxT('\''))
313 quote = c, pvalue = wxEmptyString;
314 else
315 quote = 0, pvalue = c;
316 state = ST_VALUE;
317 }
318 break;
319 case ST_VALUE:
320 if ((quote != 0 && c == quote) ||
321 (quote == 0 && IS_WHITE(c)))
322 {
323 m_ParamNames.Add(pname);
324 if (quote == 0)
325 {
326 // VS: backward compatibility, no real reason,
327 // but wxHTML code relies on this... :(
328 pvalue.MakeUpper();
329 }
367c84b9
VS
330 if (entParser)
331 m_ParamValues.Add(entParser->Parse(pvalue));
332 else
333 m_ParamValues.Add(pvalue);
daa616fc
VS
334 state = ST_BEFORE_NAME;
335 }
336 else
337 pvalue << c;
338 break;
72aa4a98 339 }
5526e819 340 }
8cd82622 341
daa616fc
VS
342 #undef IS_WHITE
343 }
5526e819
VS
344 m_Begin = i;
345
4f9297b0 346 cache->QueryTag(pos, &m_End1, &m_End2);
5526e819
VS
347 if (m_End1 > end_pos) m_End1 = end_pos;
348 if (m_End2 > end_pos) m_End2 = end_pos;
349}
350
211dfedd
VS
351wxHtmlTag::~wxHtmlTag()
352{
0d58bb65
VS
353 wxHtmlTag *t1, *t2;
354 t1 = m_FirstChild;
355 while (t1)
356 {
357 t2 = t1->GetNextSibling();
358 delete t1;
359 t1 = t2;
360 }
211dfedd
VS
361}
362
5526e819
VS
363bool wxHtmlTag::HasParam(const wxString& par) const
364{
daa616fc 365 return (m_ParamNames.Index(par, FALSE) != wxNOT_FOUND);
5526e819
VS
366}
367
5526e819
VS
368wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
369{
daa616fc
VS
370 int index = m_ParamNames.Index(par, FALSE);
371 if (index == wxNOT_FOUND)
372 return wxEmptyString;
373 if (with_commas)
4f9297b0 374 {
daa616fc
VS
375 // VS: backward compatibility, seems to be never used by wxHTML...
376 wxString s;
377 s << wxT('"') << m_ParamValues[index] << wxT('"');
378 return s;
5526e819 379 }
daa616fc
VS
380 else
381 return m_ParamValues[index];
5526e819
VS
382}
383
90350682
VZ
384int wxHtmlTag::ScanParam(const wxString& par,
385 const wxChar *format,
386 void *param) const
5526e819 387{
5526e819 388 wxString parval = GetParam(par);
161f4f73 389 return wxSscanf(parval, format, param);
5526e819
VS
390}
391
8bd72d90
VS
392bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
393{
394 wxString str = GetParam(par);
8cd82622 395
8bd72d90
VS
396 if (str.IsEmpty()) return FALSE;
397 if (str.GetChar(0) == wxT('#'))
398 {
399 unsigned long tmp;
400 if (ScanParam(par, wxT("#%lX"), &tmp) != 1)
401 return FALSE;
402 *clr = wxColour((unsigned char)((tmp & 0xFF0000) >> 16),
403 (unsigned char)((tmp & 0x00FF00) >> 8),
404 (unsigned char)(tmp & 0x0000FF));
405 return TRUE;
406 }
407 else
408 {
409 // Handle colours defined in HTML 4.0:
410 #define HTML_COLOUR(name,r,g,b) \
411 if (str.IsSameAs(wxT(name), FALSE)) \
412 { *clr = wxColour(r,g,b); return TRUE; }
413 HTML_COLOUR("black", 0x00,0x00,0x00)
414 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
415 HTML_COLOUR("gray", 0x80,0x80,0x80)
416 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
417 HTML_COLOUR("maroon", 0x80,0x00,0x00)
418 HTML_COLOUR("red", 0xFF,0x00,0x00)
419 HTML_COLOUR("purple", 0x80,0x00,0x80)
420 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
421 HTML_COLOUR("green", 0x00,0x80,0x00)
422 HTML_COLOUR("lime", 0x00,0xFF,0x00)
423 HTML_COLOUR("olive", 0x80,0x80,0x00)
424 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
425 HTML_COLOUR("navy", 0x00,0x00,0x80)
426 HTML_COLOUR("blue", 0x00,0x00,0xFF)
427 HTML_COLOUR("teal", 0x00,0x80,0x80)
428 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
429 #undef HTML_COLOUR
8bd72d90 430 }
5716a1ab
VZ
431
432 return FALSE;
8bd72d90
VS
433}
434
435bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
436{
437 if (!HasParam(par)) return FALSE;
438 long i;
439 bool succ = GetParam(par).ToLong(&i);
440 *clr = (int)i;
441 return succ;
442}
443
daa616fc
VS
444wxString wxHtmlTag::GetAllParams() const
445{
8cd82622 446 // VS: this function is for backward compatiblity only,
daa616fc
VS
447 // never used by wxHTML
448 wxString s;
449 size_t cnt = m_ParamNames.GetCount();
450 for (size_t i = 0; i < cnt; i++)
451 {
452 s << m_ParamNames[i];
453 s << wxT('=');
454 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
455 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
456 else
457 s << wxT('"') << m_ParamValues[i] << wxT('"');
458 }
459 return s;
460}
461
211dfedd
VS
462wxHtmlTag *wxHtmlTag::GetFirstSibling() const
463{
464 if (m_Parent)
465 return m_Parent->m_FirstChild;
466 else
467 {
468 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 469 while (cur->m_Prev)
211dfedd
VS
470 cur = cur->m_Prev;
471 return cur;
472 }
473}
474
475wxHtmlTag *wxHtmlTag::GetLastSibling() const
476{
477 if (m_Parent)
478 return m_Parent->m_LastChild;
479 else
480 {
481 wxHtmlTag *cur = (wxHtmlTag*)this;
8cd82622 482 while (cur->m_Next)
211dfedd
VS
483 cur = cur->m_Next;
484 return cur;
485 }
486}
487
488wxHtmlTag *wxHtmlTag::GetNextTag() const
489{
490 if (m_FirstChild) return m_FirstChild;
491 if (m_Next) return m_Next;
492 wxHtmlTag *cur = m_Parent;
493 if (!cur) return NULL;
8cd82622 494 while (cur->m_Parent && !cur->m_Next)
211dfedd
VS
495 cur = cur->m_Parent;
496 return cur->m_Next;
497}
498
4d223b67 499#endif