wx/wxprec.h already includes wx/defs.h (with other minor cleaning).
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 #include "wx/wxprec.h"
11
12 #ifdef __BORLANDC__
13 #pragma hdrstop
14 #endif
15
16 #if wxUSE_HTML
17
18 #ifndef WXPRECOMP
19 #endif
20
21 #include "wx/html/htmltag.h"
22 #include "wx/html/htmlpars.h"
23 #include "wx/colour.h"
24 #include <stdio.h> // for vsscanf
25 #include <stdarg.h>
26
27
28 //-----------------------------------------------------------------------------
29 // wxHtmlTagsCache
30 //-----------------------------------------------------------------------------
31
32 struct wxHtmlCacheItem
33 {
34 // this is "pos" value passed to wxHtmlTag's constructor.
35 // it is position of '<' character of the tag
36 int Key;
37
38 // end positions for the tag:
39 // end1 is '<' of ending tag,
40 // end2 is '>' or both are
41 // -1 if there is no ending tag for this one...
42 // or -2 if this is ending tag </...>
43 int End1, End2;
44
45 // name of this tag
46 wxChar *Name;
47 };
48
49
50 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
51
52 #define CACHE_INCREMENT 64
53
54 bool wxIsCDATAElement(const wxChar *tag)
55 {
56 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
57 (wxStrcmp(tag, _T("STYLE")) == 0);
58 }
59
60 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
61 {
62 const wxChar *src = source.c_str();
63 int lng = source.length();
64 wxChar tagBuffer[256];
65
66 m_Cache = NULL;
67 m_CacheSize = 0;
68 m_CachePos = 0;
69
70 int pos = 0;
71 while (pos < lng)
72 {
73 if (src[pos] == wxT('<')) // tag found:
74 {
75 if (m_CacheSize % CACHE_INCREMENT == 0)
76 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
77 int tg = m_CacheSize++;
78 int stpos = pos++;
79 m_Cache[tg].Key = stpos;
80
81 int i;
82 for ( i = 0;
83 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
84 src[pos] != wxT('>') && !wxIsspace(src[pos]);
85 i++, pos++ )
86 {
87 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
88 }
89 tagBuffer[i] = _T('\0');
90
91 m_Cache[tg].Name = new wxChar[i+1];
92 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
93
94 while (pos < lng && src[pos] != wxT('>')) pos++;
95
96 if (src[stpos+1] == wxT('/')) // ending tag:
97 {
98 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
99 // find matching begin tag:
100 for (i = tg; i >= 0; i--)
101 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
102 {
103 m_Cache[i].End1 = stpos;
104 m_Cache[i].End2 = pos + 1;
105 break;
106 }
107 }
108 else
109 {
110 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
111
112 if (wxIsCDATAElement(tagBuffer))
113 {
114 // store the orig pos in case we are missing the closing
115 // tag (see below)
116 wxInt32 old_pos = pos;
117 bool foundCloseTag = false;
118
119 // find next matching tag
120 int tag_len = wxStrlen(tagBuffer);
121 while (pos < lng)
122 {
123 // find the ending tag
124 while (pos + 1 < lng &&
125 (src[pos] != '<' || src[pos+1] != '/'))
126 ++pos;
127 if (src[pos] == '<')
128 ++pos;
129
130 // see if it matches
131 int match_pos = 0;
132 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
133 // cast to wxChar needed to suppress warning in
134 // Unicode build
135 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
136 ++match_pos;
137 }
138 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
139 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
140 // need to skip over these
141 }
142 else {
143 match_pos = 0;
144 }
145 ++pos;
146 }
147
148 // found a match
149 if (match_pos == tag_len)
150 {
151 pos = pos - tag_len - 3;
152 foundCloseTag = true;
153 break;
154 }
155 else // keep looking for the closing tag
156 {
157 ++pos;
158 }
159 }
160 if (!foundCloseTag)
161 {
162 // we didn't find closing tag; this means the markup
163 // is incorrect and the best thing we can do is to
164 // ignore the unclosed tag and continue parsing as if
165 // it didn't exist:
166 pos = old_pos;
167 }
168 }
169 }
170 }
171
172 pos++;
173 }
174
175 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
176 for (int i = 0; i < m_CacheSize; i++)
177 {
178 delete[] m_Cache[i].Name;
179 m_Cache[i].Name = NULL;
180 }
181 }
182
183 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
184 {
185 if (m_Cache == NULL) return;
186 if (m_Cache[m_CachePos].Key != at)
187 {
188 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
189 do
190 {
191 if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
192 {
193 // something is very wrong with HTML, give up by returning an
194 // impossibly large value which is going to be ignored by the
195 // caller
196 *end1 =
197 *end2 = INT_MAX;
198 return;
199 }
200
201 m_CachePos += delta;
202 }
203 while (m_Cache[m_CachePos].Key != at);
204 }
205 *end1 = m_Cache[m_CachePos].End1;
206 *end2 = m_Cache[m_CachePos].End2;
207 }
208
209
210
211
212 //-----------------------------------------------------------------------------
213 // wxHtmlTag
214 //-----------------------------------------------------------------------------
215
216 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
217
218 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
219 const wxString& source, int pos, int end_pos,
220 wxHtmlTagsCache *cache,
221 wxHtmlEntitiesParser *entParser) : wxObject()
222 {
223 /* Setup DOM relations */
224
225 m_Next = NULL;
226 m_FirstChild = m_LastChild = NULL;
227 m_Parent = parent;
228 if (parent)
229 {
230 m_Prev = m_Parent->m_LastChild;
231 if (m_Prev == NULL)
232 m_Parent->m_FirstChild = this;
233 else
234 m_Prev->m_Next = this;
235 m_Parent->m_LastChild = this;
236 }
237 else
238 m_Prev = NULL;
239
240 /* Find parameters and their values: */
241
242 int i;
243 wxChar c;
244
245 // fill-in name, params and begin pos:
246 i = pos+1;
247
248 // find tag's name and convert it to uppercase:
249 while ((i < end_pos) &&
250 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
251 c != wxT('\n') && c != wxT('\t') &&
252 c != wxT('>')))
253 {
254 if ((c >= wxT('a')) && (c <= wxT('z')))
255 c -= (wxT('a') - wxT('A'));
256 m_Name << c;
257 }
258
259 // if the tag has parameters, read them and "normalize" them,
260 // i.e. convert to uppercase, replace whitespaces by spaces and
261 // remove whitespaces around '=':
262 if (source[i-1] != wxT('>'))
263 {
264 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
265 c == wxT('\n') || c == wxT('\t'))
266 wxString pname, pvalue;
267 wxChar quote;
268 enum
269 {
270 ST_BEFORE_NAME = 1,
271 ST_NAME,
272 ST_BEFORE_EQ,
273 ST_BEFORE_VALUE,
274 ST_VALUE
275 } state;
276
277 quote = 0;
278 state = ST_BEFORE_NAME;
279 while (i < end_pos)
280 {
281 c = source[i++];
282
283 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
284 {
285 if (state == ST_BEFORE_EQ || state == ST_NAME)
286 {
287 m_ParamNames.Add(pname);
288 m_ParamValues.Add(wxEmptyString);
289 }
290 else if (state == ST_VALUE && quote == 0)
291 {
292 m_ParamNames.Add(pname);
293 if (entParser)
294 m_ParamValues.Add(entParser->Parse(pvalue));
295 else
296 m_ParamValues.Add(pvalue);
297 }
298 break;
299 }
300 switch (state)
301 {
302 case ST_BEFORE_NAME:
303 if (!IS_WHITE(c))
304 {
305 pname = c;
306 state = ST_NAME;
307 }
308 break;
309 case ST_NAME:
310 if (IS_WHITE(c))
311 state = ST_BEFORE_EQ;
312 else if (c == wxT('='))
313 state = ST_BEFORE_VALUE;
314 else
315 pname << c;
316 break;
317 case ST_BEFORE_EQ:
318 if (c == wxT('='))
319 state = ST_BEFORE_VALUE;
320 else if (!IS_WHITE(c))
321 {
322 m_ParamNames.Add(pname);
323 m_ParamValues.Add(wxEmptyString);
324 pname = c;
325 state = ST_NAME;
326 }
327 break;
328 case ST_BEFORE_VALUE:
329 if (!IS_WHITE(c))
330 {
331 if (c == wxT('"') || c == wxT('\''))
332 quote = c, pvalue = wxEmptyString;
333 else
334 quote = 0, pvalue = c;
335 state = ST_VALUE;
336 }
337 break;
338 case ST_VALUE:
339 if ((quote != 0 && c == quote) ||
340 (quote == 0 && IS_WHITE(c)))
341 {
342 m_ParamNames.Add(pname);
343 if (quote == 0)
344 {
345 // VS: backward compatibility, no real reason,
346 // but wxHTML code relies on this... :(
347 pvalue.MakeUpper();
348 }
349 if (entParser)
350 m_ParamValues.Add(entParser->Parse(pvalue));
351 else
352 m_ParamValues.Add(pvalue);
353 state = ST_BEFORE_NAME;
354 }
355 else
356 pvalue << c;
357 break;
358 }
359 }
360
361 #undef IS_WHITE
362 }
363 m_Begin = i;
364
365 cache->QueryTag(pos, &m_End1, &m_End2);
366 if (m_End1 > end_pos) m_End1 = end_pos;
367 if (m_End2 > end_pos) m_End2 = end_pos;
368 }
369
370 wxHtmlTag::~wxHtmlTag()
371 {
372 wxHtmlTag *t1, *t2;
373 t1 = m_FirstChild;
374 while (t1)
375 {
376 t2 = t1->GetNextSibling();
377 delete t1;
378 t1 = t2;
379 }
380 }
381
382 bool wxHtmlTag::HasParam(const wxString& par) const
383 {
384 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
385 }
386
387 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
388 {
389 int index = m_ParamNames.Index(par, false);
390 if (index == wxNOT_FOUND)
391 return wxEmptyString;
392 if (with_commas)
393 {
394 // VS: backward compatibility, seems to be never used by wxHTML...
395 wxString s;
396 s << wxT('"') << m_ParamValues[index] << wxT('"');
397 return s;
398 }
399 else
400 return m_ParamValues[index];
401 }
402
403 int wxHtmlTag::ScanParam(const wxString& par,
404 const wxChar *format,
405 void *param) const
406 {
407 wxString parval = GetParam(par);
408 return wxSscanf(parval, format, param);
409 }
410
411 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
412 {
413 wxString str = GetParam(par);
414
415 if (str.empty()) return false;
416 if (str.GetChar(0) == wxT('#'))
417 {
418 unsigned long tmp;
419 if (ScanParam(par, wxT("#%lX"), &tmp) != 1)
420 return false;
421 *clr = wxColour((unsigned char)((tmp & 0xFF0000) >> 16),
422 (unsigned char)((tmp & 0x00FF00) >> 8),
423 (unsigned char)(tmp & 0x0000FF));
424 return true;
425 }
426 else
427 {
428 // Handle colours defined in HTML 4.0:
429 #define HTML_COLOUR(name,r,g,b) \
430 if (str.IsSameAs(wxT(name), false)) \
431 { *clr = wxColour(r,g,b); return true; }
432 HTML_COLOUR("black", 0x00,0x00,0x00)
433 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
434 HTML_COLOUR("gray", 0x80,0x80,0x80)
435 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
436 HTML_COLOUR("maroon", 0x80,0x00,0x00)
437 HTML_COLOUR("red", 0xFF,0x00,0x00)
438 HTML_COLOUR("purple", 0x80,0x00,0x80)
439 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
440 HTML_COLOUR("green", 0x00,0x80,0x00)
441 HTML_COLOUR("lime", 0x00,0xFF,0x00)
442 HTML_COLOUR("olive", 0x80,0x80,0x00)
443 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
444 HTML_COLOUR("navy", 0x00,0x00,0x80)
445 HTML_COLOUR("blue", 0x00,0x00,0xFF)
446 HTML_COLOUR("teal", 0x00,0x80,0x80)
447 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
448 #undef HTML_COLOUR
449 }
450
451 return false;
452 }
453
454 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
455 {
456 if (!HasParam(par)) return false;
457 long i;
458 bool succ = GetParam(par).ToLong(&i);
459 *clr = (int)i;
460 return succ;
461 }
462
463 wxString wxHtmlTag::GetAllParams() const
464 {
465 // VS: this function is for backward compatibility only,
466 // never used by wxHTML
467 wxString s;
468 size_t cnt = m_ParamNames.GetCount();
469 for (size_t i = 0; i < cnt; i++)
470 {
471 s << m_ParamNames[i];
472 s << wxT('=');
473 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
474 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
475 else
476 s << wxT('"') << m_ParamValues[i] << wxT('"');
477 }
478 return s;
479 }
480
481 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
482 {
483 if (m_Parent)
484 return m_Parent->m_FirstChild;
485 else
486 {
487 wxHtmlTag *cur = (wxHtmlTag*)this;
488 while (cur->m_Prev)
489 cur = cur->m_Prev;
490 return cur;
491 }
492 }
493
494 wxHtmlTag *wxHtmlTag::GetLastSibling() const
495 {
496 if (m_Parent)
497 return m_Parent->m_LastChild;
498 else
499 {
500 wxHtmlTag *cur = (wxHtmlTag*)this;
501 while (cur->m_Next)
502 cur = cur->m_Next;
503 return cur;
504 }
505 }
506
507 wxHtmlTag *wxHtmlTag::GetNextTag() const
508 {
509 if (m_FirstChild) return m_FirstChild;
510 if (m_Next) return m_Next;
511 wxHtmlTag *cur = m_Parent;
512 if (!cur) return NULL;
513 while (cur->m_Parent && !cur->m_Next)
514 cur = cur->m_Parent;
515 return cur->m_Next;
516 }
517
518 #endif