]> git.saurik.com Git - wxWidgets.git/blob - src/html/htmltag.cpp
added wxHtmlWindow::ToText
[wxWidgets.git] / src / html / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // RCS-ID: $Id$
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10
11 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
12 #pragma implementation "htmltag.h"
13 #endif
14
15 #include "wx/wxprec.h"
16
17 #include "wx/defs.h"
18 #if wxUSE_HTML
19
20 #ifdef __BORLANDC__
21 #pragma hdrstop
22 #endif
23
24 #ifndef WXPRECOMP
25 #endif
26
27 #include "wx/html/htmltag.h"
28 #include "wx/html/htmlpars.h"
29 #include "wx/colour.h"
30 #include <stdio.h> // for vsscanf
31 #include <stdarg.h>
32
33
34 //-----------------------------------------------------------------------------
35 // wxHtmlTagsCache
36 //-----------------------------------------------------------------------------
37
38 struct wxHtmlCacheItem
39 {
40 // this is "pos" value passed to wxHtmlTag's constructor.
41 // it is position of '<' character of the tag
42 int Key;
43
44 // end positions for the tag:
45 // end1 is '<' of ending tag,
46 // end2 is '>' or both are
47 // -1 if there is no ending tag for this one...
48 // or -2 if this is ending tag </...>
49 int End1, End2;
50
51 // name of this tag
52 wxChar *Name;
53 };
54
55
56 IMPLEMENT_CLASS(wxHtmlTagsCache,wxObject)
57
58 #define CACHE_INCREMENT 64
59
60 bool wxIsCDATAElement(const wxChar *tag)
61 {
62 return (wxStrcmp(tag, _T("SCRIPT")) == 0) ||
63 (wxStrcmp(tag, _T("STYLE")) == 0);
64 }
65
66 wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
67 {
68 const wxChar *src = source.c_str();
69 int lng = source.Length();
70 wxChar tagBuffer[256];
71
72 m_Cache = NULL;
73 m_CacheSize = 0;
74 m_CachePos = 0;
75
76 int pos = 0;
77 while (pos < lng)
78 {
79 if (src[pos] == wxT('<')) // tag found:
80 {
81 if (m_CacheSize % CACHE_INCREMENT == 0)
82 m_Cache = (wxHtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wxHtmlCacheItem));
83 int tg = m_CacheSize++;
84 int stpos = pos++;
85 m_Cache[tg].Key = stpos;
86
87 int i;
88 for ( i = 0;
89 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
90 src[pos] != wxT('>') && !wxIsspace(src[pos]);
91 i++, pos++ )
92 {
93 tagBuffer[i] = wxToupper(src[pos]);
94 }
95 tagBuffer[i] = _T('\0');
96
97 m_Cache[tg].Name = new wxChar[i+1];
98 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
99
100 while (pos < lng && src[pos] != wxT('>')) pos++;
101
102 if (src[stpos+1] == wxT('/')) // ending tag:
103 {
104 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
105 // find matching begin tag:
106 for (i = tg; i >= 0; i--)
107 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
108 {
109 m_Cache[i].End1 = stpos;
110 m_Cache[i].End2 = pos + 1;
111 break;
112 }
113 }
114 else
115 {
116 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
117
118 if (wxIsCDATAElement(tagBuffer))
119 {
120 // find next matching tag
121 int tag_len = wxStrlen(tagBuffer);
122 while (pos < lng)
123 {
124 // find the ending tag
125 while (pos + 1 < lng &&
126 (src[pos] != '<' || src[pos+1] != '/'))
127 ++pos;
128 if (src[pos] == '<')
129 ++pos;
130
131 // see if it matches
132 int match_pos = 0;
133 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
134 // cast to wxChar needed to suppress warning in
135 // Unicode build
136 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
137 ++match_pos;
138 }
139 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
140 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
141 // need to skip over these
142 }
143 else {
144 match_pos = 0;
145 }
146 ++pos;
147 }
148
149 // found a match
150 if (match_pos == tag_len) {
151 pos = pos - tag_len - 3;
152 break;
153 }
154 else {
155 ++pos;
156 }
157 }
158 }
159 }
160 }
161
162 pos++;
163 }
164
165 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
166 for (int i = 0; i < m_CacheSize; i++)
167 {
168 delete[] m_Cache[i].Name;
169 m_Cache[i].Name = NULL;
170 }
171 }
172
173 void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
174 {
175 if (m_Cache == NULL) return;
176 if (m_Cache[m_CachePos].Key != at)
177 {
178 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
179 do
180 {
181 m_CachePos += delta;
182 }
183 while (m_Cache[m_CachePos].Key != at);
184 }
185 *end1 = m_Cache[m_CachePos].End1;
186 *end2 = m_Cache[m_CachePos].End2;
187 }
188
189
190
191
192 //-----------------------------------------------------------------------------
193 // wxHtmlTag
194 //-----------------------------------------------------------------------------
195
196 IMPLEMENT_CLASS(wxHtmlTag,wxObject)
197
198 wxHtmlTag::wxHtmlTag(wxHtmlTag *parent,
199 const wxString& source, int pos, int end_pos,
200 wxHtmlTagsCache *cache,
201 wxHtmlEntitiesParser *entParser) : wxObject()
202 {
203 /* Setup DOM relations */
204
205 m_Next = NULL;
206 m_FirstChild = m_LastChild = NULL;
207 m_Parent = parent;
208 if (parent)
209 {
210 m_Prev = m_Parent->m_LastChild;
211 if (m_Prev == NULL)
212 m_Parent->m_FirstChild = this;
213 else
214 m_Prev->m_Next = this;
215 m_Parent->m_LastChild = this;
216 }
217 else
218 m_Prev = NULL;
219
220 /* Find parameters and their values: */
221
222 int i;
223 wxChar c;
224
225 // fill-in name, params and begin pos:
226 i = pos+1;
227
228 // find tag's name and convert it to uppercase:
229 while ((i < end_pos) &&
230 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
231 c != wxT('\n') && c != wxT('\t') &&
232 c != wxT('>')))
233 {
234 if ((c >= wxT('a')) && (c <= wxT('z')))
235 c -= (wxT('a') - wxT('A'));
236 m_Name << c;
237 }
238
239 // if the tag has parameters, read them and "normalize" them,
240 // i.e. convert to uppercase, replace whitespaces by spaces and
241 // remove whitespaces around '=':
242 if (source[i-1] != wxT('>'))
243 {
244 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
245 c == wxT('\n') || c == wxT('\t'))
246 wxString pname, pvalue;
247 wxChar quote;
248 enum
249 {
250 ST_BEFORE_NAME = 1,
251 ST_NAME,
252 ST_BEFORE_EQ,
253 ST_BEFORE_VALUE,
254 ST_VALUE
255 } state;
256
257 quote = 0;
258 state = ST_BEFORE_NAME;
259 while (i < end_pos)
260 {
261 c = source[i++];
262
263 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
264 {
265 if (state == ST_BEFORE_EQ || state == ST_NAME)
266 {
267 m_ParamNames.Add(pname);
268 m_ParamValues.Add(wxEmptyString);
269 }
270 else if (state == ST_VALUE && quote == 0)
271 {
272 m_ParamNames.Add(pname);
273 if (entParser)
274 m_ParamValues.Add(entParser->Parse(pvalue));
275 else
276 m_ParamValues.Add(pvalue);
277 }
278 break;
279 }
280 switch (state)
281 {
282 case ST_BEFORE_NAME:
283 if (!IS_WHITE(c))
284 {
285 pname = c;
286 state = ST_NAME;
287 }
288 break;
289 case ST_NAME:
290 if (IS_WHITE(c))
291 state = ST_BEFORE_EQ;
292 else if (c == wxT('='))
293 state = ST_BEFORE_VALUE;
294 else
295 pname << c;
296 break;
297 case ST_BEFORE_EQ:
298 if (c == wxT('='))
299 state = ST_BEFORE_VALUE;
300 else if (!IS_WHITE(c))
301 {
302 m_ParamNames.Add(pname);
303 m_ParamValues.Add(wxEmptyString);
304 pname = c;
305 state = ST_NAME;
306 }
307 break;
308 case ST_BEFORE_VALUE:
309 if (!IS_WHITE(c))
310 {
311 if (c == wxT('"') || c == wxT('\''))
312 quote = c, pvalue = wxEmptyString;
313 else
314 quote = 0, pvalue = c;
315 state = ST_VALUE;
316 }
317 break;
318 case ST_VALUE:
319 if ((quote != 0 && c == quote) ||
320 (quote == 0 && IS_WHITE(c)))
321 {
322 m_ParamNames.Add(pname);
323 if (quote == 0)
324 {
325 // VS: backward compatibility, no real reason,
326 // but wxHTML code relies on this... :(
327 pvalue.MakeUpper();
328 }
329 if (entParser)
330 m_ParamValues.Add(entParser->Parse(pvalue));
331 else
332 m_ParamValues.Add(pvalue);
333 state = ST_BEFORE_NAME;
334 }
335 else
336 pvalue << c;
337 break;
338 }
339 }
340
341 #undef IS_WHITE
342 }
343 m_Begin = i;
344
345 cache->QueryTag(pos, &m_End1, &m_End2);
346 if (m_End1 > end_pos) m_End1 = end_pos;
347 if (m_End2 > end_pos) m_End2 = end_pos;
348 }
349
350 wxHtmlTag::~wxHtmlTag()
351 {
352 wxHtmlTag *t1, *t2;
353 t1 = m_FirstChild;
354 while (t1)
355 {
356 t2 = t1->GetNextSibling();
357 delete t1;
358 t1 = t2;
359 }
360 }
361
362 bool wxHtmlTag::HasParam(const wxString& par) const
363 {
364 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
365 }
366
367 wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
368 {
369 int index = m_ParamNames.Index(par, false);
370 if (index == wxNOT_FOUND)
371 return wxEmptyString;
372 if (with_commas)
373 {
374 // VS: backward compatibility, seems to be never used by wxHTML...
375 wxString s;
376 s << wxT('"') << m_ParamValues[index] << wxT('"');
377 return s;
378 }
379 else
380 return m_ParamValues[index];
381 }
382
383 int wxHtmlTag::ScanParam(const wxString& par,
384 const wxChar *format,
385 void *param) const
386 {
387 wxString parval = GetParam(par);
388 return wxSscanf(parval, format, param);
389 }
390
391 bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const
392 {
393 wxString str = GetParam(par);
394
395 if (str.IsEmpty()) return false;
396 if (str.GetChar(0) == wxT('#'))
397 {
398 unsigned long tmp;
399 if (ScanParam(par, wxT("#%lX"), &tmp) != 1)
400 return false;
401 *clr = wxColour((unsigned char)((tmp & 0xFF0000) >> 16),
402 (unsigned char)((tmp & 0x00FF00) >> 8),
403 (unsigned char)(tmp & 0x0000FF));
404 return true;
405 }
406 else
407 {
408 // Handle colours defined in HTML 4.0:
409 #define HTML_COLOUR(name,r,g,b) \
410 if (str.IsSameAs(wxT(name), false)) \
411 { *clr = wxColour(r,g,b); return true; }
412 HTML_COLOUR("black", 0x00,0x00,0x00)
413 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
414 HTML_COLOUR("gray", 0x80,0x80,0x80)
415 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
416 HTML_COLOUR("maroon", 0x80,0x00,0x00)
417 HTML_COLOUR("red", 0xFF,0x00,0x00)
418 HTML_COLOUR("purple", 0x80,0x00,0x80)
419 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
420 HTML_COLOUR("green", 0x00,0x80,0x00)
421 HTML_COLOUR("lime", 0x00,0xFF,0x00)
422 HTML_COLOUR("olive", 0x80,0x80,0x00)
423 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
424 HTML_COLOUR("navy", 0x00,0x00,0x80)
425 HTML_COLOUR("blue", 0x00,0x00,0xFF)
426 HTML_COLOUR("teal", 0x00,0x80,0x80)
427 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
428 #undef HTML_COLOUR
429 }
430
431 return false;
432 }
433
434 bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const
435 {
436 if (!HasParam(par)) return false;
437 long i;
438 bool succ = GetParam(par).ToLong(&i);
439 *clr = (int)i;
440 return succ;
441 }
442
443 wxString wxHtmlTag::GetAllParams() const
444 {
445 // VS: this function is for backward compatiblity only,
446 // never used by wxHTML
447 wxString s;
448 size_t cnt = m_ParamNames.GetCount();
449 for (size_t i = 0; i < cnt; i++)
450 {
451 s << m_ParamNames[i];
452 s << wxT('=');
453 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
454 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
455 else
456 s << wxT('"') << m_ParamValues[i] << wxT('"');
457 }
458 return s;
459 }
460
461 wxHtmlTag *wxHtmlTag::GetFirstSibling() const
462 {
463 if (m_Parent)
464 return m_Parent->m_FirstChild;
465 else
466 {
467 wxHtmlTag *cur = (wxHtmlTag*)this;
468 while (cur->m_Prev)
469 cur = cur->m_Prev;
470 return cur;
471 }
472 }
473
474 wxHtmlTag *wxHtmlTag::GetLastSibling() const
475 {
476 if (m_Parent)
477 return m_Parent->m_LastChild;
478 else
479 {
480 wxHtmlTag *cur = (wxHtmlTag*)this;
481 while (cur->m_Next)
482 cur = cur->m_Next;
483 return cur;
484 }
485 }
486
487 wxHtmlTag *wxHtmlTag::GetNextTag() const
488 {
489 if (m_FirstChild) return m_FirstChild;
490 if (m_Next) return m_Next;
491 wxHtmlTag *cur = m_Parent;
492 if (!cur) return NULL;
493 while (cur->m_Parent && !cur->m_Next)
494 cur = cur->m_Parent;
495 return cur->m_Next;
496 }
497
498 #endif