]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/html/htmltag.cpp | |
3 | // Purpose: wxHtmlTag class (represents single tag) | |
4 | // Author: Vaclav Slavik | |
5 | // RCS-ID: $Id$ | |
6 | // Copyright: (c) 1999 Vaclav Slavik | |
7 | // Licence: wxWindows licence | |
8 | ///////////////////////////////////////////////////////////////////////////// | |
9 | ||
10 | #include "wx/wxprec.h" | |
11 | ||
12 | #ifdef __BORLANDC__ | |
13 | #pragma hdrstop | |
14 | #endif | |
15 | ||
16 | #if wxUSE_HTML | |
17 | ||
18 | #include "wx/html/htmltag.h" | |
19 | ||
20 | #ifndef WX_PRECOMP | |
21 | #include "wx/colour.h" | |
22 | #include "wx/wxcrtvararg.h" | |
23 | #endif | |
24 | ||
25 | #include "wx/html/htmlpars.h" | |
26 | #include "wx/vector.h" | |
27 | ||
28 | #include <stdio.h> // for vsscanf | |
29 | #include <stdarg.h> | |
30 | ||
31 | //----------------------------------------------------------------------------- | |
32 | // wxHtmlTagsCache | |
33 | //----------------------------------------------------------------------------- | |
34 | ||
35 | struct wxHtmlCacheItem | |
36 | { | |
37 | // this is "pos" value passed to wxHtmlTag's constructor. | |
38 | // it is position of '<' character of the tag | |
39 | wxString::const_iterator Key; | |
40 | ||
41 | // Tag type | |
42 | enum Type | |
43 | { | |
44 | Type_Normal, // normal tag with a matching ending tag | |
45 | Type_NoMatchingEndingTag, // there's no ending tag for this tag | |
46 | Type_EndingTag // this is ending tag </..> | |
47 | }; | |
48 | Type type; | |
49 | ||
50 | // end positions for the tag: | |
51 | // end1 is '<' of ending tag, | |
52 | // end2 is '>' or both are | |
53 | wxString::const_iterator End1, End2; | |
54 | ||
55 | // name of this tag | |
56 | wxChar *Name; | |
57 | }; | |
58 | ||
59 | // NB: this is an empty class and not typedef because of forward declaration | |
60 | class wxHtmlTagsCacheData : public wxVector<wxHtmlCacheItem> | |
61 | { | |
62 | }; | |
63 | ||
64 | bool wxIsCDATAElement(const wxChar *tag) | |
65 | { | |
66 | return (wxStrcmp(tag, _T("SCRIPT")) == 0) || | |
67 | (wxStrcmp(tag, _T("STYLE")) == 0); | |
68 | } | |
69 | ||
70 | bool wxIsCDATAElement(const wxString& tag) | |
71 | { | |
72 | return (wxStrcmp(tag.wx_str(), wxS("SCRIPT")) == 0) || | |
73 | (wxStrcmp(tag.wx_str(), wxS("STYLE")) == 0); | |
74 | } | |
75 | ||
76 | wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source) | |
77 | { | |
78 | m_Cache = new wxHtmlTagsCacheData; | |
79 | m_CachePos = 0; | |
80 | ||
81 | wxChar tagBuffer[256]; | |
82 | ||
83 | const wxString::const_iterator end = source.end(); | |
84 | for ( wxString::const_iterator pos = source.begin(); pos < end; ++pos ) | |
85 | { | |
86 | if (*pos == wxT('<')) // tag found: | |
87 | { | |
88 | // don't cache comment tags | |
89 | if ( wxHtmlParser::SkipCommentTag(pos, source.end()) ) | |
90 | continue; | |
91 | ||
92 | size_t tg = Cache().size(); | |
93 | Cache().push_back(wxHtmlCacheItem()); | |
94 | ||
95 | wxString::const_iterator stpos = pos++; | |
96 | Cache()[tg].Key = stpos; | |
97 | ||
98 | int i; | |
99 | for ( i = 0; | |
100 | pos < end && i < (int)WXSIZEOF(tagBuffer) - 1 && | |
101 | *pos != wxT('>') && !wxIsspace(*pos); | |
102 | ++i, ++pos ) | |
103 | { | |
104 | tagBuffer[i] = (wxChar)wxToupper(*pos); | |
105 | } | |
106 | tagBuffer[i] = _T('\0'); | |
107 | ||
108 | Cache()[tg].Name = new wxChar[i+1]; | |
109 | memcpy(Cache()[tg].Name, tagBuffer, (i+1)*sizeof(wxChar)); | |
110 | ||
111 | while (pos < end && *pos != wxT('>')) | |
112 | ++pos; | |
113 | ||
114 | if ((stpos+1) < end && *(stpos+1) == wxT('/')) // ending tag: | |
115 | { | |
116 | Cache()[tg].type = wxHtmlCacheItem::Type_EndingTag; | |
117 | // find matching begin tag: | |
118 | for (i = tg; i >= 0; i--) | |
119 | { | |
120 | if ((Cache()[i].type == wxHtmlCacheItem::Type_NoMatchingEndingTag) && (wxStrcmp(Cache()[i].Name, tagBuffer+1) == 0)) | |
121 | { | |
122 | Cache()[i].type = wxHtmlCacheItem::Type_Normal; | |
123 | Cache()[i].End1 = stpos; | |
124 | Cache()[i].End2 = pos + 1; | |
125 | break; | |
126 | } | |
127 | } | |
128 | } | |
129 | else | |
130 | { | |
131 | Cache()[tg].type = wxHtmlCacheItem::Type_NoMatchingEndingTag; | |
132 | ||
133 | if (wxIsCDATAElement(tagBuffer)) | |
134 | { | |
135 | // store the orig pos in case we are missing the closing | |
136 | // tag (see below) | |
137 | const wxString::const_iterator old_pos = pos; | |
138 | bool foundCloseTag = false; | |
139 | ||
140 | // find next matching tag | |
141 | int tag_len = wxStrlen(tagBuffer); | |
142 | while (pos < end) | |
143 | { | |
144 | // find the ending tag | |
145 | while (pos + 1 < end && | |
146 | (*pos != '<' || *(pos+1) != '/')) | |
147 | ++pos; | |
148 | if (*pos == '<') | |
149 | ++pos; | |
150 | ||
151 | // see if it matches | |
152 | int match_pos = 0; | |
153 | while (pos < end && match_pos < tag_len ) | |
154 | { | |
155 | wxChar c = *pos; | |
156 | if ( c == '>' || c == '<' ) | |
157 | break; | |
158 | ||
159 | // cast to wxChar needed to suppress warning in | |
160 | // Unicode build | |
161 | if ((wxChar)wxToupper(c) == tagBuffer[match_pos]) | |
162 | { | |
163 | ++match_pos; | |
164 | } | |
165 | else if (c == wxT(' ') || c == wxT('\n') || | |
166 | c == wxT('\r') || c == wxT('\t')) | |
167 | { | |
168 | // need to skip over these | |
169 | } | |
170 | else | |
171 | { | |
172 | match_pos = 0; | |
173 | } | |
174 | ++pos; | |
175 | } | |
176 | ||
177 | // found a match | |
178 | if (match_pos == tag_len) | |
179 | { | |
180 | pos = pos - tag_len - 3; | |
181 | foundCloseTag = true; | |
182 | break; | |
183 | } | |
184 | else // keep looking for the closing tag | |
185 | { | |
186 | ++pos; | |
187 | } | |
188 | } | |
189 | if (!foundCloseTag) | |
190 | { | |
191 | // we didn't find closing tag; this means the markup | |
192 | // is incorrect and the best thing we can do is to | |
193 | // ignore the unclosed tag and continue parsing as if | |
194 | // it didn't exist: | |
195 | pos = old_pos; | |
196 | } | |
197 | } | |
198 | } | |
199 | } | |
200 | } | |
201 | ||
202 | // ok, we're done, now we'll free .Name members of cache - we don't need it anymore: | |
203 | for ( wxHtmlTagsCacheData::iterator i = Cache().begin(); | |
204 | i != Cache().end(); ++i ) | |
205 | { | |
206 | delete[] i->Name; | |
207 | i->Name = NULL; | |
208 | } | |
209 | } | |
210 | ||
211 | wxHtmlTagsCache::~wxHtmlTagsCache() | |
212 | { | |
213 | delete m_Cache; | |
214 | } | |
215 | ||
216 | void wxHtmlTagsCache::QueryTag(const wxString::const_iterator& at, | |
217 | const wxString::const_iterator& inputEnd, | |
218 | wxString::const_iterator *end1, | |
219 | wxString::const_iterator *end2, | |
220 | bool *hasEnding) | |
221 | { | |
222 | if (Cache().empty()) | |
223 | return; | |
224 | ||
225 | if (Cache()[m_CachePos].Key != at) | |
226 | { | |
227 | int delta = (at < Cache()[m_CachePos].Key) ? -1 : 1; | |
228 | do | |
229 | { | |
230 | m_CachePos += delta; | |
231 | ||
232 | if ( m_CachePos < 0 || m_CachePos >= (int)Cache().size() ) | |
233 | { | |
234 | if ( m_CachePos < 0 ) | |
235 | m_CachePos = 0; | |
236 | else | |
237 | m_CachePos = Cache().size() - 1; | |
238 | // something is very wrong with HTML, give up by returning an | |
239 | // impossibly large value which is going to be ignored by the | |
240 | // caller | |
241 | *end1 = | |
242 | *end2 = inputEnd; | |
243 | *hasEnding = true; | |
244 | return; | |
245 | } | |
246 | } | |
247 | while (Cache()[m_CachePos].Key != at); | |
248 | } | |
249 | ||
250 | switch ( Cache()[m_CachePos].type ) | |
251 | { | |
252 | case wxHtmlCacheItem::Type_Normal: | |
253 | *end1 = Cache()[m_CachePos].End1; | |
254 | *end2 = Cache()[m_CachePos].End2; | |
255 | *hasEnding = true; | |
256 | break; | |
257 | ||
258 | case wxHtmlCacheItem::Type_EndingTag: | |
259 | wxFAIL_MSG("QueryTag called for ending tag - can't be"); | |
260 | // but if it does happen, fall through, better than crashing | |
261 | ||
262 | case wxHtmlCacheItem::Type_NoMatchingEndingTag: | |
263 | // If input HTML is invalid and there's no closing tag for this | |
264 | // one, pretend that it runs all the way to the end of input | |
265 | *end1 = inputEnd; | |
266 | *end2 = inputEnd; | |
267 | *hasEnding = false; | |
268 | break; | |
269 | } | |
270 | } | |
271 | ||
272 | ||
273 | ||
274 | ||
275 | //----------------------------------------------------------------------------- | |
276 | // wxHtmlTag | |
277 | //----------------------------------------------------------------------------- | |
278 | ||
279 | wxHtmlTag::wxHtmlTag(wxHtmlTag *parent, | |
280 | const wxString *source, | |
281 | const wxString::const_iterator& pos, | |
282 | const wxString::const_iterator& end_pos, | |
283 | wxHtmlTagsCache *cache, | |
284 | wxHtmlEntitiesParser *entParser) | |
285 | { | |
286 | /* Setup DOM relations */ | |
287 | ||
288 | m_Next = NULL; | |
289 | m_FirstChild = m_LastChild = NULL; | |
290 | m_Parent = parent; | |
291 | if (parent) | |
292 | { | |
293 | m_Prev = m_Parent->m_LastChild; | |
294 | if (m_Prev == NULL) | |
295 | m_Parent->m_FirstChild = this; | |
296 | else | |
297 | m_Prev->m_Next = this; | |
298 | m_Parent->m_LastChild = this; | |
299 | } | |
300 | else | |
301 | m_Prev = NULL; | |
302 | ||
303 | /* Find parameters and their values: */ | |
304 | ||
305 | wxChar c; | |
306 | ||
307 | // fill-in name, params and begin pos: | |
308 | wxString::const_iterator i(pos+1); | |
309 | ||
310 | // find tag's name and convert it to uppercase: | |
311 | while ((i < end_pos) && | |
312 | ((c = *(i++)) != wxT(' ') && c != wxT('\r') && | |
313 | c != wxT('\n') && c != wxT('\t') && | |
314 | c != wxT('>'))) | |
315 | { | |
316 | if ((c >= wxT('a')) && (c <= wxT('z'))) | |
317 | c -= (wxT('a') - wxT('A')); | |
318 | m_Name << c; | |
319 | } | |
320 | ||
321 | // if the tag has parameters, read them and "normalize" them, | |
322 | // i.e. convert to uppercase, replace whitespaces by spaces and | |
323 | // remove whitespaces around '=': | |
324 | if (*(i-1) != wxT('>')) | |
325 | { | |
326 | #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \ | |
327 | c == wxT('\n') || c == wxT('\t')) | |
328 | wxString pname, pvalue; | |
329 | wxChar quote; | |
330 | enum | |
331 | { | |
332 | ST_BEFORE_NAME = 1, | |
333 | ST_NAME, | |
334 | ST_BEFORE_EQ, | |
335 | ST_BEFORE_VALUE, | |
336 | ST_VALUE | |
337 | } state; | |
338 | ||
339 | quote = 0; | |
340 | state = ST_BEFORE_NAME; | |
341 | while (i < end_pos) | |
342 | { | |
343 | c = *(i++); | |
344 | ||
345 | if (c == wxT('>') && !(state == ST_VALUE && quote != 0)) | |
346 | { | |
347 | if (state == ST_BEFORE_EQ || state == ST_NAME) | |
348 | { | |
349 | m_ParamNames.Add(pname); | |
350 | m_ParamValues.Add(wxGetEmptyString()); | |
351 | } | |
352 | else if (state == ST_VALUE && quote == 0) | |
353 | { | |
354 | m_ParamNames.Add(pname); | |
355 | if (entParser) | |
356 | m_ParamValues.Add(entParser->Parse(pvalue)); | |
357 | else | |
358 | m_ParamValues.Add(pvalue); | |
359 | } | |
360 | break; | |
361 | } | |
362 | switch (state) | |
363 | { | |
364 | case ST_BEFORE_NAME: | |
365 | if (!IS_WHITE(c)) | |
366 | { | |
367 | pname = c; | |
368 | state = ST_NAME; | |
369 | } | |
370 | break; | |
371 | case ST_NAME: | |
372 | if (IS_WHITE(c)) | |
373 | state = ST_BEFORE_EQ; | |
374 | else if (c == wxT('=')) | |
375 | state = ST_BEFORE_VALUE; | |
376 | else | |
377 | pname << c; | |
378 | break; | |
379 | case ST_BEFORE_EQ: | |
380 | if (c == wxT('=')) | |
381 | state = ST_BEFORE_VALUE; | |
382 | else if (!IS_WHITE(c)) | |
383 | { | |
384 | m_ParamNames.Add(pname); | |
385 | m_ParamValues.Add(wxGetEmptyString()); | |
386 | pname = c; | |
387 | state = ST_NAME; | |
388 | } | |
389 | break; | |
390 | case ST_BEFORE_VALUE: | |
391 | if (!IS_WHITE(c)) | |
392 | { | |
393 | if (c == wxT('"') || c == wxT('\'')) | |
394 | quote = c, pvalue = wxGetEmptyString(); | |
395 | else | |
396 | quote = 0, pvalue = c; | |
397 | state = ST_VALUE; | |
398 | } | |
399 | break; | |
400 | case ST_VALUE: | |
401 | if ((quote != 0 && c == quote) || | |
402 | (quote == 0 && IS_WHITE(c))) | |
403 | { | |
404 | m_ParamNames.Add(pname); | |
405 | if (quote == 0) | |
406 | { | |
407 | // VS: backward compatibility, no real reason, | |
408 | // but wxHTML code relies on this... :( | |
409 | pvalue.MakeUpper(); | |
410 | } | |
411 | if (entParser) | |
412 | m_ParamValues.Add(entParser->Parse(pvalue)); | |
413 | else | |
414 | m_ParamValues.Add(pvalue); | |
415 | state = ST_BEFORE_NAME; | |
416 | } | |
417 | else | |
418 | pvalue << c; | |
419 | break; | |
420 | } | |
421 | } | |
422 | ||
423 | #undef IS_WHITE | |
424 | } | |
425 | m_Begin = i; | |
426 | cache->QueryTag(pos, source->end(), &m_End1, &m_End2, &m_hasEnding); | |
427 | if (m_End1 > end_pos) m_End1 = end_pos; | |
428 | if (m_End2 > end_pos) m_End2 = end_pos; | |
429 | ||
430 | #if WXWIN_COMPATIBILITY_2_8 | |
431 | m_sourceStart = source->begin(); | |
432 | #endif | |
433 | } | |
434 | ||
435 | wxHtmlTag::~wxHtmlTag() | |
436 | { | |
437 | wxHtmlTag *t1, *t2; | |
438 | t1 = m_FirstChild; | |
439 | while (t1) | |
440 | { | |
441 | t2 = t1->GetNextSibling(); | |
442 | delete t1; | |
443 | t1 = t2; | |
444 | } | |
445 | } | |
446 | ||
447 | bool wxHtmlTag::HasParam(const wxString& par) const | |
448 | { | |
449 | return (m_ParamNames.Index(par, false) != wxNOT_FOUND); | |
450 | } | |
451 | ||
452 | wxString wxHtmlTag::GetParam(const wxString& par, bool with_quotes) const | |
453 | { | |
454 | int index = m_ParamNames.Index(par, false); | |
455 | if (index == wxNOT_FOUND) | |
456 | return wxGetEmptyString(); | |
457 | if (with_quotes) | |
458 | { | |
459 | // VS: backward compatibility, seems to be never used by wxHTML... | |
460 | wxString s; | |
461 | s << wxT('"') << m_ParamValues[index] << wxT('"'); | |
462 | return s; | |
463 | } | |
464 | else | |
465 | return m_ParamValues[index]; | |
466 | } | |
467 | ||
468 | int wxHtmlTag::ScanParam(const wxString& par, | |
469 | const char *format, | |
470 | void *param) const | |
471 | { | |
472 | wxString parval = GetParam(par); | |
473 | return wxSscanf(parval, format, param); | |
474 | } | |
475 | ||
476 | int wxHtmlTag::ScanParam(const wxString& par, | |
477 | const wchar_t *format, | |
478 | void *param) const | |
479 | { | |
480 | wxString parval = GetParam(par); | |
481 | return wxSscanf(parval, format, param); | |
482 | } | |
483 | ||
484 | bool wxHtmlTag::GetParamAsColour(const wxString& par, wxColour *clr) const | |
485 | { | |
486 | wxCHECK_MSG( clr, false, _T("invalid colour argument") ); | |
487 | ||
488 | wxString str = GetParam(par); | |
489 | ||
490 | // handle colours defined in HTML 4.0 first: | |
491 | if (str.length() > 1 && str[0] != _T('#')) | |
492 | { | |
493 | #define HTML_COLOUR(name, r, g, b) \ | |
494 | if (str.IsSameAs(wxS(name), false)) \ | |
495 | { clr->Set(r, g, b); return true; } | |
496 | HTML_COLOUR("black", 0x00,0x00,0x00) | |
497 | HTML_COLOUR("silver", 0xC0,0xC0,0xC0) | |
498 | HTML_COLOUR("gray", 0x80,0x80,0x80) | |
499 | HTML_COLOUR("white", 0xFF,0xFF,0xFF) | |
500 | HTML_COLOUR("maroon", 0x80,0x00,0x00) | |
501 | HTML_COLOUR("red", 0xFF,0x00,0x00) | |
502 | HTML_COLOUR("purple", 0x80,0x00,0x80) | |
503 | HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF) | |
504 | HTML_COLOUR("green", 0x00,0x80,0x00) | |
505 | HTML_COLOUR("lime", 0x00,0xFF,0x00) | |
506 | HTML_COLOUR("olive", 0x80,0x80,0x00) | |
507 | HTML_COLOUR("yellow", 0xFF,0xFF,0x00) | |
508 | HTML_COLOUR("navy", 0x00,0x00,0x80) | |
509 | HTML_COLOUR("blue", 0x00,0x00,0xFF) | |
510 | HTML_COLOUR("teal", 0x00,0x80,0x80) | |
511 | HTML_COLOUR("aqua", 0x00,0xFF,0xFF) | |
512 | #undef HTML_COLOUR | |
513 | } | |
514 | ||
515 | // then try to parse #rrggbb representations or set from other well | |
516 | // known names (note that this doesn't strictly conform to HTML spec, | |
517 | // but it doesn't do real harm -- but it *must* be done after the standard | |
518 | // colors are handled above): | |
519 | if (clr->Set(str)) | |
520 | return true; | |
521 | ||
522 | return false; | |
523 | } | |
524 | ||
525 | bool wxHtmlTag::GetParamAsInt(const wxString& par, int *clr) const | |
526 | { | |
527 | if (!HasParam(par)) return false; | |
528 | long i; | |
529 | bool succ = GetParam(par).ToLong(&i); | |
530 | *clr = (int)i; | |
531 | return succ; | |
532 | } | |
533 | ||
534 | wxString wxHtmlTag::GetAllParams() const | |
535 | { | |
536 | // VS: this function is for backward compatibility only, | |
537 | // never used by wxHTML | |
538 | wxString s; | |
539 | size_t cnt = m_ParamNames.GetCount(); | |
540 | for (size_t i = 0; i < cnt; i++) | |
541 | { | |
542 | s << m_ParamNames[i]; | |
543 | s << wxT('='); | |
544 | if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND) | |
545 | s << wxT('\'') << m_ParamValues[i] << wxT('\''); | |
546 | else | |
547 | s << wxT('"') << m_ParamValues[i] << wxT('"'); | |
548 | } | |
549 | return s; | |
550 | } | |
551 | ||
552 | wxHtmlTag *wxHtmlTag::GetFirstSibling() const | |
553 | { | |
554 | if (m_Parent) | |
555 | return m_Parent->m_FirstChild; | |
556 | else | |
557 | { | |
558 | wxHtmlTag *cur = (wxHtmlTag*)this; | |
559 | while (cur->m_Prev) | |
560 | cur = cur->m_Prev; | |
561 | return cur; | |
562 | } | |
563 | } | |
564 | ||
565 | wxHtmlTag *wxHtmlTag::GetLastSibling() const | |
566 | { | |
567 | if (m_Parent) | |
568 | return m_Parent->m_LastChild; | |
569 | else | |
570 | { | |
571 | wxHtmlTag *cur = (wxHtmlTag*)this; | |
572 | while (cur->m_Next) | |
573 | cur = cur->m_Next; | |
574 | return cur; | |
575 | } | |
576 | } | |
577 | ||
578 | wxHtmlTag *wxHtmlTag::GetNextTag() const | |
579 | { | |
580 | if (m_FirstChild) return m_FirstChild; | |
581 | if (m_Next) return m_Next; | |
582 | wxHtmlTag *cur = m_Parent; | |
583 | if (!cur) return NULL; | |
584 | while (cur->m_Parent && !cur->m_Next) | |
585 | cur = cur->m_Parent; | |
586 | return cur->m_Next; | |
587 | } | |
588 | ||
589 | #endif |