1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #include "wx/wxprec.h"
18 #include "wx/html/htmltag.h"
21 #include "wx/colour.h"
22 #include "wx/wxcrtvararg.h"
25 #include "wx/html/htmlpars.h"
26 #include "wx/vector.h"
28 #include <stdio.h> // for vsscanf
31 //-----------------------------------------------------------------------------
33 //-----------------------------------------------------------------------------
35 struct wxHtmlCacheItem
37 // this is "pos" value passed to wxHtmlTag's constructor.
38 // it is position of '<' character of the tag
39 wxString::const_iterator Key
;
44 Type_Normal
, // normal tag with a matching ending tag
45 Type_NoMatchingEndingTag
, // there's no ending tag for this tag
46 Type_EndingTag
// this is ending tag </..>
50 // end positions for the tag:
51 // end1 is '<' of ending tag,
52 // end2 is '>' or both are
53 wxString::const_iterator End1
, End2
;
59 // NB: this is an empty class and not typedef because of forward declaration
60 class wxHtmlTagsCacheData
: public wxVector
<wxHtmlCacheItem
>
64 bool wxIsCDATAElement(const wxChar
*tag
)
66 return (wxStrcmp(tag
, _T("SCRIPT")) == 0) ||
67 (wxStrcmp(tag
, _T("STYLE")) == 0);
70 bool wxIsCDATAElement(const wxString
& tag
)
72 return (wxStrcmp(tag
.wx_str(), wxSTRING_TEXT("SCRIPT")) == 0) ||
73 (wxStrcmp(tag
.wx_str(), wxSTRING_TEXT("STYLE")) == 0);
76 wxHtmlTagsCache::wxHtmlTagsCache(const wxString
& source
)
78 m_Cache
= new wxHtmlTagsCacheData
;
81 wxChar tagBuffer
[256];
83 const wxString::const_iterator end
= source
.end();
84 for ( wxString::const_iterator pos
= source
.begin(); pos
< end
; ++pos
)
86 if (*pos
== wxT('<')) // tag found:
88 // don't cache comment tags
89 if ( wxHtmlParser::SkipCommentTag(pos
, source
.end()) )
92 size_t tg
= Cache().size();
93 Cache().push_back(wxHtmlCacheItem());
95 wxString::const_iterator stpos
= pos
++;
96 Cache()[tg
].Key
= stpos
;
100 pos
< end
&& i
< (int)WXSIZEOF(tagBuffer
) - 1 &&
101 *pos
!= wxT('>') && !wxIsspace(*pos
);
104 tagBuffer
[i
] = (wxChar
)wxToupper(*pos
);
106 tagBuffer
[i
] = _T('\0');
108 Cache()[tg
].Name
= new wxChar
[i
+1];
109 memcpy(Cache()[tg
].Name
, tagBuffer
, (i
+1)*sizeof(wxChar
));
111 while (pos
< end
&& *pos
!= wxT('>'))
114 if ((stpos
+1) < end
&& *(stpos
+1) == wxT('/')) // ending tag:
116 Cache()[tg
].type
= wxHtmlCacheItem::Type_EndingTag
;
117 // find matching begin tag:
118 for (i
= tg
; i
>= 0; i
--)
120 if ((Cache()[i
].type
== wxHtmlCacheItem::Type_NoMatchingEndingTag
) && (wxStrcmp(Cache()[i
].Name
, tagBuffer
+1) == 0))
122 Cache()[i
].type
= wxHtmlCacheItem::Type_Normal
;
123 Cache()[i
].End1
= stpos
;
124 Cache()[i
].End2
= pos
+ 1;
131 Cache()[tg
].type
= wxHtmlCacheItem::Type_NoMatchingEndingTag
;
133 if (wxIsCDATAElement(tagBuffer
))
135 // store the orig pos in case we are missing the closing
137 const wxString::const_iterator old_pos
= pos
;
138 bool foundCloseTag
= false;
140 // find next matching tag
141 int tag_len
= wxStrlen(tagBuffer
);
144 // find the ending tag
145 while (pos
+ 1 < end
&&
146 (*pos
!= '<' || *(pos
+1) != '/'))
153 while (pos
< end
&& match_pos
< tag_len
)
156 if ( c
== '>' || c
== '<' )
159 // cast to wxChar needed to suppress warning in
161 if ((wxChar
)wxToupper(c
) == tagBuffer
[match_pos
])
165 else if (c
== wxT(' ') || c
== wxT('\n') ||
166 c
== wxT('\r') || c
== wxT('\t'))
168 // need to skip over these
178 if (match_pos
== tag_len
)
180 pos
= pos
- tag_len
- 3;
181 foundCloseTag
= true;
184 else // keep looking for the closing tag
191 // we didn't find closing tag; this means the markup
192 // is incorrect and the best thing we can do is to
193 // ignore the unclosed tag and continue parsing as if
202 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
203 for ( wxHtmlTagsCacheData::iterator i
= Cache().begin();
204 i
!= Cache().end(); ++i
)
211 wxHtmlTagsCache::~wxHtmlTagsCache()
216 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator
& at
,
217 const wxString::const_iterator
& inputEnd
,
218 wxString::const_iterator
*end1
,
219 wxString::const_iterator
*end2
,
225 if (Cache()[m_CachePos
].Key
!= at
)
227 int delta
= (at
< Cache()[m_CachePos
].Key
) ? -1 : 1;
232 if ( m_CachePos
< 0 || m_CachePos
>= (int)Cache().size() )
234 if ( m_CachePos
< 0 )
237 m_CachePos
= Cache().size() - 1;
238 // something is very wrong with HTML, give up by returning an
239 // impossibly large value which is going to be ignored by the
247 while (Cache()[m_CachePos
].Key
!= at
);
249 *end1
= Cache()[m_CachePos
].End1
;
250 *end2
= Cache()[m_CachePos
].End2
;
251 *hasEnding
= (Cache()[m_CachePos
].type
== wxHtmlCacheItem::Type_Normal
);
257 //-----------------------------------------------------------------------------
259 //-----------------------------------------------------------------------------
261 wxHtmlTag::wxHtmlTag(wxHtmlTag
*parent
,
262 const wxString
*source
,
263 const wxString::const_iterator
& pos
,
264 const wxString::const_iterator
& end_pos
,
265 wxHtmlTagsCache
*cache
,
266 wxHtmlEntitiesParser
*entParser
)
268 /* Setup DOM relations */
271 m_FirstChild
= m_LastChild
= NULL
;
275 m_Prev
= m_Parent
->m_LastChild
;
277 m_Parent
->m_FirstChild
= this;
279 m_Prev
->m_Next
= this;
280 m_Parent
->m_LastChild
= this;
285 /* Find parameters and their values: */
289 // fill-in name, params and begin pos:
290 wxString::const_iterator
i(pos
+1);
292 // find tag's name and convert it to uppercase:
293 while ((i
< end_pos
) &&
294 ((c
= *(i
++)) != wxT(' ') && c
!= wxT('\r') &&
295 c
!= wxT('\n') && c
!= wxT('\t') &&
298 if ((c
>= wxT('a')) && (c
<= wxT('z')))
299 c
-= (wxT('a') - wxT('A'));
303 // if the tag has parameters, read them and "normalize" them,
304 // i.e. convert to uppercase, replace whitespaces by spaces and
305 // remove whitespaces around '=':
306 if (*(i
-1) != wxT('>'))
308 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
309 c == wxT('\n') || c == wxT('\t'))
310 wxString pname
, pvalue
;
322 state
= ST_BEFORE_NAME
;
327 if (c
== wxT('>') && !(state
== ST_VALUE
&& quote
!= 0))
329 if (state
== ST_BEFORE_EQ
|| state
== ST_NAME
)
331 m_ParamNames
.Add(pname
);
332 m_ParamValues
.Add(wxGetEmptyString());
334 else if (state
== ST_VALUE
&& quote
== 0)
336 m_ParamNames
.Add(pname
);
338 m_ParamValues
.Add(entParser
->Parse(pvalue
));
340 m_ParamValues
.Add(pvalue
);
355 state
= ST_BEFORE_EQ
;
356 else if (c
== wxT('='))
357 state
= ST_BEFORE_VALUE
;
363 state
= ST_BEFORE_VALUE
;
364 else if (!IS_WHITE(c
))
366 m_ParamNames
.Add(pname
);
367 m_ParamValues
.Add(wxGetEmptyString());
372 case ST_BEFORE_VALUE
:
375 if (c
== wxT('"') || c
== wxT('\''))
376 quote
= c
, pvalue
= wxGetEmptyString();
378 quote
= 0, pvalue
= c
;
383 if ((quote
!= 0 && c
== quote
) ||
384 (quote
== 0 && IS_WHITE(c
)))
386 m_ParamNames
.Add(pname
);
389 // VS: backward compatibility, no real reason,
390 // but wxHTML code relies on this... :(
394 m_ParamValues
.Add(entParser
->Parse(pvalue
));
396 m_ParamValues
.Add(pvalue
);
397 state
= ST_BEFORE_NAME
;
408 cache
->QueryTag(pos
, source
->end(), &m_End1
, &m_End2
, &m_hasEnding
);
409 if (m_End1
> end_pos
) m_End1
= end_pos
;
410 if (m_End2
> end_pos
) m_End2
= end_pos
;
412 #if WXWIN_COMPATIBILITY_2_8
413 m_sourceStart
= source
->begin();
417 wxHtmlTag::~wxHtmlTag()
423 t2
= t1
->GetNextSibling();
429 bool wxHtmlTag::HasParam(const wxString
& par
) const
431 return (m_ParamNames
.Index(par
, false) != wxNOT_FOUND
);
434 wxString
wxHtmlTag::GetParam(const wxString
& par
, bool with_quotes
) const
436 int index
= m_ParamNames
.Index(par
, false);
437 if (index
== wxNOT_FOUND
)
438 return wxGetEmptyString();
441 // VS: backward compatibility, seems to be never used by wxHTML...
443 s
<< wxT('"') << m_ParamValues
[index
] << wxT('"');
447 return m_ParamValues
[index
];
450 int wxHtmlTag::ScanParam(const wxString
& par
,
454 wxString parval
= GetParam(par
);
455 return wxSscanf(parval
, format
, param
);
458 int wxHtmlTag::ScanParam(const wxString
& par
,
459 const wchar_t *format
,
462 wxString parval
= GetParam(par
);
463 return wxSscanf(parval
, format
, param
);
466 bool wxHtmlTag::GetParamAsColour(const wxString
& par
, wxColour
*clr
) const
468 wxCHECK_MSG( clr
, false, _T("invalid colour argument") );
470 wxString str
= GetParam(par
);
472 // handle colours defined in HTML 4.0 first:
473 if (str
.length() > 1 && str
[0] != _T('#'))
475 #define HTML_COLOUR(name, r, g, b) \
476 if (str.IsSameAs(wxSTRING_TEXT(name), false)) \
477 { clr->Set(r, g, b); return true; }
478 HTML_COLOUR("black", 0x00,0x00,0x00)
479 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
480 HTML_COLOUR("gray", 0x80,0x80,0x80)
481 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
482 HTML_COLOUR("maroon", 0x80,0x00,0x00)
483 HTML_COLOUR("red", 0xFF,0x00,0x00)
484 HTML_COLOUR("purple", 0x80,0x00,0x80)
485 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
486 HTML_COLOUR("green", 0x00,0x80,0x00)
487 HTML_COLOUR("lime", 0x00,0xFF,0x00)
488 HTML_COLOUR("olive", 0x80,0x80,0x00)
489 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
490 HTML_COLOUR("navy", 0x00,0x00,0x80)
491 HTML_COLOUR("blue", 0x00,0x00,0xFF)
492 HTML_COLOUR("teal", 0x00,0x80,0x80)
493 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
497 // then try to parse #rrggbb representations or set from other well
498 // known names (note that this doesn't strictly conform to HTML spec,
499 // but it doesn't do real harm -- but it *must* be done after the standard
500 // colors are handled above):
507 bool wxHtmlTag::GetParamAsInt(const wxString
& par
, int *clr
) const
509 if (!HasParam(par
)) return false;
511 bool succ
= GetParam(par
).ToLong(&i
);
516 wxString
wxHtmlTag::GetAllParams() const
518 // VS: this function is for backward compatibility only,
519 // never used by wxHTML
521 size_t cnt
= m_ParamNames
.GetCount();
522 for (size_t i
= 0; i
< cnt
; i
++)
524 s
<< m_ParamNames
[i
];
526 if (m_ParamValues
[i
].Find(wxT('"')) != wxNOT_FOUND
)
527 s
<< wxT('\'') << m_ParamValues
[i
] << wxT('\'');
529 s
<< wxT('"') << m_ParamValues
[i
] << wxT('"');
534 wxHtmlTag
*wxHtmlTag::GetFirstSibling() const
537 return m_Parent
->m_FirstChild
;
540 wxHtmlTag
*cur
= (wxHtmlTag
*)this;
547 wxHtmlTag
*wxHtmlTag::GetLastSibling() const
550 return m_Parent
->m_LastChild
;
553 wxHtmlTag
*cur
= (wxHtmlTag
*)this;
560 wxHtmlTag
*wxHtmlTag::GetNextTag() const
562 if (m_FirstChild
) return m_FirstChild
;
563 if (m_Next
) return m_Next
;
564 wxHtmlTag
*cur
= m_Parent
;
565 if (!cur
) return NULL
;
566 while (cur
->m_Parent
&& !cur
->m_Next
)