]>
git.saurik.com Git - wxWidgets.git/blob - src/html/htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wxHtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
17 #include "wx/html/htmltag.h"
20 #include "wx/colour.h"
21 #include "wx/wxcrtvararg.h"
24 #include "wx/html/htmlpars.h"
25 #include "wx/html/styleparams.h"
27 #include "wx/vector.h"
29 #include <stdio.h> // for vsscanf
32 //-----------------------------------------------------------------------------
34 //-----------------------------------------------------------------------------
36 struct wxHtmlCacheItem
38 // this is "pos" value passed to wxHtmlTag's constructor.
39 // it is position of '<' character of the tag
40 wxString::const_iterator Key
;
45 Type_Normal
, // normal tag with a matching ending tag
46 Type_NoMatchingEndingTag
, // there's no ending tag for this tag
47 Type_EndingTag
// this is ending tag </..>
51 // end positions for the tag:
52 // end1 is '<' of ending tag,
53 // end2 is '>' or both are
54 wxString::const_iterator End1
, End2
;
60 // NB: this is an empty class and not typedef because of forward declaration
61 class wxHtmlTagsCacheData
: public wxVector
<wxHtmlCacheItem
>
65 bool wxIsCDATAElement(const wxChar
*tag
)
67 return (wxStrcmp(tag
, wxT("SCRIPT")) == 0) ||
68 (wxStrcmp(tag
, wxT("STYLE")) == 0);
71 bool wxIsCDATAElement(const wxString
& tag
)
73 return (wxStrcmp(tag
.wx_str(), wxS("SCRIPT")) == 0) ||
74 (wxStrcmp(tag
.wx_str(), wxS("STYLE")) == 0);
77 wxHtmlTagsCache::wxHtmlTagsCache(const wxString
& source
)
79 m_Cache
= new wxHtmlTagsCacheData
;
82 wxChar tagBuffer
[256];
84 const wxString::const_iterator end
= source
.end();
85 for ( wxString::const_iterator pos
= source
.begin(); pos
< end
; ++pos
)
90 // possible tag start found:
92 // don't cache comment tags
93 if ( wxHtmlParser::SkipCommentTag(pos
, end
) )
96 // Remember the starting tag position.
97 wxString::const_iterator stpos
= pos
++;
99 // And look for the ending one.
102 pos
< end
&& i
< (int)WXSIZEOF(tagBuffer
) - 1 &&
103 *pos
!= wxT('>') && !wxIsspace(*pos
);
106 tagBuffer
[i
] = (wxChar
)wxToupper(*pos
);
108 tagBuffer
[i
] = wxT('\0');
110 while (pos
< end
&& *pos
!= wxT('>'))
115 // We didn't find a closing bracket, this is not a valid tag after
116 // all. Notice that we need to roll back pos to avoid creating an
117 // invalid iterator when "++pos" is done in the loop statement.
123 // We have a valid tag, add it to the cache.
124 size_t tg
= Cache().size();
125 Cache().push_back(wxHtmlCacheItem());
126 Cache()[tg
].Key
= stpos
;
127 Cache()[tg
].Name
= new wxChar
[i
+1];
128 memcpy(Cache()[tg
].Name
, tagBuffer
, (i
+1)*sizeof(wxChar
));
130 if ((stpos
+1) < end
&& *(stpos
+1) == wxT('/')) // ending tag:
132 Cache()[tg
].type
= wxHtmlCacheItem::Type_EndingTag
;
133 // find matching begin tag:
134 for (i
= tg
; i
>= 0; i
--)
136 if ((Cache()[i
].type
== wxHtmlCacheItem::Type_NoMatchingEndingTag
) && (wxStrcmp(Cache()[i
].Name
, tagBuffer
+1) == 0))
138 Cache()[i
].type
= wxHtmlCacheItem::Type_Normal
;
139 Cache()[i
].End1
= stpos
;
140 Cache()[i
].End2
= pos
+ 1;
147 Cache()[tg
].type
= wxHtmlCacheItem::Type_NoMatchingEndingTag
;
149 if (wxIsCDATAElement(tagBuffer
))
151 // store the orig pos in case we are missing the closing
153 const wxString::const_iterator old_pos
= pos
;
154 bool foundCloseTag
= false;
156 // find next matching tag
157 int tag_len
= wxStrlen(tagBuffer
);
160 // find the ending tag
161 while (pos
+ 1 < end
&&
162 (*pos
!= '<' || *(pos
+1) != '/'))
169 while (pos
< end
&& match_pos
< tag_len
)
172 if ( c
== '>' || c
== '<' )
175 // cast to wxChar needed to suppress warning in
177 if ((wxChar
)wxToupper(c
) == tagBuffer
[match_pos
])
181 else if (c
== wxT(' ') || c
== wxT('\n') ||
182 c
== wxT('\r') || c
== wxT('\t'))
184 // need to skip over these
194 if (match_pos
== tag_len
)
196 pos
= pos
- tag_len
- 3;
197 foundCloseTag
= true;
200 else // keep looking for the closing tag
207 // we didn't find closing tag; this means the markup
208 // is incorrect and the best thing we can do is to
209 // ignore the unclosed tag and continue parsing as if
217 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
218 for ( wxHtmlTagsCacheData::iterator i
= Cache().begin();
219 i
!= Cache().end(); ++i
)
225 wxHtmlTagsCache::~wxHtmlTagsCache()
230 void wxHtmlTagsCache::QueryTag(const wxString::const_iterator
& at
,
231 const wxString::const_iterator
& inputEnd
,
232 wxString::const_iterator
*end1
,
233 wxString::const_iterator
*end2
,
244 if (Cache()[m_CachePos
].Key
!= at
)
246 int delta
= (at
< Cache()[m_CachePos
].Key
) ? -1 : 1;
251 if ( m_CachePos
< 0 || m_CachePos
>= (int)Cache().size() )
253 if ( m_CachePos
< 0 )
256 m_CachePos
= Cache().size() - 1;
257 // something is very wrong with HTML, give up by returning an
258 // impossibly large value which is going to be ignored by the
266 while (Cache()[m_CachePos
].Key
!= at
);
269 switch ( Cache()[m_CachePos
].type
)
271 case wxHtmlCacheItem::Type_Normal
:
272 *end1
= Cache()[m_CachePos
].End1
;
273 *end2
= Cache()[m_CachePos
].End2
;
277 case wxHtmlCacheItem::Type_EndingTag
:
278 wxFAIL_MSG("QueryTag called for ending tag - can't be");
279 // but if it does happen, fall through, better than crashing
281 case wxHtmlCacheItem::Type_NoMatchingEndingTag
:
282 // If input HTML is invalid and there's no closing tag for this
283 // one, pretend that it runs all the way to the end of input
294 //-----------------------------------------------------------------------------
296 //-----------------------------------------------------------------------------
298 wxHtmlTag::wxHtmlTag(wxHtmlTag
*parent
,
299 const wxString
*source
,
300 const wxString::const_iterator
& pos
,
301 const wxString::const_iterator
& end_pos
,
302 wxHtmlTagsCache
*cache
,
303 wxHtmlEntitiesParser
*entParser
)
305 /* Setup DOM relations */
308 m_FirstChild
= m_LastChild
= NULL
;
312 m_Prev
= m_Parent
->m_LastChild
;
314 m_Parent
->m_FirstChild
= this;
316 m_Prev
->m_Next
= this;
317 m_Parent
->m_LastChild
= this;
322 /* Find parameters and their values: */
324 wxChar c
wxDUMMY_INITIALIZE(0);
326 // fill-in name, params and begin pos:
327 wxString::const_iterator
i(pos
+1);
329 // find tag's name and convert it to uppercase:
330 while ((i
< end_pos
) &&
331 ((c
= *(i
++)) != wxT(' ') && c
!= wxT('\r') &&
332 c
!= wxT('\n') && c
!= wxT('\t') &&
333 c
!= wxT('>') && c
!= wxT('/')))
335 if ((c
>= wxT('a')) && (c
<= wxT('z')))
336 c
-= (wxT('a') - wxT('A'));
340 // if the tag has parameters, read them and "normalize" them,
341 // i.e. convert to uppercase, replace whitespaces by spaces and
342 // remove whitespaces around '=':
343 if (*(i
-1) != wxT('>'))
345 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
346 c == wxT('\n') || c == wxT('\t'))
347 wxString pname
, pvalue
;
359 state
= ST_BEFORE_NAME
;
364 if (c
== wxT('>') && !(state
== ST_VALUE
&& quote
!= 0))
366 if (state
== ST_BEFORE_EQ
|| state
== ST_NAME
)
368 m_ParamNames
.Add(pname
);
369 m_ParamValues
.Add(wxGetEmptyString());
371 else if (state
== ST_VALUE
&& quote
== 0)
373 m_ParamNames
.Add(pname
);
375 m_ParamValues
.Add(entParser
->Parse(pvalue
));
377 m_ParamValues
.Add(pvalue
);
392 state
= ST_BEFORE_EQ
;
393 else if (c
== wxT('='))
394 state
= ST_BEFORE_VALUE
;
400 state
= ST_BEFORE_VALUE
;
401 else if (!IS_WHITE(c
))
403 m_ParamNames
.Add(pname
);
404 m_ParamValues
.Add(wxGetEmptyString());
409 case ST_BEFORE_VALUE
:
412 if (c
== wxT('"') || c
== wxT('\''))
413 quote
= c
, pvalue
= wxGetEmptyString();
415 quote
= 0, pvalue
= c
;
420 if ((quote
!= 0 && c
== quote
) ||
421 (quote
== 0 && IS_WHITE(c
)))
423 m_ParamNames
.Add(pname
);
426 // VS: backward compatibility, no real reason,
427 // but wxHTML code relies on this... :(
431 m_ParamValues
.Add(entParser
->Parse(pvalue
));
433 m_ParamValues
.Add(pvalue
);
434 state
= ST_BEFORE_NAME
;
445 cache
->QueryTag(pos
, source
->end(), &m_End1
, &m_End2
, &m_hasEnding
);
446 if (m_End1
> end_pos
) m_End1
= end_pos
;
447 if (m_End2
> end_pos
) m_End2
= end_pos
;
449 #if WXWIN_COMPATIBILITY_2_8
450 m_sourceStart
= source
->begin();
453 // Try to parse any style parameters that can be handled simply by
454 // converting them to the equivalent HTML 3 attributes: this is a far cry
455 // from perfect but better than nothing.
456 static const struct EquivAttr
462 { "text-align", "ALIGN" },
463 { "width", "WIDTH" },
464 { "vertical-align", "VALIGN" },
465 { "background", "BGCOLOR" },
466 { "background-color", "BGCOLOR" },
469 wxHtmlStyleParams
styleParams(*this);
470 for ( unsigned n
= 0; n
< WXSIZEOF(equivAttrs
); n
++ )
472 const EquivAttr
& ea
= equivAttrs
[n
];
473 if ( styleParams
.HasParam(ea
.style
) && !HasParam(ea
.attr
) )
475 m_ParamNames
.Add(ea
.attr
);
476 m_ParamValues
.Add(styleParams
.GetParam(ea
.style
));
481 wxHtmlTag::~wxHtmlTag()
487 t2
= t1
->GetNextSibling();
493 bool wxHtmlTag::HasParam(const wxString
& par
) const
495 return (m_ParamNames
.Index(par
, false) != wxNOT_FOUND
);
498 wxString
wxHtmlTag::GetParam(const wxString
& par
, bool with_quotes
) const
500 int index
= m_ParamNames
.Index(par
, false);
501 if (index
== wxNOT_FOUND
)
502 return wxGetEmptyString();
505 // VS: backward compatibility, seems to be never used by wxHTML...
507 s
<< wxT('"') << m_ParamValues
[index
] << wxT('"');
511 return m_ParamValues
[index
];
514 bool wxHtmlTag::GetParamAsString(const wxString
& par
, wxString
*str
) const
516 wxCHECK_MSG( str
, false, wxT("NULL output string argument") );
518 int index
= m_ParamNames
.Index(par
, false);
519 if (index
== wxNOT_FOUND
)
522 *str
= m_ParamValues
[index
];
527 int wxHtmlTag::ScanParam(const wxString
& par
,
531 wxString parval
= GetParam(par
);
532 return wxSscanf(parval
, format
, param
);
535 int wxHtmlTag::ScanParam(const wxString
& par
,
536 const wchar_t *format
,
539 wxString parval
= GetParam(par
);
540 return wxSscanf(parval
, format
, param
);
544 bool wxHtmlTag::ParseAsColour(const wxString
& str
, wxColour
*clr
)
546 wxCHECK_MSG( clr
, false, wxT("invalid colour argument") );
548 // handle colours defined in HTML 4.0 first:
549 if (str
.length() > 1 && str
[0] != wxT('#'))
551 #define HTML_COLOUR(name, r, g, b) \
552 if (str.IsSameAs(wxS(name), false)) \
553 { clr->Set(r, g, b); return true; }
554 HTML_COLOUR("black", 0x00,0x00,0x00)
555 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
556 HTML_COLOUR("gray", 0x80,0x80,0x80)
557 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
558 HTML_COLOUR("maroon", 0x80,0x00,0x00)
559 HTML_COLOUR("red", 0xFF,0x00,0x00)
560 HTML_COLOUR("purple", 0x80,0x00,0x80)
561 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
562 HTML_COLOUR("green", 0x00,0x80,0x00)
563 HTML_COLOUR("lime", 0x00,0xFF,0x00)
564 HTML_COLOUR("olive", 0x80,0x80,0x00)
565 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
566 HTML_COLOUR("navy", 0x00,0x00,0x80)
567 HTML_COLOUR("blue", 0x00,0x00,0xFF)
568 HTML_COLOUR("teal", 0x00,0x80,0x80)
569 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
573 // then try to parse #rrggbb representations or set from other well
574 // known names (note that this doesn't strictly conform to HTML spec,
575 // but it doesn't do real harm -- but it *must* be done after the standard
576 // colors are handled above):
583 bool wxHtmlTag::GetParamAsColour(const wxString
& par
, wxColour
*clr
) const
585 const wxString str
= GetParam(par
);
586 return !str
.empty() && ParseAsColour(str
, clr
);
589 bool wxHtmlTag::GetParamAsInt(const wxString
& par
, int *clr
) const
591 if ( !HasParam(par
) )
595 if ( !GetParam(par
).ToLong(&i
) )
603 wxHtmlTag::GetParamAsIntOrPercent(const wxString
& par
,
605 bool& isPercent
) const
607 const wxString param
= GetParam(par
);
612 if ( param
.EndsWith("%", &num
) )
623 if ( !num
.ToLong(&lValue
) )
626 if ( lValue
> INT_MAX
|| lValue
< INT_MIN
)
629 *value
= static_cast<int>(lValue
);
634 wxString
wxHtmlTag::GetAllParams() const
636 // VS: this function is for backward compatibility only,
637 // never used by wxHTML
639 size_t cnt
= m_ParamNames
.GetCount();
640 for (size_t i
= 0; i
< cnt
; i
++)
642 s
<< m_ParamNames
[i
];
644 if (m_ParamValues
[i
].Find(wxT('"')) != wxNOT_FOUND
)
645 s
<< wxT('\'') << m_ParamValues
[i
] << wxT('\'');
647 s
<< wxT('"') << m_ParamValues
[i
] << wxT('"');
652 wxHtmlTag
*wxHtmlTag::GetFirstSibling() const
655 return m_Parent
->m_FirstChild
;
658 wxHtmlTag
*cur
= (wxHtmlTag
*)this;
665 wxHtmlTag
*wxHtmlTag::GetLastSibling() const
668 return m_Parent
->m_LastChild
;
671 wxHtmlTag
*cur
= (wxHtmlTag
*)this;
678 wxHtmlTag
*wxHtmlTag::GetNextTag() const
680 if (m_FirstChild
) return m_FirstChild
;
681 if (m_Next
) return m_Next
;
682 wxHtmlTag
*cur
= m_Parent
;
683 if (!cur
) return NULL
;
684 while (cur
->m_Parent
&& !cur
->m_Next
)