]>
git.saurik.com Git - wxWidgets.git/blob - tests/benchmarks/htmlparser/htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #include "wx/wxprec.h"
19 #include "wx/colour.h"
23 #include <stdio.h> // for vsscanf
27 //-----------------------------------------------------------------------------
29 //-----------------------------------------------------------------------------
31 struct wx28HtmlCacheItem
33 // this is "pos" value passed to wx28HtmlTag's constructor.
34 // it is position of '<' character of the tag
37 // end positions for the tag:
38 // end1 is '<' of ending tag,
39 // end2 is '>' or both are
40 // -1 if there is no ending tag for this one...
41 // or -2 if this is ending tag </...>
49 IMPLEMENT_CLASS(wx28HtmlTagsCache
,wxObject
)
51 #define CACHE_INCREMENT 64
53 bool wxIsCDATAElement(const wxChar
*tag
)
55 return (wxStrcmp(tag
, _T("SCRIPT")) == 0) ||
56 (wxStrcmp(tag
, _T("STYLE")) == 0);
59 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString
& source
)
61 const wxChar
*src
= source
.c_str();
62 int lng
= source
.length();
63 wxChar tagBuffer
[256];
72 if (src
[pos
] == wxT('<')) // tag found:
74 if (m_CacheSize
% CACHE_INCREMENT
== 0)
75 m_Cache
= (wx28HtmlCacheItem
*) realloc(m_Cache
, (m_CacheSize
+ CACHE_INCREMENT
) * sizeof(wx28HtmlCacheItem
));
76 int tg
= m_CacheSize
++;
78 m_Cache
[tg
].Key
= stpos
;
82 pos
< lng
&& i
< (int)WXSIZEOF(tagBuffer
) - 1 &&
83 src
[pos
] != wxT('>') && !wxIsspace(src
[pos
]);
86 tagBuffer
[i
] = (wxChar
)wxToupper(src
[pos
]);
88 tagBuffer
[i
] = _T('\0');
90 m_Cache
[tg
].Name
= new wxChar
[i
+1];
91 memcpy(m_Cache
[tg
].Name
, tagBuffer
, (i
+1)*sizeof(wxChar
));
93 while (pos
< lng
&& src
[pos
] != wxT('>')) pos
++;
95 if (src
[stpos
+1] == wxT('/')) // ending tag:
97 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -2;
98 // find matching begin tag:
99 for (i
= tg
; i
>= 0; i
--)
100 if ((m_Cache
[i
].End1
== -1) && (wxStrcmp(m_Cache
[i
].Name
, tagBuffer
+1) == 0))
102 m_Cache
[i
].End1
= stpos
;
103 m_Cache
[i
].End2
= pos
+ 1;
109 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -1;
111 if (wxIsCDATAElement(tagBuffer
))
113 // store the orig pos in case we are missing the closing
115 wxInt32 old_pos
= pos
;
116 bool foundCloseTag
= false;
118 // find next matching tag
119 int tag_len
= wxStrlen(tagBuffer
);
122 // find the ending tag
123 while (pos
+ 1 < lng
&&
124 (src
[pos
] != '<' || src
[pos
+1] != '/'))
131 while (pos
< lng
&& match_pos
< tag_len
&& src
[pos
] != '>' && src
[pos
] != '<') {
132 // cast to wxChar needed to suppress warning in
134 if ((wxChar
)wxToupper(src
[pos
]) == tagBuffer
[match_pos
]) {
137 else if (src
[pos
] == wxT(' ') || src
[pos
] == wxT('\n') ||
138 src
[pos
] == wxT('\r') || src
[pos
] == wxT('\t')) {
139 // need to skip over these
148 if (match_pos
== tag_len
)
150 pos
= pos
- tag_len
- 3;
151 foundCloseTag
= true;
154 else // keep looking for the closing tag
161 // we didn't find closing tag; this means the markup
162 // is incorrect and the best thing we can do is to
163 // ignore the unclosed tag and continue parsing as if
174 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
175 for (int i
= 0; i
< m_CacheSize
; i
++)
177 delete[] m_Cache
[i
].Name
;
178 m_Cache
[i
].Name
= NULL
;
182 void wx28HtmlTagsCache::QueryTag(int at
, int* end1
, int* end2
)
184 if (m_Cache
== NULL
) return;
185 if (m_Cache
[m_CachePos
].Key
!= at
)
187 int delta
= (at
< m_Cache
[m_CachePos
].Key
) ? -1 : 1;
190 if ( m_CachePos
< 0 || m_CachePos
== m_CacheSize
)
192 // something is very wrong with HTML, give up by returning an
193 // impossibly large value which is going to be ignored by the
202 while (m_Cache
[m_CachePos
].Key
!= at
);
204 *end1
= m_Cache
[m_CachePos
].End1
;
205 *end2
= m_Cache
[m_CachePos
].End2
;
211 //-----------------------------------------------------------------------------
213 //-----------------------------------------------------------------------------
215 IMPLEMENT_CLASS(wx28HtmlTag
,wxObject
)
217 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag
*parent
,
218 const wxString
& source
, int pos
, int end_pos
,
219 wx28HtmlTagsCache
*cache
,
220 wx28HtmlEntitiesParser
*entParser
) : wxObject()
222 /* Setup DOM relations */
225 m_FirstChild
= m_LastChild
= NULL
;
229 m_Prev
= m_Parent
->m_LastChild
;
231 m_Parent
->m_FirstChild
= this;
233 m_Prev
->m_Next
= this;
234 m_Parent
->m_LastChild
= this;
239 /* Find parameters and their values: */
244 // fill-in name, params and begin pos:
247 // find tag's name and convert it to uppercase:
248 while ((i
< end_pos
) &&
249 ((c
= source
[i
++]) != wxT(' ') && c
!= wxT('\r') &&
250 c
!= wxT('\n') && c
!= wxT('\t') &&
253 if ((c
>= wxT('a')) && (c
<= wxT('z')))
254 c
-= (wxT('a') - wxT('A'));
258 // if the tag has parameters, read them and "normalize" them,
259 // i.e. convert to uppercase, replace whitespaces by spaces and
260 // remove whitespaces around '=':
261 if (source
[i
-1] != wxT('>'))
263 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
264 c == wxT('\n') || c == wxT('\t'))
265 wxString pname
, pvalue
;
277 state
= ST_BEFORE_NAME
;
282 if (c
== wxT('>') && !(state
== ST_VALUE
&& quote
!= 0))
284 if (state
== ST_BEFORE_EQ
|| state
== ST_NAME
)
286 m_ParamNames
.Add(pname
);
287 m_ParamValues
.Add(wxEmptyString
);
289 else if (state
== ST_VALUE
&& quote
== 0)
291 m_ParamNames
.Add(pname
);
293 m_ParamValues
.Add(entParser
->Parse(pvalue
));
295 m_ParamValues
.Add(pvalue
);
310 state
= ST_BEFORE_EQ
;
311 else if (c
== wxT('='))
312 state
= ST_BEFORE_VALUE
;
318 state
= ST_BEFORE_VALUE
;
319 else if (!IS_WHITE(c
))
321 m_ParamNames
.Add(pname
);
322 m_ParamValues
.Add(wxEmptyString
);
327 case ST_BEFORE_VALUE
:
330 if (c
== wxT('"') || c
== wxT('\''))
331 quote
= c
, pvalue
= wxEmptyString
;
333 quote
= 0, pvalue
= c
;
338 if ((quote
!= 0 && c
== quote
) ||
339 (quote
== 0 && IS_WHITE(c
)))
341 m_ParamNames
.Add(pname
);
344 // VS: backward compatibility, no real reason,
345 // but wxHTML code relies on this... :(
349 m_ParamValues
.Add(entParser
->Parse(pvalue
));
351 m_ParamValues
.Add(pvalue
);
352 state
= ST_BEFORE_NAME
;
364 cache
->QueryTag(pos
, &m_End1
, &m_End2
);
365 if (m_End1
> end_pos
) m_End1
= end_pos
;
366 if (m_End2
> end_pos
) m_End2
= end_pos
;
369 wx28HtmlTag::~wx28HtmlTag()
371 wx28HtmlTag
*t1
, *t2
;
375 t2
= t1
->GetNextSibling();
381 bool wx28HtmlTag::HasParam(const wxString
& par
) const
383 return (m_ParamNames
.Index(par
, false) != wxNOT_FOUND
);
386 wxString
wx28HtmlTag::GetParam(const wxString
& par
, bool with_commas
) const
388 int index
= m_ParamNames
.Index(par
, false);
389 if (index
== wxNOT_FOUND
)
390 return wxEmptyString
;
393 // VS: backward compatibility, seems to be never used by wxHTML...
395 s
<< wxT('"') << m_ParamValues
[index
] << wxT('"');
399 return m_ParamValues
[index
];
402 int wx28HtmlTag::ScanParam(const wxString
& par
,
403 const wxChar
*format
,
406 wxString parval
= GetParam(par
);
407 return wxSscanf(parval
, format
, param
);
410 bool wx28HtmlTag::GetParamAsColour(const wxString
& par
, wxColour
*clr
) const
412 wxCHECK_MSG( clr
, false, _T("invalid colour argument") );
414 wxString str
= GetParam(par
);
416 // handle colours defined in HTML 4.0 first:
417 if (str
.length() > 1 && str
[0] != _T('#'))
419 #define HTML_COLOUR(name, r, g, b) \
420 if (str.IsSameAs(wxT(name), false)) \
421 { clr->Set(r, g, b); return true; }
422 HTML_COLOUR("black", 0x00,0x00,0x00)
423 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
424 HTML_COLOUR("gray", 0x80,0x80,0x80)
425 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
426 HTML_COLOUR("maroon", 0x80,0x00,0x00)
427 HTML_COLOUR("red", 0xFF,0x00,0x00)
428 HTML_COLOUR("purple", 0x80,0x00,0x80)
429 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
430 HTML_COLOUR("green", 0x00,0x80,0x00)
431 HTML_COLOUR("lime", 0x00,0xFF,0x00)
432 HTML_COLOUR("olive", 0x80,0x80,0x00)
433 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
434 HTML_COLOUR("navy", 0x00,0x00,0x80)
435 HTML_COLOUR("blue", 0x00,0x00,0xFF)
436 HTML_COLOUR("teal", 0x00,0x80,0x80)
437 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
441 // then try to parse #rrggbb representations or set from other well
442 // known names (note that this doesn't strictly conform to HTML spec,
443 // but it doesn't do real harm -- but it *must* be done after the standard
444 // colors are handled above):
451 bool wx28HtmlTag::GetParamAsInt(const wxString
& par
, int *clr
) const
453 if ( !HasParam(par
) )
457 if ( !GetParam(par
).ToLong(&i
) )
464 wxString
wx28HtmlTag::GetAllParams() const
466 // VS: this function is for backward compatibility only,
467 // never used by wxHTML
469 size_t cnt
= m_ParamNames
.GetCount();
470 for (size_t i
= 0; i
< cnt
; i
++)
472 s
<< m_ParamNames
[i
];
474 if (m_ParamValues
[i
].Find(wxT('"')) != wxNOT_FOUND
)
475 s
<< wxT('\'') << m_ParamValues
[i
] << wxT('\'');
477 s
<< wxT('"') << m_ParamValues
[i
] << wxT('"');
482 wx28HtmlTag
*wx28HtmlTag::GetFirstSibling() const
485 return m_Parent
->m_FirstChild
;
488 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
495 wx28HtmlTag
*wx28HtmlTag::GetLastSibling() const
498 return m_Parent
->m_LastChild
;
501 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
508 wx28HtmlTag
*wx28HtmlTag::GetNextTag() const
510 if (m_FirstChild
) return m_FirstChild
;
511 if (m_Next
) return m_Next
;
512 wx28HtmlTag
*cur
= m_Parent
;
513 if (!cur
) return NULL
;
514 while (cur
->m_Parent
&& !cur
->m_Next
)