]>
git.saurik.com Git - wxWidgets.git/blob - tests/benchmarks/htmlparser/htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
18 #include "wx/colour.h"
22 #include <stdio.h> // for vsscanf
26 //-----------------------------------------------------------------------------
28 //-----------------------------------------------------------------------------
30 struct wx28HtmlCacheItem
32 // this is "pos" value passed to wx28HtmlTag's constructor.
33 // it is position of '<' character of the tag
36 // end positions for the tag:
37 // end1 is '<' of ending tag,
38 // end2 is '>' or both are
39 // -1 if there is no ending tag for this one...
40 // or -2 if this is ending tag </...>
48 IMPLEMENT_CLASS(wx28HtmlTagsCache
,wxObject
)
50 #define CACHE_INCREMENT 64
52 bool wxIsCDATAElement(const wxChar
*tag
)
54 return (wxStrcmp(tag
, wxT("SCRIPT")) == 0) ||
55 (wxStrcmp(tag
, wxT("STYLE")) == 0);
58 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString
& source
)
60 const wxChar
*src
= source
.c_str();
61 int lng
= source
.length();
62 wxChar tagBuffer
[256];
71 if (src
[pos
] == wxT('<')) // tag found:
73 if (m_CacheSize
% CACHE_INCREMENT
== 0)
74 m_Cache
= (wx28HtmlCacheItem
*) realloc(m_Cache
, (m_CacheSize
+ CACHE_INCREMENT
) * sizeof(wx28HtmlCacheItem
));
75 int tg
= m_CacheSize
++;
77 m_Cache
[tg
].Key
= stpos
;
81 pos
< lng
&& i
< (int)WXSIZEOF(tagBuffer
) - 1 &&
82 src
[pos
] != wxT('>') && !wxIsspace(src
[pos
]);
85 tagBuffer
[i
] = (wxChar
)wxToupper(src
[pos
]);
87 tagBuffer
[i
] = wxT('\0');
89 m_Cache
[tg
].Name
= new wxChar
[i
+1];
90 memcpy(m_Cache
[tg
].Name
, tagBuffer
, (i
+1)*sizeof(wxChar
));
92 while (pos
< lng
&& src
[pos
] != wxT('>')) pos
++;
94 if (src
[stpos
+1] == wxT('/')) // ending tag:
96 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -2;
97 // find matching begin tag:
98 for (i
= tg
; i
>= 0; i
--)
99 if ((m_Cache
[i
].End1
== -1) && (wxStrcmp(m_Cache
[i
].Name
, tagBuffer
+1) == 0))
101 m_Cache
[i
].End1
= stpos
;
102 m_Cache
[i
].End2
= pos
+ 1;
108 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -1;
110 if (wxIsCDATAElement(tagBuffer
))
112 // store the orig pos in case we are missing the closing
114 wxInt32 old_pos
= pos
;
115 bool foundCloseTag
= false;
117 // find next matching tag
118 int tag_len
= wxStrlen(tagBuffer
);
121 // find the ending tag
122 while (pos
+ 1 < lng
&&
123 (src
[pos
] != '<' || src
[pos
+1] != '/'))
130 while (pos
< lng
&& match_pos
< tag_len
&& src
[pos
] != '>' && src
[pos
] != '<') {
131 // cast to wxChar needed to suppress warning in
133 if ((wxChar
)wxToupper(src
[pos
]) == tagBuffer
[match_pos
]) {
136 else if (src
[pos
] == wxT(' ') || src
[pos
] == wxT('\n') ||
137 src
[pos
] == wxT('\r') || src
[pos
] == wxT('\t')) {
138 // need to skip over these
147 if (match_pos
== tag_len
)
149 pos
= pos
- tag_len
- 3;
150 foundCloseTag
= true;
153 else // keep looking for the closing tag
160 // we didn't find closing tag; this means the markup
161 // is incorrect and the best thing we can do is to
162 // ignore the unclosed tag and continue parsing as if
173 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
174 for (int i
= 0; i
< m_CacheSize
; i
++)
176 delete[] m_Cache
[i
].Name
;
177 m_Cache
[i
].Name
= NULL
;
181 void wx28HtmlTagsCache::QueryTag(int at
, int* end1
, int* end2
)
183 if (m_Cache
== NULL
) return;
184 if (m_Cache
[m_CachePos
].Key
!= at
)
186 int delta
= (at
< m_Cache
[m_CachePos
].Key
) ? -1 : 1;
189 if ( m_CachePos
< 0 || m_CachePos
== m_CacheSize
)
191 // something is very wrong with HTML, give up by returning an
192 // impossibly large value which is going to be ignored by the
201 while (m_Cache
[m_CachePos
].Key
!= at
);
203 *end1
= m_Cache
[m_CachePos
].End1
;
204 *end2
= m_Cache
[m_CachePos
].End2
;
210 //-----------------------------------------------------------------------------
212 //-----------------------------------------------------------------------------
214 IMPLEMENT_CLASS(wx28HtmlTag
,wxObject
)
216 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag
*parent
,
217 const wxString
& source
, int pos
, int end_pos
,
218 wx28HtmlTagsCache
*cache
,
219 wx28HtmlEntitiesParser
*entParser
) : wxObject()
221 /* Setup DOM relations */
224 m_FirstChild
= m_LastChild
= NULL
;
228 m_Prev
= m_Parent
->m_LastChild
;
230 m_Parent
->m_FirstChild
= this;
232 m_Prev
->m_Next
= this;
233 m_Parent
->m_LastChild
= this;
238 /* Find parameters and their values: */
243 // fill-in name, params and begin pos:
246 // find tag's name and convert it to uppercase:
247 while ((i
< end_pos
) &&
248 ((c
= source
[i
++]) != wxT(' ') && c
!= wxT('\r') &&
249 c
!= wxT('\n') && c
!= wxT('\t') &&
252 if ((c
>= wxT('a')) && (c
<= wxT('z')))
253 c
-= (wxT('a') - wxT('A'));
257 // if the tag has parameters, read them and "normalize" them,
258 // i.e. convert to uppercase, replace whitespaces by spaces and
259 // remove whitespaces around '=':
260 if (source
[i
-1] != wxT('>'))
262 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
263 c == wxT('\n') || c == wxT('\t'))
264 wxString pname
, pvalue
;
276 state
= ST_BEFORE_NAME
;
281 if (c
== wxT('>') && !(state
== ST_VALUE
&& quote
!= 0))
283 if (state
== ST_BEFORE_EQ
|| state
== ST_NAME
)
285 m_ParamNames
.Add(pname
);
286 m_ParamValues
.Add(wxEmptyString
);
288 else if (state
== ST_VALUE
&& quote
== 0)
290 m_ParamNames
.Add(pname
);
292 m_ParamValues
.Add(entParser
->Parse(pvalue
));
294 m_ParamValues
.Add(pvalue
);
309 state
= ST_BEFORE_EQ
;
310 else if (c
== wxT('='))
311 state
= ST_BEFORE_VALUE
;
317 state
= ST_BEFORE_VALUE
;
318 else if (!IS_WHITE(c
))
320 m_ParamNames
.Add(pname
);
321 m_ParamValues
.Add(wxEmptyString
);
326 case ST_BEFORE_VALUE
:
329 if (c
== wxT('"') || c
== wxT('\''))
330 quote
= c
, pvalue
= wxEmptyString
;
332 quote
= 0, pvalue
= c
;
337 if ((quote
!= 0 && c
== quote
) ||
338 (quote
== 0 && IS_WHITE(c
)))
340 m_ParamNames
.Add(pname
);
343 // VS: backward compatibility, no real reason,
344 // but wxHTML code relies on this... :(
348 m_ParamValues
.Add(entParser
->Parse(pvalue
));
350 m_ParamValues
.Add(pvalue
);
351 state
= ST_BEFORE_NAME
;
363 cache
->QueryTag(pos
, &m_End1
, &m_End2
);
364 if (m_End1
> end_pos
) m_End1
= end_pos
;
365 if (m_End2
> end_pos
) m_End2
= end_pos
;
368 wx28HtmlTag::~wx28HtmlTag()
370 wx28HtmlTag
*t1
, *t2
;
374 t2
= t1
->GetNextSibling();
380 bool wx28HtmlTag::HasParam(const wxString
& par
) const
382 return (m_ParamNames
.Index(par
, false) != wxNOT_FOUND
);
385 wxString
wx28HtmlTag::GetParam(const wxString
& par
, bool with_commas
) const
387 int index
= m_ParamNames
.Index(par
, false);
388 if (index
== wxNOT_FOUND
)
389 return wxEmptyString
;
392 // VS: backward compatibility, seems to be never used by wxHTML...
394 s
<< wxT('"') << m_ParamValues
[index
] << wxT('"');
398 return m_ParamValues
[index
];
401 int wx28HtmlTag::ScanParam(const wxString
& par
,
402 const wxChar
*format
,
405 wxString parval
= GetParam(par
);
406 return wxSscanf(parval
, format
, param
);
409 bool wx28HtmlTag::GetParamAsColour(const wxString
& par
, wxColour
*clr
) const
411 wxCHECK_MSG( clr
, false, wxT("invalid colour argument") );
413 wxString str
= GetParam(par
);
415 // handle colours defined in HTML 4.0 first:
416 if (str
.length() > 1 && str
[0] != wxT('#'))
418 #define HTML_COLOUR(name, r, g, b) \
419 if (str.IsSameAs(wxT(name), false)) \
420 { clr->Set(r, g, b); return true; }
421 HTML_COLOUR("black", 0x00,0x00,0x00)
422 HTML_COLOUR("silver", 0xC0,0xC0,0xC0)
423 HTML_COLOUR("gray", 0x80,0x80,0x80)
424 HTML_COLOUR("white", 0xFF,0xFF,0xFF)
425 HTML_COLOUR("maroon", 0x80,0x00,0x00)
426 HTML_COLOUR("red", 0xFF,0x00,0x00)
427 HTML_COLOUR("purple", 0x80,0x00,0x80)
428 HTML_COLOUR("fuchsia", 0xFF,0x00,0xFF)
429 HTML_COLOUR("green", 0x00,0x80,0x00)
430 HTML_COLOUR("lime", 0x00,0xFF,0x00)
431 HTML_COLOUR("olive", 0x80,0x80,0x00)
432 HTML_COLOUR("yellow", 0xFF,0xFF,0x00)
433 HTML_COLOUR("navy", 0x00,0x00,0x80)
434 HTML_COLOUR("blue", 0x00,0x00,0xFF)
435 HTML_COLOUR("teal", 0x00,0x80,0x80)
436 HTML_COLOUR("aqua", 0x00,0xFF,0xFF)
440 // then try to parse #rrggbb representations or set from other well
441 // known names (note that this doesn't strictly conform to HTML spec,
442 // but it doesn't do real harm -- but it *must* be done after the standard
443 // colors are handled above):
450 bool wx28HtmlTag::GetParamAsInt(const wxString
& par
, int *clr
) const
452 if ( !HasParam(par
) )
456 if ( !GetParam(par
).ToLong(&i
) )
463 wxString
wx28HtmlTag::GetAllParams() const
465 // VS: this function is for backward compatibility only,
466 // never used by wxHTML
468 size_t cnt
= m_ParamNames
.GetCount();
469 for (size_t i
= 0; i
< cnt
; i
++)
471 s
<< m_ParamNames
[i
];
473 if (m_ParamValues
[i
].Find(wxT('"')) != wxNOT_FOUND
)
474 s
<< wxT('\'') << m_ParamValues
[i
] << wxT('\'');
476 s
<< wxT('"') << m_ParamValues
[i
] << wxT('"');
481 wx28HtmlTag
*wx28HtmlTag::GetFirstSibling() const
484 return m_Parent
->m_FirstChild
;
487 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
494 wx28HtmlTag
*wx28HtmlTag::GetLastSibling() const
497 return m_Parent
->m_LastChild
;
500 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
507 wx28HtmlTag
*wx28HtmlTag::GetNextTag() const
509 if (m_FirstChild
) return m_FirstChild
;
510 if (m_Next
) return m_Next
;
511 wx28HtmlTag
*cur
= m_Parent
;
512 if (!cur
) return NULL
;
513 while (cur
->m_Parent
&& !cur
->m_Next
)