]>
git.saurik.com Git - wxWidgets.git/blob - tests/benchmarks/htmlparser/htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
18 #include <stdio.h> // for vsscanf
22 //-----------------------------------------------------------------------------
24 //-----------------------------------------------------------------------------
26 struct wx28HtmlCacheItem
28 // this is "pos" value passed to wx28HtmlTag's constructor.
29 // it is position of '<' character of the tag
32 // end positions for the tag:
33 // end1 is '<' of ending tag,
34 // end2 is '>' or both are
35 // -1 if there is no ending tag for this one...
36 // or -2 if this is ending tag </...>
44 IMPLEMENT_CLASS(wx28HtmlTagsCache
,wxObject
)
46 #define CACHE_INCREMENT 64
48 bool wxIsCDATAElement(const wxChar
*tag
)
50 return (wxStrcmp(tag
, wxT("SCRIPT")) == 0) ||
51 (wxStrcmp(tag
, wxT("STYLE")) == 0);
54 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString
& source
)
56 const wxChar
*src
= source
.c_str();
57 int lng
= source
.length();
58 wxChar tagBuffer
[256];
67 if (src
[pos
] == wxT('<')) // tag found:
69 if (m_CacheSize
% CACHE_INCREMENT
== 0)
70 m_Cache
= (wx28HtmlCacheItem
*) realloc(m_Cache
, (m_CacheSize
+ CACHE_INCREMENT
) * sizeof(wx28HtmlCacheItem
));
71 int tg
= m_CacheSize
++;
73 m_Cache
[tg
].Key
= stpos
;
77 pos
< lng
&& i
< (int)WXSIZEOF(tagBuffer
) - 1 &&
78 src
[pos
] != wxT('>') && !wxIsspace(src
[pos
]);
81 tagBuffer
[i
] = (wxChar
)wxToupper(src
[pos
]);
83 tagBuffer
[i
] = wxT('\0');
85 m_Cache
[tg
].Name
= new wxChar
[i
+1];
86 memcpy(m_Cache
[tg
].Name
, tagBuffer
, (i
+1)*sizeof(wxChar
));
88 while (pos
< lng
&& src
[pos
] != wxT('>')) pos
++;
90 if (src
[stpos
+1] == wxT('/')) // ending tag:
92 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -2;
93 // find matching begin tag:
94 for (i
= tg
; i
>= 0; i
--)
95 if ((m_Cache
[i
].End1
== -1) && (wxStrcmp(m_Cache
[i
].Name
, tagBuffer
+1) == 0))
97 m_Cache
[i
].End1
= stpos
;
98 m_Cache
[i
].End2
= pos
+ 1;
104 m_Cache
[tg
].End1
= m_Cache
[tg
].End2
= -1;
106 if (wxIsCDATAElement(tagBuffer
))
108 // store the orig pos in case we are missing the closing
110 wxInt32 old_pos
= pos
;
111 bool foundCloseTag
= false;
113 // find next matching tag
114 int tag_len
= wxStrlen(tagBuffer
);
117 // find the ending tag
118 while (pos
+ 1 < lng
&&
119 (src
[pos
] != '<' || src
[pos
+1] != '/'))
126 while (pos
< lng
&& match_pos
< tag_len
&& src
[pos
] != '>' && src
[pos
] != '<') {
127 // cast to wxChar needed to suppress warning in
129 if ((wxChar
)wxToupper(src
[pos
]) == tagBuffer
[match_pos
]) {
132 else if (src
[pos
] == wxT(' ') || src
[pos
] == wxT('\n') ||
133 src
[pos
] == wxT('\r') || src
[pos
] == wxT('\t')) {
134 // need to skip over these
143 if (match_pos
== tag_len
)
145 pos
= pos
- tag_len
- 3;
146 foundCloseTag
= true;
149 else // keep looking for the closing tag
156 // we didn't find closing tag; this means the markup
157 // is incorrect and the best thing we can do is to
158 // ignore the unclosed tag and continue parsing as if
169 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
170 for (int i
= 0; i
< m_CacheSize
; i
++)
172 delete[] m_Cache
[i
].Name
;
173 m_Cache
[i
].Name
= NULL
;
177 void wx28HtmlTagsCache::QueryTag(int at
, int* end1
, int* end2
)
179 if (m_Cache
== NULL
) return;
180 if (m_Cache
[m_CachePos
].Key
!= at
)
182 int delta
= (at
< m_Cache
[m_CachePos
].Key
) ? -1 : 1;
185 if ( m_CachePos
< 0 || m_CachePos
== m_CacheSize
)
187 // something is very wrong with HTML, give up by returning an
188 // impossibly large value which is going to be ignored by the
197 while (m_Cache
[m_CachePos
].Key
!= at
);
199 *end1
= m_Cache
[m_CachePos
].End1
;
200 *end2
= m_Cache
[m_CachePos
].End2
;
206 //-----------------------------------------------------------------------------
208 //-----------------------------------------------------------------------------
210 IMPLEMENT_CLASS(wx28HtmlTag
,wxObject
)
212 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag
*parent
,
213 const wxString
& source
, int pos
, int end_pos
,
214 wx28HtmlTagsCache
*cache
,
215 wx28HtmlEntitiesParser
*entParser
) : wxObject()
217 /* Setup DOM relations */
220 m_FirstChild
= m_LastChild
= NULL
;
224 m_Prev
= m_Parent
->m_LastChild
;
226 m_Parent
->m_FirstChild
= this;
228 m_Prev
->m_Next
= this;
229 m_Parent
->m_LastChild
= this;
234 /* Find parameters and their values: */
239 // fill-in name, params and begin pos:
242 // find tag's name and convert it to uppercase:
243 while ((i
< end_pos
) &&
244 ((c
= source
[i
++]) != wxT(' ') && c
!= wxT('\r') &&
245 c
!= wxT('\n') && c
!= wxT('\t') &&
248 if ((c
>= wxT('a')) && (c
<= wxT('z')))
249 c
-= (wxT('a') - wxT('A'));
253 // if the tag has parameters, read them and "normalize" them,
254 // i.e. convert to uppercase, replace whitespaces by spaces and
255 // remove whitespaces around '=':
256 if (source
[i
-1] != wxT('>'))
258 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
259 c == wxT('\n') || c == wxT('\t'))
260 wxString pname
, pvalue
;
272 state
= ST_BEFORE_NAME
;
277 if (c
== wxT('>') && !(state
== ST_VALUE
&& quote
!= 0))
279 if (state
== ST_BEFORE_EQ
|| state
== ST_NAME
)
281 m_ParamNames
.Add(pname
);
282 m_ParamValues
.Add(wxEmptyString
);
284 else if (state
== ST_VALUE
&& quote
== 0)
286 m_ParamNames
.Add(pname
);
288 m_ParamValues
.Add(entParser
->Parse(pvalue
));
290 m_ParamValues
.Add(pvalue
);
305 state
= ST_BEFORE_EQ
;
306 else if (c
== wxT('='))
307 state
= ST_BEFORE_VALUE
;
313 state
= ST_BEFORE_VALUE
;
314 else if (!IS_WHITE(c
))
316 m_ParamNames
.Add(pname
);
317 m_ParamValues
.Add(wxEmptyString
);
322 case ST_BEFORE_VALUE
:
325 if (c
== wxT('"') || c
== wxT('\''))
326 quote
= c
, pvalue
= wxEmptyString
;
328 quote
= 0, pvalue
= c
;
333 if ((quote
!= 0 && c
== quote
) ||
334 (quote
== 0 && IS_WHITE(c
)))
336 m_ParamNames
.Add(pname
);
339 // VS: backward compatibility, no real reason,
340 // but wxHTML code relies on this... :(
344 m_ParamValues
.Add(entParser
->Parse(pvalue
));
346 m_ParamValues
.Add(pvalue
);
347 state
= ST_BEFORE_NAME
;
359 cache
->QueryTag(pos
, &m_End1
, &m_End2
);
360 if (m_End1
> end_pos
) m_End1
= end_pos
;
361 if (m_End2
> end_pos
) m_End2
= end_pos
;
364 wx28HtmlTag::~wx28HtmlTag()
366 wx28HtmlTag
*t1
, *t2
;
370 t2
= t1
->GetNextSibling();
376 bool wx28HtmlTag::HasParam(const wxString
& par
) const
378 return (m_ParamNames
.Index(par
, false) != wxNOT_FOUND
);
381 wxString
wx28HtmlTag::GetParam(const wxString
& par
, bool with_commas
) const
383 int index
= m_ParamNames
.Index(par
, false);
384 if (index
== wxNOT_FOUND
)
385 return wxEmptyString
;
388 // VS: backward compatibility, seems to be never used by wxHTML...
390 s
<< wxT('"') << m_ParamValues
[index
] << wxT('"');
394 return m_ParamValues
[index
];
397 int wx28HtmlTag::ScanParam(const wxString
& par
,
398 const wxChar
*format
,
401 wxString parval
= GetParam(par
);
402 return wxSscanf(parval
, format
, param
);
405 bool wx28HtmlTag::GetParamAsInt(const wxString
& par
, int *clr
) const
407 if ( !HasParam(par
) )
411 if ( !GetParam(par
).ToLong(&i
) )
418 wxString
wx28HtmlTag::GetAllParams() const
420 // VS: this function is for backward compatibility only,
421 // never used by wxHTML
423 size_t cnt
= m_ParamNames
.GetCount();
424 for (size_t i
= 0; i
< cnt
; i
++)
426 s
<< m_ParamNames
[i
];
428 if (m_ParamValues
[i
].Find(wxT('"')) != wxNOT_FOUND
)
429 s
<< wxT('\'') << m_ParamValues
[i
] << wxT('\'');
431 s
<< wxT('"') << m_ParamValues
[i
] << wxT('"');
436 wx28HtmlTag
*wx28HtmlTag::GetFirstSibling() const
439 return m_Parent
->m_FirstChild
;
442 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
449 wx28HtmlTag
*wx28HtmlTag::GetLastSibling() const
452 return m_Parent
->m_LastChild
;
455 wx28HtmlTag
*cur
= (wx28HtmlTag
*)this;
462 wx28HtmlTag
*wx28HtmlTag::GetNextTag() const
464 if (m_FirstChild
) return m_FirstChild
;
465 if (m_Next
) return m_Next
;
466 wx28HtmlTag
*cur
= m_Parent
;
467 if (!cur
) return NULL
;
468 while (cur
->m_Parent
&& !cur
->m_Next
)