]> git.saurik.com Git - wxWidgets.git/blob - tests/benchmarks/htmlparser/htmltag.cpp
OSX_ARCH_OPTS also needs to be added to CPPFLAGS.
[wxWidgets.git] / tests / benchmarks / htmlparser / htmltag.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmltag.cpp
3 // Purpose: wx28HtmlTag class (represents single tag)
4 // Author: Vaclav Slavik
5 // Copyright: (c) 1999 Vaclav Slavik
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9 #include "wx/wxprec.h"
10
11 #ifdef __BORLANDC__
12 #pragma hdrstop
13 #endif
14
15 #include "htmltag.h"
16
17 #include "htmlpars.h"
18 #include <stdio.h> // for vsscanf
19 #include <stdarg.h>
20
21
22 //-----------------------------------------------------------------------------
23 // wx28HtmlTagsCache
24 //-----------------------------------------------------------------------------
25
26 struct wx28HtmlCacheItem
27 {
28 // this is "pos" value passed to wx28HtmlTag's constructor.
29 // it is position of '<' character of the tag
30 int Key;
31
32 // end positions for the tag:
33 // end1 is '<' of ending tag,
34 // end2 is '>' or both are
35 // -1 if there is no ending tag for this one...
36 // or -2 if this is ending tag </...>
37 int End1, End2;
38
39 // name of this tag
40 wxChar *Name;
41 };
42
43
44 IMPLEMENT_CLASS(wx28HtmlTagsCache,wxObject)
45
46 #define CACHE_INCREMENT 64
47
48 bool wxIsCDATAElement(const wxChar *tag)
49 {
50 return (wxStrcmp(tag, wxT("SCRIPT")) == 0) ||
51 (wxStrcmp(tag, wxT("STYLE")) == 0);
52 }
53
54 wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source)
55 {
56 const wxChar *src = source.c_str();
57 int lng = source.length();
58 wxChar tagBuffer[256];
59
60 m_Cache = NULL;
61 m_CacheSize = 0;
62 m_CachePos = 0;
63
64 int pos = 0;
65 while (pos < lng)
66 {
67 if (src[pos] == wxT('<')) // tag found:
68 {
69 if (m_CacheSize % CACHE_INCREMENT == 0)
70 m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem));
71 int tg = m_CacheSize++;
72 int stpos = pos++;
73 m_Cache[tg].Key = stpos;
74
75 int i;
76 for ( i = 0;
77 pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 &&
78 src[pos] != wxT('>') && !wxIsspace(src[pos]);
79 i++, pos++ )
80 {
81 tagBuffer[i] = (wxChar)wxToupper(src[pos]);
82 }
83 tagBuffer[i] = wxT('\0');
84
85 m_Cache[tg].Name = new wxChar[i+1];
86 memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar));
87
88 while (pos < lng && src[pos] != wxT('>')) pos++;
89
90 if (src[stpos+1] == wxT('/')) // ending tag:
91 {
92 m_Cache[tg].End1 = m_Cache[tg].End2 = -2;
93 // find matching begin tag:
94 for (i = tg; i >= 0; i--)
95 if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0))
96 {
97 m_Cache[i].End1 = stpos;
98 m_Cache[i].End2 = pos + 1;
99 break;
100 }
101 }
102 else
103 {
104 m_Cache[tg].End1 = m_Cache[tg].End2 = -1;
105
106 if (wxIsCDATAElement(tagBuffer))
107 {
108 // store the orig pos in case we are missing the closing
109 // tag (see below)
110 wxInt32 old_pos = pos;
111 bool foundCloseTag = false;
112
113 // find next matching tag
114 int tag_len = wxStrlen(tagBuffer);
115 while (pos < lng)
116 {
117 // find the ending tag
118 while (pos + 1 < lng &&
119 (src[pos] != '<' || src[pos+1] != '/'))
120 ++pos;
121 if (src[pos] == '<')
122 ++pos;
123
124 // see if it matches
125 int match_pos = 0;
126 while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') {
127 // cast to wxChar needed to suppress warning in
128 // Unicode build
129 if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) {
130 ++match_pos;
131 }
132 else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') ||
133 src[pos] == wxT('\r') || src[pos] == wxT('\t')) {
134 // need to skip over these
135 }
136 else {
137 match_pos = 0;
138 }
139 ++pos;
140 }
141
142 // found a match
143 if (match_pos == tag_len)
144 {
145 pos = pos - tag_len - 3;
146 foundCloseTag = true;
147 break;
148 }
149 else // keep looking for the closing tag
150 {
151 ++pos;
152 }
153 }
154 if (!foundCloseTag)
155 {
156 // we didn't find closing tag; this means the markup
157 // is incorrect and the best thing we can do is to
158 // ignore the unclosed tag and continue parsing as if
159 // it didn't exist:
160 pos = old_pos;
161 }
162 }
163 }
164 }
165
166 pos++;
167 }
168
169 // ok, we're done, now we'll free .Name members of cache - we don't need it anymore:
170 for (int i = 0; i < m_CacheSize; i++)
171 {
172 delete[] m_Cache[i].Name;
173 m_Cache[i].Name = NULL;
174 }
175 }
176
177 void wx28HtmlTagsCache::QueryTag(int at, int* end1, int* end2)
178 {
179 if (m_Cache == NULL) return;
180 if (m_Cache[m_CachePos].Key != at)
181 {
182 int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
183 do
184 {
185 if ( m_CachePos < 0 || m_CachePos == m_CacheSize )
186 {
187 // something is very wrong with HTML, give up by returning an
188 // impossibly large value which is going to be ignored by the
189 // caller
190 *end1 =
191 *end2 = INT_MAX;
192 return;
193 }
194
195 m_CachePos += delta;
196 }
197 while (m_Cache[m_CachePos].Key != at);
198 }
199 *end1 = m_Cache[m_CachePos].End1;
200 *end2 = m_Cache[m_CachePos].End2;
201 }
202
203
204
205
206 //-----------------------------------------------------------------------------
207 // wx28HtmlTag
208 //-----------------------------------------------------------------------------
209
210 IMPLEMENT_CLASS(wx28HtmlTag,wxObject)
211
212 wx28HtmlTag::wx28HtmlTag(wx28HtmlTag *parent,
213 const wxString& source, int pos, int end_pos,
214 wx28HtmlTagsCache *cache,
215 wx28HtmlEntitiesParser *entParser) : wxObject()
216 {
217 /* Setup DOM relations */
218
219 m_Next = NULL;
220 m_FirstChild = m_LastChild = NULL;
221 m_Parent = parent;
222 if (parent)
223 {
224 m_Prev = m_Parent->m_LastChild;
225 if (m_Prev == NULL)
226 m_Parent->m_FirstChild = this;
227 else
228 m_Prev->m_Next = this;
229 m_Parent->m_LastChild = this;
230 }
231 else
232 m_Prev = NULL;
233
234 /* Find parameters and their values: */
235
236 int i;
237 wxChar c;
238
239 // fill-in name, params and begin pos:
240 i = pos+1;
241
242 // find tag's name and convert it to uppercase:
243 while ((i < end_pos) &&
244 ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
245 c != wxT('\n') && c != wxT('\t') &&
246 c != wxT('>')))
247 {
248 if ((c >= wxT('a')) && (c <= wxT('z')))
249 c -= (wxT('a') - wxT('A'));
250 m_Name << c;
251 }
252
253 // if the tag has parameters, read them and "normalize" them,
254 // i.e. convert to uppercase, replace whitespaces by spaces and
255 // remove whitespaces around '=':
256 if (source[i-1] != wxT('>'))
257 {
258 #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
259 c == wxT('\n') || c == wxT('\t'))
260 wxString pname, pvalue;
261 wxChar quote;
262 enum
263 {
264 ST_BEFORE_NAME = 1,
265 ST_NAME,
266 ST_BEFORE_EQ,
267 ST_BEFORE_VALUE,
268 ST_VALUE
269 } state;
270
271 quote = 0;
272 state = ST_BEFORE_NAME;
273 while (i < end_pos)
274 {
275 c = source[i++];
276
277 if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
278 {
279 if (state == ST_BEFORE_EQ || state == ST_NAME)
280 {
281 m_ParamNames.Add(pname);
282 m_ParamValues.Add(wxEmptyString);
283 }
284 else if (state == ST_VALUE && quote == 0)
285 {
286 m_ParamNames.Add(pname);
287 if (entParser)
288 m_ParamValues.Add(entParser->Parse(pvalue));
289 else
290 m_ParamValues.Add(pvalue);
291 }
292 break;
293 }
294 switch (state)
295 {
296 case ST_BEFORE_NAME:
297 if (!IS_WHITE(c))
298 {
299 pname = c;
300 state = ST_NAME;
301 }
302 break;
303 case ST_NAME:
304 if (IS_WHITE(c))
305 state = ST_BEFORE_EQ;
306 else if (c == wxT('='))
307 state = ST_BEFORE_VALUE;
308 else
309 pname << c;
310 break;
311 case ST_BEFORE_EQ:
312 if (c == wxT('='))
313 state = ST_BEFORE_VALUE;
314 else if (!IS_WHITE(c))
315 {
316 m_ParamNames.Add(pname);
317 m_ParamValues.Add(wxEmptyString);
318 pname = c;
319 state = ST_NAME;
320 }
321 break;
322 case ST_BEFORE_VALUE:
323 if (!IS_WHITE(c))
324 {
325 if (c == wxT('"') || c == wxT('\''))
326 quote = c, pvalue = wxEmptyString;
327 else
328 quote = 0, pvalue = c;
329 state = ST_VALUE;
330 }
331 break;
332 case ST_VALUE:
333 if ((quote != 0 && c == quote) ||
334 (quote == 0 && IS_WHITE(c)))
335 {
336 m_ParamNames.Add(pname);
337 if (quote == 0)
338 {
339 // VS: backward compatibility, no real reason,
340 // but wxHTML code relies on this... :(
341 pvalue.MakeUpper();
342 }
343 if (entParser)
344 m_ParamValues.Add(entParser->Parse(pvalue));
345 else
346 m_ParamValues.Add(pvalue);
347 state = ST_BEFORE_NAME;
348 }
349 else
350 pvalue << c;
351 break;
352 }
353 }
354
355 #undef IS_WHITE
356 }
357 m_Begin = i;
358
359 cache->QueryTag(pos, &m_End1, &m_End2);
360 if (m_End1 > end_pos) m_End1 = end_pos;
361 if (m_End2 > end_pos) m_End2 = end_pos;
362 }
363
364 wx28HtmlTag::~wx28HtmlTag()
365 {
366 wx28HtmlTag *t1, *t2;
367 t1 = m_FirstChild;
368 while (t1)
369 {
370 t2 = t1->GetNextSibling();
371 delete t1;
372 t1 = t2;
373 }
374 }
375
376 bool wx28HtmlTag::HasParam(const wxString& par) const
377 {
378 return (m_ParamNames.Index(par, false) != wxNOT_FOUND);
379 }
380
381 wxString wx28HtmlTag::GetParam(const wxString& par, bool with_commas) const
382 {
383 int index = m_ParamNames.Index(par, false);
384 if (index == wxNOT_FOUND)
385 return wxEmptyString;
386 if (with_commas)
387 {
388 // VS: backward compatibility, seems to be never used by wxHTML...
389 wxString s;
390 s << wxT('"') << m_ParamValues[index] << wxT('"');
391 return s;
392 }
393 else
394 return m_ParamValues[index];
395 }
396
397 int wx28HtmlTag::ScanParam(const wxString& par,
398 const wxChar *format,
399 void *param) const
400 {
401 wxString parval = GetParam(par);
402 return wxSscanf(parval, format, param);
403 }
404
405 bool wx28HtmlTag::GetParamAsInt(const wxString& par, int *clr) const
406 {
407 if ( !HasParam(par) )
408 return false;
409
410 long i;
411 if ( !GetParam(par).ToLong(&i) )
412 return false;
413
414 *clr = (int)i;
415 return true;
416 }
417
418 wxString wx28HtmlTag::GetAllParams() const
419 {
420 // VS: this function is for backward compatibility only,
421 // never used by wxHTML
422 wxString s;
423 size_t cnt = m_ParamNames.GetCount();
424 for (size_t i = 0; i < cnt; i++)
425 {
426 s << m_ParamNames[i];
427 s << wxT('=');
428 if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
429 s << wxT('\'') << m_ParamValues[i] << wxT('\'');
430 else
431 s << wxT('"') << m_ParamValues[i] << wxT('"');
432 }
433 return s;
434 }
435
436 wx28HtmlTag *wx28HtmlTag::GetFirstSibling() const
437 {
438 if (m_Parent)
439 return m_Parent->m_FirstChild;
440 else
441 {
442 wx28HtmlTag *cur = (wx28HtmlTag*)this;
443 while (cur->m_Prev)
444 cur = cur->m_Prev;
445 return cur;
446 }
447 }
448
449 wx28HtmlTag *wx28HtmlTag::GetLastSibling() const
450 {
451 if (m_Parent)
452 return m_Parent->m_LastChild;
453 else
454 {
455 wx28HtmlTag *cur = (wx28HtmlTag*)this;
456 while (cur->m_Next)
457 cur = cur->m_Next;
458 return cur;
459 }
460 }
461
462 wx28HtmlTag *wx28HtmlTag::GetNextTag() const
463 {
464 if (m_FirstChild) return m_FirstChild;
465 if (m_Next) return m_Next;
466 wx28HtmlTag *cur = m_Parent;
467 if (!cur) return NULL;
468 while (cur->m_Parent && !cur->m_Next)
469 cur = cur->m_Parent;
470 return cur->m_Next;
471 }