]>
Commit | Line | Data |
---|---|---|
c21faa0a VS |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/html/htmltag.cpp | |
3 | // Purpose: wx28HtmlTag class (represents single tag) | |
4 | // Author: Vaclav Slavik | |
c21faa0a VS |
5 | // Copyright: (c) 1999 Vaclav Slavik |
6 | // Licence: wxWindows licence | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | #include "wx/wxprec.h" | |
10 | ||
11 | #ifdef __BORLANDC__ | |
12 | #pragma hdrstop | |
13 | #endif | |
14 | ||
15 | #include "htmltag.h" | |
16 | ||
c21faa0a VS |
17 | #include "htmlpars.h" |
18 | #include <stdio.h> // for vsscanf | |
19 | #include <stdarg.h> | |
20 | ||
21 | ||
22 | //----------------------------------------------------------------------------- | |
23 | // wx28HtmlTagsCache | |
24 | //----------------------------------------------------------------------------- | |
25 | ||
26 | struct wx28HtmlCacheItem | |
27 | { | |
28 | // this is "pos" value passed to wx28HtmlTag's constructor. | |
29 | // it is position of '<' character of the tag | |
30 | int Key; | |
31 | ||
32 | // end positions for the tag: | |
33 | // end1 is '<' of ending tag, | |
34 | // end2 is '>' or both are | |
35 | // -1 if there is no ending tag for this one... | |
36 | // or -2 if this is ending tag </...> | |
37 | int End1, End2; | |
38 | ||
39 | // name of this tag | |
40 | wxChar *Name; | |
41 | }; | |
42 | ||
43 | ||
44 | IMPLEMENT_CLASS(wx28HtmlTagsCache,wxObject) | |
45 | ||
46 | #define CACHE_INCREMENT 64 | |
47 | ||
48 | bool wxIsCDATAElement(const wxChar *tag) | |
49 | { | |
9a83f860 VZ |
50 | return (wxStrcmp(tag, wxT("SCRIPT")) == 0) || |
51 | (wxStrcmp(tag, wxT("STYLE")) == 0); | |
c21faa0a VS |
52 | } |
53 | ||
54 | wx28HtmlTagsCache::wx28HtmlTagsCache(const wxString& source) | |
55 | { | |
56 | const wxChar *src = source.c_str(); | |
57 | int lng = source.length(); | |
58 | wxChar tagBuffer[256]; | |
59 | ||
60 | m_Cache = NULL; | |
61 | m_CacheSize = 0; | |
62 | m_CachePos = 0; | |
63 | ||
64 | int pos = 0; | |
65 | while (pos < lng) | |
66 | { | |
67 | if (src[pos] == wxT('<')) // tag found: | |
68 | { | |
69 | if (m_CacheSize % CACHE_INCREMENT == 0) | |
70 | m_Cache = (wx28HtmlCacheItem*) realloc(m_Cache, (m_CacheSize + CACHE_INCREMENT) * sizeof(wx28HtmlCacheItem)); | |
71 | int tg = m_CacheSize++; | |
72 | int stpos = pos++; | |
73 | m_Cache[tg].Key = stpos; | |
74 | ||
75 | int i; | |
76 | for ( i = 0; | |
77 | pos < lng && i < (int)WXSIZEOF(tagBuffer) - 1 && | |
78 | src[pos] != wxT('>') && !wxIsspace(src[pos]); | |
79 | i++, pos++ ) | |
80 | { | |
81 | tagBuffer[i] = (wxChar)wxToupper(src[pos]); | |
82 | } | |
9a83f860 | 83 | tagBuffer[i] = wxT('\0'); |
c21faa0a VS |
84 | |
85 | m_Cache[tg].Name = new wxChar[i+1]; | |
86 | memcpy(m_Cache[tg].Name, tagBuffer, (i+1)*sizeof(wxChar)); | |
87 | ||
88 | while (pos < lng && src[pos] != wxT('>')) pos++; | |
89 | ||
90 | if (src[stpos+1] == wxT('/')) // ending tag: | |
91 | { | |
92 | m_Cache[tg].End1 = m_Cache[tg].End2 = -2; | |
93 | // find matching begin tag: | |
94 | for (i = tg; i >= 0; i--) | |
95 | if ((m_Cache[i].End1 == -1) && (wxStrcmp(m_Cache[i].Name, tagBuffer+1) == 0)) | |
96 | { | |
97 | m_Cache[i].End1 = stpos; | |
98 | m_Cache[i].End2 = pos + 1; | |
99 | break; | |
100 | } | |
101 | } | |
102 | else | |
103 | { | |
104 | m_Cache[tg].End1 = m_Cache[tg].End2 = -1; | |
105 | ||
106 | if (wxIsCDATAElement(tagBuffer)) | |
107 | { | |
108 | // store the orig pos in case we are missing the closing | |
109 | // tag (see below) | |
110 | wxInt32 old_pos = pos; | |
111 | bool foundCloseTag = false; | |
112 | ||
113 | // find next matching tag | |
114 | int tag_len = wxStrlen(tagBuffer); | |
115 | while (pos < lng) | |
116 | { | |
117 | // find the ending tag | |
118 | while (pos + 1 < lng && | |
119 | (src[pos] != '<' || src[pos+1] != '/')) | |
120 | ++pos; | |
121 | if (src[pos] == '<') | |
122 | ++pos; | |
123 | ||
124 | // see if it matches | |
125 | int match_pos = 0; | |
126 | while (pos < lng && match_pos < tag_len && src[pos] != '>' && src[pos] != '<') { | |
127 | // cast to wxChar needed to suppress warning in | |
128 | // Unicode build | |
129 | if ((wxChar)wxToupper(src[pos]) == tagBuffer[match_pos]) { | |
130 | ++match_pos; | |
131 | } | |
132 | else if (src[pos] == wxT(' ') || src[pos] == wxT('\n') || | |
133 | src[pos] == wxT('\r') || src[pos] == wxT('\t')) { | |
134 | // need to skip over these | |
135 | } | |
136 | else { | |
137 | match_pos = 0; | |
138 | } | |
139 | ++pos; | |
140 | } | |
141 | ||
142 | // found a match | |
143 | if (match_pos == tag_len) | |
144 | { | |
145 | pos = pos - tag_len - 3; | |
146 | foundCloseTag = true; | |
147 | break; | |
148 | } | |
149 | else // keep looking for the closing tag | |
150 | { | |
151 | ++pos; | |
152 | } | |
153 | } | |
154 | if (!foundCloseTag) | |
155 | { | |
156 | // we didn't find closing tag; this means the markup | |
157 | // is incorrect and the best thing we can do is to | |
158 | // ignore the unclosed tag and continue parsing as if | |
159 | // it didn't exist: | |
160 | pos = old_pos; | |
161 | } | |
162 | } | |
163 | } | |
164 | } | |
165 | ||
166 | pos++; | |
167 | } | |
168 | ||
169 | // ok, we're done, now we'll free .Name members of cache - we don't need it anymore: | |
170 | for (int i = 0; i < m_CacheSize; i++) | |
171 | { | |
172 | delete[] m_Cache[i].Name; | |
173 | m_Cache[i].Name = NULL; | |
174 | } | |
175 | } | |
176 | ||
177 | void wx28HtmlTagsCache::QueryTag(int at, int* end1, int* end2) | |
178 | { | |
179 | if (m_Cache == NULL) return; | |
180 | if (m_Cache[m_CachePos].Key != at) | |
181 | { | |
182 | int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1; | |
183 | do | |
184 | { | |
185 | if ( m_CachePos < 0 || m_CachePos == m_CacheSize ) | |
186 | { | |
187 | // something is very wrong with HTML, give up by returning an | |
188 | // impossibly large value which is going to be ignored by the | |
189 | // caller | |
190 | *end1 = | |
191 | *end2 = INT_MAX; | |
192 | return; | |
193 | } | |
194 | ||
195 | m_CachePos += delta; | |
196 | } | |
197 | while (m_Cache[m_CachePos].Key != at); | |
198 | } | |
199 | *end1 = m_Cache[m_CachePos].End1; | |
200 | *end2 = m_Cache[m_CachePos].End2; | |
201 | } | |
202 | ||
203 | ||
204 | ||
205 | ||
206 | //----------------------------------------------------------------------------- | |
207 | // wx28HtmlTag | |
208 | //----------------------------------------------------------------------------- | |
209 | ||
210 | IMPLEMENT_CLASS(wx28HtmlTag,wxObject) | |
211 | ||
212 | wx28HtmlTag::wx28HtmlTag(wx28HtmlTag *parent, | |
213 | const wxString& source, int pos, int end_pos, | |
214 | wx28HtmlTagsCache *cache, | |
215 | wx28HtmlEntitiesParser *entParser) : wxObject() | |
216 | { | |
217 | /* Setup DOM relations */ | |
218 | ||
219 | m_Next = NULL; | |
220 | m_FirstChild = m_LastChild = NULL; | |
221 | m_Parent = parent; | |
222 | if (parent) | |
223 | { | |
224 | m_Prev = m_Parent->m_LastChild; | |
225 | if (m_Prev == NULL) | |
226 | m_Parent->m_FirstChild = this; | |
227 | else | |
228 | m_Prev->m_Next = this; | |
229 | m_Parent->m_LastChild = this; | |
230 | } | |
231 | else | |
232 | m_Prev = NULL; | |
233 | ||
234 | /* Find parameters and their values: */ | |
235 | ||
236 | int i; | |
237 | wxChar c; | |
238 | ||
239 | // fill-in name, params and begin pos: | |
240 | i = pos+1; | |
241 | ||
242 | // find tag's name and convert it to uppercase: | |
243 | while ((i < end_pos) && | |
244 | ((c = source[i++]) != wxT(' ') && c != wxT('\r') && | |
245 | c != wxT('\n') && c != wxT('\t') && | |
246 | c != wxT('>'))) | |
247 | { | |
248 | if ((c >= wxT('a')) && (c <= wxT('z'))) | |
249 | c -= (wxT('a') - wxT('A')); | |
250 | m_Name << c; | |
251 | } | |
252 | ||
253 | // if the tag has parameters, read them and "normalize" them, | |
254 | // i.e. convert to uppercase, replace whitespaces by spaces and | |
255 | // remove whitespaces around '=': | |
256 | if (source[i-1] != wxT('>')) | |
257 | { | |
258 | #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \ | |
259 | c == wxT('\n') || c == wxT('\t')) | |
260 | wxString pname, pvalue; | |
261 | wxChar quote; | |
262 | enum | |
263 | { | |
264 | ST_BEFORE_NAME = 1, | |
265 | ST_NAME, | |
266 | ST_BEFORE_EQ, | |
267 | ST_BEFORE_VALUE, | |
268 | ST_VALUE | |
269 | } state; | |
270 | ||
271 | quote = 0; | |
272 | state = ST_BEFORE_NAME; | |
273 | while (i < end_pos) | |
274 | { | |
275 | c = source[i++]; | |
276 | ||
277 | if (c == wxT('>') && !(state == ST_VALUE && quote != 0)) | |
278 | { | |
279 | if (state == ST_BEFORE_EQ || state == ST_NAME) | |
280 | { | |
281 | m_ParamNames.Add(pname); | |
282 | m_ParamValues.Add(wxEmptyString); | |
283 | } | |
284 | else if (state == ST_VALUE && quote == 0) | |
285 | { | |
286 | m_ParamNames.Add(pname); | |
287 | if (entParser) | |
288 | m_ParamValues.Add(entParser->Parse(pvalue)); | |
289 | else | |
290 | m_ParamValues.Add(pvalue); | |
291 | } | |
292 | break; | |
293 | } | |
294 | switch (state) | |
295 | { | |
296 | case ST_BEFORE_NAME: | |
297 | if (!IS_WHITE(c)) | |
298 | { | |
299 | pname = c; | |
300 | state = ST_NAME; | |
301 | } | |
302 | break; | |
303 | case ST_NAME: | |
304 | if (IS_WHITE(c)) | |
305 | state = ST_BEFORE_EQ; | |
306 | else if (c == wxT('=')) | |
307 | state = ST_BEFORE_VALUE; | |
308 | else | |
309 | pname << c; | |
310 | break; | |
311 | case ST_BEFORE_EQ: | |
312 | if (c == wxT('=')) | |
313 | state = ST_BEFORE_VALUE; | |
314 | else if (!IS_WHITE(c)) | |
315 | { | |
316 | m_ParamNames.Add(pname); | |
317 | m_ParamValues.Add(wxEmptyString); | |
318 | pname = c; | |
319 | state = ST_NAME; | |
320 | } | |
321 | break; | |
322 | case ST_BEFORE_VALUE: | |
323 | if (!IS_WHITE(c)) | |
324 | { | |
325 | if (c == wxT('"') || c == wxT('\'')) | |
326 | quote = c, pvalue = wxEmptyString; | |
327 | else | |
328 | quote = 0, pvalue = c; | |
329 | state = ST_VALUE; | |
330 | } | |
331 | break; | |
332 | case ST_VALUE: | |
333 | if ((quote != 0 && c == quote) || | |
334 | (quote == 0 && IS_WHITE(c))) | |
335 | { | |
336 | m_ParamNames.Add(pname); | |
337 | if (quote == 0) | |
338 | { | |
339 | // VS: backward compatibility, no real reason, | |
340 | // but wxHTML code relies on this... :( | |
341 | pvalue.MakeUpper(); | |
342 | } | |
343 | if (entParser) | |
344 | m_ParamValues.Add(entParser->Parse(pvalue)); | |
345 | else | |
346 | m_ParamValues.Add(pvalue); | |
347 | state = ST_BEFORE_NAME; | |
348 | } | |
349 | else | |
350 | pvalue << c; | |
351 | break; | |
352 | } | |
353 | } | |
354 | ||
355 | #undef IS_WHITE | |
356 | } | |
357 | m_Begin = i; | |
358 | ||
359 | cache->QueryTag(pos, &m_End1, &m_End2); | |
360 | if (m_End1 > end_pos) m_End1 = end_pos; | |
361 | if (m_End2 > end_pos) m_End2 = end_pos; | |
362 | } | |
363 | ||
364 | wx28HtmlTag::~wx28HtmlTag() | |
365 | { | |
366 | wx28HtmlTag *t1, *t2; | |
367 | t1 = m_FirstChild; | |
368 | while (t1) | |
369 | { | |
370 | t2 = t1->GetNextSibling(); | |
371 | delete t1; | |
372 | t1 = t2; | |
373 | } | |
374 | } | |
375 | ||
376 | bool wx28HtmlTag::HasParam(const wxString& par) const | |
377 | { | |
378 | return (m_ParamNames.Index(par, false) != wxNOT_FOUND); | |
379 | } | |
380 | ||
381 | wxString wx28HtmlTag::GetParam(const wxString& par, bool with_commas) const | |
382 | { | |
383 | int index = m_ParamNames.Index(par, false); | |
384 | if (index == wxNOT_FOUND) | |
385 | return wxEmptyString; | |
386 | if (with_commas) | |
387 | { | |
388 | // VS: backward compatibility, seems to be never used by wxHTML... | |
389 | wxString s; | |
390 | s << wxT('"') << m_ParamValues[index] << wxT('"'); | |
391 | return s; | |
392 | } | |
393 | else | |
394 | return m_ParamValues[index]; | |
395 | } | |
396 | ||
397 | int wx28HtmlTag::ScanParam(const wxString& par, | |
398 | const wxChar *format, | |
399 | void *param) const | |
400 | { | |
401 | wxString parval = GetParam(par); | |
402 | return wxSscanf(parval, format, param); | |
403 | } | |
404 | ||
c21faa0a VS |
405 | bool wx28HtmlTag::GetParamAsInt(const wxString& par, int *clr) const |
406 | { | |
407 | if ( !HasParam(par) ) | |
408 | return false; | |
409 | ||
410 | long i; | |
411 | if ( !GetParam(par).ToLong(&i) ) | |
412 | return false; | |
413 | ||
414 | *clr = (int)i; | |
415 | return true; | |
416 | } | |
417 | ||
418 | wxString wx28HtmlTag::GetAllParams() const | |
419 | { | |
420 | // VS: this function is for backward compatibility only, | |
421 | // never used by wxHTML | |
422 | wxString s; | |
423 | size_t cnt = m_ParamNames.GetCount(); | |
424 | for (size_t i = 0; i < cnt; i++) | |
425 | { | |
426 | s << m_ParamNames[i]; | |
427 | s << wxT('='); | |
428 | if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND) | |
429 | s << wxT('\'') << m_ParamValues[i] << wxT('\''); | |
430 | else | |
431 | s << wxT('"') << m_ParamValues[i] << wxT('"'); | |
432 | } | |
433 | return s; | |
434 | } | |
435 | ||
436 | wx28HtmlTag *wx28HtmlTag::GetFirstSibling() const | |
437 | { | |
438 | if (m_Parent) | |
439 | return m_Parent->m_FirstChild; | |
440 | else | |
441 | { | |
442 | wx28HtmlTag *cur = (wx28HtmlTag*)this; | |
443 | while (cur->m_Prev) | |
444 | cur = cur->m_Prev; | |
445 | return cur; | |
446 | } | |
447 | } | |
448 | ||
449 | wx28HtmlTag *wx28HtmlTag::GetLastSibling() const | |
450 | { | |
451 | if (m_Parent) | |
452 | return m_Parent->m_LastChild; | |
453 | else | |
454 | { | |
455 | wx28HtmlTag *cur = (wx28HtmlTag*)this; | |
456 | while (cur->m_Next) | |
457 | cur = cur->m_Next; | |
458 | return cur; | |
459 | } | |
460 | } | |
461 | ||
462 | wx28HtmlTag *wx28HtmlTag::GetNextTag() const | |
463 | { | |
464 | if (m_FirstChild) return m_FirstChild; | |
465 | if (m_Next) return m_Next; | |
466 | wx28HtmlTag *cur = m_Parent; | |
467 | if (!cur) return NULL; | |
468 | while (cur->m_Parent && !cur->m_Next) | |
469 | cur = cur->m_Parent; | |
470 | return cur->m_Next; | |
471 | } |