]>
Commit | Line | Data |
---|---|---|
d7463f75 JS |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: htmlparser.cpp | |
3 | // Purpose: Simple HTML parser | |
4 | // Author: Julian Smart | |
5 | // Modified by: | |
6 | // Created: 2002-09-25 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Julian Smart | |
9 | // Licence: wxWindows license | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ---------------------------------------------------------------------------- | |
13 | // headers | |
14 | // ---------------------------------------------------------------------------- | |
d7463f75 | 15 | |
d9ab621e WS |
16 | // For compilers that support precompilation, includes "wx/wx.h". |
17 | #include "wx/wxprec.h" | |
d7463f75 JS |
18 | |
19 | #ifdef __BORLANDC__ | |
20 | #pragma hdrstop | |
21 | #endif | |
22 | ||
d7463f75 | 23 | #include "wx/wfstream.h" |
d9ab621e | 24 | #include "wx/textfile.h" |
d7463f75 | 25 | #include "wx/txtstrm.h" |
d7463f75 JS |
26 | #include "htmlparser.h" |
27 | ||
28 | /// Useful insertion operators for wxOutputStream. | |
29 | static wxOutputStream& operator <<(wxOutputStream& stream, const wxString& s) | |
30 | { | |
31 | wxTextOutputStream txt(stream); // This is to make sure the line-ending is native! | |
32 | ||
33 | txt.WriteString(s); | |
34 | return stream; | |
35 | } | |
36 | ||
37 | #if 0 // Gives warning because not used... | |
38 | static wxOutputStream& operator <<(wxOutputStream& stream, long l) | |
39 | { | |
40 | wxString str; | |
41 | str.Printf("%ld", l); | |
42 | return stream << str; | |
43 | } | |
44 | ||
45 | static wxOutputStream& operator <<(wxOutputStream& stream, const char c) | |
46 | { | |
47 | wxString str; | |
48 | str.Printf("%c", c); | |
49 | return stream << str; | |
50 | } | |
51 | #endif // 0 | |
52 | ||
53 | /* | |
54 | * wxSimpleHtmlAttribute | |
55 | * Representation of an attribute | |
56 | */ | |
57 | ||
58 | wxSimpleHtmlParser::wxSimpleHtmlParser() | |
59 | { | |
60 | m_topLevel = NULL; | |
61 | m_pos = 0; | |
62 | } | |
63 | ||
64 | ||
65 | wxSimpleHtmlParser::~wxSimpleHtmlParser() | |
66 | { | |
67 | Clear(); | |
68 | } | |
69 | ||
70 | bool wxSimpleHtmlParser::ParseFile(const wxString& filename) | |
71 | { | |
72 | wxTextFile textFile; | |
73 | ||
74 | if (textFile.Open(filename)) | |
75 | { | |
76 | wxString text; | |
77 | wxString line; | |
78 | int i; | |
79 | int count = textFile.GetLineCount(); | |
80 | for (i = 0; i < count; i++) | |
81 | { | |
82 | if (i == 0) | |
83 | line = textFile.GetFirstLine(); | |
84 | else | |
85 | line = textFile.GetNextLine(); | |
86 | ||
87 | text += line; | |
88 | if (i != (count - 1)) | |
89 | text += wxT("\n"); | |
90 | } | |
91 | ||
92 | #if 0 | |
93 | for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() ) | |
94 | { | |
95 | text += line; | |
96 | if (!textFile.Eof()) | |
97 | text += wxT("\n"); | |
98 | } | |
99 | #endif | |
100 | ||
101 | return ParseString(text); | |
102 | } | |
103 | else | |
4fe30bce | 104 | return false; |
d7463f75 JS |
105 | } |
106 | ||
107 | bool wxSimpleHtmlParser::ParseString(const wxString& str) | |
108 | { | |
109 | Clear(); | |
110 | ||
111 | m_pos = 0; | |
112 | m_text = str; | |
113 | m_length = str.Length(); | |
114 | ||
115 | m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel); | |
116 | ||
117 | bool bResult = ParseHtml(m_topLevel); | |
118 | ||
254a2129 | 119 | wxASSERT(bResult); // Failed to parse the TAGs. |
d7463f75 JS |
120 | // Hint: Check if every open tag has a close tag! |
121 | ||
122 | return bResult; | |
123 | } | |
124 | ||
125 | // Main recursive parsing function | |
126 | bool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent) | |
127 | { | |
128 | if (!parent) | |
4fe30bce | 129 | return false; |
d7463f75 JS |
130 | |
131 | while (!Eof()) | |
132 | { | |
133 | EatWhitespace(); | |
134 | if (IsComment()) | |
135 | { | |
136 | ParseComment(); | |
137 | } | |
138 | else if (IsDirective()) | |
139 | { | |
140 | wxSimpleHtmlTag* tag = ParseDirective(); | |
141 | if (tag) | |
142 | parent->AppendTag(tag); | |
143 | } | |
144 | else if (IsXMLDeclaration()) | |
145 | { | |
146 | wxSimpleHtmlTag* tag = ParseXMLDeclaration(); | |
147 | if (tag) | |
148 | parent->AppendTag(tag); | |
149 | } | |
150 | else if (IsTagClose()) | |
151 | { | |
152 | wxSimpleHtmlTag* tag = ParseTagClose(); | |
153 | if (tag) | |
154 | { | |
155 | if (IsCloseTagNeeded(tag->GetName())) | |
156 | { | |
157 | if (!parent->GetParent()) | |
4fe30bce | 158 | return false; |
d7463f75 | 159 | parent->GetParent()->AppendTag(tag); |
4fe30bce | 160 | return true; |
d7463f75 JS |
161 | } |
162 | else | |
163 | parent->AppendTag(tag); | |
254a2129 | 164 | } |
d7463f75 JS |
165 | } |
166 | else if (IsTagStartBracket(GetChar(m_pos))) | |
167 | { | |
168 | wxSimpleHtmlTag* tag = ParseTagHeader(); | |
169 | if (tag) | |
170 | parent->AppendTag(tag); | |
171 | ||
172 | if (IsCloseTagNeeded(tag->GetName())) | |
173 | { | |
174 | if (!ParseHtml(tag)) | |
4fe30bce | 175 | return false; // Something didn't go ok, so don't continue. |
d7463f75 JS |
176 | } |
177 | } | |
178 | else | |
179 | { | |
180 | // Just a text string | |
181 | wxString text; | |
182 | ParseText(text); | |
183 | ||
184 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text); | |
185 | tag->SetText(text); | |
186 | if(parent->GetParent()) | |
187 | parent->GetParent()->AppendTag(tag); | |
188 | else | |
254a2129 | 189 | parent->AppendTag(tag); // When this occurs it is probably the |
d7463f75 JS |
190 | // empty lines at the end of the file... |
191 | } | |
192 | } | |
4fe30bce | 193 | return true; |
d7463f75 JS |
194 | } |
195 | ||
196 | // Plain text, up until an angled bracket | |
197 | bool wxSimpleHtmlParser::ParseText(wxString& text) | |
198 | { | |
199 | while (!Eof() && GetChar(m_pos) != wxT('<')) | |
200 | { | |
254a2129 | 201 | text += (wxChar)GetChar(m_pos); |
d7463f75 JS |
202 | m_pos ++; |
203 | } | |
204 | DecodeSpecialChars(text); | |
4fe30bce | 205 | return true; |
d7463f75 JS |
206 | } |
207 | ||
208 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader() | |
209 | { | |
210 | if (IsTagStartBracket(GetChar(m_pos))) | |
211 | { | |
212 | m_pos ++; | |
213 | EatWhitespace(); | |
214 | ||
215 | wxString word; | |
4fe30bce | 216 | ReadWord(word, true); |
d7463f75 JS |
217 | |
218 | EatWhitespace(); | |
219 | ||
220 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open); | |
221 | ||
222 | ParseAttributes(tag); | |
223 | ||
224 | EatWhitespace(); | |
225 | ||
226 | if (IsTagEndBracket(GetChar(m_pos))) | |
227 | m_pos ++; | |
228 | ||
229 | return tag; | |
230 | } | |
231 | else | |
232 | return NULL; | |
233 | } | |
234 | ||
235 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose() | |
236 | { | |
4fe30bce | 237 | Matches(wxT("</"), true); |
d7463f75 JS |
238 | |
239 | EatWhitespace(); | |
240 | ||
241 | wxString word; | |
4fe30bce | 242 | ReadWord(word, true); |
d7463f75 JS |
243 | |
244 | EatWhitespace(); | |
245 | m_pos ++; | |
246 | ||
247 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close); | |
248 | return tag; | |
249 | } | |
250 | ||
251 | bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag) | |
252 | { | |
253 | // Parse attributes of a tag header until we reach > | |
254 | while (!IsTagEndBracket(GetChar(m_pos)) && !Eof()) | |
255 | { | |
256 | EatWhitespace(); | |
257 | ||
258 | wxString attrName, attrValue; | |
259 | ||
260 | if (IsString()) | |
261 | { | |
4fe30bce | 262 | ReadString(attrName, true); |
d7463f75 JS |
263 | tag->AppendAttribute(attrName, wxEmptyString); |
264 | } | |
265 | else if (IsNumeric(GetChar(m_pos))) | |
266 | { | |
4fe30bce | 267 | ReadNumber(attrName, true); |
d7463f75 JS |
268 | tag->AppendAttribute(attrName, wxEmptyString); |
269 | } | |
270 | else | |
271 | { | |
272 | // Try to read an attribute name/value pair, or at least a name | |
273 | // without the value | |
4fe30bce | 274 | ReadLiteral(attrName, true); |
d7463f75 JS |
275 | EatWhitespace(); |
276 | ||
277 | if (GetChar(m_pos) == wxT('=')) | |
278 | { | |
279 | m_pos ++; | |
280 | EatWhitespace(); | |
281 | ||
282 | if (IsString()) | |
4fe30bce | 283 | ReadString(attrValue, true); |
d7463f75 | 284 | else if (!Eof() && !IsTagEndBracket(GetChar(m_pos))) |
4fe30bce | 285 | ReadLiteral(attrValue, true); |
d7463f75 JS |
286 | } |
287 | if (!attrName.IsEmpty()) | |
288 | tag->AppendAttribute(attrName, attrValue); | |
289 | } | |
290 | } | |
4fe30bce | 291 | return true; |
d7463f75 JS |
292 | } |
293 | ||
294 | // e.g. <!DOCTYPE ....> | |
295 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective() | |
296 | { | |
4fe30bce | 297 | Matches(wxT("<!"), true); |
d7463f75 JS |
298 | |
299 | EatWhitespace(); | |
300 | ||
301 | wxString word; | |
4fe30bce | 302 | ReadWord(word, true); |
d7463f75 JS |
303 | |
304 | EatWhitespace(); | |
305 | ||
306 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive); | |
307 | ||
308 | ParseAttributes(tag); | |
309 | ||
310 | EatWhitespace(); | |
311 | ||
312 | if (IsTagEndBracket(GetChar(m_pos))) | |
313 | m_pos ++; | |
314 | ||
315 | return tag; | |
316 | } | |
317 | ||
318 | // e.g. <?xml .... ?> | |
319 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseXMLDeclaration() | |
320 | { | |
4fe30bce | 321 | Matches(wxT("<?"), true); |
d7463f75 JS |
322 | |
323 | EatWhitespace(); | |
324 | ||
325 | wxString word; | |
4fe30bce | 326 | ReadWord(word, true); |
d7463f75 JS |
327 | |
328 | EatWhitespace(); | |
329 | ||
330 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_XMLDeclaration); | |
331 | ||
332 | ParseAttributes(tag); | |
333 | ||
334 | EatWhitespace(); | |
335 | ||
336 | if (IsTagEndBracket(GetChar(m_pos))) | |
337 | m_pos ++; | |
338 | ||
339 | return tag; | |
340 | } | |
341 | ||
342 | bool wxSimpleHtmlParser::ParseComment() | |
343 | { | |
344 | // Eat the comment tag start | |
4fe30bce | 345 | Matches(wxT("<!--"), true); |
d7463f75 | 346 | |
4fe30bce | 347 | while (!Eof() && !Matches(wxT("-->"), true)) |
d7463f75 JS |
348 | { |
349 | m_pos ++; | |
350 | } | |
351 | ||
4fe30bce | 352 | return true; |
d7463f75 JS |
353 | } |
354 | ||
355 | bool wxSimpleHtmlParser::EatWhitespace() | |
356 | { | |
357 | while (!Eof() && IsWhitespace(GetChar(m_pos))) | |
358 | m_pos ++; | |
4fe30bce | 359 | return true; |
d7463f75 JS |
360 | } |
361 | ||
362 | bool wxSimpleHtmlParser::EatWhitespace(int& pos) | |
363 | { | |
364 | while (!Eof(pos) && IsWhitespace(GetChar(pos))) | |
365 | pos ++; | |
4fe30bce | 366 | return true; |
d7463f75 JS |
367 | } |
368 | ||
369 | bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt) | |
370 | { | |
371 | int pos = m_pos; | |
372 | if (GetChar(pos) == (int) '"') | |
373 | { | |
374 | pos ++; | |
375 | while (!Eof(pos) && GetChar(pos) != (int) '"') | |
376 | { | |
377 | // TODO: how are quotes escaped in HTML? | |
378 | str += (wxChar) GetChar(pos); | |
379 | pos ++; | |
380 | } | |
381 | if (GetChar(pos) == (int) '"') | |
382 | pos ++; | |
383 | if (eatIt) | |
384 | m_pos = pos; | |
385 | DecodeSpecialChars(str); | |
4fe30bce | 386 | return true; |
d7463f75 JS |
387 | } |
388 | else | |
4fe30bce | 389 | return false; |
d7463f75 JS |
390 | } |
391 | ||
392 | bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt) | |
393 | { | |
394 | int pos = m_pos; | |
395 | ||
396 | if (!IsAlpha(GetChar(pos))) | |
4fe30bce | 397 | return false; |
d7463f75 JS |
398 | |
399 | str += (wxChar) GetChar(pos) ; | |
400 | pos ++; | |
401 | ||
402 | while (!Eof(pos) && IsWordChar(GetChar(pos))) | |
403 | { | |
404 | str += (wxChar) GetChar(pos); | |
405 | pos ++; | |
406 | } | |
407 | if (eatIt) | |
408 | m_pos = pos; | |
409 | DecodeSpecialChars(str); | |
4fe30bce | 410 | return true; |
d7463f75 JS |
411 | } |
412 | ||
413 | bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt) | |
414 | { | |
415 | int pos = m_pos; | |
416 | ||
417 | if (!IsNumeric(GetChar(pos))) | |
4fe30bce | 418 | return false; |
d7463f75 JS |
419 | |
420 | str += (wxChar) GetChar(pos) ; | |
421 | pos ++; | |
422 | ||
423 | while (!Eof(pos) && IsNumeric(GetChar(pos))) | |
424 | { | |
425 | str += (wxChar) GetChar(pos); | |
426 | pos ++; | |
427 | } | |
428 | if (eatIt) | |
429 | m_pos = pos; | |
430 | DecodeSpecialChars(str); | |
4fe30bce | 431 | return true; |
d7463f75 JS |
432 | } |
433 | ||
434 | // Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string) | |
435 | bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt) | |
436 | { | |
437 | int pos = m_pos; | |
438 | ||
439 | while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('=')) | |
440 | { | |
254a2129 | 441 | str += (wxChar)GetChar(pos); |
d7463f75 JS |
442 | pos ++; |
443 | } | |
444 | if (eatIt) | |
445 | m_pos = pos; | |
446 | DecodeSpecialChars(str); | |
4fe30bce | 447 | return true; |
d7463f75 JS |
448 | } |
449 | ||
450 | bool wxSimpleHtmlParser::IsComment() | |
451 | { | |
452 | return Matches(wxT("<!--")); | |
453 | } | |
454 | ||
455 | bool wxSimpleHtmlParser::IsDirective() | |
456 | { | |
457 | return Matches(wxT("<!")); | |
458 | } | |
459 | ||
460 | bool wxSimpleHtmlParser::IsXMLDeclaration() | |
461 | { | |
462 | return Matches(wxT("<?xml")); | |
463 | } | |
464 | ||
465 | bool wxSimpleHtmlParser::IsString() | |
466 | { | |
467 | return (GetChar(m_pos) == (int) '"') ; | |
468 | } | |
469 | ||
470 | bool wxSimpleHtmlParser::IsWord() | |
471 | { | |
472 | return (IsAlpha(GetChar(m_pos))); | |
473 | } | |
474 | ||
475 | bool wxSimpleHtmlParser::IsTagClose() | |
476 | { | |
477 | return Matches(wxT("</")); | |
478 | } | |
479 | ||
480 | bool wxSimpleHtmlParser::IsTagStartBracket(int ch) | |
481 | { | |
482 | return (ch == wxT('<')); | |
483 | } | |
484 | ||
485 | bool wxSimpleHtmlParser::IsTagEndBracket(int ch) | |
486 | { | |
487 | return (ch == wxT('>')); | |
488 | } | |
489 | ||
490 | bool wxSimpleHtmlParser::IsWhitespace(int ch) | |
491 | { | |
492 | return ((ch == 13) || (ch == 10) || (ch == 32) || (ch == (int) '\t')) ; | |
493 | } | |
494 | ||
495 | bool wxSimpleHtmlParser::IsAlpha(int ch) | |
496 | { | |
497 | return (wxIsalpha((wxChar) ch) != 0); | |
498 | } | |
499 | ||
500 | bool wxSimpleHtmlParser::IsWordChar(int ch) | |
501 | { | |
502 | return (wxIsalpha((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('_') || IsNumeric(ch)); | |
503 | } | |
504 | ||
505 | bool wxSimpleHtmlParser::IsNumeric(int ch) | |
506 | { | |
507 | return (wxIsdigit((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('.')) ; | |
508 | } | |
509 | ||
510 | bool wxSimpleHtmlParser::IsCloseTagNeeded(const wxString &name) | |
511 | { | |
4fe30bce WS |
512 | if (name.IsSameAs(wxT("P"), false)) // e.g <P> |
513 | return false; | |
d7463f75 JS |
514 | |
515 | // ToDo add more items here. | |
516 | ||
4fe30bce | 517 | return true; |
d7463f75 JS |
518 | } |
519 | ||
520 | // Encode/Decode Special Characters. | |
521 | // See here for the used table: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/xmlsql/ac_xml1_1nqk.asp | |
522 | /* static */ void wxSimpleHtmlParser::DecodeSpecialChars(wxString &value) | |
523 | { | |
254a2129 | 524 | // XML translation |
4fe30bce WS |
525 | value.Replace(wxT(">"), wxT(">"), true); |
526 | value.Replace(wxT("<"), wxT("<"), true); | |
527 | value.Replace(wxT("""), wxT("\""), true); | |
528 | value.Replace(wxT("'"), wxT("'"), true); | |
529 | value.Replace(wxT("&"), wxT("&"), true); // Note: do this as last to prevent replace problems. | |
d7463f75 JS |
530 | } |
531 | ||
532 | /* static */ wxString wxSimpleHtmlParser::EncodeSpecialChars(const wxString &value) | |
533 | { | |
534 | wxString newvalue = value; | |
535 | ||
254a2129 | 536 | // XML translation |
4fe30bce WS |
537 | newvalue.Replace(wxT("&"), wxT("&"), true); // Note: do this as first to prevent replace problems. |
538 | newvalue.Replace(wxT(">"), wxT(">"), true); | |
539 | newvalue.Replace(wxT("<"), wxT("<"), true); | |
540 | newvalue.Replace(wxT("\""),wxT("""), true); | |
541 | newvalue.Replace(wxT("'"), wxT("'"), true); | |
254a2129 | 542 | |
d7463f75 JS |
543 | return newvalue; |
544 | } | |
545 | ||
546 | // Matches this string (case insensitive) | |
547 | bool wxSimpleHtmlParser::Matches(const wxString& tok, bool eatIt) | |
548 | { | |
549 | wxString text(m_text.Mid(m_pos, tok.Length())); | |
550 | bool success = (text.CmpNoCase(tok) == 0) ; | |
551 | if (success && eatIt) | |
552 | { | |
553 | m_pos += tok.Length(); | |
554 | } | |
555 | return success; | |
556 | } | |
557 | ||
558 | // Safe way of getting a character | |
559 | int wxSimpleHtmlParser::GetChar(size_t i) const | |
560 | { | |
561 | if (i >= (size_t) m_length) | |
562 | return -1; | |
563 | return m_text[i]; | |
564 | } | |
565 | ||
566 | void wxSimpleHtmlParser::Clear() | |
567 | { | |
568 | if (m_topLevel) | |
569 | delete m_topLevel; | |
570 | m_topLevel = NULL; | |
571 | m_text = wxEmptyString; | |
572 | m_pos = 0; | |
573 | m_length = 0; | |
574 | } | |
575 | ||
576 | // Write this file | |
577 | void wxSimpleHtmlParser::Write(wxOutputStream& stream) | |
578 | { | |
579 | if (m_topLevel) | |
580 | m_topLevel->Write(stream); | |
581 | } | |
582 | ||
583 | bool wxSimpleHtmlParser::WriteFile(wxString& filename) | |
584 | { | |
585 | wxFileOutputStream fstream(filename); | |
586 | if (fstream.Ok()) | |
587 | { | |
588 | Write(fstream); | |
4fe30bce | 589 | return true; |
d7463f75 JS |
590 | } |
591 | else | |
4fe30bce | 592 | return false; |
d7463f75 JS |
593 | } |
594 | ||
595 | /* | |
596 | * wxSimpleHtmlTag | |
597 | * Representation of a tag or chunk of text | |
598 | */ | |
599 | ||
600 | wxSimpleHtmlTag::wxSimpleHtmlTag(const wxString& tagName, int tagType) | |
601 | { | |
602 | m_name = tagName; | |
603 | m_type = tagType; | |
604 | m_attributes = NULL; | |
605 | m_children = NULL; | |
606 | m_parent = NULL; | |
607 | m_next = NULL; | |
608 | } | |
609 | ||
610 | wxSimpleHtmlTag::~wxSimpleHtmlTag() | |
611 | { | |
612 | ClearAttributes(); | |
613 | ClearChildren(); | |
614 | } | |
615 | ||
616 | //// Operations | |
617 | void wxSimpleHtmlTag::ClearAttributes() | |
618 | { | |
619 | if (m_attributes) | |
620 | { | |
621 | wxSimpleHtmlAttribute* attr = m_attributes; | |
622 | while (attr) | |
623 | { | |
624 | wxSimpleHtmlAttribute* next = attr->m_next; | |
625 | ||
626 | attr->m_next = NULL; | |
627 | delete attr; | |
628 | attr = next; | |
629 | } | |
630 | m_attributes = NULL; | |
631 | } | |
632 | } | |
633 | ||
634 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::FindAttribute(const wxString& name) const | |
635 | { | |
636 | wxSimpleHtmlAttribute* attr = m_attributes; | |
637 | while (attr) | |
638 | { | |
639 | if (attr->GetName().CmpNoCase(name) == 0) | |
640 | { | |
641 | return attr; | |
642 | } | |
643 | attr = attr->m_next; | |
644 | } | |
645 | return NULL; | |
646 | } | |
647 | ||
648 | void wxSimpleHtmlTag::AppendAttribute(const wxString& name, const wxString& value) | |
649 | { | |
650 | wxSimpleHtmlAttribute* attr = new wxSimpleHtmlAttribute(name, value); | |
651 | if (m_attributes) | |
652 | { | |
653 | // Find tail | |
654 | wxSimpleHtmlAttribute* last = m_attributes; | |
655 | while (last->m_next) | |
656 | last = last->m_next; | |
657 | ||
658 | last->m_next = attr; | |
659 | } | |
660 | else | |
661 | m_attributes = attr; | |
662 | } | |
663 | ||
664 | void wxSimpleHtmlTag::ClearChildren() | |
665 | { | |
666 | if (m_children) | |
667 | { | |
668 | wxSimpleHtmlTag* child = m_children; | |
669 | while (child) | |
670 | { | |
671 | wxSimpleHtmlTag* next = child->m_next; | |
672 | ||
673 | child->m_next = NULL; | |
674 | delete child; | |
675 | child = next; | |
676 | } | |
677 | m_children = NULL; | |
678 | } | |
679 | } | |
680 | ||
681 | void wxSimpleHtmlTag::RemoveChild(wxSimpleHtmlTag *remove) | |
682 | { | |
683 | if (m_children) | |
684 | { | |
685 | wxSimpleHtmlTag* child = m_children; | |
686 | wxSimpleHtmlTag* prev = NULL; | |
687 | while (child) | |
688 | { | |
689 | wxSimpleHtmlTag* next = child->m_next; | |
690 | ||
691 | if (child == remove) | |
692 | { | |
693 | child->m_next = NULL; | |
694 | delete child; | |
254a2129 | 695 | |
d7463f75 JS |
696 | if (prev != NULL) |
697 | prev->m_next = next; | |
698 | else | |
699 | m_children = next; | |
700 | ||
701 | return; | |
702 | } | |
703 | prev = child; | |
704 | child = next; | |
705 | } | |
706 | } | |
707 | } | |
708 | ||
709 | void wxSimpleHtmlTag::AppendTag(wxSimpleHtmlTag* tag) | |
710 | { | |
711 | if (!tag) | |
712 | return; | |
713 | ||
714 | if (m_children) | |
715 | { | |
716 | // Find tail | |
717 | wxSimpleHtmlTag* last = m_children; | |
718 | while (last->m_next) | |
719 | last = last->m_next; | |
720 | ||
254a2129 | 721 | last->m_next = tag; |
d7463f75 JS |
722 | } |
723 | else | |
724 | { | |
725 | m_children = tag; | |
726 | } | |
727 | ||
728 | tag->m_parent = this; | |
729 | } | |
730 | ||
731 | void wxSimpleHtmlTag::AppendTagAfterUs(wxSimpleHtmlTag* tag) | |
732 | { | |
733 | if (!tag) | |
734 | return; | |
735 | ||
736 | tag->m_parent = m_parent; | |
737 | tag->m_next = m_next; | |
738 | m_next = tag; | |
739 | } | |
740 | ||
741 | // Gets the text from this tag and its descendants | |
742 | wxString wxSimpleHtmlTag::GetTagText() | |
743 | { | |
744 | wxString text; | |
745 | if (m_children) | |
746 | { | |
747 | wxSimpleHtmlTag* tag = m_children; | |
748 | while (tag) | |
749 | { | |
750 | text += tag->GetTagText(); | |
751 | tag = tag->m_next; | |
752 | } | |
753 | return text; | |
754 | } | |
755 | else if (GetType() == wxSimpleHtmlTag_Text) | |
756 | return GetText(); | |
757 | else | |
758 | return wxEmptyString; | |
759 | } | |
760 | ||
761 | int wxSimpleHtmlTag::GetAttributeCount() const | |
762 | { | |
763 | int count = 0; | |
764 | wxSimpleHtmlAttribute* attr = m_attributes; | |
765 | while (attr) | |
766 | { | |
767 | count ++; | |
768 | attr = attr->m_next; | |
769 | } | |
770 | return count; | |
771 | } | |
772 | ||
773 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::GetAttribute(int i) const | |
774 | { | |
775 | int count = 0; | |
776 | wxSimpleHtmlAttribute* attr = m_attributes; | |
777 | while (attr) | |
778 | { | |
779 | if (count == i) | |
780 | return attr; | |
781 | count ++; | |
782 | attr = attr->m_next; | |
783 | } | |
784 | return NULL; | |
785 | } | |
786 | ||
787 | int wxSimpleHtmlTag::GetChildCount() const | |
788 | { | |
789 | int count = 0; | |
790 | wxSimpleHtmlTag* tag = m_children; | |
791 | while (tag) | |
792 | { | |
793 | count ++; | |
794 | tag = tag->m_next; | |
795 | } | |
796 | return count; | |
797 | } | |
798 | ||
799 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name, const wxString& value) const | |
800 | { | |
801 | wxSimpleHtmlAttribute* attr = FindAttribute(name); | |
802 | ||
803 | return (attr && (attr->GetValue().CmpNoCase(value) == 0)) ; | |
804 | } | |
805 | ||
806 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name) const | |
807 | { | |
808 | return FindAttribute(name) != NULL ; | |
809 | } | |
810 | ||
811 | bool wxSimpleHtmlTag::GetAttributeValue(wxString& value, const wxString& attrName) | |
812 | { | |
813 | wxSimpleHtmlAttribute* attr = FindAttribute(attrName); | |
814 | if (attr) | |
815 | { | |
816 | value = attr->GetValue(); | |
4fe30bce | 817 | return true; |
d7463f75 JS |
818 | } |
819 | else | |
4fe30bce | 820 | return false; |
d7463f75 JS |
821 | } |
822 | ||
254a2129 | 823 | // Search forward from this tag until we find a tag with this name & attribute |
d7463f75 JS |
824 | wxSimpleHtmlTag* wxSimpleHtmlTag::FindTag(const wxString& tagName, const wxString& attrName) |
825 | { | |
826 | wxSimpleHtmlTag* tag = m_next; | |
827 | while (tag) | |
828 | { | |
829 | if (tag->NameIs(tagName) && (attrName.IsEmpty() || tag->FindAttribute(attrName))) | |
830 | return tag; | |
831 | ||
832 | tag = tag->m_next; | |
833 | } | |
834 | return NULL; | |
835 | } | |
836 | ||
837 | bool wxSimpleHtmlTag::FindTextUntilTagClose(wxString& text, const wxString& tagName) | |
838 | { | |
839 | wxSimpleHtmlTag* tag = this; | |
840 | while (tag) | |
841 | { | |
842 | if (tag->GetType() == wxSimpleHtmlTag_Close && tag->NameIs(tagName)) | |
4fe30bce | 843 | return true; |
d7463f75 JS |
844 | |
845 | if (tag->GetType() == wxSimpleHtmlTag_Text) | |
846 | text += tag->GetText(); | |
847 | ||
848 | tag = tag->m_next; | |
849 | } | |
4fe30bce | 850 | return true; |
d7463f75 JS |
851 | } |
852 | ||
853 | ||
854 | wxSimpleHtmlTag* wxSimpleHtmlTag::GetChild(int i) const | |
855 | { | |
856 | int count = 0; | |
857 | wxSimpleHtmlTag* tag = m_children; | |
858 | while (tag) | |
859 | { | |
860 | if (count == i) | |
861 | return tag; | |
862 | ||
863 | count ++; | |
864 | tag = tag->m_next; | |
865 | } | |
866 | return NULL; | |
867 | } | |
868 | ||
869 | void wxSimpleHtmlTag::Write(wxOutputStream& stream) | |
870 | { | |
871 | // Some helpers to layout the open and close tags. | |
4fe30bce | 872 | static bool sbUseTab = true; |
d7463f75 JS |
873 | static size_t snTabLevel = 0; |
874 | ||
875 | #if 0 // Enable if no tabs should be used to align the tags. | |
876 | snTabLevel = 0; | |
877 | #endif | |
878 | ||
879 | // Handle the different types of tags we can write. | |
880 | switch (GetType()) | |
881 | { | |
882 | case wxSimpleHtmlTag_Text: | |
883 | { | |
884 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_text); | |
885 | break; | |
886 | } | |
887 | case wxSimpleHtmlTag_Open: | |
888 | { | |
889 | size_t tab; | |
890 | for(tab = 0; tab < snTabLevel; tab++) | |
891 | stream << wxT("\t"); | |
892 | stream << wxT("<") << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
893 | if (GetAttributeCount() > 0) | |
894 | stream << wxT(" "); | |
895 | int i; | |
896 | for (i = 0; i < GetAttributeCount(); i++) | |
897 | { | |
898 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
899 | attr->Write(stream); | |
900 | if (i < GetAttributeCount() - 1) | |
901 | stream << wxT(" "); | |
254a2129 | 902 | } |
d7463f75 JS |
903 | if(!m_children) |
904 | { | |
254a2129 | 905 | sbUseTab = false; // We're putting the open a close tag on the same line, |
d7463f75 JS |
906 | // so we don't wan't any tabs |
907 | stream << wxT(">"); | |
908 | } | |
909 | else | |
910 | { | |
4fe30bce | 911 | // sbUseTab = true; |
d7463f75 JS |
912 | stream << wxT(">\n"); |
913 | } | |
914 | snTabLevel++; | |
915 | break; | |
916 | } | |
917 | case wxSimpleHtmlTag_Directive: | |
918 | { | |
919 | stream << wxT("<!") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
920 | int i; | |
921 | for (i = 0; i < GetAttributeCount(); i++) | |
922 | { | |
923 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
924 | attr->Write(stream); | |
925 | if (i < GetAttributeCount() - 1) | |
926 | stream << wxT(" "); | |
927 | } | |
928 | stream << wxT(">\n"); | |
929 | break; | |
930 | } | |
931 | case wxSimpleHtmlTag_XMLDeclaration: | |
932 | { | |
933 | stream << wxT("<?") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
934 | int i; | |
935 | for (i = 0; i < GetAttributeCount(); i++) | |
936 | { | |
937 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
938 | attr->Write(stream); | |
939 | if (i < GetAttributeCount() - 1) | |
940 | stream << wxT(" "); | |
941 | } | |
942 | stream << wxT(">\n\n"); | |
943 | break; | |
944 | } | |
945 | case wxSimpleHtmlTag_Close: | |
946 | { | |
947 | if (snTabLevel) // Safety to prevent going around... | |
948 | snTabLevel--; // Reduce the tab level | |
949 | if (sbUseTab) // Do we write the open tag and close tag on a other line? | |
950 | { | |
951 | size_t tab; | |
952 | for(tab = 0; tab < snTabLevel; tab++) | |
953 | stream << wxT("\t"); | |
954 | } | |
955 | stream << wxT("</") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(">\n"); | |
4fe30bce | 956 | sbUseTab = true; |
d7463f75 JS |
957 | break; |
958 | } | |
959 | default: | |
960 | { | |
961 | break; | |
962 | } | |
963 | } | |
964 | wxSimpleHtmlTag* tag = m_children; | |
965 | while (tag) | |
966 | { | |
967 | tag->Write(stream); | |
968 | tag = tag->m_next; | |
969 | } | |
970 | ||
971 | } | |
972 | ||
973 | void wxSimpleHtmlAttribute::Write(wxOutputStream& stream) | |
974 | { | |
975 | if (m_value.IsEmpty()) | |
976 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
977 | else | |
978 | { | |
979 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
980 | stream << wxT("=\""); | |
981 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_value); | |
982 | stream << wxT("\""); | |
983 | } | |
984 | } |