]>
Commit | Line | Data |
---|---|---|
d7463f75 JS |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: htmlparser.cpp | |
3 | // Purpose: Simple HTML parser | |
4 | // Author: Julian Smart | |
5 | // Modified by: | |
6 | // Created: 2002-09-25 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Julian Smart | |
9 | // Licence: wxWindows license | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ---------------------------------------------------------------------------- | |
13 | // headers | |
14 | // ---------------------------------------------------------------------------- | |
71ada1a5 | 15 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) |
d7463f75 JS |
16 | #pragma implementation "htmlparser.h" |
17 | #endif | |
18 | ||
19 | #include "wx/wx.h" | |
20 | ||
21 | #ifdef __BORLANDC__ | |
22 | #pragma hdrstop | |
23 | #endif | |
24 | ||
25 | #include "wx/textfile.h" | |
26 | #include "wx/wfstream.h" | |
27 | #include "wx/txtstrm.h" | |
28 | ||
29 | #include <ctype.h> | |
30 | ||
31 | #include "htmlparser.h" | |
32 | ||
33 | /// Useful insertion operators for wxOutputStream. | |
34 | static wxOutputStream& operator <<(wxOutputStream& stream, const wxString& s) | |
35 | { | |
36 | wxTextOutputStream txt(stream); // This is to make sure the line-ending is native! | |
37 | ||
38 | txt.WriteString(s); | |
39 | return stream; | |
40 | } | |
41 | ||
42 | #if 0 // Gives warning because not used... | |
43 | static wxOutputStream& operator <<(wxOutputStream& stream, long l) | |
44 | { | |
45 | wxString str; | |
46 | str.Printf("%ld", l); | |
47 | return stream << str; | |
48 | } | |
49 | ||
50 | static wxOutputStream& operator <<(wxOutputStream& stream, const char c) | |
51 | { | |
52 | wxString str; | |
53 | str.Printf("%c", c); | |
54 | return stream << str; | |
55 | } | |
56 | #endif // 0 | |
57 | ||
58 | /* | |
59 | * wxSimpleHtmlAttribute | |
60 | * Representation of an attribute | |
61 | */ | |
62 | ||
63 | wxSimpleHtmlParser::wxSimpleHtmlParser() | |
64 | { | |
65 | m_topLevel = NULL; | |
66 | m_pos = 0; | |
67 | } | |
68 | ||
69 | ||
70 | wxSimpleHtmlParser::~wxSimpleHtmlParser() | |
71 | { | |
72 | Clear(); | |
73 | } | |
74 | ||
75 | bool wxSimpleHtmlParser::ParseFile(const wxString& filename) | |
76 | { | |
77 | wxTextFile textFile; | |
78 | ||
79 | if (textFile.Open(filename)) | |
80 | { | |
81 | wxString text; | |
82 | wxString line; | |
83 | int i; | |
84 | int count = textFile.GetLineCount(); | |
85 | for (i = 0; i < count; i++) | |
86 | { | |
87 | if (i == 0) | |
88 | line = textFile.GetFirstLine(); | |
89 | else | |
90 | line = textFile.GetNextLine(); | |
91 | ||
92 | text += line; | |
93 | if (i != (count - 1)) | |
94 | text += wxT("\n"); | |
95 | } | |
96 | ||
97 | #if 0 | |
98 | for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() ) | |
99 | { | |
100 | text += line; | |
101 | if (!textFile.Eof()) | |
102 | text += wxT("\n"); | |
103 | } | |
104 | #endif | |
105 | ||
106 | return ParseString(text); | |
107 | } | |
108 | else | |
109 | return FALSE; | |
110 | } | |
111 | ||
112 | bool wxSimpleHtmlParser::ParseString(const wxString& str) | |
113 | { | |
114 | Clear(); | |
115 | ||
116 | m_pos = 0; | |
117 | m_text = str; | |
118 | m_length = str.Length(); | |
119 | ||
120 | m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel); | |
121 | ||
122 | bool bResult = ParseHtml(m_topLevel); | |
123 | ||
124 | wxASSERT(bResult); // Failed to parse the TAGs. | |
125 | // Hint: Check if every open tag has a close tag! | |
126 | ||
127 | return bResult; | |
128 | } | |
129 | ||
130 | // Main recursive parsing function | |
131 | bool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent) | |
132 | { | |
133 | if (!parent) | |
134 | return FALSE; | |
135 | ||
136 | while (!Eof()) | |
137 | { | |
138 | EatWhitespace(); | |
139 | if (IsComment()) | |
140 | { | |
141 | ParseComment(); | |
142 | } | |
143 | else if (IsDirective()) | |
144 | { | |
145 | wxSimpleHtmlTag* tag = ParseDirective(); | |
146 | if (tag) | |
147 | parent->AppendTag(tag); | |
148 | } | |
149 | else if (IsXMLDeclaration()) | |
150 | { | |
151 | wxSimpleHtmlTag* tag = ParseXMLDeclaration(); | |
152 | if (tag) | |
153 | parent->AppendTag(tag); | |
154 | } | |
155 | else if (IsTagClose()) | |
156 | { | |
157 | wxSimpleHtmlTag* tag = ParseTagClose(); | |
158 | if (tag) | |
159 | { | |
160 | if (IsCloseTagNeeded(tag->GetName())) | |
161 | { | |
162 | if (!parent->GetParent()) | |
163 | return FALSE; | |
164 | parent->GetParent()->AppendTag(tag); | |
165 | return TRUE; | |
166 | } | |
167 | else | |
168 | parent->AppendTag(tag); | |
169 | } | |
170 | } | |
171 | else if (IsTagStartBracket(GetChar(m_pos))) | |
172 | { | |
173 | wxSimpleHtmlTag* tag = ParseTagHeader(); | |
174 | if (tag) | |
175 | parent->AppendTag(tag); | |
176 | ||
177 | if (IsCloseTagNeeded(tag->GetName())) | |
178 | { | |
179 | if (!ParseHtml(tag)) | |
180 | return FALSE; // Something didn't go ok, so don't continue. | |
181 | } | |
182 | } | |
183 | else | |
184 | { | |
185 | // Just a text string | |
186 | wxString text; | |
187 | ParseText(text); | |
188 | ||
189 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text); | |
190 | tag->SetText(text); | |
191 | if(parent->GetParent()) | |
192 | parent->GetParent()->AppendTag(tag); | |
193 | else | |
194 | parent->AppendTag(tag); // When this occurs it is probably the | |
195 | // empty lines at the end of the file... | |
196 | } | |
197 | } | |
198 | return TRUE; | |
199 | } | |
200 | ||
201 | // Plain text, up until an angled bracket | |
202 | bool wxSimpleHtmlParser::ParseText(wxString& text) | |
203 | { | |
204 | while (!Eof() && GetChar(m_pos) != wxT('<')) | |
205 | { | |
206 | text += GetChar(m_pos); | |
207 | m_pos ++; | |
208 | } | |
209 | DecodeSpecialChars(text); | |
210 | return TRUE; | |
211 | } | |
212 | ||
213 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader() | |
214 | { | |
215 | if (IsTagStartBracket(GetChar(m_pos))) | |
216 | { | |
217 | m_pos ++; | |
218 | EatWhitespace(); | |
219 | ||
220 | wxString word; | |
221 | ReadWord(word, TRUE); | |
222 | ||
223 | EatWhitespace(); | |
224 | ||
225 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open); | |
226 | ||
227 | ParseAttributes(tag); | |
228 | ||
229 | EatWhitespace(); | |
230 | ||
231 | if (IsTagEndBracket(GetChar(m_pos))) | |
232 | m_pos ++; | |
233 | ||
234 | return tag; | |
235 | } | |
236 | else | |
237 | return NULL; | |
238 | } | |
239 | ||
240 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose() | |
241 | { | |
242 | Matches(wxT("</"), TRUE); | |
243 | ||
244 | EatWhitespace(); | |
245 | ||
246 | wxString word; | |
247 | ReadWord(word, TRUE); | |
248 | ||
249 | EatWhitespace(); | |
250 | m_pos ++; | |
251 | ||
252 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close); | |
253 | return tag; | |
254 | } | |
255 | ||
256 | bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag) | |
257 | { | |
258 | // Parse attributes of a tag header until we reach > | |
259 | while (!IsTagEndBracket(GetChar(m_pos)) && !Eof()) | |
260 | { | |
261 | EatWhitespace(); | |
262 | ||
263 | wxString attrName, attrValue; | |
264 | ||
265 | if (IsString()) | |
266 | { | |
267 | ReadString(attrName, TRUE); | |
268 | tag->AppendAttribute(attrName, wxEmptyString); | |
269 | } | |
270 | else if (IsNumeric(GetChar(m_pos))) | |
271 | { | |
272 | ReadNumber(attrName, TRUE); | |
273 | tag->AppendAttribute(attrName, wxEmptyString); | |
274 | } | |
275 | else | |
276 | { | |
277 | // Try to read an attribute name/value pair, or at least a name | |
278 | // without the value | |
279 | ReadLiteral(attrName, TRUE); | |
280 | EatWhitespace(); | |
281 | ||
282 | if (GetChar(m_pos) == wxT('=')) | |
283 | { | |
284 | m_pos ++; | |
285 | EatWhitespace(); | |
286 | ||
287 | if (IsString()) | |
288 | ReadString(attrValue, TRUE); | |
289 | else if (!Eof() && !IsTagEndBracket(GetChar(m_pos))) | |
290 | ReadLiteral(attrValue, TRUE); | |
291 | } | |
292 | if (!attrName.IsEmpty()) | |
293 | tag->AppendAttribute(attrName, attrValue); | |
294 | } | |
295 | } | |
296 | return TRUE; | |
297 | } | |
298 | ||
299 | // e.g. <!DOCTYPE ....> | |
300 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective() | |
301 | { | |
302 | Matches(wxT("<!"), TRUE); | |
303 | ||
304 | EatWhitespace(); | |
305 | ||
306 | wxString word; | |
307 | ReadWord(word, TRUE); | |
308 | ||
309 | EatWhitespace(); | |
310 | ||
311 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive); | |
312 | ||
313 | ParseAttributes(tag); | |
314 | ||
315 | EatWhitespace(); | |
316 | ||
317 | if (IsTagEndBracket(GetChar(m_pos))) | |
318 | m_pos ++; | |
319 | ||
320 | return tag; | |
321 | } | |
322 | ||
323 | // e.g. <?xml .... ?> | |
324 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseXMLDeclaration() | |
325 | { | |
326 | Matches(wxT("<?"), TRUE); | |
327 | ||
328 | EatWhitespace(); | |
329 | ||
330 | wxString word; | |
331 | ReadWord(word, TRUE); | |
332 | ||
333 | EatWhitespace(); | |
334 | ||
335 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_XMLDeclaration); | |
336 | ||
337 | ParseAttributes(tag); | |
338 | ||
339 | EatWhitespace(); | |
340 | ||
341 | if (IsTagEndBracket(GetChar(m_pos))) | |
342 | m_pos ++; | |
343 | ||
344 | return tag; | |
345 | } | |
346 | ||
347 | bool wxSimpleHtmlParser::ParseComment() | |
348 | { | |
349 | // Eat the comment tag start | |
350 | Matches(wxT("<!--"), TRUE); | |
351 | ||
352 | while (!Eof() && !Matches(wxT("-->"), TRUE)) | |
353 | { | |
354 | m_pos ++; | |
355 | } | |
356 | ||
357 | return TRUE; | |
358 | } | |
359 | ||
360 | bool wxSimpleHtmlParser::EatWhitespace() | |
361 | { | |
362 | while (!Eof() && IsWhitespace(GetChar(m_pos))) | |
363 | m_pos ++; | |
364 | return TRUE; | |
365 | } | |
366 | ||
367 | bool wxSimpleHtmlParser::EatWhitespace(int& pos) | |
368 | { | |
369 | while (!Eof(pos) && IsWhitespace(GetChar(pos))) | |
370 | pos ++; | |
371 | return TRUE; | |
372 | } | |
373 | ||
374 | bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt) | |
375 | { | |
376 | int pos = m_pos; | |
377 | if (GetChar(pos) == (int) '"') | |
378 | { | |
379 | pos ++; | |
380 | while (!Eof(pos) && GetChar(pos) != (int) '"') | |
381 | { | |
382 | // TODO: how are quotes escaped in HTML? | |
383 | str += (wxChar) GetChar(pos); | |
384 | pos ++; | |
385 | } | |
386 | if (GetChar(pos) == (int) '"') | |
387 | pos ++; | |
388 | if (eatIt) | |
389 | m_pos = pos; | |
390 | DecodeSpecialChars(str); | |
391 | return TRUE; | |
392 | } | |
393 | else | |
394 | return FALSE; | |
395 | } | |
396 | ||
397 | bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt) | |
398 | { | |
399 | int pos = m_pos; | |
400 | ||
401 | if (!IsAlpha(GetChar(pos))) | |
402 | return FALSE; | |
403 | ||
404 | str += (wxChar) GetChar(pos) ; | |
405 | pos ++; | |
406 | ||
407 | while (!Eof(pos) && IsWordChar(GetChar(pos))) | |
408 | { | |
409 | str += (wxChar) GetChar(pos); | |
410 | pos ++; | |
411 | } | |
412 | if (eatIt) | |
413 | m_pos = pos; | |
414 | DecodeSpecialChars(str); | |
415 | return TRUE; | |
416 | } | |
417 | ||
418 | bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt) | |
419 | { | |
420 | int pos = m_pos; | |
421 | ||
422 | if (!IsNumeric(GetChar(pos))) | |
423 | return FALSE; | |
424 | ||
425 | str += (wxChar) GetChar(pos) ; | |
426 | pos ++; | |
427 | ||
428 | while (!Eof(pos) && IsNumeric(GetChar(pos))) | |
429 | { | |
430 | str += (wxChar) GetChar(pos); | |
431 | pos ++; | |
432 | } | |
433 | if (eatIt) | |
434 | m_pos = pos; | |
435 | DecodeSpecialChars(str); | |
436 | return TRUE; | |
437 | } | |
438 | ||
439 | // Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string) | |
440 | bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt) | |
441 | { | |
442 | int pos = m_pos; | |
443 | ||
444 | while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('=')) | |
445 | { | |
446 | str += GetChar(pos); | |
447 | pos ++; | |
448 | } | |
449 | if (eatIt) | |
450 | m_pos = pos; | |
451 | DecodeSpecialChars(str); | |
452 | return TRUE; | |
453 | } | |
454 | ||
455 | bool wxSimpleHtmlParser::IsComment() | |
456 | { | |
457 | return Matches(wxT("<!--")); | |
458 | } | |
459 | ||
460 | bool wxSimpleHtmlParser::IsDirective() | |
461 | { | |
462 | return Matches(wxT("<!")); | |
463 | } | |
464 | ||
465 | bool wxSimpleHtmlParser::IsXMLDeclaration() | |
466 | { | |
467 | return Matches(wxT("<?xml")); | |
468 | } | |
469 | ||
470 | bool wxSimpleHtmlParser::IsString() | |
471 | { | |
472 | return (GetChar(m_pos) == (int) '"') ; | |
473 | } | |
474 | ||
475 | bool wxSimpleHtmlParser::IsWord() | |
476 | { | |
477 | return (IsAlpha(GetChar(m_pos))); | |
478 | } | |
479 | ||
480 | bool wxSimpleHtmlParser::IsTagClose() | |
481 | { | |
482 | return Matches(wxT("</")); | |
483 | } | |
484 | ||
485 | bool wxSimpleHtmlParser::IsTagStartBracket(int ch) | |
486 | { | |
487 | return (ch == wxT('<')); | |
488 | } | |
489 | ||
490 | bool wxSimpleHtmlParser::IsTagEndBracket(int ch) | |
491 | { | |
492 | return (ch == wxT('>')); | |
493 | } | |
494 | ||
495 | bool wxSimpleHtmlParser::IsWhitespace(int ch) | |
496 | { | |
497 | return ((ch == 13) || (ch == 10) || (ch == 32) || (ch == (int) '\t')) ; | |
498 | } | |
499 | ||
500 | bool wxSimpleHtmlParser::IsAlpha(int ch) | |
501 | { | |
502 | return (wxIsalpha((wxChar) ch) != 0); | |
503 | } | |
504 | ||
505 | bool wxSimpleHtmlParser::IsWordChar(int ch) | |
506 | { | |
507 | return (wxIsalpha((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('_') || IsNumeric(ch)); | |
508 | } | |
509 | ||
510 | bool wxSimpleHtmlParser::IsNumeric(int ch) | |
511 | { | |
512 | return (wxIsdigit((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('.')) ; | |
513 | } | |
514 | ||
515 | bool wxSimpleHtmlParser::IsCloseTagNeeded(const wxString &name) | |
516 | { | |
517 | if (name.IsSameAs(wxT("P"), FALSE)) // e.g <P> | |
518 | return FALSE; | |
519 | ||
520 | // ToDo add more items here. | |
521 | ||
522 | return TRUE; | |
523 | } | |
524 | ||
525 | // Encode/Decode Special Characters. | |
526 | // See here for the used table: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/xmlsql/ac_xml1_1nqk.asp | |
527 | /* static */ void wxSimpleHtmlParser::DecodeSpecialChars(wxString &value) | |
528 | { | |
529 | // XML translation | |
530 | value.Replace(wxT(">"), wxT(">"), TRUE); | |
531 | value.Replace(wxT("<"), wxT("<"), TRUE); | |
532 | value.Replace(wxT("""), wxT("\""), TRUE); | |
533 | value.Replace(wxT("'"), wxT("'"), TRUE); | |
534 | value.Replace(wxT("&"), wxT("&"), TRUE); // Note: do this as last to prevent replace problems. | |
535 | } | |
536 | ||
537 | /* static */ wxString wxSimpleHtmlParser::EncodeSpecialChars(const wxString &value) | |
538 | { | |
539 | wxString newvalue = value; | |
540 | ||
541 | // XML translation | |
542 | newvalue.Replace(wxT("&"), wxT("&"), TRUE); // Note: do this as first to prevent replace problems. | |
543 | newvalue.Replace(wxT(">"), wxT(">"), TRUE); | |
544 | newvalue.Replace(wxT("<"), wxT("<"), TRUE); | |
545 | newvalue.Replace(wxT("\""),wxT("""), TRUE); | |
546 | newvalue.Replace(wxT("'"), wxT("'"), TRUE); | |
547 | ||
548 | return newvalue; | |
549 | } | |
550 | ||
551 | // Matches this string (case insensitive) | |
552 | bool wxSimpleHtmlParser::Matches(const wxString& tok, bool eatIt) | |
553 | { | |
554 | wxString text(m_text.Mid(m_pos, tok.Length())); | |
555 | bool success = (text.CmpNoCase(tok) == 0) ; | |
556 | if (success && eatIt) | |
557 | { | |
558 | m_pos += tok.Length(); | |
559 | } | |
560 | return success; | |
561 | } | |
562 | ||
563 | // Safe way of getting a character | |
564 | int wxSimpleHtmlParser::GetChar(size_t i) const | |
565 | { | |
566 | if (i >= (size_t) m_length) | |
567 | return -1; | |
568 | return m_text[i]; | |
569 | } | |
570 | ||
571 | void wxSimpleHtmlParser::Clear() | |
572 | { | |
573 | if (m_topLevel) | |
574 | delete m_topLevel; | |
575 | m_topLevel = NULL; | |
576 | m_text = wxEmptyString; | |
577 | m_pos = 0; | |
578 | m_length = 0; | |
579 | } | |
580 | ||
581 | // Write this file | |
582 | void wxSimpleHtmlParser::Write(wxOutputStream& stream) | |
583 | { | |
584 | if (m_topLevel) | |
585 | m_topLevel->Write(stream); | |
586 | } | |
587 | ||
588 | bool wxSimpleHtmlParser::WriteFile(wxString& filename) | |
589 | { | |
590 | wxFileOutputStream fstream(filename); | |
591 | if (fstream.Ok()) | |
592 | { | |
593 | Write(fstream); | |
594 | return TRUE; | |
595 | } | |
596 | else | |
597 | return FALSE; | |
598 | } | |
599 | ||
600 | /* | |
601 | * wxSimpleHtmlTag | |
602 | * Representation of a tag or chunk of text | |
603 | */ | |
604 | ||
605 | wxSimpleHtmlTag::wxSimpleHtmlTag(const wxString& tagName, int tagType) | |
606 | { | |
607 | m_name = tagName; | |
608 | m_type = tagType; | |
609 | m_attributes = NULL; | |
610 | m_children = NULL; | |
611 | m_parent = NULL; | |
612 | m_next = NULL; | |
613 | } | |
614 | ||
615 | wxSimpleHtmlTag::~wxSimpleHtmlTag() | |
616 | { | |
617 | ClearAttributes(); | |
618 | ClearChildren(); | |
619 | } | |
620 | ||
621 | //// Operations | |
622 | void wxSimpleHtmlTag::ClearAttributes() | |
623 | { | |
624 | if (m_attributes) | |
625 | { | |
626 | wxSimpleHtmlAttribute* attr = m_attributes; | |
627 | while (attr) | |
628 | { | |
629 | wxSimpleHtmlAttribute* next = attr->m_next; | |
630 | ||
631 | attr->m_next = NULL; | |
632 | delete attr; | |
633 | attr = next; | |
634 | } | |
635 | m_attributes = NULL; | |
636 | } | |
637 | } | |
638 | ||
639 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::FindAttribute(const wxString& name) const | |
640 | { | |
641 | wxSimpleHtmlAttribute* attr = m_attributes; | |
642 | while (attr) | |
643 | { | |
644 | if (attr->GetName().CmpNoCase(name) == 0) | |
645 | { | |
646 | return attr; | |
647 | } | |
648 | attr = attr->m_next; | |
649 | } | |
650 | return NULL; | |
651 | } | |
652 | ||
653 | void wxSimpleHtmlTag::AppendAttribute(const wxString& name, const wxString& value) | |
654 | { | |
655 | wxSimpleHtmlAttribute* attr = new wxSimpleHtmlAttribute(name, value); | |
656 | if (m_attributes) | |
657 | { | |
658 | // Find tail | |
659 | wxSimpleHtmlAttribute* last = m_attributes; | |
660 | while (last->m_next) | |
661 | last = last->m_next; | |
662 | ||
663 | last->m_next = attr; | |
664 | } | |
665 | else | |
666 | m_attributes = attr; | |
667 | } | |
668 | ||
669 | void wxSimpleHtmlTag::ClearChildren() | |
670 | { | |
671 | if (m_children) | |
672 | { | |
673 | wxSimpleHtmlTag* child = m_children; | |
674 | while (child) | |
675 | { | |
676 | wxSimpleHtmlTag* next = child->m_next; | |
677 | ||
678 | child->m_next = NULL; | |
679 | delete child; | |
680 | child = next; | |
681 | } | |
682 | m_children = NULL; | |
683 | } | |
684 | } | |
685 | ||
686 | void wxSimpleHtmlTag::RemoveChild(wxSimpleHtmlTag *remove) | |
687 | { | |
688 | if (m_children) | |
689 | { | |
690 | wxSimpleHtmlTag* child = m_children; | |
691 | wxSimpleHtmlTag* prev = NULL; | |
692 | while (child) | |
693 | { | |
694 | wxSimpleHtmlTag* next = child->m_next; | |
695 | ||
696 | if (child == remove) | |
697 | { | |
698 | child->m_next = NULL; | |
699 | delete child; | |
700 | ||
701 | if (prev != NULL) | |
702 | prev->m_next = next; | |
703 | else | |
704 | m_children = next; | |
705 | ||
706 | return; | |
707 | } | |
708 | prev = child; | |
709 | child = next; | |
710 | } | |
711 | } | |
712 | } | |
713 | ||
714 | void wxSimpleHtmlTag::AppendTag(wxSimpleHtmlTag* tag) | |
715 | { | |
716 | if (!tag) | |
717 | return; | |
718 | ||
719 | if (m_children) | |
720 | { | |
721 | // Find tail | |
722 | wxSimpleHtmlTag* last = m_children; | |
723 | while (last->m_next) | |
724 | last = last->m_next; | |
725 | ||
726 | last->m_next = tag; | |
727 | } | |
728 | else | |
729 | { | |
730 | m_children = tag; | |
731 | } | |
732 | ||
733 | tag->m_parent = this; | |
734 | } | |
735 | ||
736 | void wxSimpleHtmlTag::AppendTagAfterUs(wxSimpleHtmlTag* tag) | |
737 | { | |
738 | if (!tag) | |
739 | return; | |
740 | ||
741 | tag->m_parent = m_parent; | |
742 | tag->m_next = m_next; | |
743 | m_next = tag; | |
744 | } | |
745 | ||
746 | // Gets the text from this tag and its descendants | |
747 | wxString wxSimpleHtmlTag::GetTagText() | |
748 | { | |
749 | wxString text; | |
750 | if (m_children) | |
751 | { | |
752 | wxSimpleHtmlTag* tag = m_children; | |
753 | while (tag) | |
754 | { | |
755 | text += tag->GetTagText(); | |
756 | tag = tag->m_next; | |
757 | } | |
758 | return text; | |
759 | } | |
760 | else if (GetType() == wxSimpleHtmlTag_Text) | |
761 | return GetText(); | |
762 | else | |
763 | return wxEmptyString; | |
764 | } | |
765 | ||
766 | int wxSimpleHtmlTag::GetAttributeCount() const | |
767 | { | |
768 | int count = 0; | |
769 | wxSimpleHtmlAttribute* attr = m_attributes; | |
770 | while (attr) | |
771 | { | |
772 | count ++; | |
773 | attr = attr->m_next; | |
774 | } | |
775 | return count; | |
776 | } | |
777 | ||
778 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::GetAttribute(int i) const | |
779 | { | |
780 | int count = 0; | |
781 | wxSimpleHtmlAttribute* attr = m_attributes; | |
782 | while (attr) | |
783 | { | |
784 | if (count == i) | |
785 | return attr; | |
786 | count ++; | |
787 | attr = attr->m_next; | |
788 | } | |
789 | return NULL; | |
790 | } | |
791 | ||
792 | int wxSimpleHtmlTag::GetChildCount() const | |
793 | { | |
794 | int count = 0; | |
795 | wxSimpleHtmlTag* tag = m_children; | |
796 | while (tag) | |
797 | { | |
798 | count ++; | |
799 | tag = tag->m_next; | |
800 | } | |
801 | return count; | |
802 | } | |
803 | ||
804 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name, const wxString& value) const | |
805 | { | |
806 | wxSimpleHtmlAttribute* attr = FindAttribute(name); | |
807 | ||
808 | return (attr && (attr->GetValue().CmpNoCase(value) == 0)) ; | |
809 | } | |
810 | ||
811 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name) const | |
812 | { | |
813 | return FindAttribute(name) != NULL ; | |
814 | } | |
815 | ||
816 | bool wxSimpleHtmlTag::GetAttributeValue(wxString& value, const wxString& attrName) | |
817 | { | |
818 | wxSimpleHtmlAttribute* attr = FindAttribute(attrName); | |
819 | if (attr) | |
820 | { | |
821 | value = attr->GetValue(); | |
822 | return TRUE; | |
823 | } | |
824 | else | |
825 | return FALSE; | |
826 | } | |
827 | ||
828 | // Search forward from this tag until we find a tag with this name & attribute | |
829 | wxSimpleHtmlTag* wxSimpleHtmlTag::FindTag(const wxString& tagName, const wxString& attrName) | |
830 | { | |
831 | wxSimpleHtmlTag* tag = m_next; | |
832 | while (tag) | |
833 | { | |
834 | if (tag->NameIs(tagName) && (attrName.IsEmpty() || tag->FindAttribute(attrName))) | |
835 | return tag; | |
836 | ||
837 | tag = tag->m_next; | |
838 | } | |
839 | return NULL; | |
840 | } | |
841 | ||
842 | bool wxSimpleHtmlTag::FindTextUntilTagClose(wxString& text, const wxString& tagName) | |
843 | { | |
844 | wxSimpleHtmlTag* tag = this; | |
845 | while (tag) | |
846 | { | |
847 | if (tag->GetType() == wxSimpleHtmlTag_Close && tag->NameIs(tagName)) | |
848 | return TRUE; | |
849 | ||
850 | if (tag->GetType() == wxSimpleHtmlTag_Text) | |
851 | text += tag->GetText(); | |
852 | ||
853 | tag = tag->m_next; | |
854 | } | |
855 | return TRUE; | |
856 | } | |
857 | ||
858 | ||
859 | wxSimpleHtmlTag* wxSimpleHtmlTag::GetChild(int i) const | |
860 | { | |
861 | int count = 0; | |
862 | wxSimpleHtmlTag* tag = m_children; | |
863 | while (tag) | |
864 | { | |
865 | if (count == i) | |
866 | return tag; | |
867 | ||
868 | count ++; | |
869 | tag = tag->m_next; | |
870 | } | |
871 | return NULL; | |
872 | } | |
873 | ||
874 | void wxSimpleHtmlTag::Write(wxOutputStream& stream) | |
875 | { | |
876 | // Some helpers to layout the open and close tags. | |
877 | static bool sbUseTab = TRUE; | |
878 | static size_t snTabLevel = 0; | |
879 | ||
880 | #if 0 // Enable if no tabs should be used to align the tags. | |
881 | snTabLevel = 0; | |
882 | #endif | |
883 | ||
884 | // Handle the different types of tags we can write. | |
885 | switch (GetType()) | |
886 | { | |
887 | case wxSimpleHtmlTag_Text: | |
888 | { | |
889 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_text); | |
890 | break; | |
891 | } | |
892 | case wxSimpleHtmlTag_Open: | |
893 | { | |
894 | size_t tab; | |
895 | for(tab = 0; tab < snTabLevel; tab++) | |
896 | stream << wxT("\t"); | |
897 | stream << wxT("<") << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
898 | if (GetAttributeCount() > 0) | |
899 | stream << wxT(" "); | |
900 | int i; | |
901 | for (i = 0; i < GetAttributeCount(); i++) | |
902 | { | |
903 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
904 | attr->Write(stream); | |
905 | if (i < GetAttributeCount() - 1) | |
906 | stream << wxT(" "); | |
907 | } | |
908 | if(!m_children) | |
909 | { | |
910 | sbUseTab = FALSE; // We're putting the open a close tag on the same line, | |
911 | // so we don't wan't any tabs | |
912 | stream << wxT(">"); | |
913 | } | |
914 | else | |
915 | { | |
916 | // sbUseTab = TRUE; | |
917 | stream << wxT(">\n"); | |
918 | } | |
919 | snTabLevel++; | |
920 | break; | |
921 | } | |
922 | case wxSimpleHtmlTag_Directive: | |
923 | { | |
924 | stream << wxT("<!") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
925 | int i; | |
926 | for (i = 0; i < GetAttributeCount(); i++) | |
927 | { | |
928 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
929 | attr->Write(stream); | |
930 | if (i < GetAttributeCount() - 1) | |
931 | stream << wxT(" "); | |
932 | } | |
933 | stream << wxT(">\n"); | |
934 | break; | |
935 | } | |
936 | case wxSimpleHtmlTag_XMLDeclaration: | |
937 | { | |
938 | stream << wxT("<?") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
939 | int i; | |
940 | for (i = 0; i < GetAttributeCount(); i++) | |
941 | { | |
942 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
943 | attr->Write(stream); | |
944 | if (i < GetAttributeCount() - 1) | |
945 | stream << wxT(" "); | |
946 | } | |
947 | stream << wxT(">\n\n"); | |
948 | break; | |
949 | } | |
950 | case wxSimpleHtmlTag_Close: | |
951 | { | |
952 | if (snTabLevel) // Safety to prevent going around... | |
953 | snTabLevel--; // Reduce the tab level | |
954 | if (sbUseTab) // Do we write the open tag and close tag on a other line? | |
955 | { | |
956 | size_t tab; | |
957 | for(tab = 0; tab < snTabLevel; tab++) | |
958 | stream << wxT("\t"); | |
959 | } | |
960 | stream << wxT("</") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(">\n"); | |
961 | sbUseTab = TRUE; | |
962 | break; | |
963 | } | |
964 | default: | |
965 | { | |
966 | break; | |
967 | } | |
968 | } | |
969 | wxSimpleHtmlTag* tag = m_children; | |
970 | while (tag) | |
971 | { | |
972 | tag->Write(stream); | |
973 | tag = tag->m_next; | |
974 | } | |
975 | ||
976 | } | |
977 | ||
978 | void wxSimpleHtmlAttribute::Write(wxOutputStream& stream) | |
979 | { | |
980 | if (m_value.IsEmpty()) | |
981 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
982 | else | |
983 | { | |
984 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
985 | stream << wxT("=\""); | |
986 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_value); | |
987 | stream << wxT("\""); | |
988 | } | |
989 | } |