]>
Commit | Line | Data |
---|---|---|
d7463f75 JS |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: htmlparser.cpp | |
3 | // Purpose: Simple HTML parser | |
4 | // Author: Julian Smart | |
5 | // Modified by: | |
6 | // Created: 2002-09-25 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) Julian Smart | |
9 | // Licence: wxWindows license | |
10 | ///////////////////////////////////////////////////////////////////////////// | |
11 | ||
12 | // ---------------------------------------------------------------------------- | |
13 | // headers | |
14 | // ---------------------------------------------------------------------------- | |
71ada1a5 | 15 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) |
d7463f75 JS |
16 | #pragma implementation "htmlparser.h" |
17 | #endif | |
18 | ||
d9ab621e WS |
19 | // For compilers that support precompilation, includes "wx/wx.h". |
20 | #include "wx/wxprec.h" | |
d7463f75 JS |
21 | |
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
d7463f75 | 26 | #include "wx/wfstream.h" |
d9ab621e | 27 | #include "wx/textfile.h" |
d7463f75 | 28 | #include "wx/txtstrm.h" |
d7463f75 JS |
29 | #include "htmlparser.h" |
30 | ||
31 | /// Useful insertion operators for wxOutputStream. | |
32 | static wxOutputStream& operator <<(wxOutputStream& stream, const wxString& s) | |
33 | { | |
34 | wxTextOutputStream txt(stream); // This is to make sure the line-ending is native! | |
35 | ||
36 | txt.WriteString(s); | |
37 | return stream; | |
38 | } | |
39 | ||
40 | #if 0 // Gives warning because not used... | |
41 | static wxOutputStream& operator <<(wxOutputStream& stream, long l) | |
42 | { | |
43 | wxString str; | |
44 | str.Printf("%ld", l); | |
45 | return stream << str; | |
46 | } | |
47 | ||
48 | static wxOutputStream& operator <<(wxOutputStream& stream, const char c) | |
49 | { | |
50 | wxString str; | |
51 | str.Printf("%c", c); | |
52 | return stream << str; | |
53 | } | |
54 | #endif // 0 | |
55 | ||
56 | /* | |
57 | * wxSimpleHtmlAttribute | |
58 | * Representation of an attribute | |
59 | */ | |
60 | ||
61 | wxSimpleHtmlParser::wxSimpleHtmlParser() | |
62 | { | |
63 | m_topLevel = NULL; | |
64 | m_pos = 0; | |
65 | } | |
66 | ||
67 | ||
68 | wxSimpleHtmlParser::~wxSimpleHtmlParser() | |
69 | { | |
70 | Clear(); | |
71 | } | |
72 | ||
73 | bool wxSimpleHtmlParser::ParseFile(const wxString& filename) | |
74 | { | |
75 | wxTextFile textFile; | |
76 | ||
77 | if (textFile.Open(filename)) | |
78 | { | |
79 | wxString text; | |
80 | wxString line; | |
81 | int i; | |
82 | int count = textFile.GetLineCount(); | |
83 | for (i = 0; i < count; i++) | |
84 | { | |
85 | if (i == 0) | |
86 | line = textFile.GetFirstLine(); | |
87 | else | |
88 | line = textFile.GetNextLine(); | |
89 | ||
90 | text += line; | |
91 | if (i != (count - 1)) | |
92 | text += wxT("\n"); | |
93 | } | |
94 | ||
95 | #if 0 | |
96 | for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() ) | |
97 | { | |
98 | text += line; | |
99 | if (!textFile.Eof()) | |
100 | text += wxT("\n"); | |
101 | } | |
102 | #endif | |
103 | ||
104 | return ParseString(text); | |
105 | } | |
106 | else | |
4fe30bce | 107 | return false; |
d7463f75 JS |
108 | } |
109 | ||
110 | bool wxSimpleHtmlParser::ParseString(const wxString& str) | |
111 | { | |
112 | Clear(); | |
113 | ||
114 | m_pos = 0; | |
115 | m_text = str; | |
116 | m_length = str.Length(); | |
117 | ||
118 | m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel); | |
119 | ||
120 | bool bResult = ParseHtml(m_topLevel); | |
121 | ||
122 | wxASSERT(bResult); // Failed to parse the TAGs. | |
123 | // Hint: Check if every open tag has a close tag! | |
124 | ||
125 | return bResult; | |
126 | } | |
127 | ||
128 | // Main recursive parsing function | |
129 | bool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent) | |
130 | { | |
131 | if (!parent) | |
4fe30bce | 132 | return false; |
d7463f75 JS |
133 | |
134 | while (!Eof()) | |
135 | { | |
136 | EatWhitespace(); | |
137 | if (IsComment()) | |
138 | { | |
139 | ParseComment(); | |
140 | } | |
141 | else if (IsDirective()) | |
142 | { | |
143 | wxSimpleHtmlTag* tag = ParseDirective(); | |
144 | if (tag) | |
145 | parent->AppendTag(tag); | |
146 | } | |
147 | else if (IsXMLDeclaration()) | |
148 | { | |
149 | wxSimpleHtmlTag* tag = ParseXMLDeclaration(); | |
150 | if (tag) | |
151 | parent->AppendTag(tag); | |
152 | } | |
153 | else if (IsTagClose()) | |
154 | { | |
155 | wxSimpleHtmlTag* tag = ParseTagClose(); | |
156 | if (tag) | |
157 | { | |
158 | if (IsCloseTagNeeded(tag->GetName())) | |
159 | { | |
160 | if (!parent->GetParent()) | |
4fe30bce | 161 | return false; |
d7463f75 | 162 | parent->GetParent()->AppendTag(tag); |
4fe30bce | 163 | return true; |
d7463f75 JS |
164 | } |
165 | else | |
166 | parent->AppendTag(tag); | |
167 | } | |
168 | } | |
169 | else if (IsTagStartBracket(GetChar(m_pos))) | |
170 | { | |
171 | wxSimpleHtmlTag* tag = ParseTagHeader(); | |
172 | if (tag) | |
173 | parent->AppendTag(tag); | |
174 | ||
175 | if (IsCloseTagNeeded(tag->GetName())) | |
176 | { | |
177 | if (!ParseHtml(tag)) | |
4fe30bce | 178 | return false; // Something didn't go ok, so don't continue. |
d7463f75 JS |
179 | } |
180 | } | |
181 | else | |
182 | { | |
183 | // Just a text string | |
184 | wxString text; | |
185 | ParseText(text); | |
186 | ||
187 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text); | |
188 | tag->SetText(text); | |
189 | if(parent->GetParent()) | |
190 | parent->GetParent()->AppendTag(tag); | |
191 | else | |
192 | parent->AppendTag(tag); // When this occurs it is probably the | |
193 | // empty lines at the end of the file... | |
194 | } | |
195 | } | |
4fe30bce | 196 | return true; |
d7463f75 JS |
197 | } |
198 | ||
199 | // Plain text, up until an angled bracket | |
200 | bool wxSimpleHtmlParser::ParseText(wxString& text) | |
201 | { | |
202 | while (!Eof() && GetChar(m_pos) != wxT('<')) | |
203 | { | |
204 | text += GetChar(m_pos); | |
205 | m_pos ++; | |
206 | } | |
207 | DecodeSpecialChars(text); | |
4fe30bce | 208 | return true; |
d7463f75 JS |
209 | } |
210 | ||
211 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader() | |
212 | { | |
213 | if (IsTagStartBracket(GetChar(m_pos))) | |
214 | { | |
215 | m_pos ++; | |
216 | EatWhitespace(); | |
217 | ||
218 | wxString word; | |
4fe30bce | 219 | ReadWord(word, true); |
d7463f75 JS |
220 | |
221 | EatWhitespace(); | |
222 | ||
223 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open); | |
224 | ||
225 | ParseAttributes(tag); | |
226 | ||
227 | EatWhitespace(); | |
228 | ||
229 | if (IsTagEndBracket(GetChar(m_pos))) | |
230 | m_pos ++; | |
231 | ||
232 | return tag; | |
233 | } | |
234 | else | |
235 | return NULL; | |
236 | } | |
237 | ||
238 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose() | |
239 | { | |
4fe30bce | 240 | Matches(wxT("</"), true); |
d7463f75 JS |
241 | |
242 | EatWhitespace(); | |
243 | ||
244 | wxString word; | |
4fe30bce | 245 | ReadWord(word, true); |
d7463f75 JS |
246 | |
247 | EatWhitespace(); | |
248 | m_pos ++; | |
249 | ||
250 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close); | |
251 | return tag; | |
252 | } | |
253 | ||
254 | bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag) | |
255 | { | |
256 | // Parse attributes of a tag header until we reach > | |
257 | while (!IsTagEndBracket(GetChar(m_pos)) && !Eof()) | |
258 | { | |
259 | EatWhitespace(); | |
260 | ||
261 | wxString attrName, attrValue; | |
262 | ||
263 | if (IsString()) | |
264 | { | |
4fe30bce | 265 | ReadString(attrName, true); |
d7463f75 JS |
266 | tag->AppendAttribute(attrName, wxEmptyString); |
267 | } | |
268 | else if (IsNumeric(GetChar(m_pos))) | |
269 | { | |
4fe30bce | 270 | ReadNumber(attrName, true); |
d7463f75 JS |
271 | tag->AppendAttribute(attrName, wxEmptyString); |
272 | } | |
273 | else | |
274 | { | |
275 | // Try to read an attribute name/value pair, or at least a name | |
276 | // without the value | |
4fe30bce | 277 | ReadLiteral(attrName, true); |
d7463f75 JS |
278 | EatWhitespace(); |
279 | ||
280 | if (GetChar(m_pos) == wxT('=')) | |
281 | { | |
282 | m_pos ++; | |
283 | EatWhitespace(); | |
284 | ||
285 | if (IsString()) | |
4fe30bce | 286 | ReadString(attrValue, true); |
d7463f75 | 287 | else if (!Eof() && !IsTagEndBracket(GetChar(m_pos))) |
4fe30bce | 288 | ReadLiteral(attrValue, true); |
d7463f75 JS |
289 | } |
290 | if (!attrName.IsEmpty()) | |
291 | tag->AppendAttribute(attrName, attrValue); | |
292 | } | |
293 | } | |
4fe30bce | 294 | return true; |
d7463f75 JS |
295 | } |
296 | ||
297 | // e.g. <!DOCTYPE ....> | |
298 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective() | |
299 | { | |
4fe30bce | 300 | Matches(wxT("<!"), true); |
d7463f75 JS |
301 | |
302 | EatWhitespace(); | |
303 | ||
304 | wxString word; | |
4fe30bce | 305 | ReadWord(word, true); |
d7463f75 JS |
306 | |
307 | EatWhitespace(); | |
308 | ||
309 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive); | |
310 | ||
311 | ParseAttributes(tag); | |
312 | ||
313 | EatWhitespace(); | |
314 | ||
315 | if (IsTagEndBracket(GetChar(m_pos))) | |
316 | m_pos ++; | |
317 | ||
318 | return tag; | |
319 | } | |
320 | ||
321 | // e.g. <?xml .... ?> | |
322 | wxSimpleHtmlTag* wxSimpleHtmlParser::ParseXMLDeclaration() | |
323 | { | |
4fe30bce | 324 | Matches(wxT("<?"), true); |
d7463f75 JS |
325 | |
326 | EatWhitespace(); | |
327 | ||
328 | wxString word; | |
4fe30bce | 329 | ReadWord(word, true); |
d7463f75 JS |
330 | |
331 | EatWhitespace(); | |
332 | ||
333 | wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_XMLDeclaration); | |
334 | ||
335 | ParseAttributes(tag); | |
336 | ||
337 | EatWhitespace(); | |
338 | ||
339 | if (IsTagEndBracket(GetChar(m_pos))) | |
340 | m_pos ++; | |
341 | ||
342 | return tag; | |
343 | } | |
344 | ||
345 | bool wxSimpleHtmlParser::ParseComment() | |
346 | { | |
347 | // Eat the comment tag start | |
4fe30bce | 348 | Matches(wxT("<!--"), true); |
d7463f75 | 349 | |
4fe30bce | 350 | while (!Eof() && !Matches(wxT("-->"), true)) |
d7463f75 JS |
351 | { |
352 | m_pos ++; | |
353 | } | |
354 | ||
4fe30bce | 355 | return true; |
d7463f75 JS |
356 | } |
357 | ||
358 | bool wxSimpleHtmlParser::EatWhitespace() | |
359 | { | |
360 | while (!Eof() && IsWhitespace(GetChar(m_pos))) | |
361 | m_pos ++; | |
4fe30bce | 362 | return true; |
d7463f75 JS |
363 | } |
364 | ||
365 | bool wxSimpleHtmlParser::EatWhitespace(int& pos) | |
366 | { | |
367 | while (!Eof(pos) && IsWhitespace(GetChar(pos))) | |
368 | pos ++; | |
4fe30bce | 369 | return true; |
d7463f75 JS |
370 | } |
371 | ||
372 | bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt) | |
373 | { | |
374 | int pos = m_pos; | |
375 | if (GetChar(pos) == (int) '"') | |
376 | { | |
377 | pos ++; | |
378 | while (!Eof(pos) && GetChar(pos) != (int) '"') | |
379 | { | |
380 | // TODO: how are quotes escaped in HTML? | |
381 | str += (wxChar) GetChar(pos); | |
382 | pos ++; | |
383 | } | |
384 | if (GetChar(pos) == (int) '"') | |
385 | pos ++; | |
386 | if (eatIt) | |
387 | m_pos = pos; | |
388 | DecodeSpecialChars(str); | |
4fe30bce | 389 | return true; |
d7463f75 JS |
390 | } |
391 | else | |
4fe30bce | 392 | return false; |
d7463f75 JS |
393 | } |
394 | ||
395 | bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt) | |
396 | { | |
397 | int pos = m_pos; | |
398 | ||
399 | if (!IsAlpha(GetChar(pos))) | |
4fe30bce | 400 | return false; |
d7463f75 JS |
401 | |
402 | str += (wxChar) GetChar(pos) ; | |
403 | pos ++; | |
404 | ||
405 | while (!Eof(pos) && IsWordChar(GetChar(pos))) | |
406 | { | |
407 | str += (wxChar) GetChar(pos); | |
408 | pos ++; | |
409 | } | |
410 | if (eatIt) | |
411 | m_pos = pos; | |
412 | DecodeSpecialChars(str); | |
4fe30bce | 413 | return true; |
d7463f75 JS |
414 | } |
415 | ||
416 | bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt) | |
417 | { | |
418 | int pos = m_pos; | |
419 | ||
420 | if (!IsNumeric(GetChar(pos))) | |
4fe30bce | 421 | return false; |
d7463f75 JS |
422 | |
423 | str += (wxChar) GetChar(pos) ; | |
424 | pos ++; | |
425 | ||
426 | while (!Eof(pos) && IsNumeric(GetChar(pos))) | |
427 | { | |
428 | str += (wxChar) GetChar(pos); | |
429 | pos ++; | |
430 | } | |
431 | if (eatIt) | |
432 | m_pos = pos; | |
433 | DecodeSpecialChars(str); | |
4fe30bce | 434 | return true; |
d7463f75 JS |
435 | } |
436 | ||
437 | // Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string) | |
438 | bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt) | |
439 | { | |
440 | int pos = m_pos; | |
441 | ||
442 | while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('=')) | |
443 | { | |
444 | str += GetChar(pos); | |
445 | pos ++; | |
446 | } | |
447 | if (eatIt) | |
448 | m_pos = pos; | |
449 | DecodeSpecialChars(str); | |
4fe30bce | 450 | return true; |
d7463f75 JS |
451 | } |
452 | ||
453 | bool wxSimpleHtmlParser::IsComment() | |
454 | { | |
455 | return Matches(wxT("<!--")); | |
456 | } | |
457 | ||
458 | bool wxSimpleHtmlParser::IsDirective() | |
459 | { | |
460 | return Matches(wxT("<!")); | |
461 | } | |
462 | ||
463 | bool wxSimpleHtmlParser::IsXMLDeclaration() | |
464 | { | |
465 | return Matches(wxT("<?xml")); | |
466 | } | |
467 | ||
468 | bool wxSimpleHtmlParser::IsString() | |
469 | { | |
470 | return (GetChar(m_pos) == (int) '"') ; | |
471 | } | |
472 | ||
473 | bool wxSimpleHtmlParser::IsWord() | |
474 | { | |
475 | return (IsAlpha(GetChar(m_pos))); | |
476 | } | |
477 | ||
478 | bool wxSimpleHtmlParser::IsTagClose() | |
479 | { | |
480 | return Matches(wxT("</")); | |
481 | } | |
482 | ||
483 | bool wxSimpleHtmlParser::IsTagStartBracket(int ch) | |
484 | { | |
485 | return (ch == wxT('<')); | |
486 | } | |
487 | ||
488 | bool wxSimpleHtmlParser::IsTagEndBracket(int ch) | |
489 | { | |
490 | return (ch == wxT('>')); | |
491 | } | |
492 | ||
493 | bool wxSimpleHtmlParser::IsWhitespace(int ch) | |
494 | { | |
495 | return ((ch == 13) || (ch == 10) || (ch == 32) || (ch == (int) '\t')) ; | |
496 | } | |
497 | ||
498 | bool wxSimpleHtmlParser::IsAlpha(int ch) | |
499 | { | |
500 | return (wxIsalpha((wxChar) ch) != 0); | |
501 | } | |
502 | ||
503 | bool wxSimpleHtmlParser::IsWordChar(int ch) | |
504 | { | |
505 | return (wxIsalpha((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('_') || IsNumeric(ch)); | |
506 | } | |
507 | ||
508 | bool wxSimpleHtmlParser::IsNumeric(int ch) | |
509 | { | |
510 | return (wxIsdigit((wxChar) ch) != 0 || ch == wxT('-') || ch == wxT('.')) ; | |
511 | } | |
512 | ||
513 | bool wxSimpleHtmlParser::IsCloseTagNeeded(const wxString &name) | |
514 | { | |
4fe30bce WS |
515 | if (name.IsSameAs(wxT("P"), false)) // e.g <P> |
516 | return false; | |
d7463f75 JS |
517 | |
518 | // ToDo add more items here. | |
519 | ||
4fe30bce | 520 | return true; |
d7463f75 JS |
521 | } |
522 | ||
523 | // Encode/Decode Special Characters. | |
524 | // See here for the used table: http://msdn.microsoft.com/library/default.asp?url=/library/en-us/xmlsql/ac_xml1_1nqk.asp | |
525 | /* static */ void wxSimpleHtmlParser::DecodeSpecialChars(wxString &value) | |
526 | { | |
527 | // XML translation | |
4fe30bce WS |
528 | value.Replace(wxT(">"), wxT(">"), true); |
529 | value.Replace(wxT("<"), wxT("<"), true); | |
530 | value.Replace(wxT("""), wxT("\""), true); | |
531 | value.Replace(wxT("'"), wxT("'"), true); | |
532 | value.Replace(wxT("&"), wxT("&"), true); // Note: do this as last to prevent replace problems. | |
d7463f75 JS |
533 | } |
534 | ||
535 | /* static */ wxString wxSimpleHtmlParser::EncodeSpecialChars(const wxString &value) | |
536 | { | |
537 | wxString newvalue = value; | |
538 | ||
539 | // XML translation | |
4fe30bce WS |
540 | newvalue.Replace(wxT("&"), wxT("&"), true); // Note: do this as first to prevent replace problems. |
541 | newvalue.Replace(wxT(">"), wxT(">"), true); | |
542 | newvalue.Replace(wxT("<"), wxT("<"), true); | |
543 | newvalue.Replace(wxT("\""),wxT("""), true); | |
544 | newvalue.Replace(wxT("'"), wxT("'"), true); | |
d7463f75 JS |
545 | |
546 | return newvalue; | |
547 | } | |
548 | ||
549 | // Matches this string (case insensitive) | |
550 | bool wxSimpleHtmlParser::Matches(const wxString& tok, bool eatIt) | |
551 | { | |
552 | wxString text(m_text.Mid(m_pos, tok.Length())); | |
553 | bool success = (text.CmpNoCase(tok) == 0) ; | |
554 | if (success && eatIt) | |
555 | { | |
556 | m_pos += tok.Length(); | |
557 | } | |
558 | return success; | |
559 | } | |
560 | ||
561 | // Safe way of getting a character | |
562 | int wxSimpleHtmlParser::GetChar(size_t i) const | |
563 | { | |
564 | if (i >= (size_t) m_length) | |
565 | return -1; | |
566 | return m_text[i]; | |
567 | } | |
568 | ||
569 | void wxSimpleHtmlParser::Clear() | |
570 | { | |
571 | if (m_topLevel) | |
572 | delete m_topLevel; | |
573 | m_topLevel = NULL; | |
574 | m_text = wxEmptyString; | |
575 | m_pos = 0; | |
576 | m_length = 0; | |
577 | } | |
578 | ||
579 | // Write this file | |
580 | void wxSimpleHtmlParser::Write(wxOutputStream& stream) | |
581 | { | |
582 | if (m_topLevel) | |
583 | m_topLevel->Write(stream); | |
584 | } | |
585 | ||
586 | bool wxSimpleHtmlParser::WriteFile(wxString& filename) | |
587 | { | |
588 | wxFileOutputStream fstream(filename); | |
589 | if (fstream.Ok()) | |
590 | { | |
591 | Write(fstream); | |
4fe30bce | 592 | return true; |
d7463f75 JS |
593 | } |
594 | else | |
4fe30bce | 595 | return false; |
d7463f75 JS |
596 | } |
597 | ||
598 | /* | |
599 | * wxSimpleHtmlTag | |
600 | * Representation of a tag or chunk of text | |
601 | */ | |
602 | ||
603 | wxSimpleHtmlTag::wxSimpleHtmlTag(const wxString& tagName, int tagType) | |
604 | { | |
605 | m_name = tagName; | |
606 | m_type = tagType; | |
607 | m_attributes = NULL; | |
608 | m_children = NULL; | |
609 | m_parent = NULL; | |
610 | m_next = NULL; | |
611 | } | |
612 | ||
613 | wxSimpleHtmlTag::~wxSimpleHtmlTag() | |
614 | { | |
615 | ClearAttributes(); | |
616 | ClearChildren(); | |
617 | } | |
618 | ||
619 | //// Operations | |
620 | void wxSimpleHtmlTag::ClearAttributes() | |
621 | { | |
622 | if (m_attributes) | |
623 | { | |
624 | wxSimpleHtmlAttribute* attr = m_attributes; | |
625 | while (attr) | |
626 | { | |
627 | wxSimpleHtmlAttribute* next = attr->m_next; | |
628 | ||
629 | attr->m_next = NULL; | |
630 | delete attr; | |
631 | attr = next; | |
632 | } | |
633 | m_attributes = NULL; | |
634 | } | |
635 | } | |
636 | ||
637 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::FindAttribute(const wxString& name) const | |
638 | { | |
639 | wxSimpleHtmlAttribute* attr = m_attributes; | |
640 | while (attr) | |
641 | { | |
642 | if (attr->GetName().CmpNoCase(name) == 0) | |
643 | { | |
644 | return attr; | |
645 | } | |
646 | attr = attr->m_next; | |
647 | } | |
648 | return NULL; | |
649 | } | |
650 | ||
651 | void wxSimpleHtmlTag::AppendAttribute(const wxString& name, const wxString& value) | |
652 | { | |
653 | wxSimpleHtmlAttribute* attr = new wxSimpleHtmlAttribute(name, value); | |
654 | if (m_attributes) | |
655 | { | |
656 | // Find tail | |
657 | wxSimpleHtmlAttribute* last = m_attributes; | |
658 | while (last->m_next) | |
659 | last = last->m_next; | |
660 | ||
661 | last->m_next = attr; | |
662 | } | |
663 | else | |
664 | m_attributes = attr; | |
665 | } | |
666 | ||
667 | void wxSimpleHtmlTag::ClearChildren() | |
668 | { | |
669 | if (m_children) | |
670 | { | |
671 | wxSimpleHtmlTag* child = m_children; | |
672 | while (child) | |
673 | { | |
674 | wxSimpleHtmlTag* next = child->m_next; | |
675 | ||
676 | child->m_next = NULL; | |
677 | delete child; | |
678 | child = next; | |
679 | } | |
680 | m_children = NULL; | |
681 | } | |
682 | } | |
683 | ||
684 | void wxSimpleHtmlTag::RemoveChild(wxSimpleHtmlTag *remove) | |
685 | { | |
686 | if (m_children) | |
687 | { | |
688 | wxSimpleHtmlTag* child = m_children; | |
689 | wxSimpleHtmlTag* prev = NULL; | |
690 | while (child) | |
691 | { | |
692 | wxSimpleHtmlTag* next = child->m_next; | |
693 | ||
694 | if (child == remove) | |
695 | { | |
696 | child->m_next = NULL; | |
697 | delete child; | |
698 | ||
699 | if (prev != NULL) | |
700 | prev->m_next = next; | |
701 | else | |
702 | m_children = next; | |
703 | ||
704 | return; | |
705 | } | |
706 | prev = child; | |
707 | child = next; | |
708 | } | |
709 | } | |
710 | } | |
711 | ||
712 | void wxSimpleHtmlTag::AppendTag(wxSimpleHtmlTag* tag) | |
713 | { | |
714 | if (!tag) | |
715 | return; | |
716 | ||
717 | if (m_children) | |
718 | { | |
719 | // Find tail | |
720 | wxSimpleHtmlTag* last = m_children; | |
721 | while (last->m_next) | |
722 | last = last->m_next; | |
723 | ||
724 | last->m_next = tag; | |
725 | } | |
726 | else | |
727 | { | |
728 | m_children = tag; | |
729 | } | |
730 | ||
731 | tag->m_parent = this; | |
732 | } | |
733 | ||
734 | void wxSimpleHtmlTag::AppendTagAfterUs(wxSimpleHtmlTag* tag) | |
735 | { | |
736 | if (!tag) | |
737 | return; | |
738 | ||
739 | tag->m_parent = m_parent; | |
740 | tag->m_next = m_next; | |
741 | m_next = tag; | |
742 | } | |
743 | ||
744 | // Gets the text from this tag and its descendants | |
745 | wxString wxSimpleHtmlTag::GetTagText() | |
746 | { | |
747 | wxString text; | |
748 | if (m_children) | |
749 | { | |
750 | wxSimpleHtmlTag* tag = m_children; | |
751 | while (tag) | |
752 | { | |
753 | text += tag->GetTagText(); | |
754 | tag = tag->m_next; | |
755 | } | |
756 | return text; | |
757 | } | |
758 | else if (GetType() == wxSimpleHtmlTag_Text) | |
759 | return GetText(); | |
760 | else | |
761 | return wxEmptyString; | |
762 | } | |
763 | ||
764 | int wxSimpleHtmlTag::GetAttributeCount() const | |
765 | { | |
766 | int count = 0; | |
767 | wxSimpleHtmlAttribute* attr = m_attributes; | |
768 | while (attr) | |
769 | { | |
770 | count ++; | |
771 | attr = attr->m_next; | |
772 | } | |
773 | return count; | |
774 | } | |
775 | ||
776 | wxSimpleHtmlAttribute* wxSimpleHtmlTag::GetAttribute(int i) const | |
777 | { | |
778 | int count = 0; | |
779 | wxSimpleHtmlAttribute* attr = m_attributes; | |
780 | while (attr) | |
781 | { | |
782 | if (count == i) | |
783 | return attr; | |
784 | count ++; | |
785 | attr = attr->m_next; | |
786 | } | |
787 | return NULL; | |
788 | } | |
789 | ||
790 | int wxSimpleHtmlTag::GetChildCount() const | |
791 | { | |
792 | int count = 0; | |
793 | wxSimpleHtmlTag* tag = m_children; | |
794 | while (tag) | |
795 | { | |
796 | count ++; | |
797 | tag = tag->m_next; | |
798 | } | |
799 | return count; | |
800 | } | |
801 | ||
802 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name, const wxString& value) const | |
803 | { | |
804 | wxSimpleHtmlAttribute* attr = FindAttribute(name); | |
805 | ||
806 | return (attr && (attr->GetValue().CmpNoCase(value) == 0)) ; | |
807 | } | |
808 | ||
809 | bool wxSimpleHtmlTag::HasAttribute(const wxString& name) const | |
810 | { | |
811 | return FindAttribute(name) != NULL ; | |
812 | } | |
813 | ||
814 | bool wxSimpleHtmlTag::GetAttributeValue(wxString& value, const wxString& attrName) | |
815 | { | |
816 | wxSimpleHtmlAttribute* attr = FindAttribute(attrName); | |
817 | if (attr) | |
818 | { | |
819 | value = attr->GetValue(); | |
4fe30bce | 820 | return true; |
d7463f75 JS |
821 | } |
822 | else | |
4fe30bce | 823 | return false; |
d7463f75 JS |
824 | } |
825 | ||
826 | // Search forward from this tag until we find a tag with this name & attribute | |
827 | wxSimpleHtmlTag* wxSimpleHtmlTag::FindTag(const wxString& tagName, const wxString& attrName) | |
828 | { | |
829 | wxSimpleHtmlTag* tag = m_next; | |
830 | while (tag) | |
831 | { | |
832 | if (tag->NameIs(tagName) && (attrName.IsEmpty() || tag->FindAttribute(attrName))) | |
833 | return tag; | |
834 | ||
835 | tag = tag->m_next; | |
836 | } | |
837 | return NULL; | |
838 | } | |
839 | ||
840 | bool wxSimpleHtmlTag::FindTextUntilTagClose(wxString& text, const wxString& tagName) | |
841 | { | |
842 | wxSimpleHtmlTag* tag = this; | |
843 | while (tag) | |
844 | { | |
845 | if (tag->GetType() == wxSimpleHtmlTag_Close && tag->NameIs(tagName)) | |
4fe30bce | 846 | return true; |
d7463f75 JS |
847 | |
848 | if (tag->GetType() == wxSimpleHtmlTag_Text) | |
849 | text += tag->GetText(); | |
850 | ||
851 | tag = tag->m_next; | |
852 | } | |
4fe30bce | 853 | return true; |
d7463f75 JS |
854 | } |
855 | ||
856 | ||
857 | wxSimpleHtmlTag* wxSimpleHtmlTag::GetChild(int i) const | |
858 | { | |
859 | int count = 0; | |
860 | wxSimpleHtmlTag* tag = m_children; | |
861 | while (tag) | |
862 | { | |
863 | if (count == i) | |
864 | return tag; | |
865 | ||
866 | count ++; | |
867 | tag = tag->m_next; | |
868 | } | |
869 | return NULL; | |
870 | } | |
871 | ||
872 | void wxSimpleHtmlTag::Write(wxOutputStream& stream) | |
873 | { | |
874 | // Some helpers to layout the open and close tags. | |
4fe30bce | 875 | static bool sbUseTab = true; |
d7463f75 JS |
876 | static size_t snTabLevel = 0; |
877 | ||
878 | #if 0 // Enable if no tabs should be used to align the tags. | |
879 | snTabLevel = 0; | |
880 | #endif | |
881 | ||
882 | // Handle the different types of tags we can write. | |
883 | switch (GetType()) | |
884 | { | |
885 | case wxSimpleHtmlTag_Text: | |
886 | { | |
887 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_text); | |
888 | break; | |
889 | } | |
890 | case wxSimpleHtmlTag_Open: | |
891 | { | |
892 | size_t tab; | |
893 | for(tab = 0; tab < snTabLevel; tab++) | |
894 | stream << wxT("\t"); | |
895 | stream << wxT("<") << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
896 | if (GetAttributeCount() > 0) | |
897 | stream << wxT(" "); | |
898 | int i; | |
899 | for (i = 0; i < GetAttributeCount(); i++) | |
900 | { | |
901 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
902 | attr->Write(stream); | |
903 | if (i < GetAttributeCount() - 1) | |
904 | stream << wxT(" "); | |
905 | } | |
906 | if(!m_children) | |
907 | { | |
4fe30bce | 908 | sbUseTab = false; // We're putting the open a close tag on the same line, |
d7463f75 JS |
909 | // so we don't wan't any tabs |
910 | stream << wxT(">"); | |
911 | } | |
912 | else | |
913 | { | |
4fe30bce | 914 | // sbUseTab = true; |
d7463f75 JS |
915 | stream << wxT(">\n"); |
916 | } | |
917 | snTabLevel++; | |
918 | break; | |
919 | } | |
920 | case wxSimpleHtmlTag_Directive: | |
921 | { | |
922 | stream << wxT("<!") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
923 | int i; | |
924 | for (i = 0; i < GetAttributeCount(); i++) | |
925 | { | |
926 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
927 | attr->Write(stream); | |
928 | if (i < GetAttributeCount() - 1) | |
929 | stream << wxT(" "); | |
930 | } | |
931 | stream << wxT(">\n"); | |
932 | break; | |
933 | } | |
934 | case wxSimpleHtmlTag_XMLDeclaration: | |
935 | { | |
936 | stream << wxT("<?") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(" "); | |
937 | int i; | |
938 | for (i = 0; i < GetAttributeCount(); i++) | |
939 | { | |
940 | wxSimpleHtmlAttribute* attr = GetAttribute(i); | |
941 | attr->Write(stream); | |
942 | if (i < GetAttributeCount() - 1) | |
943 | stream << wxT(" "); | |
944 | } | |
945 | stream << wxT(">\n\n"); | |
946 | break; | |
947 | } | |
948 | case wxSimpleHtmlTag_Close: | |
949 | { | |
950 | if (snTabLevel) // Safety to prevent going around... | |
951 | snTabLevel--; // Reduce the tab level | |
952 | if (sbUseTab) // Do we write the open tag and close tag on a other line? | |
953 | { | |
954 | size_t tab; | |
955 | for(tab = 0; tab < snTabLevel; tab++) | |
956 | stream << wxT("\t"); | |
957 | } | |
958 | stream << wxT("</") << wxSimpleHtmlParser::EncodeSpecialChars(m_name) << wxT(">\n"); | |
4fe30bce | 959 | sbUseTab = true; |
d7463f75 JS |
960 | break; |
961 | } | |
962 | default: | |
963 | { | |
964 | break; | |
965 | } | |
966 | } | |
967 | wxSimpleHtmlTag* tag = m_children; | |
968 | while (tag) | |
969 | { | |
970 | tag->Write(stream); | |
971 | tag = tag->m_next; | |
972 | } | |
973 | ||
974 | } | |
975 | ||
976 | void wxSimpleHtmlAttribute::Write(wxOutputStream& stream) | |
977 | { | |
978 | if (m_value.IsEmpty()) | |
979 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
980 | else | |
981 | { | |
982 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_name); | |
983 | stream << wxT("=\""); | |
984 | stream << wxSimpleHtmlParser::EncodeSpecialChars(m_value); | |
985 | stream << wxT("\""); | |
986 | } | |
987 | } |