1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmlpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #include "wx/wxprec.h"
16 #if wxUSE_HTML && wxUSE_STREAMS
19 #include "wx/dynarray.h"
23 #include "wx/wxcrtvararg.h"
26 #include "wx/tokenzr.h"
27 #include "wx/wfstream.h"
29 #include "wx/fontmap.h"
30 #include "wx/html/htmldefs.h"
31 #include "wx/html/htmlpars.h"
32 #include "wx/vector.h"
35 #include "wx/msw/wince/missing.h" // for bsearch()
38 // DLL options compatibility check:
39 WX_CHECK_BUILD_OPTIONS("wxHTML")
41 const wxChar
*wxTRACE_HTML_DEBUG
= _T("htmldebug");
43 //-----------------------------------------------------------------------------
44 // wxHtmlParser helpers
45 //-----------------------------------------------------------------------------
51 wxHtmlTextPiece(const wxString::const_iterator
& start
,
52 const wxString::const_iterator
& end
)
53 : m_start(start
), m_end(end
) {}
54 wxString::const_iterator m_start
, m_end
;
57 // NB: this is an empty class and not typedef because of forward declaration
58 class wxHtmlTextPieces
: public wxVector
<wxHtmlTextPiece
>
62 class wxHtmlParserState
67 wxHtmlTextPieces
*m_textPieces
;
69 const wxString
*m_source
;
70 wxHtmlParserState
*m_nextState
;
73 //-----------------------------------------------------------------------------
75 //-----------------------------------------------------------------------------
77 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
)
79 wxHtmlParser::wxHtmlParser()
80 : wxObject(), m_HandlersHash(wxKEY_STRING
),
81 m_FS(NULL
), m_HandlersStack(NULL
)
84 m_entitiesParser
= new wxHtmlEntitiesParser
;
92 wxHtmlParser::~wxHtmlParser()
94 while (RestoreState()) {}
99 wxList
& tmp
= *m_HandlersStack
;
100 wxList::iterator it
, en
;
101 for( it
= tmp
.begin(), en
= tmp
.end(); it
!= en
; ++it
)
102 delete (wxHashTable
*)*it
;
105 delete m_HandlersStack
;
106 m_HandlersHash
.Clear();
107 WX_CLEAR_LIST(wxList
, m_HandlersList
);
108 delete m_entitiesParser
;
112 wxObject
* wxHtmlParser::Parse(const wxString
& source
)
116 wxObject
*result
= GetProduct();
121 void wxHtmlParser::InitParser(const wxString
& source
)
124 m_stopParsing
= false;
127 void wxHtmlParser::DoneParser()
132 void wxHtmlParser::SetSource(const wxString
& src
)
135 // NB: This is allocated on heap because wxHtmlTag uses iterators and
136 // making a copy of m_Source string in SetSourceAndSaveState() and
137 // RestoreState() would invalidate them (because wxString::m_impl's
138 // memory would change completely twice and iterators use pointers
139 // into it). So instead, we keep the string object intact and only
140 // store/restore pointer to it, for which we need it to be allocated
143 m_Source
= new wxString(src
);
149 void wxHtmlParser::CreateDOMTree()
151 wxHtmlTagsCache
cache(*m_Source
);
152 m_TextPieces
= new wxHtmlTextPieces
;
153 CreateDOMSubTree(NULL
, m_Source
->begin(), m_Source
->end(), &cache
);
157 extern bool wxIsCDATAElement(const wxString
& tag
);
159 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag
*cur
,
160 const wxString::const_iterator
& begin_pos
,
161 const wxString::const_iterator
& end_pos
,
162 wxHtmlTagsCache
*cache
)
164 if (end_pos
<= begin_pos
)
168 wxString::const_iterator i
= begin_pos
;
169 wxString::const_iterator textBeginning
= begin_pos
;
171 // If the tag contains CDATA text, we include the text between beginning
172 // and ending tag verbosely. Setting i=end_pos will skip to the very
173 // end of this function where text piece is added, bypassing any child
174 // tags parsing (CDATA element can't have child elements by definition):
175 if (cur
!= NULL
&& wxIsCDATAElement(cur
->GetName()))
186 // add text to m_TextPieces:
187 if (i
> textBeginning
)
188 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, i
));
190 // if it is a comment, skip it:
191 if ( SkipCommentTag(i
, m_Source
->end()) )
193 textBeginning
= i
= i
+ 1; // skip closing '>' too
196 // add another tag to the tree:
197 else if (i
< end_pos
-1 && *(i
+1) != wxT('/'))
201 chd
= new wxHtmlTag(cur
, m_Source
,
202 i
, end_pos
, cache
, m_entitiesParser
);
205 chd
= new wxHtmlTag(NULL
, m_Source
,
206 i
, end_pos
, cache
, m_entitiesParser
);
209 // if this is the first tag to be created make the root
210 // m_Tags point to it:
215 // if there is already a root tag add this tag as
217 chd
->m_Prev
= m_Tags
->GetLastSibling();
218 chd
->m_Prev
->m_Next
= chd
;
222 if (chd
->HasEnding())
224 CreateDOMSubTree(chd
,
225 chd
->GetBeginIter(), chd
->GetEndIter1(),
227 i
= chd
->GetEndIter2();
230 i
= chd
->GetBeginIter();
235 // ... or skip ending tag:
238 while (i
< end_pos
&& *i
!= wxT('>')) ++i
;
245 // add remaining text to m_TextPieces:
246 if (end_pos
> textBeginning
)
247 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, end_pos
));
250 void wxHtmlParser::DestroyDOMTree()
256 t2
= t1
->GetNextSibling();
260 m_Tags
= m_CurTag
= NULL
;
266 void wxHtmlParser::DoParsing()
270 DoParsing(m_Source
->begin(), m_Source
->end());
273 void wxHtmlParser::DoParsing(const wxString::const_iterator
& begin_pos_
,
274 const wxString::const_iterator
& end_pos
)
276 wxString::const_iterator
begin_pos(begin_pos_
);
278 if (end_pos
<= begin_pos
)
281 wxHtmlTextPieces
& pieces
= *m_TextPieces
;
282 size_t piecesCnt
= pieces
.size();
284 while (begin_pos
< end_pos
)
286 while (m_CurTag
&& m_CurTag
->GetBeginIter() < begin_pos
)
287 m_CurTag
= m_CurTag
->GetNextTag();
288 while (m_CurTextPiece
< piecesCnt
&&
289 pieces
[m_CurTextPiece
].m_start
< begin_pos
)
292 if (m_CurTextPiece
< piecesCnt
&&
294 pieces
[m_CurTextPiece
].m_start
< m_CurTag
->GetBeginIter()))
297 AddText(GetEntitiesParser()->Parse(
298 wxString(pieces
[m_CurTextPiece
].m_start
,
299 pieces
[m_CurTextPiece
].m_end
)));
300 begin_pos
= pieces
[m_CurTextPiece
].m_end
;
305 if (m_CurTag
->HasEnding())
306 begin_pos
= m_CurTag
->GetEndIter2();
308 begin_pos
= m_CurTag
->GetBeginIter();
309 wxHtmlTag
*t
= m_CurTag
;
310 m_CurTag
= m_CurTag
->GetNextTag();
319 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
)
324 h
= (wxHtmlTagHandler
*) m_HandlersHash
.Get(tag
.GetName());
327 inner
= h
->HandleTag(tag
);
334 DoParsing(tag
.GetBeginIter(), tag
.GetEndIter1());
338 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler
*handler
)
340 wxString
s(handler
->GetSupportedTags());
341 wxStringTokenizer
tokenizer(s
, wxT(", "));
343 while (tokenizer
.HasMoreTokens())
344 m_HandlersHash
.Put(tokenizer
.GetNextToken(), handler
);
346 if (m_HandlersList
.IndexOf(handler
) == wxNOT_FOUND
)
347 m_HandlersList
.Append(handler
);
349 handler
->SetParser(this);
352 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler
*handler
, const wxString
& tags
)
354 wxStringTokenizer
tokenizer(tags
, wxT(", "));
357 if (m_HandlersStack
== NULL
)
359 m_HandlersStack
= new wxList
;
362 m_HandlersStack
->Insert((wxObject
*)new wxHashTable(m_HandlersHash
));
364 while (tokenizer
.HasMoreTokens())
366 key
= tokenizer
.GetNextToken();
367 m_HandlersHash
.Delete(key
);
368 m_HandlersHash
.Put(key
, handler
);
372 void wxHtmlParser::PopTagHandler()
374 wxList::compatibility_iterator first
;
376 if ( !m_HandlersStack
||
378 !(first
= m_HandlersStack
->GetFirst())
380 ((first
= m_HandlersStack
->GetFirst()) == NULL
)
381 #endif // wxUSE_STL/!wxUSE_STL
384 wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
387 m_HandlersHash
= *((wxHashTable
*) first
->GetData());
388 delete (wxHashTable
*) first
->GetData();
389 m_HandlersStack
->Erase(first
);
392 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
)
394 wxHtmlParserState
*s
= new wxHtmlParserState
;
396 s
->m_curTag
= m_CurTag
;
398 s
->m_textPieces
= m_TextPieces
;
399 s
->m_curTextPiece
= m_CurTextPiece
;
400 s
->m_source
= m_Source
;
402 s
->m_nextState
= m_SavedStates
;
414 bool wxHtmlParser::RestoreState()
416 if (!m_SavedStates
) return false;
421 wxHtmlParserState
*s
= m_SavedStates
;
422 m_SavedStates
= s
->m_nextState
;
424 m_CurTag
= s
->m_curTag
;
426 m_TextPieces
= s
->m_textPieces
;
427 m_CurTextPiece
= s
->m_curTextPiece
;
428 m_Source
= s
->m_source
;
434 wxString
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
)
436 return wxString(tag
.GetBeginIter(), tag
.GetEndIter1());
439 //-----------------------------------------------------------------------------
441 //-----------------------------------------------------------------------------
443 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
)
445 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
)
447 // It is safe to temporarily change the source being parsed,
448 // provided we restore the state back after parsing
449 m_Parser
->SetSourceAndSaveState(source
);
450 m_Parser
->DoParsing();
451 m_Parser
->RestoreState();
455 //-----------------------------------------------------------------------------
456 // wxHtmlEntitiesParser
457 //-----------------------------------------------------------------------------
459 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
)
461 wxHtmlEntitiesParser::wxHtmlEntitiesParser()
462 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
463 : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
)
468 wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
470 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
475 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
)
477 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
478 if (encoding
== m_encoding
)
483 m_encoding
= encoding
;
484 if (m_encoding
== wxFONTENCODING_SYSTEM
)
487 m_conv
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
));
493 wxString
wxHtmlEntitiesParser::Parse(const wxString
& input
) const
497 const wxString::const_iterator
end(input
.end());
498 wxString::const_iterator
c(input
.begin());
499 wxString::const_iterator
last(c
);
501 for ( ; c
< end
; ++c
)
505 if ( output
.empty() )
506 output
.reserve(input
.length());
509 output
.append(last
, c
);
514 const wxString::const_iterator ent_s
= c
;
517 for ( ; c
!= end
; ++c
)
520 if ( !((ch
>= wxT('a') && ch
<= wxT('z')) ||
521 (ch
>= wxT('A') && ch
<= wxT('Z')) ||
522 (ch
>= wxT('0') && ch
<= wxT('9')) ||
523 ch
== wxT('_') || ch
== wxT('#')) )
527 entity
.append(ent_s
, c
);
528 if (c
== end
|| *c
!= wxT(';')) --c
;
530 entity_char
= GetEntityChar(entity
);
532 output
<< entity_char
;
535 output
.append(ent_s
-1, c
+1);
536 wxLogTrace(wxTRACE_HTML_DEBUG
,
537 "Unrecognized HTML entity: '%s'",
542 if ( last
== input
.begin() ) // common case: no entity
545 output
.append(last
, end
);
550 wxChar
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) const
555 wbuf
[0] = (wchar_t)code
;
557 wxMBConv
*conv
= m_conv
? m_conv
: &wxConvLocal
;
558 if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1)
562 return (code
< 256) ? (wxChar
)code
: '?';
567 struct wxHtmlEntityInfo
569 const wxStringCharType
*name
;
573 extern "C" int LINKAGEMODE
wxHtmlEntityCompare(const void *key
, const void *item
)
575 #if wxUSE_UNICODE_UTF8
576 return strcmp((char*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
578 return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
582 wxChar
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) const
586 if (entity
[0] == wxT('#'))
588 // NB: parsed value is a number, so it's OK to use wx_str(), internal
589 // representation is the same for numbers
590 const wxStringCharType
*ent_s
= entity
.wx_str();
591 const wxStringCharType
*format
;
593 if (ent_s
[1] == wxSTRING_TEXT('x') || ent_s
[1] == wxSTRING_TEXT('X'))
595 format
= wxSTRING_TEXT("%x");
599 format
= wxSTRING_TEXT("%u");
602 if (wxSscanf(ent_s
, format
, &code
) != 1)
607 // store the literals in wx's internal representation (either char*
608 // in UTF-8 or wchar_t*) for best performance:
609 #define ENTITY(name, code) { wxSTRING_TEXT(name), code }
611 static wxHtmlEntityInfo substitutions
[] = {
612 ENTITY("AElig", 198),
613 ENTITY("Aacute", 193),
614 ENTITY("Acirc", 194),
615 ENTITY("Agrave", 192),
616 ENTITY("Alpha", 913),
617 ENTITY("Aring", 197),
618 ENTITY("Atilde", 195),
621 ENTITY("Ccedil", 199),
623 ENTITY("Dagger", 8225),
624 ENTITY("Delta", 916),
626 ENTITY("Eacute", 201),
627 ENTITY("Ecirc", 202),
628 ENTITY("Egrave", 200),
629 ENTITY("Epsilon", 917),
632 ENTITY("Gamma", 915),
633 ENTITY("Iacute", 205),
634 ENTITY("Icirc", 206),
635 ENTITY("Igrave", 204),
638 ENTITY("Kappa", 922),
639 ENTITY("Lambda", 923),
641 ENTITY("Ntilde", 209),
643 ENTITY("OElig", 338),
644 ENTITY("Oacute", 211),
645 ENTITY("Ocirc", 212),
646 ENTITY("Ograve", 210),
647 ENTITY("Omega", 937),
648 ENTITY("Omicron", 927),
649 ENTITY("Oslash", 216),
650 ENTITY("Otilde", 213),
654 ENTITY("Prime", 8243),
657 ENTITY("Scaron", 352),
658 ENTITY("Sigma", 931),
659 ENTITY("THORN", 222),
661 ENTITY("Theta", 920),
662 ENTITY("Uacute", 218),
663 ENTITY("Ucirc", 219),
664 ENTITY("Ugrave", 217),
665 ENTITY("Upsilon", 933),
668 ENTITY("Yacute", 221),
671 ENTITY("aacute", 225),
672 ENTITY("acirc", 226),
673 ENTITY("acute", 180),
674 ENTITY("aelig", 230),
675 ENTITY("agrave", 224),
676 ENTITY("alefsym", 8501),
677 ENTITY("alpha", 945),
681 ENTITY("aring", 229),
682 ENTITY("asymp", 8776),
683 ENTITY("atilde", 227),
685 ENTITY("bdquo", 8222),
687 ENTITY("brvbar", 166),
688 ENTITY("bull", 8226),
690 ENTITY("ccedil", 231),
691 ENTITY("cedil", 184),
695 ENTITY("clubs", 9827),
696 ENTITY("cong", 8773),
698 ENTITY("crarr", 8629),
700 ENTITY("curren", 164),
701 ENTITY("dArr", 8659),
702 ENTITY("dagger", 8224),
703 ENTITY("darr", 8595),
705 ENTITY("delta", 948),
706 ENTITY("diams", 9830),
707 ENTITY("divide", 247),
708 ENTITY("eacute", 233),
709 ENTITY("ecirc", 234),
710 ENTITY("egrave", 232),
711 ENTITY("empty", 8709),
712 ENTITY("emsp", 8195),
713 ENTITY("ensp", 8194),
714 ENTITY("epsilon", 949),
715 ENTITY("equiv", 8801),
719 ENTITY("euro", 8364),
720 ENTITY("exist", 8707),
722 ENTITY("forall", 8704),
723 ENTITY("frac12", 189),
724 ENTITY("frac14", 188),
725 ENTITY("frac34", 190),
726 ENTITY("frasl", 8260),
727 ENTITY("gamma", 947),
730 ENTITY("hArr", 8660),
731 ENTITY("harr", 8596),
732 ENTITY("hearts", 9829),
733 ENTITY("hellip", 8230),
734 ENTITY("iacute", 237),
735 ENTITY("icirc", 238),
736 ENTITY("iexcl", 161),
737 ENTITY("igrave", 236),
738 ENTITY("image", 8465),
739 ENTITY("infin", 8734),
742 ENTITY("iquest", 191),
743 ENTITY("isin", 8712),
745 ENTITY("kappa", 954),
746 ENTITY("lArr", 8656),
747 ENTITY("lambda", 955),
748 ENTITY("lang", 9001),
749 ENTITY("laquo", 171),
750 ENTITY("larr", 8592),
751 ENTITY("lceil", 8968),
752 ENTITY("ldquo", 8220),
754 ENTITY("lfloor", 8970),
755 ENTITY("lowast", 8727),
758 ENTITY("lsaquo", 8249),
759 ENTITY("lsquo", 8216),
762 ENTITY("mdash", 8212),
763 ENTITY("micro", 181),
764 ENTITY("middot", 183),
765 ENTITY("minus", 8722),
767 ENTITY("nabla", 8711),
769 ENTITY("ndash", 8211),
773 ENTITY("notin", 8713),
774 ENTITY("nsub", 8836),
775 ENTITY("ntilde", 241),
777 ENTITY("oacute", 243),
778 ENTITY("ocirc", 244),
779 ENTITY("oelig", 339),
780 ENTITY("ograve", 242),
781 ENTITY("oline", 8254),
782 ENTITY("omega", 969),
783 ENTITY("omicron", 959),
784 ENTITY("oplus", 8853),
788 ENTITY("oslash", 248),
789 ENTITY("otilde", 245),
790 ENTITY("otimes", 8855),
793 ENTITY("part", 8706),
794 ENTITY("permil", 8240),
795 ENTITY("perp", 8869),
799 ENTITY("plusmn", 177),
800 ENTITY("pound", 163),
801 ENTITY("prime", 8242),
802 ENTITY("prod", 8719),
803 ENTITY("prop", 8733),
806 ENTITY("rArr", 8658),
807 ENTITY("radic", 8730),
808 ENTITY("rang", 9002),
809 ENTITY("raquo", 187),
810 ENTITY("rarr", 8594),
811 ENTITY("rceil", 8969),
812 ENTITY("rdquo", 8221),
813 ENTITY("real", 8476),
815 ENTITY("rfloor", 8971),
818 ENTITY("rsaquo", 8250),
819 ENTITY("rsquo", 8217),
820 ENTITY("sbquo", 8218),
821 ENTITY("scaron", 353),
822 ENTITY("sdot", 8901),
825 ENTITY("sigma", 963),
826 ENTITY("sigmaf", 962),
828 ENTITY("spades", 9824),
830 ENTITY("sube", 8838),
836 ENTITY("supe", 8839),
837 ENTITY("szlig", 223),
839 ENTITY("there4", 8756),
840 ENTITY("theta", 952),
841 ENTITY("thetasym", 977),
842 ENTITY("thinsp", 8201),
843 ENTITY("thorn", 254),
844 ENTITY("tilde", 732),
845 ENTITY("times", 215),
846 ENTITY("trade", 8482),
847 ENTITY("uArr", 8657),
848 ENTITY("uacute", 250),
849 ENTITY("uarr", 8593),
850 ENTITY("ucirc", 251),
851 ENTITY("ugrave", 249),
853 ENTITY("upsih", 978),
854 ENTITY("upsilon", 965),
856 ENTITY("weierp", 8472),
858 ENTITY("yacute", 253),
863 ENTITY("zwnj", 8204),
866 static size_t substitutions_cnt
= 0;
868 if (substitutions_cnt
== 0)
869 while (substitutions
[substitutions_cnt
].code
!= 0)
872 wxHtmlEntityInfo
*info
= NULL
;
874 // bsearch crashes under WinCE for some reason
876 for (i
= 0; i
< substitutions_cnt
; i
++)
878 if (entity
== substitutions
[i
].name
)
880 info
= & substitutions
[i
];
885 info
= (wxHtmlEntityInfo
*) bsearch(entity
.wx_str(), substitutions
,
887 sizeof(wxHtmlEntityInfo
),
888 wxHtmlEntityCompare
);
897 return GetCharForCode(code
);
900 wxFSFile
*wxHtmlParser::OpenURL(wxHtmlURLType
WXUNUSED(type
),
901 const wxString
& url
) const
903 return m_FS
? m_FS
->OpenFile(url
) : NULL
;
908 //-----------------------------------------------------------------------------
909 // wxHtmlParser::ExtractCharsetInformation
910 //-----------------------------------------------------------------------------
912 class wxMetaTagParser
: public wxHtmlParser
915 wxMetaTagParser() { }
917 wxObject
* GetProduct() { return NULL
; }
920 virtual void AddText(const wxString
& WXUNUSED(txt
)) {}
922 DECLARE_NO_COPY_CLASS(wxMetaTagParser
)
925 class wxMetaTagHandler
: public wxHtmlTagHandler
928 wxMetaTagHandler(wxString
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {}
929 wxString
GetSupportedTags() { return wxT("META,BODY"); }
930 bool HandleTag(const wxHtmlTag
& tag
);
935 DECLARE_NO_COPY_CLASS(wxMetaTagHandler
)
938 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
)
940 if (tag
.GetName() == _T("BODY"))
942 m_Parser
->StopParsing();
946 if (tag
.HasParam(_T("HTTP-EQUIV")) &&
947 tag
.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) &&
948 tag
.HasParam(_T("CONTENT")))
950 wxString content
= tag
.GetParam(_T("CONTENT")).Lower();
951 if (content
.Left(19) == _T("text/html; charset="))
953 *m_retval
= content
.Mid(19);
954 m_Parser
->StopParsing();
962 wxString
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
)
965 wxMetaTagParser
*parser
= new wxMetaTagParser();
968 parser
->AddTagHandler(new wxMetaTagHandler(&charset
));
969 parser
->Parse(markup
);
977 wxHtmlParser::SkipCommentTag(wxString::const_iterator
& start
,
978 wxString::const_iterator end
)
980 wxASSERT_MSG( *start
== '<', _T("should be called on the tag start") );
982 wxString::const_iterator p
= start
;
984 // comments begin with "<!--" in HTML 4.0
985 if ( p
> end
- 3 || *++p
!= '!' || *++p
!= '-' || *++p
!= '-' )
987 // not a comment at all
991 // skip the start of the comment tag in any case, if we don't find the
992 // closing tag we should ignore broken markup
995 // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
996 // comment delimiter and the closing tag character (section 3.2.4 of
997 // http://www.w3.org/TR/html401/)
1001 const wxChar c
= *p
;
1003 if ( (c
== wxT(' ') || c
== wxT('\n') ||
1004 c
== wxT('\r') || c
== wxT('\t')) && dashes
>= 2 )
1006 // ignore white space before potential tag end
1010 if ( c
== wxT('>') && dashes
>= 2 )
1012 // found end of comment
1017 if ( c
== wxT('-') )
1026 #endif // wxUSE_HTML