1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/html/htmlpars.cpp
3 // Purpose: wxHtmlParser class (generic parser)
4 // Author: Vaclav Slavik
6 // Copyright: (c) 1999 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
10 #include "wx/wxprec.h"
16 #if wxUSE_HTML && wxUSE_STREAMS
19 #include "wx/dynarray.h"
23 #include "wx/wxcrtvararg.h"
26 #include "wx/tokenzr.h"
27 #include "wx/wfstream.h"
29 #include "wx/fontmap.h"
30 #include "wx/html/htmldefs.h"
31 #include "wx/html/htmlpars.h"
32 #include "wx/vector.h"
35 #include "wx/msw/wince/missing.h" // for bsearch()
38 // DLL options compatibility check:
39 WX_CHECK_BUILD_OPTIONS("wxHTML")
41 const wxChar
*wxTRACE_HTML_DEBUG
= _T("htmldebug");
43 //-----------------------------------------------------------------------------
44 // wxHtmlParser helpers
45 //-----------------------------------------------------------------------------
51 wxHtmlTextPiece(const wxString::const_iterator
& start
,
52 const wxString::const_iterator
& end
)
53 : m_start(start
), m_end(end
) {}
54 wxString::const_iterator m_start
, m_end
;
57 // NB: this is an empty class and not typedef because of forward declaration
58 class wxHtmlTextPieces
: public wxVector
<wxHtmlTextPiece
>
62 class wxHtmlParserState
67 wxHtmlTextPieces
*m_textPieces
;
69 const wxString
*m_source
;
70 wxHtmlParserState
*m_nextState
;
73 //-----------------------------------------------------------------------------
75 //-----------------------------------------------------------------------------
77 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
)
79 wxHtmlParser::wxHtmlParser()
80 : wxObject(), m_HandlersHash(wxKEY_STRING
),
81 m_FS(NULL
), m_HandlersStack(NULL
)
84 m_entitiesParser
= new wxHtmlEntitiesParser
;
92 wxHtmlParser::~wxHtmlParser()
94 while (RestoreState()) {}
99 wxList
& tmp
= *m_HandlersStack
;
100 wxList::iterator it
, en
;
101 for( it
= tmp
.begin(), en
= tmp
.end(); it
!= en
; ++it
)
102 delete (wxHashTable
*)*it
;
105 delete m_HandlersStack
;
106 m_HandlersHash
.Clear();
107 WX_CLEAR_LIST(wxList
, m_HandlersList
);
108 delete m_entitiesParser
;
112 wxObject
* wxHtmlParser::Parse(const wxString
& source
)
116 wxObject
*result
= GetProduct();
121 void wxHtmlParser::InitParser(const wxString
& source
)
124 m_stopParsing
= false;
127 void wxHtmlParser::DoneParser()
132 void wxHtmlParser::SetSource(const wxString
& src
)
135 // NB: this is allocated on heap because wxHtmlTag keeps a pointer to
136 // this string if WXWIN_COMPATIBILITY_2_8
138 m_Source
= new wxString(src
);
144 void wxHtmlParser::CreateDOMTree()
146 wxHtmlTagsCache
cache(*m_Source
);
147 m_TextPieces
= new wxHtmlTextPieces
;
148 CreateDOMSubTree(NULL
, m_Source
->begin(), m_Source
->end(), &cache
);
152 extern bool wxIsCDATAElement(const wxString
& tag
);
154 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag
*cur
,
155 const wxString::const_iterator
& begin_pos
,
156 const wxString::const_iterator
& end_pos
,
157 wxHtmlTagsCache
*cache
)
159 if (end_pos
<= begin_pos
)
163 wxString::const_iterator i
= begin_pos
;
164 wxString::const_iterator textBeginning
= begin_pos
;
166 // If the tag contains CDATA text, we include the text between beginning
167 // and ending tag verbosely. Setting i=end_pos will skip to the very
168 // end of this function where text piece is added, bypassing any child
169 // tags parsing (CDATA element can't have child elements by definition):
170 if (cur
!= NULL
&& wxIsCDATAElement(cur
->GetName()))
181 // add text to m_TextPieces:
182 if (i
> textBeginning
)
183 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, i
));
185 // if it is a comment, skip it:
186 if ( SkipCommentTag(i
, m_Source
->end()) )
188 textBeginning
= i
= i
+ 1; // skip closing '>' too
191 // add another tag to the tree:
192 else if (i
< end_pos
-1 && *(i
+1) != wxT('/'))
196 chd
= new wxHtmlTag(cur
, m_Source
,
197 i
, end_pos
, cache
, m_entitiesParser
);
200 chd
= new wxHtmlTag(NULL
, m_Source
,
201 i
, end_pos
, cache
, m_entitiesParser
);
204 // if this is the first tag to be created make the root
205 // m_Tags point to it:
210 // if there is already a root tag add this tag as
212 chd
->m_Prev
= m_Tags
->GetLastSibling();
213 chd
->m_Prev
->m_Next
= chd
;
217 if (chd
->HasEnding())
219 CreateDOMSubTree(chd
,
220 chd
->GetBeginIter(), chd
->GetEndIter1(),
222 i
= chd
->GetEndIter2();
225 i
= chd
->GetBeginIter();
230 // ... or skip ending tag:
233 while (i
< end_pos
&& *i
!= wxT('>')) ++i
;
240 // add remaining text to m_TextPieces:
241 if (end_pos
> textBeginning
)
242 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, end_pos
));
245 void wxHtmlParser::DestroyDOMTree()
251 t2
= t1
->GetNextSibling();
255 m_Tags
= m_CurTag
= NULL
;
261 void wxHtmlParser::DoParsing()
265 DoParsing(m_Source
->begin(), m_Source
->end());
268 void wxHtmlParser::DoParsing(const wxString::const_iterator
& begin_pos_
,
269 const wxString::const_iterator
& end_pos
)
271 wxString::const_iterator
begin_pos(begin_pos_
);
273 if (end_pos
<= begin_pos
)
276 wxHtmlTextPieces
& pieces
= *m_TextPieces
;
277 size_t piecesCnt
= pieces
.size();
279 while (begin_pos
< end_pos
)
281 while (m_CurTag
&& m_CurTag
->GetBeginIter() < begin_pos
)
282 m_CurTag
= m_CurTag
->GetNextTag();
283 while (m_CurTextPiece
< piecesCnt
&&
284 pieces
[m_CurTextPiece
].m_start
< begin_pos
)
287 if (m_CurTextPiece
< piecesCnt
&&
289 pieces
[m_CurTextPiece
].m_start
< m_CurTag
->GetBeginIter()))
292 AddText(GetEntitiesParser()->Parse(
293 wxString(pieces
[m_CurTextPiece
].m_start
,
294 pieces
[m_CurTextPiece
].m_end
)));
295 begin_pos
= pieces
[m_CurTextPiece
].m_end
;
300 if (m_CurTag
->HasEnding())
301 begin_pos
= m_CurTag
->GetEndIter2();
303 begin_pos
= m_CurTag
->GetBeginIter();
304 wxHtmlTag
*t
= m_CurTag
;
305 m_CurTag
= m_CurTag
->GetNextTag();
314 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
)
319 h
= (wxHtmlTagHandler
*) m_HandlersHash
.Get(tag
.GetName());
322 inner
= h
->HandleTag(tag
);
329 DoParsing(tag
.GetBeginIter(), tag
.GetEndIter1());
333 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler
*handler
)
335 wxString
s(handler
->GetSupportedTags());
336 wxStringTokenizer
tokenizer(s
, wxT(", "));
338 while (tokenizer
.HasMoreTokens())
339 m_HandlersHash
.Put(tokenizer
.GetNextToken(), handler
);
341 if (m_HandlersList
.IndexOf(handler
) == wxNOT_FOUND
)
342 m_HandlersList
.Append(handler
);
344 handler
->SetParser(this);
347 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler
*handler
, const wxString
& tags
)
349 wxStringTokenizer
tokenizer(tags
, wxT(", "));
352 if (m_HandlersStack
== NULL
)
354 m_HandlersStack
= new wxList
;
357 m_HandlersStack
->Insert((wxObject
*)new wxHashTable(m_HandlersHash
));
359 while (tokenizer
.HasMoreTokens())
361 key
= tokenizer
.GetNextToken();
362 m_HandlersHash
.Delete(key
);
363 m_HandlersHash
.Put(key
, handler
);
367 void wxHtmlParser::PopTagHandler()
369 wxList::compatibility_iterator first
;
371 if ( !m_HandlersStack
||
373 !(first
= m_HandlersStack
->GetFirst())
375 ((first
= m_HandlersStack
->GetFirst()) == NULL
)
376 #endif // wxUSE_STL/!wxUSE_STL
379 wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
382 m_HandlersHash
= *((wxHashTable
*) first
->GetData());
383 delete (wxHashTable
*) first
->GetData();
384 m_HandlersStack
->Erase(first
);
387 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
)
389 wxHtmlParserState
*s
= new wxHtmlParserState
;
391 s
->m_curTag
= m_CurTag
;
393 s
->m_textPieces
= m_TextPieces
;
394 s
->m_curTextPiece
= m_CurTextPiece
;
395 s
->m_source
= m_Source
;
397 s
->m_nextState
= m_SavedStates
;
409 bool wxHtmlParser::RestoreState()
411 if (!m_SavedStates
) return false;
415 wxHtmlParserState
*s
= m_SavedStates
;
416 m_SavedStates
= s
->m_nextState
;
418 m_CurTag
= s
->m_curTag
;
420 m_TextPieces
= s
->m_textPieces
;
421 m_CurTextPiece
= s
->m_curTextPiece
;
422 m_Source
= s
->m_source
;
428 wxString
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
)
430 return wxString(tag
.GetBeginIter(), tag
.GetEndIter1());
433 //-----------------------------------------------------------------------------
435 //-----------------------------------------------------------------------------
437 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
)
439 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
)
441 // It is safe to temporarily change the source being parsed,
442 // provided we restore the state back after parsing
443 m_Parser
->SetSourceAndSaveState(source
);
444 m_Parser
->DoParsing();
445 m_Parser
->RestoreState();
449 //-----------------------------------------------------------------------------
450 // wxHtmlEntitiesParser
451 //-----------------------------------------------------------------------------
453 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
)
455 wxHtmlEntitiesParser::wxHtmlEntitiesParser()
456 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
457 : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
)
462 wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
464 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
469 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
)
471 #if wxUSE_WCHAR_T && !wxUSE_UNICODE
472 if (encoding
== m_encoding
)
477 m_encoding
= encoding
;
478 if (m_encoding
== wxFONTENCODING_SYSTEM
)
481 m_conv
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
));
487 wxString
wxHtmlEntitiesParser::Parse(const wxString
& input
) const
491 const wxString::const_iterator
end(input
.end());
492 wxString::const_iterator
c(input
.begin());
493 wxString::const_iterator
last(c
);
495 for ( ; c
< end
; ++c
)
499 if ( output
.empty() )
500 output
.reserve(input
.length());
503 output
.append(last
, c
);
508 const wxString::const_iterator ent_s
= c
;
511 for ( ; c
!= end
; ++c
)
514 if ( !((ch
>= wxT('a') && ch
<= wxT('z')) ||
515 (ch
>= wxT('A') && ch
<= wxT('Z')) ||
516 (ch
>= wxT('0') && ch
<= wxT('9')) ||
517 ch
== wxT('_') || ch
== wxT('#')) )
521 entity
.append(ent_s
, c
);
522 if (c
== end
|| *c
!= wxT(';')) --c
;
524 entity_char
= GetEntityChar(entity
);
526 output
<< entity_char
;
529 output
.append(ent_s
-1, c
+1);
530 wxLogTrace(wxTRACE_HTML_DEBUG
,
531 "Unrecognized HTML entity: '%s'",
536 if ( last
== input
.begin() ) // common case: no entity
539 output
.append(last
, end
);
544 wxChar
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) const
549 wbuf
[0] = (wchar_t)code
;
551 wxMBConv
*conv
= m_conv
? m_conv
: &wxConvLocal
;
552 if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1)
556 return (code
< 256) ? (wxChar
)code
: '?';
561 struct wxHtmlEntityInfo
563 const wxStringCharType
*name
;
567 extern "C" int LINKAGEMODE
wxHtmlEntityCompare(const void *key
, const void *item
)
569 #if wxUSE_UNICODE_UTF8
570 return strcmp((char*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
572 return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
);
576 wxChar
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) const
580 if (entity
[0] == wxT('#'))
582 // NB: parsed value is a number, so it's OK to use wx_str(), internal
583 // representation is the same for numbers
584 const wxStringCharType
*ent_s
= entity
.wx_str();
585 const wxStringCharType
*format
;
587 if (ent_s
[1] == wxSTRING_TEXT('x') || ent_s
[1] == wxSTRING_TEXT('X'))
589 format
= wxSTRING_TEXT("%x");
593 format
= wxSTRING_TEXT("%u");
596 if (wxSscanf(ent_s
, format
, &code
) != 1)
601 // store the literals in wx's internal representation (either char*
602 // in UTF-8 or wchar_t*) for best performance:
603 #define ENTITY(name, code) { wxSTRING_TEXT(name), code }
605 static wxHtmlEntityInfo substitutions
[] = {
606 ENTITY("AElig", 198),
607 ENTITY("Aacute", 193),
608 ENTITY("Acirc", 194),
609 ENTITY("Agrave", 192),
610 ENTITY("Alpha", 913),
611 ENTITY("Aring", 197),
612 ENTITY("Atilde", 195),
615 ENTITY("Ccedil", 199),
617 ENTITY("Dagger", 8225),
618 ENTITY("Delta", 916),
620 ENTITY("Eacute", 201),
621 ENTITY("Ecirc", 202),
622 ENTITY("Egrave", 200),
623 ENTITY("Epsilon", 917),
626 ENTITY("Gamma", 915),
627 ENTITY("Iacute", 205),
628 ENTITY("Icirc", 206),
629 ENTITY("Igrave", 204),
632 ENTITY("Kappa", 922),
633 ENTITY("Lambda", 923),
635 ENTITY("Ntilde", 209),
637 ENTITY("OElig", 338),
638 ENTITY("Oacute", 211),
639 ENTITY("Ocirc", 212),
640 ENTITY("Ograve", 210),
641 ENTITY("Omega", 937),
642 ENTITY("Omicron", 927),
643 ENTITY("Oslash", 216),
644 ENTITY("Otilde", 213),
648 ENTITY("Prime", 8243),
651 ENTITY("Scaron", 352),
652 ENTITY("Sigma", 931),
653 ENTITY("THORN", 222),
655 ENTITY("Theta", 920),
656 ENTITY("Uacute", 218),
657 ENTITY("Ucirc", 219),
658 ENTITY("Ugrave", 217),
659 ENTITY("Upsilon", 933),
662 ENTITY("Yacute", 221),
665 ENTITY("aacute", 225),
666 ENTITY("acirc", 226),
667 ENTITY("acute", 180),
668 ENTITY("aelig", 230),
669 ENTITY("agrave", 224),
670 ENTITY("alefsym", 8501),
671 ENTITY("alpha", 945),
675 ENTITY("aring", 229),
676 ENTITY("asymp", 8776),
677 ENTITY("atilde", 227),
679 ENTITY("bdquo", 8222),
681 ENTITY("brvbar", 166),
682 ENTITY("bull", 8226),
684 ENTITY("ccedil", 231),
685 ENTITY("cedil", 184),
689 ENTITY("clubs", 9827),
690 ENTITY("cong", 8773),
692 ENTITY("crarr", 8629),
694 ENTITY("curren", 164),
695 ENTITY("dArr", 8659),
696 ENTITY("dagger", 8224),
697 ENTITY("darr", 8595),
699 ENTITY("delta", 948),
700 ENTITY("diams", 9830),
701 ENTITY("divide", 247),
702 ENTITY("eacute", 233),
703 ENTITY("ecirc", 234),
704 ENTITY("egrave", 232),
705 ENTITY("empty", 8709),
706 ENTITY("emsp", 8195),
707 ENTITY("ensp", 8194),
708 ENTITY("epsilon", 949),
709 ENTITY("equiv", 8801),
713 ENTITY("euro", 8364),
714 ENTITY("exist", 8707),
716 ENTITY("forall", 8704),
717 ENTITY("frac12", 189),
718 ENTITY("frac14", 188),
719 ENTITY("frac34", 190),
720 ENTITY("frasl", 8260),
721 ENTITY("gamma", 947),
724 ENTITY("hArr", 8660),
725 ENTITY("harr", 8596),
726 ENTITY("hearts", 9829),
727 ENTITY("hellip", 8230),
728 ENTITY("iacute", 237),
729 ENTITY("icirc", 238),
730 ENTITY("iexcl", 161),
731 ENTITY("igrave", 236),
732 ENTITY("image", 8465),
733 ENTITY("infin", 8734),
736 ENTITY("iquest", 191),
737 ENTITY("isin", 8712),
739 ENTITY("kappa", 954),
740 ENTITY("lArr", 8656),
741 ENTITY("lambda", 955),
742 ENTITY("lang", 9001),
743 ENTITY("laquo", 171),
744 ENTITY("larr", 8592),
745 ENTITY("lceil", 8968),
746 ENTITY("ldquo", 8220),
748 ENTITY("lfloor", 8970),
749 ENTITY("lowast", 8727),
752 ENTITY("lsaquo", 8249),
753 ENTITY("lsquo", 8216),
756 ENTITY("mdash", 8212),
757 ENTITY("micro", 181),
758 ENTITY("middot", 183),
759 ENTITY("minus", 8722),
761 ENTITY("nabla", 8711),
763 ENTITY("ndash", 8211),
767 ENTITY("notin", 8713),
768 ENTITY("nsub", 8836),
769 ENTITY("ntilde", 241),
771 ENTITY("oacute", 243),
772 ENTITY("ocirc", 244),
773 ENTITY("oelig", 339),
774 ENTITY("ograve", 242),
775 ENTITY("oline", 8254),
776 ENTITY("omega", 969),
777 ENTITY("omicron", 959),
778 ENTITY("oplus", 8853),
782 ENTITY("oslash", 248),
783 ENTITY("otilde", 245),
784 ENTITY("otimes", 8855),
787 ENTITY("part", 8706),
788 ENTITY("permil", 8240),
789 ENTITY("perp", 8869),
793 ENTITY("plusmn", 177),
794 ENTITY("pound", 163),
795 ENTITY("prime", 8242),
796 ENTITY("prod", 8719),
797 ENTITY("prop", 8733),
800 ENTITY("rArr", 8658),
801 ENTITY("radic", 8730),
802 ENTITY("rang", 9002),
803 ENTITY("raquo", 187),
804 ENTITY("rarr", 8594),
805 ENTITY("rceil", 8969),
806 ENTITY("rdquo", 8221),
807 ENTITY("real", 8476),
809 ENTITY("rfloor", 8971),
812 ENTITY("rsaquo", 8250),
813 ENTITY("rsquo", 8217),
814 ENTITY("sbquo", 8218),
815 ENTITY("scaron", 353),
816 ENTITY("sdot", 8901),
819 ENTITY("sigma", 963),
820 ENTITY("sigmaf", 962),
822 ENTITY("spades", 9824),
824 ENTITY("sube", 8838),
830 ENTITY("supe", 8839),
831 ENTITY("szlig", 223),
833 ENTITY("there4", 8756),
834 ENTITY("theta", 952),
835 ENTITY("thetasym", 977),
836 ENTITY("thinsp", 8201),
837 ENTITY("thorn", 254),
838 ENTITY("tilde", 732),
839 ENTITY("times", 215),
840 ENTITY("trade", 8482),
841 ENTITY("uArr", 8657),
842 ENTITY("uacute", 250),
843 ENTITY("uarr", 8593),
844 ENTITY("ucirc", 251),
845 ENTITY("ugrave", 249),
847 ENTITY("upsih", 978),
848 ENTITY("upsilon", 965),
850 ENTITY("weierp", 8472),
852 ENTITY("yacute", 253),
857 ENTITY("zwnj", 8204),
860 static size_t substitutions_cnt
= 0;
862 if (substitutions_cnt
== 0)
863 while (substitutions
[substitutions_cnt
].code
!= 0)
866 wxHtmlEntityInfo
*info
= NULL
;
868 // bsearch crashes under WinCE for some reason
870 for (i
= 0; i
< substitutions_cnt
; i
++)
872 if (entity
== substitutions
[i
].name
)
874 info
= & substitutions
[i
];
879 info
= (wxHtmlEntityInfo
*) bsearch(entity
.wx_str(), substitutions
,
881 sizeof(wxHtmlEntityInfo
),
882 wxHtmlEntityCompare
);
891 return GetCharForCode(code
);
894 wxFSFile
*wxHtmlParser::OpenURL(wxHtmlURLType
WXUNUSED(type
),
895 const wxString
& url
) const
897 return m_FS
? m_FS
->OpenFile(url
) : NULL
;
902 //-----------------------------------------------------------------------------
903 // wxHtmlParser::ExtractCharsetInformation
904 //-----------------------------------------------------------------------------
906 class wxMetaTagParser
: public wxHtmlParser
909 wxMetaTagParser() { }
911 wxObject
* GetProduct() { return NULL
; }
914 virtual void AddText(const wxString
& WXUNUSED(txt
)) {}
916 DECLARE_NO_COPY_CLASS(wxMetaTagParser
)
919 class wxMetaTagHandler
: public wxHtmlTagHandler
922 wxMetaTagHandler(wxString
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {}
923 wxString
GetSupportedTags() { return wxT("META,BODY"); }
924 bool HandleTag(const wxHtmlTag
& tag
);
929 DECLARE_NO_COPY_CLASS(wxMetaTagHandler
)
932 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
)
934 if (tag
.GetName() == _T("BODY"))
936 m_Parser
->StopParsing();
940 if (tag
.HasParam(_T("HTTP-EQUIV")) &&
941 tag
.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) &&
942 tag
.HasParam(_T("CONTENT")))
944 wxString content
= tag
.GetParam(_T("CONTENT")).Lower();
945 if (content
.Left(19) == _T("text/html; charset="))
947 *m_retval
= content
.Mid(19);
948 m_Parser
->StopParsing();
956 wxString
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
)
959 wxMetaTagParser
*parser
= new wxMetaTagParser();
962 parser
->AddTagHandler(new wxMetaTagHandler(&charset
));
963 parser
->Parse(markup
);
971 wxHtmlParser::SkipCommentTag(wxString::const_iterator
& start
,
972 wxString::const_iterator end
)
974 wxASSERT_MSG( *start
== '<', _T("should be called on the tag start") );
976 wxString::const_iterator p
= start
;
978 // comments begin with "<!--" in HTML 4.0
979 if ( p
> end
- 3 || *++p
!= '!' || *++p
!= '-' || *++p
!= '-' )
981 // not a comment at all
985 // skip the start of the comment tag in any case, if we don't find the
986 // closing tag we should ignore broken markup
989 // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between
990 // comment delimiter and the closing tag character (section 3.2.4 of
991 // http://www.w3.org/TR/html401/)
997 if ( (c
== wxT(' ') || c
== wxT('\n') ||
998 c
== wxT('\r') || c
== wxT('\t')) && dashes
>= 2 )
1000 // ignore white space before potential tag end
1004 if ( c
== wxT('>') && dashes
>= 2 )
1006 // found end of comment
1011 if ( c
== wxT('-') )
1020 #endif // wxUSE_HTML