1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/html/htmlpars.cpp 
   3 // Purpose:     wxHtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 #include "wx/wxprec.h" 
  16 #if wxUSE_HTML && wxUSE_STREAMS 
  19     #include "wx/dynarray.h" 
  23     #include "wx/wxcrtvararg.h" 
  26 #include "wx/tokenzr.h" 
  27 #include "wx/wfstream.h" 
  29 #include "wx/fontmap.h" 
  30 #include "wx/html/htmldefs.h" 
  31 #include "wx/html/htmlpars.h" 
  32 #include "wx/vector.h" 
  35     #include "wx/msw/wince/missing.h"       // for bsearch() 
  38 // DLL options compatibility check: 
  39 WX_CHECK_BUILD_OPTIONS("wxHTML") 
  41 const wxChar 
*wxTRACE_HTML_DEBUG 
= wxT("htmldebug"); 
  43 //----------------------------------------------------------------------------- 
  44 // wxHtmlParser helpers 
  45 //----------------------------------------------------------------------------- 
  51     wxHtmlTextPiece(const wxString::const_iterator
& start
, 
  52                     const wxString::const_iterator
& end
) 
  53         : m_start(start
), m_end(end
) {} 
  54     wxString::const_iterator m_start
, m_end
; 
  57 // NB: this is an empty class and not typedef because of forward declaration 
  58 class wxHtmlTextPieces 
: public wxVector
<wxHtmlTextPiece
> 
  62 class wxHtmlParserState
 
  67     wxHtmlTextPieces  
*m_textPieces
; 
  69     const wxString    
*m_source
; 
  70     wxHtmlParserState 
*m_nextState
; 
  73 //----------------------------------------------------------------------------- 
  75 //----------------------------------------------------------------------------- 
  77 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
) 
  79 wxHtmlParser::wxHtmlParser() 
  84     m_entitiesParser 
= new wxHtmlEntitiesParser
; 
  92 wxHtmlParser::~wxHtmlParser() 
  94     while (RestoreState()) {} 
  97     WX_CLEAR_ARRAY(m_HandlersStack
); 
  98     WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet
, m_HandlersSet
); 
  99     delete m_entitiesParser
; 
 103 wxObject
* wxHtmlParser::Parse(const wxString
& source
) 
 107     wxObject 
*result 
= GetProduct(); 
 112 void wxHtmlParser::InitParser(const wxString
& source
) 
 115     m_stopParsing 
= false; 
 118 void wxHtmlParser::DoneParser() 
 123 void wxHtmlParser::SetSource(const wxString
& src
) 
 126     // NB: This is allocated on heap because wxHtmlTag uses iterators and 
 127     //     making a copy of m_Source string in SetSourceAndSaveState() and 
 128     //     RestoreState() would invalidate them (because wxString::m_impl's 
 129     //     memory would change completely twice and iterators use pointers 
 130     //     into it). So instead, we keep the string object intact and only 
 131     //     store/restore pointer to it, for which we need it to be allocated 
 134     m_Source 
= new wxString(src
); 
 140 void wxHtmlParser::CreateDOMTree() 
 142     wxHtmlTagsCache 
cache(*m_Source
); 
 143     m_TextPieces 
= new wxHtmlTextPieces
; 
 144     CreateDOMSubTree(NULL
, m_Source
->begin(), m_Source
->end(), &cache
); 
 148 extern bool wxIsCDATAElement(const wxString
& tag
); 
 150 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag 
*cur
, 
 151                                     const wxString::const_iterator
& begin_pos
, 
 152                                     const wxString::const_iterator
& end_pos
, 
 153                                     wxHtmlTagsCache 
*cache
) 
 155     if (end_pos 
<= begin_pos
) 
 159     wxString::const_iterator i 
= begin_pos
; 
 160     wxString::const_iterator textBeginning 
= begin_pos
; 
 162     // If the tag contains CDATA text, we include the text between beginning 
 163     // and ending tag verbosely. Setting i=end_pos will skip to the very 
 164     // end of this function where text piece is added, bypassing any child 
 165     // tags parsing (CDATA element can't have child elements by definition): 
 166     if (cur 
!= NULL 
&& wxIsCDATAElement(cur
->GetName())) 
 177             // add text to m_TextPieces: 
 178             if (i 
> textBeginning
) 
 179                 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, i
)); 
 181             // if it is a comment, skip it: 
 182             if ( SkipCommentTag(i
, m_Source
->end()) ) 
 184                 textBeginning 
= i 
= i 
+ 1; // skip closing '>' too 
 187             // add another tag to the tree: 
 188             else if (i 
< end_pos
-1 && *(i
+1) != wxT('/')) 
 192                     chd 
= new wxHtmlTag(cur
, m_Source
, 
 193                                         i
, end_pos
, cache
, m_entitiesParser
); 
 196                     chd 
= new wxHtmlTag(NULL
, m_Source
, 
 197                                         i
, end_pos
, cache
, m_entitiesParser
); 
 200                         // if this is the first tag to be created make the root 
 201                         // m_Tags point to it: 
 206                         // if there is already a root tag add this tag as 
 208                         chd
->m_Prev 
= m_Tags
->GetLastSibling(); 
 209                         chd
->m_Prev
->m_Next 
= chd
; 
 213                 if (chd
->HasEnding()) 
 215                     CreateDOMSubTree(chd
, 
 216                                      chd
->GetBeginIter(), chd
->GetEndIter1(), 
 218                     i 
= chd
->GetEndIter2(); 
 221                     i 
= chd
->GetBeginIter(); 
 226             // ... or skip ending tag: 
 229                 while (i 
< end_pos 
&& *i 
!= wxT('>')) ++i
; 
 236     // add remaining text to m_TextPieces: 
 237     if (end_pos 
> textBeginning
) 
 238         m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, end_pos
)); 
 241 void wxHtmlParser::DestroyDOMTree() 
 247         t2 
= t1
->GetNextSibling(); 
 251     m_Tags 
= m_CurTag 
= NULL
; 
 257 void wxHtmlParser::DoParsing() 
 261     DoParsing(m_Source
->begin(), m_Source
->end()); 
 264 void wxHtmlParser::DoParsing(const wxString::const_iterator
& begin_pos_
, 
 265                              const wxString::const_iterator
& end_pos
) 
 267     wxString::const_iterator 
begin_pos(begin_pos_
); 
 269     if (end_pos 
<= begin_pos
) 
 272     wxHtmlTextPieces
& pieces 
= *m_TextPieces
; 
 273     size_t piecesCnt 
= pieces
.size(); 
 275     while (begin_pos 
< end_pos
) 
 277         while (m_CurTag 
&& m_CurTag
->GetBeginIter() < begin_pos
) 
 278             m_CurTag 
= m_CurTag
->GetNextTag(); 
 279         while (m_CurTextPiece 
< piecesCnt 
&& 
 280                pieces
[m_CurTextPiece
].m_start 
< begin_pos
) 
 283         if (m_CurTextPiece 
< piecesCnt 
&& 
 285              pieces
[m_CurTextPiece
].m_start 
< m_CurTag
->GetBeginIter())) 
 288             AddText(GetEntitiesParser()->Parse( 
 289                        wxString(pieces
[m_CurTextPiece
].m_start
, 
 290                                 pieces
[m_CurTextPiece
].m_end
))); 
 291             begin_pos 
= pieces
[m_CurTextPiece
].m_end
; 
 296             if (m_CurTag
->HasEnding()) 
 297                 begin_pos 
= m_CurTag
->GetEndIter2(); 
 299                 begin_pos 
= m_CurTag
->GetBeginIter(); 
 300             wxHtmlTag 
*t 
= m_CurTag
; 
 301             m_CurTag 
= m_CurTag
->GetNextTag(); 
 310 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
) 
 314     wxHtmlTagHandlersHash::const_iterator h 
= m_HandlersHash
.find(tag
.GetName()); 
 315     if (h 
!= m_HandlersHash
.end()) 
 317         inner 
= h
->second
->HandleTag(tag
); 
 324             DoParsing(tag
.GetBeginIter(), tag
.GetEndIter1()); 
 328 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler 
*handler
) 
 330     wxString 
s(handler
->GetSupportedTags()); 
 331     wxStringTokenizer 
tokenizer(s
, wxT(", ")); 
 333     while (tokenizer
.HasMoreTokens()) 
 334         m_HandlersHash
[tokenizer
.GetNextToken()] = handler
; 
 336     m_HandlersSet
.insert(handler
); 
 338     handler
->SetParser(this); 
 341 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler 
*handler
, const wxString
& tags
) 
 343     wxStringTokenizer 
tokenizer(tags
, wxT(", ")); 
 346     m_HandlersStack
.push_back(new wxHtmlTagHandlersHash(m_HandlersHash
)); 
 348     while (tokenizer
.HasMoreTokens()) 
 350         key 
= tokenizer
.GetNextToken(); 
 351         m_HandlersHash
[key
] = handler
; 
 355 void wxHtmlParser::PopTagHandler() 
 357     wxCHECK_RET( !m_HandlersStack
.empty(), 
 358                  "attempt to remove HTML tag handler from empty stack" ); 
 360     wxHtmlTagHandlersHash 
*prev 
= m_HandlersStack
.back(); 
 361     m_HandlersStack
.pop_back(); 
 362     m_HandlersHash 
= *prev
; 
 366 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
) 
 368     wxHtmlParserState 
*s 
= new wxHtmlParserState
; 
 370     s
->m_curTag 
= m_CurTag
; 
 372     s
->m_textPieces 
= m_TextPieces
; 
 373     s
->m_curTextPiece 
= m_CurTextPiece
; 
 374     s
->m_source 
= m_Source
; 
 376     s
->m_nextState 
= m_SavedStates
; 
 388 bool wxHtmlParser::RestoreState() 
 390     if (!m_SavedStates
) return false; 
 395     wxHtmlParserState 
*s 
= m_SavedStates
; 
 396     m_SavedStates 
= s
->m_nextState
; 
 398     m_CurTag 
= s
->m_curTag
; 
 400     m_TextPieces 
= s
->m_textPieces
; 
 401     m_CurTextPiece 
= s
->m_curTextPiece
; 
 402     m_Source 
= s
->m_source
; 
 408 wxString 
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
) 
 410     return wxString(tag
.GetBeginIter(), tag
.GetEndIter1()); 
 413 //----------------------------------------------------------------------------- 
 415 //----------------------------------------------------------------------------- 
 417 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
) 
 419 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
) 
 421     // It is safe to temporarily change the source being parsed, 
 422     // provided we restore the state back after parsing 
 423     m_Parser
->SetSourceAndSaveState(source
); 
 424     m_Parser
->DoParsing(); 
 425     m_Parser
->RestoreState(); 
 429 //----------------------------------------------------------------------------- 
 430 // wxHtmlEntitiesParser 
 431 //----------------------------------------------------------------------------- 
 433 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
) 
 435 wxHtmlEntitiesParser::wxHtmlEntitiesParser() 
 436 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 437     : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
) 
 442 wxHtmlEntitiesParser::~wxHtmlEntitiesParser() 
 444 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 449 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
) 
 451 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 452     if (encoding 
== m_encoding
) 
 457     m_encoding 
= encoding
; 
 458     if (m_encoding 
== wxFONTENCODING_SYSTEM
) 
 461         m_conv 
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
)); 
 467 wxString 
wxHtmlEntitiesParser::Parse(const wxString
& input
) const 
 471     const wxString::const_iterator 
end(input
.end()); 
 472     wxString::const_iterator 
c(input
.begin()); 
 473     wxString::const_iterator 
last(c
); 
 475     for ( ; c 
< end
; ++c 
) 
 479             if ( output
.empty() ) 
 480                 output
.reserve(input
.length()); 
 483                 output
.append(last
, c
); 
 488             const wxString::const_iterator ent_s 
= c
; 
 491             for ( ; c 
!= end
; ++c 
) 
 494                 if ( !((ch 
>= wxT('a') && ch 
<= wxT('z')) || 
 495                        (ch 
>= wxT('A') && ch 
<= wxT('Z')) || 
 496                        (ch 
>= wxT('0') && ch 
<= wxT('9')) || 
 497                         ch 
== wxT('_') || ch 
== wxT('#')) ) 
 501             entity
.append(ent_s
, c
); 
 502             if (c 
== end 
|| *c 
!= wxT(';')) --c
; 
 504             entity_char 
= GetEntityChar(entity
); 
 506                 output 
<< entity_char
; 
 509                 output
.append(ent_s
-1, c
+1); 
 510                 wxLogTrace(wxTRACE_HTML_DEBUG
, 
 511                            "Unrecognized HTML entity: '%s'", 
 516     if ( last 
== input
.begin() ) // common case: no entity 
 519         output
.append(last
, end
); 
 524 wxChar 
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) const 
 529     wbuf
[0] = (wchar_t)code
; 
 531     wxMBConv 
*conv 
= m_conv 
? m_conv 
: &wxConvLocal
; 
 532     if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1) 
 536     return (code 
< 256) ? (wxChar
)code 
: '?'; 
 541 struct wxHtmlEntityInfo
 
 543     const wxStringCharType 
*name
; 
 547 extern "C" int LINKAGEMODE 
wxHtmlEntityCompare(const void *key
, const void *item
) 
 549 #if wxUSE_UNICODE_UTF8 
 550     return strcmp((char*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 552     return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 556 wxChar 
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) const 
 561       return 0; // invalid entity reference 
 563     if (entity
[0] == wxT('#')) 
 565         // NB: parsed value is a number, so it's OK to use wx_str(), internal 
 566         //     representation is the same for numbers 
 567         const wxStringCharType 
*ent_s 
= entity
.wx_str(); 
 568         const wxStringCharType 
*format
; 
 570         if (ent_s
[1] == wxS('x') || ent_s
[1] == wxS('X')) 
 579         if (wxSscanf(ent_s
, format
, &code
) != 1) 
 584         // store the literals in wx's internal representation (either char* 
 585         // in UTF-8 or wchar_t*) for best performance: 
 586         #define ENTITY(name, code) { wxS(name), code } 
 588         static wxHtmlEntityInfo substitutions
[] = { 
 589             ENTITY("AElig", 198), 
 590             ENTITY("Aacute", 193), 
 591             ENTITY("Acirc", 194), 
 592             ENTITY("Agrave", 192), 
 593             ENTITY("Alpha", 913), 
 594             ENTITY("Aring", 197), 
 595             ENTITY("Atilde", 195), 
 598             ENTITY("Ccedil", 199), 
 600             ENTITY("Dagger", 8225), 
 601             ENTITY("Delta", 916), 
 603             ENTITY("Eacute", 201), 
 604             ENTITY("Ecirc", 202), 
 605             ENTITY("Egrave", 200), 
 606             ENTITY("Epsilon", 917), 
 609             ENTITY("Gamma", 915), 
 610             ENTITY("Iacute", 205), 
 611             ENTITY("Icirc", 206), 
 612             ENTITY("Igrave", 204), 
 615             ENTITY("Kappa", 922), 
 616             ENTITY("Lambda", 923), 
 618             ENTITY("Ntilde", 209), 
 620             ENTITY("OElig", 338), 
 621             ENTITY("Oacute", 211), 
 622             ENTITY("Ocirc", 212), 
 623             ENTITY("Ograve", 210), 
 624             ENTITY("Omega", 937), 
 625             ENTITY("Omicron", 927), 
 626             ENTITY("Oslash", 216), 
 627             ENTITY("Otilde", 213), 
 631             ENTITY("Prime", 8243), 
 634             ENTITY("Scaron", 352), 
 635             ENTITY("Sigma", 931), 
 636             ENTITY("THORN", 222), 
 638             ENTITY("Theta", 920), 
 639             ENTITY("Uacute", 218), 
 640             ENTITY("Ucirc", 219), 
 641             ENTITY("Ugrave", 217), 
 642             ENTITY("Upsilon", 933), 
 645             ENTITY("Yacute", 221), 
 648             ENTITY("aacute", 225), 
 649             ENTITY("acirc", 226), 
 650             ENTITY("acute", 180), 
 651             ENTITY("aelig", 230), 
 652             ENTITY("agrave", 224), 
 653             ENTITY("alefsym", 8501), 
 654             ENTITY("alpha", 945), 
 659             ENTITY("aring", 229), 
 660             ENTITY("asymp", 8776), 
 661             ENTITY("atilde", 227), 
 663             ENTITY("bdquo", 8222), 
 665             ENTITY("brvbar", 166), 
 666             ENTITY("bull", 8226), 
 668             ENTITY("ccedil", 231), 
 669             ENTITY("cedil", 184), 
 673             ENTITY("clubs", 9827), 
 674             ENTITY("cong", 8773), 
 676             ENTITY("crarr", 8629), 
 678             ENTITY("curren", 164), 
 679             ENTITY("dArr", 8659), 
 680             ENTITY("dagger", 8224), 
 681             ENTITY("darr", 8595), 
 683             ENTITY("delta", 948), 
 684             ENTITY("diams", 9830), 
 685             ENTITY("divide", 247), 
 686             ENTITY("eacute", 233), 
 687             ENTITY("ecirc", 234), 
 688             ENTITY("egrave", 232), 
 689             ENTITY("empty", 8709), 
 690             ENTITY("emsp", 8195), 
 691             ENTITY("ensp", 8194), 
 692             ENTITY("epsilon", 949), 
 693             ENTITY("equiv", 8801), 
 697             ENTITY("euro", 8364), 
 698             ENTITY("exist", 8707), 
 700             ENTITY("forall", 8704), 
 701             ENTITY("frac12", 189), 
 702             ENTITY("frac14", 188), 
 703             ENTITY("frac34", 190), 
 704             ENTITY("frasl", 8260), 
 705             ENTITY("gamma", 947), 
 708             ENTITY("hArr", 8660), 
 709             ENTITY("harr", 8596), 
 710             ENTITY("hearts", 9829), 
 711             ENTITY("hellip", 8230), 
 712             ENTITY("iacute", 237), 
 713             ENTITY("icirc", 238), 
 714             ENTITY("iexcl", 161), 
 715             ENTITY("igrave", 236), 
 716             ENTITY("image", 8465), 
 717             ENTITY("infin", 8734), 
 720             ENTITY("iquest", 191), 
 721             ENTITY("isin", 8712), 
 723             ENTITY("kappa", 954), 
 724             ENTITY("lArr", 8656), 
 725             ENTITY("lambda", 955), 
 726             ENTITY("lang", 9001), 
 727             ENTITY("laquo", 171), 
 728             ENTITY("larr", 8592), 
 729             ENTITY("lceil", 8968), 
 730             ENTITY("ldquo", 8220), 
 732             ENTITY("lfloor", 8970), 
 733             ENTITY("lowast", 8727), 
 736             ENTITY("lsaquo", 8249), 
 737             ENTITY("lsquo", 8216), 
 740             ENTITY("mdash", 8212), 
 741             ENTITY("micro", 181), 
 742             ENTITY("middot", 183), 
 743             ENTITY("minus", 8722), 
 745             ENTITY("nabla", 8711), 
 747             ENTITY("ndash", 8211), 
 751             ENTITY("notin", 8713), 
 752             ENTITY("nsub", 8836), 
 753             ENTITY("ntilde", 241), 
 755             ENTITY("oacute", 243), 
 756             ENTITY("ocirc", 244), 
 757             ENTITY("oelig", 339), 
 758             ENTITY("ograve", 242), 
 759             ENTITY("oline", 8254), 
 760             ENTITY("omega", 969), 
 761             ENTITY("omicron", 959), 
 762             ENTITY("oplus", 8853), 
 766             ENTITY("oslash", 248), 
 767             ENTITY("otilde", 245), 
 768             ENTITY("otimes", 8855), 
 771             ENTITY("part", 8706), 
 772             ENTITY("permil", 8240), 
 773             ENTITY("perp", 8869), 
 777             ENTITY("plusmn", 177), 
 778             ENTITY("pound", 163), 
 779             ENTITY("prime", 8242), 
 780             ENTITY("prod", 8719), 
 781             ENTITY("prop", 8733), 
 784             ENTITY("rArr", 8658), 
 785             ENTITY("radic", 8730), 
 786             ENTITY("rang", 9002), 
 787             ENTITY("raquo", 187), 
 788             ENTITY("rarr", 8594), 
 789             ENTITY("rceil", 8969), 
 790             ENTITY("rdquo", 8221), 
 791             ENTITY("real", 8476), 
 793             ENTITY("rfloor", 8971), 
 796             ENTITY("rsaquo", 8250), 
 797             ENTITY("rsquo", 8217), 
 798             ENTITY("sbquo", 8218), 
 799             ENTITY("scaron", 353), 
 800             ENTITY("sdot", 8901), 
 803             ENTITY("sigma", 963), 
 804             ENTITY("sigmaf", 962), 
 806             ENTITY("spades", 9824), 
 808             ENTITY("sube", 8838), 
 814             ENTITY("supe", 8839), 
 815             ENTITY("szlig", 223), 
 817             ENTITY("there4", 8756), 
 818             ENTITY("theta", 952), 
 819             ENTITY("thetasym", 977), 
 820             ENTITY("thinsp", 8201), 
 821             ENTITY("thorn", 254), 
 822             ENTITY("tilde", 732), 
 823             ENTITY("times", 215), 
 824             ENTITY("trade", 8482), 
 825             ENTITY("uArr", 8657), 
 826             ENTITY("uacute", 250), 
 827             ENTITY("uarr", 8593), 
 828             ENTITY("ucirc", 251), 
 829             ENTITY("ugrave", 249), 
 831             ENTITY("upsih", 978), 
 832             ENTITY("upsilon", 965), 
 834             ENTITY("weierp", 8472), 
 836             ENTITY("yacute", 253), 
 841             ENTITY("zwnj", 8204), 
 844         static size_t substitutions_cnt 
= 0; 
 846         if (substitutions_cnt 
== 0) 
 847             while (substitutions
[substitutions_cnt
].code 
!= 0) 
 850         wxHtmlEntityInfo 
*info
; 
 852         // bsearch crashes under WinCE for some reason 
 855         for (i 
= 0; i 
< substitutions_cnt
; i
++) 
 857             if (entity 
== substitutions
[i
].name
) 
 859                 info 
= & substitutions
[i
]; 
 864         info 
= (wxHtmlEntityInfo
*) bsearch(entity
.wx_str(), substitutions
, 
 866                                            sizeof(wxHtmlEntityInfo
), 
 867                                            wxHtmlEntityCompare
); 
 876         return GetCharForCode(code
); 
 879 wxFSFile 
*wxHtmlParser::OpenURL(wxHtmlURLType 
WXUNUSED(type
), 
 880                                 const wxString
& url
) const 
 882     return m_FS 
? m_FS
->OpenFile(url
) : NULL
; 
 887 //----------------------------------------------------------------------------- 
 888 // wxHtmlParser::ExtractCharsetInformation 
 889 //----------------------------------------------------------------------------- 
 891 class wxMetaTagParser 
: public wxHtmlParser
 
 894     wxMetaTagParser() { } 
 896     wxObject
* GetProduct() { return NULL
; } 
 899     virtual void AddText(const wxString
& WXUNUSED(txt
)) {} 
 901     wxDECLARE_NO_COPY_CLASS(wxMetaTagParser
); 
 904 class wxMetaTagHandler 
: public wxHtmlTagHandler
 
 907     wxMetaTagHandler(wxString 
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {} 
 908     wxString 
GetSupportedTags() { return wxT("META,BODY"); } 
 909     bool HandleTag(const wxHtmlTag
& tag
); 
 914     wxDECLARE_NO_COPY_CLASS(wxMetaTagHandler
); 
 917 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
) 
 919     if (tag
.GetName() == wxT("BODY")) 
 921         m_Parser
->StopParsing(); 
 925     if (tag
.HasParam(wxT("HTTP-EQUIV")) && 
 926         tag
.GetParam(wxT("HTTP-EQUIV")).IsSameAs(wxT("Content-Type"), false) && 
 927         tag
.HasParam(wxT("CONTENT"))) 
 929         wxString content 
= tag
.GetParam(wxT("CONTENT")).Lower(); 
 930         if (content
.Left(19) == wxT("text/html; charset=")) 
 932             *m_retval 
= content
.Mid(19); 
 933             m_Parser
->StopParsing(); 
 941 wxString 
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
) 
 944     wxMetaTagParser 
*parser 
= new wxMetaTagParser(); 
 947         parser
->AddTagHandler(new wxMetaTagHandler(&charset
)); 
 948         parser
->Parse(markup
); 
 956 wxHtmlParser::SkipCommentTag(wxString::const_iterator
& start
, 
 957                              wxString::const_iterator end
) 
 959     wxASSERT_MSG( *start 
== '<', wxT("should be called on the tag start") ); 
 961     wxString::const_iterator p 
= start
; 
 963     // comments begin with "<!--" in HTML 4.0 
 964     if ( p 
> end 
- 3 || *++p 
!= '!' || *++p 
!= '-' || *++p 
!= '-' ) 
 966         // not a comment at all 
 970     // skip the start of the comment tag in any case, if we don't find the 
 971     // closing tag we should ignore broken markup 
 974     // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between 
 975     // comment delimiter and the closing tag character (section 3.2.4 of 
 976     // http://www.w3.org/TR/html401/) 
 982         if ( (c 
== wxT(' ') || c 
== wxT('\n') || 
 983               c 
== wxT('\r') || c 
== wxT('\t')) && dashes 
>= 2 ) 
 985             // ignore white space before potential tag end 
 989         if ( c 
== wxT('>') && dashes 
>= 2 ) 
 991             // found end of comment 
1005 #endif // wxUSE_HTML