1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/html/htmlpars.cpp 
   3 // Purpose:     wxHtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 #include "wx/wxprec.h" 
  16 #if wxUSE_HTML && wxUSE_STREAMS 
  19     #include "wx/dynarray.h" 
  23     #include "wx/wxcrtvararg.h" 
  26 #include "wx/tokenzr.h" 
  27 #include "wx/wfstream.h" 
  29 #include "wx/fontmap.h" 
  30 #include "wx/html/htmldefs.h" 
  31 #include "wx/html/htmlpars.h" 
  32 #include "wx/vector.h" 
  35     #include "wx/msw/wince/missing.h"       // for bsearch() 
  38 // DLL options compatibility check: 
  39 WX_CHECK_BUILD_OPTIONS("wxHTML") 
  41 const wxChar 
*wxTRACE_HTML_DEBUG 
= wxT("htmldebug"); 
  43 //----------------------------------------------------------------------------- 
  44 // wxHtmlParser helpers 
  45 //----------------------------------------------------------------------------- 
  51     wxHtmlTextPiece(const wxString::const_iterator
& start
, 
  52                     const wxString::const_iterator
& end
) 
  53         : m_start(start
), m_end(end
) {} 
  54     wxString::const_iterator m_start
, m_end
; 
  57 // NB: this is an empty class and not typedef because of forward declaration 
  58 class wxHtmlTextPieces 
: public wxVector
<wxHtmlTextPiece
> 
  62 class wxHtmlParserState
 
  67     wxHtmlTextPieces  
*m_textPieces
; 
  69     const wxString    
*m_source
; 
  70     wxHtmlParserState 
*m_nextState
; 
  73 //----------------------------------------------------------------------------- 
  75 //----------------------------------------------------------------------------- 
  77 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
) 
  79 wxHtmlParser::wxHtmlParser() 
  84     m_entitiesParser 
= new wxHtmlEntitiesParser
; 
  92 wxHtmlParser::~wxHtmlParser() 
  94     while (RestoreState()) {} 
  97     WX_CLEAR_ARRAY(m_HandlersStack
); 
  98     WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet
, m_HandlersSet
); 
  99     delete m_entitiesParser
; 
 103 wxObject
* wxHtmlParser::Parse(const wxString
& source
) 
 107     wxObject 
*result 
= GetProduct(); 
 112 void wxHtmlParser::InitParser(const wxString
& source
) 
 115     m_stopParsing 
= false; 
 118 void wxHtmlParser::DoneParser() 
 123 void wxHtmlParser::SetSource(const wxString
& src
) 
 126     // NB: This is allocated on heap because wxHtmlTag uses iterators and 
 127     //     making a copy of m_Source string in SetSourceAndSaveState() and 
 128     //     RestoreState() would invalidate them (because wxString::m_impl's 
 129     //     memory would change completely twice and iterators use pointers 
 130     //     into it). So instead, we keep the string object intact and only 
 131     //     store/restore pointer to it, for which we need it to be allocated 
 134     m_Source 
= new wxString(src
); 
 140 void wxHtmlParser::CreateDOMTree() 
 142     wxHtmlTagsCache 
cache(*m_Source
); 
 143     m_TextPieces 
= new wxHtmlTextPieces
; 
 144     CreateDOMSubTree(NULL
, m_Source
->begin(), m_Source
->end(), &cache
); 
 148 extern bool wxIsCDATAElement(const wxString
& tag
); 
 150 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag 
*cur
, 
 151                                     const wxString::const_iterator
& begin_pos
, 
 152                                     const wxString::const_iterator
& end_pos
, 
 153                                     wxHtmlTagsCache 
*cache
) 
 155     if (end_pos 
<= begin_pos
) 
 159     wxString::const_iterator i 
= begin_pos
; 
 160     wxString::const_iterator textBeginning 
= begin_pos
; 
 162     // If the tag contains CDATA text, we include the text between beginning 
 163     // and ending tag verbosely. Setting i=end_pos will skip to the very 
 164     // end of this function where text piece is added, bypassing any child 
 165     // tags parsing (CDATA element can't have child elements by definition): 
 166     if (cur 
!= NULL 
&& wxIsCDATAElement(cur
->GetName())) 
 177             // add text to m_TextPieces: 
 178             if (i 
> textBeginning
) 
 179                 m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, i
)); 
 181             // if it is a comment, skip it: 
 182             if ( SkipCommentTag(i
, m_Source
->end()) ) 
 184                 textBeginning 
= i 
= i 
+ 1; // skip closing '>' too 
 187             // add another tag to the tree: 
 188             else if (i 
< end_pos
-1 && *(i
+1) != wxT('/')) 
 192                     chd 
= new wxHtmlTag(cur
, m_Source
, 
 193                                         i
, end_pos
, cache
, m_entitiesParser
); 
 196                     chd 
= new wxHtmlTag(NULL
, m_Source
, 
 197                                         i
, end_pos
, cache
, m_entitiesParser
); 
 200                         // if this is the first tag to be created make the root 
 201                         // m_Tags point to it: 
 206                         // if there is already a root tag add this tag as 
 208                         chd
->m_Prev 
= m_Tags
->GetLastSibling(); 
 209                         chd
->m_Prev
->m_Next 
= chd
; 
 213                 if (chd
->HasEnding()) 
 215                     CreateDOMSubTree(chd
, 
 216                                      chd
->GetBeginIter(), chd
->GetEndIter1(), 
 218                     i 
= chd
->GetEndIter2(); 
 221                     i 
= chd
->GetBeginIter(); 
 226             // ... or skip ending tag: 
 229                 while (i 
< end_pos 
&& *i 
!= wxT('>')) ++i
; 
 230                 textBeginning 
= i 
< end_pos 
? i
+1 : i
; 
 236     // add remaining text to m_TextPieces: 
 237     if (end_pos 
> textBeginning
) 
 238         m_TextPieces
->push_back(wxHtmlTextPiece(textBeginning
, end_pos
)); 
 241 void wxHtmlParser::DestroyDOMTree() 
 247         t2 
= t1
->GetNextSibling(); 
 251     m_Tags 
= m_CurTag 
= NULL
; 
 253     wxDELETE(m_TextPieces
); 
 256 void wxHtmlParser::DoParsing() 
 260     DoParsing(m_Source
->begin(), m_Source
->end()); 
 263 void wxHtmlParser::DoParsing(const wxString::const_iterator
& begin_pos_
, 
 264                              const wxString::const_iterator
& end_pos
) 
 266     wxString::const_iterator 
begin_pos(begin_pos_
); 
 268     if (end_pos 
<= begin_pos
) 
 271     wxHtmlTextPieces
& pieces 
= *m_TextPieces
; 
 272     size_t piecesCnt 
= pieces
.size(); 
 274     while (begin_pos 
< end_pos
) 
 276         while (m_CurTag 
&& m_CurTag
->GetBeginIter() < begin_pos
) 
 277             m_CurTag 
= m_CurTag
->GetNextTag(); 
 278         while (m_CurTextPiece 
< piecesCnt 
&& 
 279                pieces
[m_CurTextPiece
].m_start 
< begin_pos
) 
 282         if (m_CurTextPiece 
< piecesCnt 
&& 
 284              pieces
[m_CurTextPiece
].m_start 
< m_CurTag
->GetBeginIter())) 
 287             AddText(GetEntitiesParser()->Parse( 
 288                        wxString(pieces
[m_CurTextPiece
].m_start
, 
 289                                 pieces
[m_CurTextPiece
].m_end
))); 
 290             begin_pos 
= pieces
[m_CurTextPiece
].m_end
; 
 295             if (m_CurTag
->HasEnding()) 
 296                 begin_pos 
= m_CurTag
->GetEndIter2(); 
 298                 begin_pos 
= m_CurTag
->GetBeginIter(); 
 299             wxHtmlTag 
*t 
= m_CurTag
; 
 300             m_CurTag 
= m_CurTag
->GetNextTag(); 
 309 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
) 
 313     wxHtmlTagHandlersHash::const_iterator h 
= m_HandlersHash
.find(tag
.GetName()); 
 314     if (h 
!= m_HandlersHash
.end()) 
 316         inner 
= h
->second
->HandleTag(tag
); 
 323             DoParsing(tag
.GetBeginIter(), tag
.GetEndIter1()); 
 327 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler 
*handler
) 
 329     wxString 
s(handler
->GetSupportedTags()); 
 330     wxStringTokenizer 
tokenizer(s
, wxT(", ")); 
 332     while (tokenizer
.HasMoreTokens()) 
 333         m_HandlersHash
[tokenizer
.GetNextToken()] = handler
; 
 335     m_HandlersSet
.insert(handler
); 
 337     handler
->SetParser(this); 
 340 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler 
*handler
, const wxString
& tags
) 
 342     wxStringTokenizer 
tokenizer(tags
, wxT(", ")); 
 345     m_HandlersStack
.push_back(new wxHtmlTagHandlersHash(m_HandlersHash
)); 
 347     while (tokenizer
.HasMoreTokens()) 
 349         key 
= tokenizer
.GetNextToken(); 
 350         m_HandlersHash
[key
] = handler
; 
 354 void wxHtmlParser::PopTagHandler() 
 356     wxCHECK_RET( !m_HandlersStack
.empty(), 
 357                  "attempt to remove HTML tag handler from empty stack" ); 
 359     wxHtmlTagHandlersHash 
*prev 
= m_HandlersStack
.back(); 
 360     m_HandlersStack
.pop_back(); 
 361     m_HandlersHash 
= *prev
; 
 365 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
) 
 367     wxHtmlParserState 
*s 
= new wxHtmlParserState
; 
 369     s
->m_curTag 
= m_CurTag
; 
 371     s
->m_textPieces 
= m_TextPieces
; 
 372     s
->m_curTextPiece 
= m_CurTextPiece
; 
 373     s
->m_source 
= m_Source
; 
 375     s
->m_nextState 
= m_SavedStates
; 
 387 bool wxHtmlParser::RestoreState() 
 389     if (!m_SavedStates
) return false; 
 394     wxHtmlParserState 
*s 
= m_SavedStates
; 
 395     m_SavedStates 
= s
->m_nextState
; 
 397     m_CurTag 
= s
->m_curTag
; 
 399     m_TextPieces 
= s
->m_textPieces
; 
 400     m_CurTextPiece 
= s
->m_curTextPiece
; 
 401     m_Source 
= s
->m_source
; 
 407 wxString 
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
) 
 409     return wxString(tag
.GetBeginIter(), tag
.GetEndIter1()); 
 412 //----------------------------------------------------------------------------- 
 414 //----------------------------------------------------------------------------- 
 416 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
) 
 418 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
) 
 420     // It is safe to temporarily change the source being parsed, 
 421     // provided we restore the state back after parsing 
 422     m_Parser
->SetSourceAndSaveState(source
); 
 423     m_Parser
->DoParsing(); 
 424     m_Parser
->RestoreState(); 
 428 //----------------------------------------------------------------------------- 
 429 // wxHtmlEntitiesParser 
 430 //----------------------------------------------------------------------------- 
 432 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
) 
 434 wxHtmlEntitiesParser::wxHtmlEntitiesParser() 
 436     : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
) 
 441 wxHtmlEntitiesParser::~wxHtmlEntitiesParser() 
 449 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
) 
 451     if (encoding 
== m_encoding
) 
 456     m_encoding 
= encoding
; 
 457     if (m_encoding 
== wxFONTENCODING_SYSTEM
) 
 460         m_conv 
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
)); 
 462 #endif // !wxUSE_UNICODE 
 464 wxString 
wxHtmlEntitiesParser::Parse(const wxString
& input
) const 
 468     const wxString::const_iterator 
end(input
.end()); 
 469     wxString::const_iterator 
c(input
.begin()); 
 470     wxString::const_iterator 
last(c
); 
 472     for ( ; c 
< end
; ++c 
) 
 476             if ( output
.empty() ) 
 477                 output
.reserve(input
.length()); 
 480                 output
.append(last
, c
); 
 485             const wxString::const_iterator ent_s 
= c
; 
 488             for ( ; c 
!= end
; ++c 
) 
 491                 if ( !((ch 
>= wxT('a') && ch 
<= wxT('z')) || 
 492                        (ch 
>= wxT('A') && ch 
<= wxT('Z')) || 
 493                        (ch 
>= wxT('0') && ch 
<= wxT('9')) || 
 494                         ch 
== wxT('_') || ch 
== wxT('#')) ) 
 498             entity
.append(ent_s
, c
); 
 499             if (c 
== end 
|| *c 
!= wxT(';')) --c
; 
 501             entity_char 
= GetEntityChar(entity
); 
 503                 output 
<< entity_char
; 
 506                 output
.append(ent_s
-1, c
+1); 
 507                 wxLogTrace(wxTRACE_HTML_DEBUG
, 
 508                            "Unrecognized HTML entity: '%s'", 
 513     if ( last 
== input
.begin() ) // common case: no entity 
 516         output
.append(last
, end
); 
 521 wxChar 
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) const 
 525     wbuf
[0] = (wchar_t)code
; 
 527     wxMBConv 
*conv 
= m_conv 
? m_conv 
: &wxConvLocal
; 
 528     if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1) 
 534 struct wxHtmlEntityInfo
 
 536     const wxStringCharType 
*name
; 
 540 extern "C" int LINKAGEMODE 
wxHtmlEntityCompare(const void *key
, const void *item
) 
 542 #if wxUSE_UNICODE_UTF8 
 543     return strcmp((char*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 545     return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 549 wxChar 
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) const 
 554       return 0; // invalid entity reference 
 556     if (entity
[0] == wxT('#')) 
 558         // NB: parsed value is a number, so it's OK to use wx_str(), internal 
 559         //     representation is the same for numbers 
 560         const wxStringCharType 
*ent_s 
= entity
.wx_str(); 
 561         const wxStringCharType 
*format
; 
 563         if (ent_s
[1] == wxS('x') || ent_s
[1] == wxS('X')) 
 572         if (wxSscanf(ent_s
, format
, &code
) != 1) 
 577         // store the literals in wx's internal representation (either char* 
 578         // in UTF-8 or wchar_t*) for best performance: 
 579         #define ENTITY(name, code) { wxS(name), code } 
 581         static wxHtmlEntityInfo substitutions
[] = { 
 582             ENTITY("AElig", 198), 
 583             ENTITY("Aacute", 193), 
 584             ENTITY("Acirc", 194), 
 585             ENTITY("Agrave", 192), 
 586             ENTITY("Alpha", 913), 
 587             ENTITY("Aring", 197), 
 588             ENTITY("Atilde", 195), 
 591             ENTITY("Ccedil", 199), 
 593             ENTITY("Dagger", 8225), 
 594             ENTITY("Delta", 916), 
 596             ENTITY("Eacute", 201), 
 597             ENTITY("Ecirc", 202), 
 598             ENTITY("Egrave", 200), 
 599             ENTITY("Epsilon", 917), 
 602             ENTITY("Gamma", 915), 
 603             ENTITY("Iacute", 205), 
 604             ENTITY("Icirc", 206), 
 605             ENTITY("Igrave", 204), 
 608             ENTITY("Kappa", 922), 
 609             ENTITY("Lambda", 923), 
 611             ENTITY("Ntilde", 209), 
 613             ENTITY("OElig", 338), 
 614             ENTITY("Oacute", 211), 
 615             ENTITY("Ocirc", 212), 
 616             ENTITY("Ograve", 210), 
 617             ENTITY("Omega", 937), 
 618             ENTITY("Omicron", 927), 
 619             ENTITY("Oslash", 216), 
 620             ENTITY("Otilde", 213), 
 624             ENTITY("Prime", 8243), 
 627             ENTITY("Scaron", 352), 
 628             ENTITY("Sigma", 931), 
 629             ENTITY("THORN", 222), 
 631             ENTITY("Theta", 920), 
 632             ENTITY("Uacute", 218), 
 633             ENTITY("Ucirc", 219), 
 634             ENTITY("Ugrave", 217), 
 635             ENTITY("Upsilon", 933), 
 638             ENTITY("Yacute", 221), 
 641             ENTITY("aacute", 225), 
 642             ENTITY("acirc", 226), 
 643             ENTITY("acute", 180), 
 644             ENTITY("aelig", 230), 
 645             ENTITY("agrave", 224), 
 646             ENTITY("alefsym", 8501), 
 647             ENTITY("alpha", 945), 
 652             ENTITY("aring", 229), 
 653             ENTITY("asymp", 8776), 
 654             ENTITY("atilde", 227), 
 656             ENTITY("bdquo", 8222), 
 658             ENTITY("brvbar", 166), 
 659             ENTITY("bull", 8226), 
 661             ENTITY("ccedil", 231), 
 662             ENTITY("cedil", 184), 
 666             ENTITY("clubs", 9827), 
 667             ENTITY("cong", 8773), 
 669             ENTITY("crarr", 8629), 
 671             ENTITY("curren", 164), 
 672             ENTITY("dArr", 8659), 
 673             ENTITY("dagger", 8224), 
 674             ENTITY("darr", 8595), 
 676             ENTITY("delta", 948), 
 677             ENTITY("diams", 9830), 
 678             ENTITY("divide", 247), 
 679             ENTITY("eacute", 233), 
 680             ENTITY("ecirc", 234), 
 681             ENTITY("egrave", 232), 
 682             ENTITY("empty", 8709), 
 683             ENTITY("emsp", 8195), 
 684             ENTITY("ensp", 8194), 
 685             ENTITY("epsilon", 949), 
 686             ENTITY("equiv", 8801), 
 690             ENTITY("euro", 8364), 
 691             ENTITY("exist", 8707), 
 693             ENTITY("forall", 8704), 
 694             ENTITY("frac12", 189), 
 695             ENTITY("frac14", 188), 
 696             ENTITY("frac34", 190), 
 697             ENTITY("frasl", 8260), 
 698             ENTITY("gamma", 947), 
 701             ENTITY("hArr", 8660), 
 702             ENTITY("harr", 8596), 
 703             ENTITY("hearts", 9829), 
 704             ENTITY("hellip", 8230), 
 705             ENTITY("iacute", 237), 
 706             ENTITY("icirc", 238), 
 707             ENTITY("iexcl", 161), 
 708             ENTITY("igrave", 236), 
 709             ENTITY("image", 8465), 
 710             ENTITY("infin", 8734), 
 713             ENTITY("iquest", 191), 
 714             ENTITY("isin", 8712), 
 716             ENTITY("kappa", 954), 
 717             ENTITY("lArr", 8656), 
 718             ENTITY("lambda", 955), 
 719             ENTITY("lang", 9001), 
 720             ENTITY("laquo", 171), 
 721             ENTITY("larr", 8592), 
 722             ENTITY("lceil", 8968), 
 723             ENTITY("ldquo", 8220), 
 725             ENTITY("lfloor", 8970), 
 726             ENTITY("lowast", 8727), 
 729             ENTITY("lsaquo", 8249), 
 730             ENTITY("lsquo", 8216), 
 733             ENTITY("mdash", 8212), 
 734             ENTITY("micro", 181), 
 735             ENTITY("middot", 183), 
 736             ENTITY("minus", 8722), 
 738             ENTITY("nabla", 8711), 
 740             ENTITY("ndash", 8211), 
 744             ENTITY("notin", 8713), 
 745             ENTITY("nsub", 8836), 
 746             ENTITY("ntilde", 241), 
 748             ENTITY("oacute", 243), 
 749             ENTITY("ocirc", 244), 
 750             ENTITY("oelig", 339), 
 751             ENTITY("ograve", 242), 
 752             ENTITY("oline", 8254), 
 753             ENTITY("omega", 969), 
 754             ENTITY("omicron", 959), 
 755             ENTITY("oplus", 8853), 
 759             ENTITY("oslash", 248), 
 760             ENTITY("otilde", 245), 
 761             ENTITY("otimes", 8855), 
 764             ENTITY("part", 8706), 
 765             ENTITY("permil", 8240), 
 766             ENTITY("perp", 8869), 
 770             ENTITY("plusmn", 177), 
 771             ENTITY("pound", 163), 
 772             ENTITY("prime", 8242), 
 773             ENTITY("prod", 8719), 
 774             ENTITY("prop", 8733), 
 777             ENTITY("rArr", 8658), 
 778             ENTITY("radic", 8730), 
 779             ENTITY("rang", 9002), 
 780             ENTITY("raquo", 187), 
 781             ENTITY("rarr", 8594), 
 782             ENTITY("rceil", 8969), 
 783             ENTITY("rdquo", 8221), 
 784             ENTITY("real", 8476), 
 786             ENTITY("rfloor", 8971), 
 789             ENTITY("rsaquo", 8250), 
 790             ENTITY("rsquo", 8217), 
 791             ENTITY("sbquo", 8218), 
 792             ENTITY("scaron", 353), 
 793             ENTITY("sdot", 8901), 
 796             ENTITY("sigma", 963), 
 797             ENTITY("sigmaf", 962), 
 799             ENTITY("spades", 9824), 
 801             ENTITY("sube", 8838), 
 807             ENTITY("supe", 8839), 
 808             ENTITY("szlig", 223), 
 810             ENTITY("there4", 8756), 
 811             ENTITY("theta", 952), 
 812             ENTITY("thetasym", 977), 
 813             ENTITY("thinsp", 8201), 
 814             ENTITY("thorn", 254), 
 815             ENTITY("tilde", 732), 
 816             ENTITY("times", 215), 
 817             ENTITY("trade", 8482), 
 818             ENTITY("uArr", 8657), 
 819             ENTITY("uacute", 250), 
 820             ENTITY("uarr", 8593), 
 821             ENTITY("ucirc", 251), 
 822             ENTITY("ugrave", 249), 
 824             ENTITY("upsih", 978), 
 825             ENTITY("upsilon", 965), 
 827             ENTITY("weierp", 8472), 
 829             ENTITY("yacute", 253), 
 834             ENTITY("zwnj", 8204), 
 837         static size_t substitutions_cnt 
= 0; 
 839         if (substitutions_cnt 
== 0) 
 840             while (substitutions
[substitutions_cnt
].code 
!= 0) 
 843         wxHtmlEntityInfo 
*info
; 
 845         // bsearch crashes under WinCE for some reason 
 848         for (i 
= 0; i 
< substitutions_cnt
; i
++) 
 850             if (entity 
== substitutions
[i
].name
) 
 852                 info 
= & substitutions
[i
]; 
 857         info 
= (wxHtmlEntityInfo
*) bsearch(entity
.wx_str(), substitutions
, 
 859                                            sizeof(wxHtmlEntityInfo
), 
 860                                            wxHtmlEntityCompare
); 
 869         return GetCharForCode(code
); 
 872 wxFSFile 
*wxHtmlParser::OpenURL(wxHtmlURLType type
, 
 873                                 const wxString
& url
) const 
 875     int flags 
= wxFS_READ
; 
 876     if (type 
== wxHTML_URL_IMAGE
) 
 877         flags 
|= wxFS_SEEKABLE
; 
 879     return m_FS 
? m_FS
->OpenFile(url
, flags
) : NULL
; 
 884 //----------------------------------------------------------------------------- 
 885 // wxHtmlParser::ExtractCharsetInformation 
 886 //----------------------------------------------------------------------------- 
 888 class wxMetaTagParser 
: public wxHtmlParser
 
 891     wxMetaTagParser() { } 
 893     wxObject
* GetProduct() { return NULL
; } 
 896     virtual void AddText(const wxString
& WXUNUSED(txt
)) {} 
 898     wxDECLARE_NO_COPY_CLASS(wxMetaTagParser
); 
 901 class wxMetaTagHandler 
: public wxHtmlTagHandler
 
 904     wxMetaTagHandler(wxString 
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {} 
 905     wxString 
GetSupportedTags() { return wxT("META,BODY"); } 
 906     bool HandleTag(const wxHtmlTag
& tag
); 
 911     wxDECLARE_NO_COPY_CLASS(wxMetaTagHandler
); 
 914 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
) 
 916     if (tag
.GetName() == wxT("BODY")) 
 918         m_Parser
->StopParsing(); 
 922     if (tag
.HasParam(wxT("HTTP-EQUIV")) && 
 923         tag
.GetParam(wxT("HTTP-EQUIV")).IsSameAs(wxT("Content-Type"), false) && 
 924         tag
.HasParam(wxT("CONTENT"))) 
 926         wxString content 
= tag
.GetParam(wxT("CONTENT")).Lower(); 
 927         if (content
.Left(19) == wxT("text/html; charset=")) 
 929             *m_retval 
= content
.Mid(19); 
 930             m_Parser
->StopParsing(); 
 938 wxString 
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
) 
 941     wxMetaTagParser 
*parser 
= new wxMetaTagParser(); 
 944         parser
->AddTagHandler(new wxMetaTagHandler(&charset
)); 
 945         parser
->Parse(markup
); 
 953 wxHtmlParser::SkipCommentTag(wxString::const_iterator
& start
, 
 954                              wxString::const_iterator end
) 
 956     wxASSERT_MSG( *start 
== '<', wxT("should be called on the tag start") ); 
 958     wxString::const_iterator p 
= start
; 
 960     // comments begin with "<!--" in HTML 4.0 
 961     if ( end 
- start 
< 4 || *++p 
!= '!' || *++p 
!= '-' || *++p 
!= '-' ) 
 963         // not a comment at all 
 967     // skip the start of the comment tag in any case, if we don't find the 
 968     // closing tag we should ignore broken markup 
 971     // comments end with "--[ \t\r\n]*>", i.e. white space is allowed between 
 972     // comment delimiter and the closing tag character (section 3.2.4 of 
 973     // http://www.w3.org/TR/html401/) 
 979         if ( (c 
== wxT(' ') || c 
== wxT('\n') || 
 980               c 
== wxT('\r') || c 
== wxT('\t')) && dashes 
>= 2 ) 
 982             // ignore white space before potential tag end 
 986         if ( c 
== wxT('>') && dashes 
>= 2 ) 
 988             // found end of comment 
1002 #endif // wxUSE_HTML