1 ///////////////////////////////////////////////////////////////////////////// 
   3 // Purpose:     wxHtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows Licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  11 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) 
  12 #pragma implementation "htmlpars.h" 
  15 #include "wx/wxprec.h" 
  18 #if wxUSE_HTML && wxUSE_STREAMS 
  29 #include "wx/tokenzr.h" 
  30 #include "wx/wfstream.h" 
  32 #include "wx/fontmap.h" 
  33 #include "wx/html/htmldefs.h" 
  34 #include "wx/html/htmlpars.h" 
  35 #include "wx/dynarray.h" 
  36 #include "wx/arrimpl.cpp" 
  39     #include "wx/msw/wince/missing.h"       // for bsearch() 
  42 // DLL options compatibility check: 
  44 WX_CHECK_BUILD_OPTIONS("wxHTML") 
  46 //----------------------------------------------------------------------------- 
  47 // wxHtmlParser helpers 
  48 //----------------------------------------------------------------------------- 
  53     wxHtmlTextPiece(int pos
, int lng
) : m_pos(pos
), m_lng(lng
) {} 
  57 WX_DECLARE_OBJARRAY(wxHtmlTextPiece
, wxHtmlTextPieces
); 
  58 WX_DEFINE_OBJARRAY(wxHtmlTextPieces
); 
  60 class wxHtmlParserState
 
  65     wxHtmlTextPieces  
*m_textPieces
; 
  68     wxHtmlParserState 
*m_nextState
; 
  71 //----------------------------------------------------------------------------- 
  73 //----------------------------------------------------------------------------- 
  75 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
) 
  77 wxHtmlParser::wxHtmlParser() 
  78     : wxObject(), m_HandlersHash(wxKEY_STRING
), 
  79       m_FS(NULL
), m_HandlersStack(NULL
) 
  81     m_entitiesParser 
= new wxHtmlEntitiesParser
; 
  89 wxHtmlParser::~wxHtmlParser() 
  91     while (RestoreState()) {} 
  96         wxList
& tmp 
= *m_HandlersStack
; 
  97         wxList::iterator it
, en
; 
  98         for( it 
= tmp
.begin(), en 
= tmp
.end(); it 
!= en
; ++it 
) 
  99             delete (wxHashTable
*)*it
; 
 102     delete m_HandlersStack
; 
 103     m_HandlersHash
.Clear(); 
 104     WX_CLEAR_LIST(wxList
, m_HandlersList
); 
 105     delete m_entitiesParser
; 
 108 wxObject
* wxHtmlParser::Parse(const wxString
& source
) 
 112     wxObject 
*result 
= GetProduct(); 
 117 void wxHtmlParser::InitParser(const wxString
& source
) 
 120     m_stopParsing 
= FALSE
; 
 123 void wxHtmlParser::DoneParser() 
 128 void wxHtmlParser::SetSource(const wxString
& src
) 
 137 void wxHtmlParser::CreateDOMTree() 
 139     wxHtmlTagsCache 
cache(m_Source
); 
 140     m_TextPieces 
= new wxHtmlTextPieces
; 
 141     CreateDOMSubTree(NULL
, 0, m_Source
.Length(), &cache
); 
 145 extern bool wxIsCDATAElement(const wxChar 
*tag
); 
 147 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag 
*cur
, 
 148                                     int begin_pos
, int end_pos
, 
 149                                     wxHtmlTagsCache 
*cache
) 
 151     if (end_pos 
<= begin_pos
) return; 
 155     int textBeginning 
= begin_pos
; 
 157     // If the tag contains CDATA text, we include the text between beginning 
 158     // and ending tag verbosely. Setting i=end_pos will skip to the very 
 159     // end of this function where text piece is added, bypassing any child 
 160     // tags parsing (CDATA element can't have child elements by definition): 
 161     if (cur 
!= NULL 
&& wxIsCDATAElement(cur
->GetName().c_str())) 
 168         c 
= m_Source
.GetChar(i
); 
 172             // add text to m_TextPieces: 
 173             if (i 
- textBeginning 
> 0) 
 175                     wxHtmlTextPiece(textBeginning
, i 
- textBeginning
)); 
 177             // if it is a comment, skip it: 
 178             if (i 
< end_pos
-6 && m_Source
.GetChar(i
+1) == wxT('!') && 
 179                                  m_Source
.GetChar(i
+2) == wxT('-') && 
 180                                  m_Source
.GetChar(i
+3) == wxT('-')) 
 182                 // Comments begin with "<!--" and end with "--[ \t\r\n]*>" 
 183                 // according to HTML 4.0 
 188                     c 
= m_Source
.GetChar(i
++); 
 189                     if ((c 
== wxT(' ') || c 
== wxT('\n') || 
 190                         c 
== wxT('\r') || c 
== wxT('\t')) && dashes 
>= 2) {} 
 191                     else if (c 
== wxT('>') && dashes 
>= 2) 
 196                     else if (c 
== wxT('-')) 
 203             // add another tag to the tree: 
 204             else if (i 
< end_pos
-1 && m_Source
.GetChar(i
+1) != wxT('/')) 
 208                     chd 
= new wxHtmlTag(cur
, m_Source
, 
 209                                         i
, end_pos
, cache
, m_entitiesParser
); 
 212                     chd 
= new wxHtmlTag(NULL
, m_Source
, 
 213                                         i
, end_pos
, cache
, m_entitiesParser
); 
 216                         // if this is the first tag to be created make the root 
 217                         // m_Tags point to it: 
 222                         // if there is already a root tag add this tag as 
 224                         chd
->m_Prev 
= m_Tags
->GetLastSibling(); 
 225                         chd
->m_Prev
->m_Next 
= chd
; 
 229                 if (chd
->HasEnding()) 
 231                     CreateDOMSubTree(chd
, 
 232                                      chd
->GetBeginPos(), chd
->GetEndPos1(), 
 234                     i 
= chd
->GetEndPos2(); 
 237                     i 
= chd
->GetBeginPos(); 
 242             // ... or skip ending tag: 
 245                 while (i 
< end_pos 
&& m_Source
.GetChar(i
) != wxT('>')) i
++; 
 252     // add remaining text to m_TextPieces: 
 253     if (end_pos 
- textBeginning 
> 0) 
 255             wxHtmlTextPiece(textBeginning
, end_pos 
- textBeginning
)); 
 258 void wxHtmlParser::DestroyDOMTree() 
 264         t2 
= t1
->GetNextSibling(); 
 268     m_Tags 
= m_CurTag 
= NULL
; 
 274 void wxHtmlParser::DoParsing() 
 278     DoParsing(0, m_Source
.Length()); 
 281 void wxHtmlParser::DoParsing(int begin_pos
, int end_pos
) 
 283     if (end_pos 
<= begin_pos
) return; 
 285     wxHtmlTextPieces
& pieces 
= *m_TextPieces
; 
 286     size_t piecesCnt 
= pieces
.GetCount(); 
 288     while (begin_pos 
< end_pos
) 
 290         while (m_CurTag 
&& m_CurTag
->GetBeginPos() < begin_pos
) 
 291             m_CurTag 
= m_CurTag
->GetNextTag(); 
 292         while (m_CurTextPiece 
< piecesCnt 
&& 
 293                pieces
[m_CurTextPiece
].m_pos 
< begin_pos
) 
 296         if (m_CurTextPiece 
< piecesCnt 
&& 
 298              pieces
[m_CurTextPiece
].m_pos 
< m_CurTag
->GetBeginPos())) 
 301             AddText(GetEntitiesParser()->Parse( 
 302                        m_Source
.Mid(pieces
[m_CurTextPiece
].m_pos
, 
 303                                     pieces
[m_CurTextPiece
].m_lng
))); 
 304             begin_pos 
= pieces
[m_CurTextPiece
].m_pos 
+ 
 305                         pieces
[m_CurTextPiece
].m_lng
; 
 313                 if (m_CurTag
->HasEnding()) 
 314                     begin_pos 
= m_CurTag
->GetEndPos2(); 
 316                     begin_pos 
= m_CurTag
->GetBeginPos(); 
 318             wxHtmlTag 
*t 
= m_CurTag
; 
 319             m_CurTag 
= m_CurTag
->GetNextTag(); 
 328 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
) 
 333     h 
= (wxHtmlTagHandler
*) m_HandlersHash
.Get(tag
.GetName()); 
 336         inner 
= h
->HandleTag(tag
); 
 343             DoParsing(tag
.GetBeginPos(), tag
.GetEndPos1()); 
 347 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler 
*handler
) 
 349     wxString 
s(handler
->GetSupportedTags()); 
 350     wxStringTokenizer 
tokenizer(s
, wxT(", ")); 
 352     while (tokenizer
.HasMoreTokens()) 
 353         m_HandlersHash
.Put(tokenizer
.GetNextToken(), handler
); 
 355     if (m_HandlersList
.IndexOf(handler
) == wxNOT_FOUND
) 
 356         m_HandlersList
.Append(handler
); 
 358     handler
->SetParser(this); 
 361 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler 
*handler
, wxString tags
) 
 363     wxStringTokenizer 
tokenizer(tags
, wxT(", ")); 
 366     if (m_HandlersStack 
== NULL
) 
 368         m_HandlersStack 
= new wxList
; 
 371     m_HandlersStack
->Insert((wxObject
*)new wxHashTable(m_HandlersHash
)); 
 373     while (tokenizer
.HasMoreTokens()) 
 375         key 
= tokenizer
.GetNextToken(); 
 376         m_HandlersHash
.Delete(key
); 
 377         m_HandlersHash
.Put(key
, handler
); 
 381 void wxHtmlParser::PopTagHandler() 
 383     wxList::compatibility_iterator first
; 
 385     if ( !m_HandlersStack 
|| 
 387          !(first 
= m_HandlersStack
->GetFirst()) 
 389          ((first 
= m_HandlersStack
->GetFirst()) == NULL
) 
 390 #endif // wxUSE_STL/!wxUSE_STL 
 393         wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack.")); 
 396     m_HandlersHash 
= *((wxHashTable
*) first
->GetData()); 
 397     delete (wxHashTable
*) first
->GetData(); 
 398     m_HandlersStack
->Erase(first
); 
 401 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
) 
 403     wxHtmlParserState 
*s 
= new wxHtmlParserState
; 
 405     s
->m_curTag 
= m_CurTag
; 
 407     s
->m_textPieces 
= m_TextPieces
; 
 408     s
->m_curTextPiece 
= m_CurTextPiece
; 
 409     s
->m_source 
= m_Source
; 
 411     s
->m_nextState 
= m_SavedStates
; 
 418     m_Source 
= wxEmptyString
; 
 423 bool wxHtmlParser::RestoreState() 
 425     if (!m_SavedStates
) return FALSE
; 
 429     wxHtmlParserState 
*s 
= m_SavedStates
; 
 430     m_SavedStates 
= s
->m_nextState
; 
 432     m_CurTag 
= s
->m_curTag
; 
 434     m_TextPieces 
= s
->m_textPieces
; 
 435     m_CurTextPiece 
= s
->m_curTextPiece
; 
 436     m_Source 
= s
->m_source
; 
 442 //----------------------------------------------------------------------------- 
 444 //----------------------------------------------------------------------------- 
 446 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
) 
 449 //----------------------------------------------------------------------------- 
 450 // wxHtmlEntitiesParser 
 451 //----------------------------------------------------------------------------- 
 453 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
) 
 455 wxHtmlEntitiesParser::wxHtmlEntitiesParser() 
 456 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 457     : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
) 
 462 wxHtmlEntitiesParser::~wxHtmlEntitiesParser() 
 464 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 469 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
) 
 471 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 472     if (encoding 
== m_encoding
) 
 477     m_encoding 
= encoding
; 
 478     if (m_encoding 
== wxFONTENCODING_SYSTEM
) 
 481         m_conv 
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
)); 
 487 wxString 
wxHtmlEntitiesParser::Parse(const wxString
& input
) 
 489     const wxChar 
*c
, *last
; 
 490     const wxChar 
*in_str 
= input
.c_str(); 
 493     output
.reserve(input
.length()); 
 495     for (c 
= in_str
, last 
= in_str
; *c 
!= wxT('\0'); c
++) 
 500                 output
.append(last
, c 
- last
); 
 501             if (++c 
== wxT('\0')) break; 
 504             const wxChar 
*ent_s 
= c
; 
 507             for (; (*c 
>= wxT('a') && *c 
<= wxT('z')) || 
 508                    (*c 
>= wxT('A') && *c 
<= wxT('Z')) || 
 509                    (*c 
>= wxT('0') && *c 
<= wxT('9')) || 
 510                    *c 
== wxT('_') || *c 
== wxT('#'); c
++) {} 
 511             entity
.append(ent_s
, c 
- ent_s
); 
 512             if (*c 
!= wxT(';')) c
--; 
 514             entity_char 
= GetEntityChar(entity
); 
 516                 output 
<< entity_char
; 
 519                 output
.append(ent_s
-1, c
-ent_s
+2); 
 520                 wxLogDebug(wxT("Unrecognized HTML entity: '%s'"), entity
.c_str()); 
 524     if (*last 
!= wxT('\0')) 
 529 struct wxHtmlEntityInfo
 
 535 extern "C" int LINKAGEMODE 
wxHtmlEntityCompare(const void *key
, const void *item
) 
 537     return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 541 wxChar 
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) 
 546     wbuf
[0] = (wchar_t)code
; 
 548     wxMBConv 
*conv 
= m_conv 
? m_conv 
: &wxConvLocal
; 
 549     if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1) 
 553     return (code 
< 256) ? (wxChar
)code 
: '?'; 
 558 wxChar 
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) 
 562     if (entity
[0] == wxT('#')) 
 564         const wxChar 
*ent_s 
= entity
.c_str(); 
 565         const wxChar 
*format
; 
 567         if (ent_s
[1] == wxT('x') || ent_s
[1] == wxT('X')) 
 576         if (wxSscanf(ent_s
, format
, &code
) != 1) 
 581         static wxHtmlEntityInfo substitutions
[] = { 
 582             { wxT("AElig"),198 }, 
 583             { wxT("Aacute"),193 }, 
 584             { wxT("Acirc"),194 }, 
 585             { wxT("Agrave"),192 }, 
 586             { wxT("Alpha"),913 }, 
 587             { wxT("Aring"),197 }, 
 588             { wxT("Atilde"),195 }, 
 591             { wxT("Ccedil"),199 }, 
 593             { wxT("Dagger"),8225 }, 
 594             { wxT("Delta"),916 }, 
 596             { wxT("Eacute"),201 }, 
 597             { wxT("Ecirc"),202 }, 
 598             { wxT("Egrave"),200 }, 
 599             { wxT("Epsilon"),917 }, 
 602             { wxT("Gamma"),915 }, 
 603             { wxT("Iacute"),205 }, 
 604             { wxT("Icirc"),206 }, 
 605             { wxT("Igrave"),204 }, 
 608             { wxT("Kappa"),922 }, 
 609             { wxT("Lambda"),923 }, 
 611             { wxT("Ntilde"),209 }, 
 613             { wxT("OElig"),338 }, 
 614             { wxT("Oacute"),211 }, 
 615             { wxT("Ocirc"),212 }, 
 616             { wxT("Ograve"),210 }, 
 617             { wxT("Omega"),937 }, 
 618             { wxT("Omicron"),927 }, 
 619             { wxT("Oslash"),216 }, 
 620             { wxT("Otilde"),213 }, 
 624             { wxT("Prime"),8243 }, 
 627             { wxT("Scaron"),352 }, 
 628             { wxT("Sigma"),931 }, 
 629             { wxT("THORN"),222 }, 
 631             { wxT("Theta"),920 }, 
 632             { wxT("Uacute"),218 }, 
 633             { wxT("Ucirc"),219 }, 
 634             { wxT("Ugrave"),217 }, 
 635             { wxT("Upsilon"),933 }, 
 638             { wxT("Yacute"),221 }, 
 641             { wxT("aacute"),225 }, 
 642             { wxT("acirc"),226 }, 
 643             { wxT("acute"),180 }, 
 644             { wxT("aelig"),230 }, 
 645             { wxT("agrave"),224 }, 
 646             { wxT("alefsym"),8501 }, 
 647             { wxT("alpha"),945 }, 
 651             { wxT("aring"),229 }, 
 652             { wxT("asymp"),8776 }, 
 653             { wxT("atilde"),227 }, 
 655             { wxT("bdquo"),8222 }, 
 657             { wxT("brvbar"),166 }, 
 658             { wxT("bull"),8226 }, 
 660             { wxT("ccedil"),231 }, 
 661             { wxT("cedil"),184 }, 
 665             { wxT("clubs"),9827 }, 
 666             { wxT("cong"),8773 }, 
 668             { wxT("crarr"),8629 }, 
 670             { wxT("curren"),164 }, 
 671             { wxT("dArr"),8659 }, 
 672             { wxT("dagger"),8224 }, 
 673             { wxT("darr"),8595 }, 
 675             { wxT("delta"),948 }, 
 676             { wxT("diams"),9830 }, 
 677             { wxT("divide"),247 }, 
 678             { wxT("eacute"),233 }, 
 679             { wxT("ecirc"),234 }, 
 680             { wxT("egrave"),232 }, 
 681             { wxT("empty"),8709 }, 
 682             { wxT("emsp"),8195 }, 
 683             { wxT("ensp"),8194 }, 
 684             { wxT("epsilon"),949 }, 
 685             { wxT("equiv"),8801 }, 
 689             { wxT("euro"),8364 }, 
 690             { wxT("exist"),8707 }, 
 692             { wxT("forall"),8704 }, 
 693             { wxT("frac12"),189 }, 
 694             { wxT("frac14"),188 }, 
 695             { wxT("frac34"),190 }, 
 696             { wxT("frasl"),8260 }, 
 697             { wxT("gamma"),947 }, 
 700             { wxT("hArr"),8660 }, 
 701             { wxT("harr"),8596 }, 
 702             { wxT("hearts"),9829 }, 
 703             { wxT("hellip"),8230 }, 
 704             { wxT("iacute"),237 }, 
 705             { wxT("icirc"),238 }, 
 706             { wxT("iexcl"),161 }, 
 707             { wxT("igrave"),236 }, 
 708             { wxT("image"),8465 }, 
 709             { wxT("infin"),8734 }, 
 712             { wxT("iquest"),191 }, 
 713             { wxT("isin"),8712 }, 
 715             { wxT("kappa"),954 }, 
 716             { wxT("lArr"),8656 }, 
 717             { wxT("lambda"),955 }, 
 718             { wxT("lang"),9001 }, 
 719             { wxT("laquo"),171 }, 
 720             { wxT("larr"),8592 }, 
 721             { wxT("lceil"),8968 }, 
 722             { wxT("ldquo"),8220 }, 
 724             { wxT("lfloor"),8970 }, 
 725             { wxT("lowast"),8727 }, 
 728             { wxT("lsaquo"),8249 }, 
 729             { wxT("lsquo"),8216 }, 
 732             { wxT("mdash"),8212 }, 
 733             { wxT("micro"),181 }, 
 734             { wxT("middot"),183 }, 
 735             { wxT("minus"),8722 }, 
 737             { wxT("nabla"),8711 }, 
 739             { wxT("ndash"),8211 }, 
 743             { wxT("notin"),8713 }, 
 744             { wxT("nsub"),8836 }, 
 745             { wxT("ntilde"),241 }, 
 747             { wxT("oacute"),243 }, 
 748             { wxT("ocirc"),244 }, 
 749             { wxT("oelig"),339 }, 
 750             { wxT("ograve"),242 }, 
 751             { wxT("oline"),8254 }, 
 752             { wxT("omega"),969 }, 
 753             { wxT("omicron"),959 }, 
 754             { wxT("oplus"),8853 }, 
 758             { wxT("oslash"),248 }, 
 759             { wxT("otilde"),245 }, 
 760             { wxT("otimes"),8855 }, 
 763             { wxT("part"),8706 }, 
 764             { wxT("permil"),8240 }, 
 765             { wxT("perp"),8869 }, 
 769             { wxT("plusmn"),177 }, 
 770             { wxT("pound"),163 }, 
 771             { wxT("prime"),8242 }, 
 772             { wxT("prod"),8719 }, 
 773             { wxT("prop"),8733 }, 
 776             { wxT("rArr"),8658 }, 
 777             { wxT("radic"),8730 }, 
 778             { wxT("rang"),9002 }, 
 779             { wxT("raquo"),187 }, 
 780             { wxT("rarr"),8594 }, 
 781             { wxT("rceil"),8969 }, 
 782             { wxT("rdquo"),8221 }, 
 783             { wxT("real"),8476 }, 
 785             { wxT("rfloor"),8971 }, 
 788             { wxT("rsaquo"),8250 }, 
 789             { wxT("rsquo"),8217 }, 
 790             { wxT("sbquo"),8218 }, 
 791             { wxT("scaron"),353 }, 
 792             { wxT("sdot"),8901 }, 
 795             { wxT("sigma"),963 }, 
 796             { wxT("sigmaf"),962 }, 
 798             { wxT("spades"),9824 }, 
 800             { wxT("sube"),8838 }, 
 806             { wxT("supe"),8839 }, 
 807             { wxT("szlig"),223 }, 
 809             { wxT("there4"),8756 }, 
 810             { wxT("theta"),952 }, 
 811             { wxT("thetasym"),977 }, 
 812             { wxT("thinsp"),8201 }, 
 813             { wxT("thorn"),254 }, 
 814             { wxT("tilde"),732 }, 
 815             { wxT("times"),215 }, 
 816             { wxT("trade"),8482 }, 
 817             { wxT("uArr"),8657 }, 
 818             { wxT("uacute"),250 }, 
 819             { wxT("uarr"),8593 }, 
 820             { wxT("ucirc"),251 }, 
 821             { wxT("ugrave"),249 }, 
 823             { wxT("upsih"),978 }, 
 824             { wxT("upsilon"),965 }, 
 826             { wxT("weierp"),8472 }, 
 828             { wxT("yacute"),253 }, 
 833             { wxT("zwnj"),8204 }, 
 835         static size_t substitutions_cnt 
= 0; 
 837         if (substitutions_cnt 
== 0) 
 838             while (substitutions
[substitutions_cnt
].code 
!= 0) 
 841         wxHtmlEntityInfo 
*info
; 
 842         info 
= (wxHtmlEntityInfo
*) bsearch(entity
.c_str(), substitutions
, 
 844                                            sizeof(wxHtmlEntityInfo
), 
 845                                            wxHtmlEntityCompare
); 
 853         return GetCharForCode(code
); 
 856 wxFSFile 
*wxHtmlParser::OpenURL(wxHtmlURLType 
WXUNUSED(type
),  
 857                                 const wxString
& url
) const 
 859     return m_FS 
? m_FS
->OpenFile(url
) : NULL
; 
 864 //----------------------------------------------------------------------------- 
 865 // wxHtmlParser::ExtractCharsetInformation 
 866 //----------------------------------------------------------------------------- 
 868 class wxMetaTagParser 
: public wxHtmlParser
 
 871     wxMetaTagParser() { } 
 873     wxObject
* GetProduct() { return NULL
; } 
 876     virtual void AddText(const wxChar
* WXUNUSED(txt
)) {} 
 878     DECLARE_NO_COPY_CLASS(wxMetaTagParser
) 
 881 class wxMetaTagHandler 
: public wxHtmlTagHandler
 
 884     wxMetaTagHandler(wxString 
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {} 
 885     wxString 
GetSupportedTags() { return wxT("META,BODY"); } 
 886     bool HandleTag(const wxHtmlTag
& tag
); 
 891     DECLARE_NO_COPY_CLASS(wxMetaTagHandler
) 
 894 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
) 
 896     if (tag
.GetName() == _T("BODY")) 
 898         m_Parser
->StopParsing(); 
 902     if (tag
.HasParam(_T("HTTP-EQUIV")) && 
 903         tag
.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) && 
 904         tag
.HasParam(_T("CONTENT"))) 
 906         wxString content 
= tag
.GetParam(_T("CONTENT")).Lower(); 
 907         if (content
.Left(19) == _T("text/html; charset=")) 
 909             *m_retval 
= content
.Mid(19); 
 910             m_Parser
->StopParsing(); 
 918 wxString 
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
) 
 921     wxMetaTagParser parser
; 
 922     parser
.AddTagHandler(new wxMetaTagHandler(&charset
)); 
 923     parser
.Parse(markup
);