1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/html/htmlpars.cpp 
   3 // Purpose:     wxHtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 #include "wx/wxprec.h" 
  16 #if wxUSE_HTML && wxUSE_STREAMS 
  19     #include "wx/dynarray.h" 
  25 #include "wx/tokenzr.h" 
  26 #include "wx/wfstream.h" 
  28 #include "wx/fontmap.h" 
  29 #include "wx/html/htmldefs.h" 
  30 #include "wx/html/htmlpars.h" 
  31 #include "wx/arrimpl.cpp" 
  34     #include "wx/msw/wince/missing.h"       // for bsearch() 
  37 // DLL options compatibility check: 
  38 WX_CHECK_BUILD_OPTIONS("wxHTML") 
  40 const wxChar 
*wxTRACE_HTML_DEBUG 
= _T("htmldebug"); 
  42 //----------------------------------------------------------------------------- 
  43 // wxHtmlParser helpers 
  44 //----------------------------------------------------------------------------- 
  49     wxHtmlTextPiece(int pos
, int lng
) : m_pos(pos
), m_lng(lng
) {} 
  53 WX_DECLARE_OBJARRAY(wxHtmlTextPiece
, wxHtmlTextPieces
); 
  54 WX_DEFINE_OBJARRAY(wxHtmlTextPieces
) 
  56 class wxHtmlParserState
 
  61     wxHtmlTextPieces  
*m_textPieces
; 
  64     wxHtmlParserState 
*m_nextState
; 
  67 //----------------------------------------------------------------------------- 
  69 //----------------------------------------------------------------------------- 
  71 IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser
,wxObject
) 
  73 wxHtmlParser::wxHtmlParser() 
  74     : wxObject(), m_HandlersHash(wxKEY_STRING
), 
  75       m_FS(NULL
), m_HandlersStack(NULL
) 
  77     m_entitiesParser 
= new wxHtmlEntitiesParser
; 
  85 wxHtmlParser::~wxHtmlParser() 
  87     while (RestoreState()) {} 
  92         wxList
& tmp 
= *m_HandlersStack
; 
  93         wxList::iterator it
, en
; 
  94         for( it 
= tmp
.begin(), en 
= tmp
.end(); it 
!= en
; ++it 
) 
  95             delete (wxHashTable
*)*it
; 
  98     delete m_HandlersStack
; 
  99     m_HandlersHash
.Clear(); 
 100     WX_CLEAR_LIST(wxList
, m_HandlersList
); 
 101     delete m_entitiesParser
; 
 104 wxObject
* wxHtmlParser::Parse(const wxString
& source
) 
 108     wxObject 
*result 
= GetProduct(); 
 113 void wxHtmlParser::InitParser(const wxString
& source
) 
 116     m_stopParsing 
= false; 
 119 void wxHtmlParser::DoneParser() 
 124 void wxHtmlParser::SetSource(const wxString
& src
) 
 133 void wxHtmlParser::CreateDOMTree() 
 135     wxHtmlTagsCache 
cache(m_Source
); 
 136     m_TextPieces 
= new wxHtmlTextPieces
; 
 137     CreateDOMSubTree(NULL
, 0, m_Source
.length(), &cache
); 
 141 extern bool wxIsCDATAElement(const wxChar 
*tag
); 
 143 void wxHtmlParser::CreateDOMSubTree(wxHtmlTag 
*cur
, 
 144                                     int begin_pos
, int end_pos
, 
 145                                     wxHtmlTagsCache 
*cache
) 
 147     if (end_pos 
<= begin_pos
) return; 
 151     int textBeginning 
= begin_pos
; 
 153     // If the tag contains CDATA text, we include the text between beginning 
 154     // and ending tag verbosely. Setting i=end_pos will skip to the very 
 155     // end of this function where text piece is added, bypassing any child 
 156     // tags parsing (CDATA element can't have child elements by definition): 
 157     if (cur 
!= NULL 
&& wxIsCDATAElement(cur
->GetName().c_str())) 
 164         c 
= m_Source
.GetChar(i
); 
 168             // add text to m_TextPieces: 
 169             if (i 
- textBeginning 
> 0) 
 171                     wxHtmlTextPiece(textBeginning
, i 
- textBeginning
)); 
 173             // if it is a comment, skip it: 
 174             if (i 
< end_pos
-6 && m_Source
.GetChar(i
+1) == wxT('!') && 
 175                                  m_Source
.GetChar(i
+2) == wxT('-') && 
 176                                  m_Source
.GetChar(i
+3) == wxT('-')) 
 178                 // Comments begin with "<!--" and end with "--[ \t\r\n]*>" 
 179                 // according to HTML 4.0 
 184                     c 
= m_Source
.GetChar(i
++); 
 185                     if ((c 
== wxT(' ') || c 
== wxT('\n') || 
 186                         c 
== wxT('\r') || c 
== wxT('\t')) && dashes 
>= 2) {} 
 187                     else if (c 
== wxT('>') && dashes 
>= 2) 
 192                     else if (c 
== wxT('-')) 
 199             // add another tag to the tree: 
 200             else if (i 
< end_pos
-1 && m_Source
.GetChar(i
+1) != wxT('/')) 
 204                     chd 
= new wxHtmlTag(cur
, m_Source
, 
 205                                         i
, end_pos
, cache
, m_entitiesParser
); 
 208                     chd 
= new wxHtmlTag(NULL
, m_Source
, 
 209                                         i
, end_pos
, cache
, m_entitiesParser
); 
 212                         // if this is the first tag to be created make the root 
 213                         // m_Tags point to it: 
 218                         // if there is already a root tag add this tag as 
 220                         chd
->m_Prev 
= m_Tags
->GetLastSibling(); 
 221                         chd
->m_Prev
->m_Next 
= chd
; 
 225                 if (chd
->HasEnding()) 
 227                     CreateDOMSubTree(chd
, 
 228                                      chd
->GetBeginPos(), chd
->GetEndPos1(), 
 230                     i 
= chd
->GetEndPos2(); 
 233                     i 
= chd
->GetBeginPos(); 
 238             // ... or skip ending tag: 
 241                 while (i 
< end_pos 
&& m_Source
.GetChar(i
) != wxT('>')) i
++; 
 248     // add remaining text to m_TextPieces: 
 249     if (end_pos 
- textBeginning 
> 0) 
 251             wxHtmlTextPiece(textBeginning
, end_pos 
- textBeginning
)); 
 254 void wxHtmlParser::DestroyDOMTree() 
 260         t2 
= t1
->GetNextSibling(); 
 264     m_Tags 
= m_CurTag 
= NULL
; 
 270 void wxHtmlParser::DoParsing() 
 274     DoParsing(0, m_Source
.length()); 
 277 void wxHtmlParser::DoParsing(int begin_pos
, int end_pos
) 
 279     if (end_pos 
<= begin_pos
) return; 
 281     wxHtmlTextPieces
& pieces 
= *m_TextPieces
; 
 282     size_t piecesCnt 
= pieces
.GetCount(); 
 284     while (begin_pos 
< end_pos
) 
 286         while (m_CurTag 
&& m_CurTag
->GetBeginPos() < begin_pos
) 
 287             m_CurTag 
= m_CurTag
->GetNextTag(); 
 288         while (m_CurTextPiece 
< piecesCnt 
&& 
 289                pieces
[m_CurTextPiece
].m_pos 
< begin_pos
) 
 292         if (m_CurTextPiece 
< piecesCnt 
&& 
 294              pieces
[m_CurTextPiece
].m_pos 
< m_CurTag
->GetBeginPos())) 
 297             AddText(GetEntitiesParser()->Parse( 
 298                        m_Source
.Mid(pieces
[m_CurTextPiece
].m_pos
, 
 299                                     pieces
[m_CurTextPiece
].m_lng
))); 
 300             begin_pos 
= pieces
[m_CurTextPiece
].m_pos 
+ 
 301                         pieces
[m_CurTextPiece
].m_lng
; 
 306             if (m_CurTag
->HasEnding()) 
 307                 begin_pos 
= m_CurTag
->GetEndPos2(); 
 309                 begin_pos 
= m_CurTag
->GetBeginPos(); 
 310             wxHtmlTag 
*t 
= m_CurTag
; 
 311             m_CurTag 
= m_CurTag
->GetNextTag(); 
 320 void wxHtmlParser::AddTag(const wxHtmlTag
& tag
) 
 325     h 
= (wxHtmlTagHandler
*) m_HandlersHash
.Get(tag
.GetName()); 
 328         inner 
= h
->HandleTag(tag
); 
 335             DoParsing(tag
.GetBeginPos(), tag
.GetEndPos1()); 
 339 void wxHtmlParser::AddTagHandler(wxHtmlTagHandler 
*handler
) 
 341     wxString 
s(handler
->GetSupportedTags()); 
 342     wxStringTokenizer 
tokenizer(s
, wxT(", ")); 
 344     while (tokenizer
.HasMoreTokens()) 
 345         m_HandlersHash
.Put(tokenizer
.GetNextToken(), handler
); 
 347     if (m_HandlersList
.IndexOf(handler
) == wxNOT_FOUND
) 
 348         m_HandlersList
.Append(handler
); 
 350     handler
->SetParser(this); 
 353 void wxHtmlParser::PushTagHandler(wxHtmlTagHandler 
*handler
, const wxString
& tags
) 
 355     wxStringTokenizer 
tokenizer(tags
, wxT(", ")); 
 358     if (m_HandlersStack 
== NULL
) 
 360         m_HandlersStack 
= new wxList
; 
 363     m_HandlersStack
->Insert((wxObject
*)new wxHashTable(m_HandlersHash
)); 
 365     while (tokenizer
.HasMoreTokens()) 
 367         key 
= tokenizer
.GetNextToken(); 
 368         m_HandlersHash
.Delete(key
); 
 369         m_HandlersHash
.Put(key
, handler
); 
 373 void wxHtmlParser::PopTagHandler() 
 375     wxList::compatibility_iterator first
; 
 377     if ( !m_HandlersStack 
|| 
 379          !(first 
= m_HandlersStack
->GetFirst()) 
 381          ((first 
= m_HandlersStack
->GetFirst()) == NULL
) 
 382 #endif // wxUSE_STL/!wxUSE_STL 
 385         wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack.")); 
 388     m_HandlersHash 
= *((wxHashTable
*) first
->GetData()); 
 389     delete (wxHashTable
*) first
->GetData(); 
 390     m_HandlersStack
->Erase(first
); 
 393 void wxHtmlParser::SetSourceAndSaveState(const wxString
& src
) 
 395     wxHtmlParserState 
*s 
= new wxHtmlParserState
; 
 397     s
->m_curTag 
= m_CurTag
; 
 399     s
->m_textPieces 
= m_TextPieces
; 
 400     s
->m_curTextPiece 
= m_CurTextPiece
; 
 401     s
->m_source 
= m_Source
; 
 403     s
->m_nextState 
= m_SavedStates
; 
 410     m_Source 
= wxEmptyString
; 
 415 bool wxHtmlParser::RestoreState() 
 417     if (!m_SavedStates
) return false; 
 421     wxHtmlParserState 
*s 
= m_SavedStates
; 
 422     m_SavedStates 
= s
->m_nextState
; 
 424     m_CurTag 
= s
->m_curTag
; 
 426     m_TextPieces 
= s
->m_textPieces
; 
 427     m_CurTextPiece 
= s
->m_curTextPiece
; 
 428     m_Source 
= s
->m_source
; 
 434 wxString 
wxHtmlParser::GetInnerSource(const wxHtmlTag
& tag
) 
 436     return GetSource()->Mid(tag
.GetBeginPos(), 
 437                             tag
.GetEndPos1() - tag
.GetBeginPos()); 
 440 //----------------------------------------------------------------------------- 
 442 //----------------------------------------------------------------------------- 
 444 IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler
,wxObject
) 
 446 void wxHtmlTagHandler::ParseInnerSource(const wxString
& source
) 
 448     // It is safe to temporarily change the source being parsed, 
 449     // provided we restore the state back after parsing 
 450     m_Parser
->SetSourceAndSaveState(source
); 
 451     m_Parser
->DoParsing(); 
 452     m_Parser
->RestoreState(); 
 456 //----------------------------------------------------------------------------- 
 457 // wxHtmlEntitiesParser 
 458 //----------------------------------------------------------------------------- 
 460 IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser
,wxObject
) 
 462 wxHtmlEntitiesParser::wxHtmlEntitiesParser() 
 463 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 464     : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
) 
 469 wxHtmlEntitiesParser::~wxHtmlEntitiesParser() 
 471 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 476 void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
) 
 478 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 479     if (encoding 
== m_encoding
) 
 484     m_encoding 
= encoding
; 
 485     if (m_encoding 
== wxFONTENCODING_SYSTEM
) 
 488         m_conv 
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
)); 
 494 wxString 
wxHtmlEntitiesParser::Parse(const wxString
& input
) 
 496     const wxChar 
*c
, *last
; 
 497     const wxChar 
*in_str 
= input
.c_str(); 
 500     output
.reserve(input
.length()); 
 502     for (c 
= in_str
, last 
= in_str
; *c 
!= wxT('\0'); c
++) 
 507                 output
.append(last
, c 
- last
); 
 508             if ( *++c 
== wxT('\0') ) 
 512             const wxChar 
*ent_s 
= c
; 
 515             for (; (*c 
>= wxT('a') && *c 
<= wxT('z')) || 
 516                    (*c 
>= wxT('A') && *c 
<= wxT('Z')) || 
 517                    (*c 
>= wxT('0') && *c 
<= wxT('9')) || 
 518                    *c 
== wxT('_') || *c 
== wxT('#'); c
++) {} 
 519             entity
.append(ent_s
, c 
- ent_s
); 
 520             if (*c 
!= wxT(';')) c
--; 
 522             entity_char 
= GetEntityChar(entity
); 
 524                 output 
<< entity_char
; 
 527                 output
.append(ent_s
-1, c
-ent_s
+2); 
 528                 wxLogTrace(wxTRACE_HTML_DEBUG
, 
 529                            wxT("Unrecognized HTML entity: '%s'"), 
 534     if (*last 
!= wxT('\0')) 
 539 struct wxHtmlEntityInfo
 
 545 extern "C" int LINKAGEMODE 
wxHtmlEntityCompare(const void *key
, const void *item
) 
 547     return wxStrcmp((wxChar
*)key
, ((wxHtmlEntityInfo
*)item
)->name
); 
 551 wxChar 
wxHtmlEntitiesParser::GetCharForCode(unsigned code
) 
 556     wbuf
[0] = (wchar_t)code
; 
 558     wxMBConv 
*conv 
= m_conv 
? m_conv 
: &wxConvLocal
; 
 559     if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1) 
 563     return (code 
< 256) ? (wxChar
)code 
: '?'; 
 568 wxChar 
wxHtmlEntitiesParser::GetEntityChar(const wxString
& entity
) 
 572     if (entity
[0] == wxT('#')) 
 574         const wxChar 
*ent_s 
= entity
.c_str(); 
 575         const wxChar 
*format
; 
 577         if (ent_s
[1] == wxT('x') || ent_s
[1] == wxT('X')) 
 586         if (wxSscanf(ent_s
, format
, &code
) != 1) 
 591         static wxHtmlEntityInfo substitutions
[] = { 
 592             { wxT("AElig"),198 }, 
 593             { wxT("Aacute"),193 }, 
 594             { wxT("Acirc"),194 }, 
 595             { wxT("Agrave"),192 }, 
 596             { wxT("Alpha"),913 }, 
 597             { wxT("Aring"),197 }, 
 598             { wxT("Atilde"),195 }, 
 601             { wxT("Ccedil"),199 }, 
 603             { wxT("Dagger"),8225 }, 
 604             { wxT("Delta"),916 }, 
 606             { wxT("Eacute"),201 }, 
 607             { wxT("Ecirc"),202 }, 
 608             { wxT("Egrave"),200 }, 
 609             { wxT("Epsilon"),917 }, 
 612             { wxT("Gamma"),915 }, 
 613             { wxT("Iacute"),205 }, 
 614             { wxT("Icirc"),206 }, 
 615             { wxT("Igrave"),204 }, 
 618             { wxT("Kappa"),922 }, 
 619             { wxT("Lambda"),923 }, 
 621             { wxT("Ntilde"),209 }, 
 623             { wxT("OElig"),338 }, 
 624             { wxT("Oacute"),211 }, 
 625             { wxT("Ocirc"),212 }, 
 626             { wxT("Ograve"),210 }, 
 627             { wxT("Omega"),937 }, 
 628             { wxT("Omicron"),927 }, 
 629             { wxT("Oslash"),216 }, 
 630             { wxT("Otilde"),213 }, 
 634             { wxT("Prime"),8243 }, 
 637             { wxT("Scaron"),352 }, 
 638             { wxT("Sigma"),931 }, 
 639             { wxT("THORN"),222 }, 
 641             { wxT("Theta"),920 }, 
 642             { wxT("Uacute"),218 }, 
 643             { wxT("Ucirc"),219 }, 
 644             { wxT("Ugrave"),217 }, 
 645             { wxT("Upsilon"),933 }, 
 648             { wxT("Yacute"),221 }, 
 651             { wxT("aacute"),225 }, 
 652             { wxT("acirc"),226 }, 
 653             { wxT("acute"),180 }, 
 654             { wxT("aelig"),230 }, 
 655             { wxT("agrave"),224 }, 
 656             { wxT("alefsym"),8501 }, 
 657             { wxT("alpha"),945 }, 
 661             { wxT("aring"),229 }, 
 662             { wxT("asymp"),8776 }, 
 663             { wxT("atilde"),227 }, 
 665             { wxT("bdquo"),8222 }, 
 667             { wxT("brvbar"),166 }, 
 668             { wxT("bull"),8226 }, 
 670             { wxT("ccedil"),231 }, 
 671             { wxT("cedil"),184 }, 
 675             { wxT("clubs"),9827 }, 
 676             { wxT("cong"),8773 }, 
 678             { wxT("crarr"),8629 }, 
 680             { wxT("curren"),164 }, 
 681             { wxT("dArr"),8659 }, 
 682             { wxT("dagger"),8224 }, 
 683             { wxT("darr"),8595 }, 
 685             { wxT("delta"),948 }, 
 686             { wxT("diams"),9830 }, 
 687             { wxT("divide"),247 }, 
 688             { wxT("eacute"),233 }, 
 689             { wxT("ecirc"),234 }, 
 690             { wxT("egrave"),232 }, 
 691             { wxT("empty"),8709 }, 
 692             { wxT("emsp"),8195 }, 
 693             { wxT("ensp"),8194 }, 
 694             { wxT("epsilon"),949 }, 
 695             { wxT("equiv"),8801 }, 
 699             { wxT("euro"),8364 }, 
 700             { wxT("exist"),8707 }, 
 702             { wxT("forall"),8704 }, 
 703             { wxT("frac12"),189 }, 
 704             { wxT("frac14"),188 }, 
 705             { wxT("frac34"),190 }, 
 706             { wxT("frasl"),8260 }, 
 707             { wxT("gamma"),947 }, 
 710             { wxT("hArr"),8660 }, 
 711             { wxT("harr"),8596 }, 
 712             { wxT("hearts"),9829 }, 
 713             { wxT("hellip"),8230 }, 
 714             { wxT("iacute"),237 }, 
 715             { wxT("icirc"),238 }, 
 716             { wxT("iexcl"),161 }, 
 717             { wxT("igrave"),236 }, 
 718             { wxT("image"),8465 }, 
 719             { wxT("infin"),8734 }, 
 722             { wxT("iquest"),191 }, 
 723             { wxT("isin"),8712 }, 
 725             { wxT("kappa"),954 }, 
 726             { wxT("lArr"),8656 }, 
 727             { wxT("lambda"),955 }, 
 728             { wxT("lang"),9001 }, 
 729             { wxT("laquo"),171 }, 
 730             { wxT("larr"),8592 }, 
 731             { wxT("lceil"),8968 }, 
 732             { wxT("ldquo"),8220 }, 
 734             { wxT("lfloor"),8970 }, 
 735             { wxT("lowast"),8727 }, 
 738             { wxT("lsaquo"),8249 }, 
 739             { wxT("lsquo"),8216 }, 
 742             { wxT("mdash"),8212 }, 
 743             { wxT("micro"),181 }, 
 744             { wxT("middot"),183 }, 
 745             { wxT("minus"),8722 }, 
 747             { wxT("nabla"),8711 }, 
 749             { wxT("ndash"),8211 }, 
 753             { wxT("notin"),8713 }, 
 754             { wxT("nsub"),8836 }, 
 755             { wxT("ntilde"),241 }, 
 757             { wxT("oacute"),243 }, 
 758             { wxT("ocirc"),244 }, 
 759             { wxT("oelig"),339 }, 
 760             { wxT("ograve"),242 }, 
 761             { wxT("oline"),8254 }, 
 762             { wxT("omega"),969 }, 
 763             { wxT("omicron"),959 }, 
 764             { wxT("oplus"),8853 }, 
 768             { wxT("oslash"),248 }, 
 769             { wxT("otilde"),245 }, 
 770             { wxT("otimes"),8855 }, 
 773             { wxT("part"),8706 }, 
 774             { wxT("permil"),8240 }, 
 775             { wxT("perp"),8869 }, 
 779             { wxT("plusmn"),177 }, 
 780             { wxT("pound"),163 }, 
 781             { wxT("prime"),8242 }, 
 782             { wxT("prod"),8719 }, 
 783             { wxT("prop"),8733 }, 
 786             { wxT("rArr"),8658 }, 
 787             { wxT("radic"),8730 }, 
 788             { wxT("rang"),9002 }, 
 789             { wxT("raquo"),187 }, 
 790             { wxT("rarr"),8594 }, 
 791             { wxT("rceil"),8969 }, 
 792             { wxT("rdquo"),8221 }, 
 793             { wxT("real"),8476 }, 
 795             { wxT("rfloor"),8971 }, 
 798             { wxT("rsaquo"),8250 }, 
 799             { wxT("rsquo"),8217 }, 
 800             { wxT("sbquo"),8218 }, 
 801             { wxT("scaron"),353 }, 
 802             { wxT("sdot"),8901 }, 
 805             { wxT("sigma"),963 }, 
 806             { wxT("sigmaf"),962 }, 
 808             { wxT("spades"),9824 }, 
 810             { wxT("sube"),8838 }, 
 816             { wxT("supe"),8839 }, 
 817             { wxT("szlig"),223 }, 
 819             { wxT("there4"),8756 }, 
 820             { wxT("theta"),952 }, 
 821             { wxT("thetasym"),977 }, 
 822             { wxT("thinsp"),8201 }, 
 823             { wxT("thorn"),254 }, 
 824             { wxT("tilde"),732 }, 
 825             { wxT("times"),215 }, 
 826             { wxT("trade"),8482 }, 
 827             { wxT("uArr"),8657 }, 
 828             { wxT("uacute"),250 }, 
 829             { wxT("uarr"),8593 }, 
 830             { wxT("ucirc"),251 }, 
 831             { wxT("ugrave"),249 }, 
 833             { wxT("upsih"),978 }, 
 834             { wxT("upsilon"),965 }, 
 836             { wxT("weierp"),8472 }, 
 838             { wxT("yacute"),253 }, 
 843             { wxT("zwnj"),8204 }, 
 845         static size_t substitutions_cnt 
= 0; 
 847         if (substitutions_cnt 
== 0) 
 848             while (substitutions
[substitutions_cnt
].code 
!= 0) 
 851         wxHtmlEntityInfo 
*info 
= NULL
; 
 853         // bsearch crashes under WinCE for some reason 
 855         for (i 
= 0; i 
< substitutions_cnt
; i
++) 
 857             if (entity 
== substitutions
[i
].name
) 
 859                 info 
= & substitutions
[i
]; 
 864         info 
= (wxHtmlEntityInfo
*) bsearch(entity
.c_str(), substitutions
, 
 866                                            sizeof(wxHtmlEntityInfo
), 
 867                                            wxHtmlEntityCompare
); 
 876         return GetCharForCode(code
); 
 879 wxFSFile 
*wxHtmlParser::OpenURL(wxHtmlURLType 
WXUNUSED(type
), 
 880                                 const wxString
& url
) const 
 882     return m_FS 
? m_FS
->OpenFile(url
) : NULL
; 
 887 //----------------------------------------------------------------------------- 
 888 // wxHtmlParser::ExtractCharsetInformation 
 889 //----------------------------------------------------------------------------- 
 891 class wxMetaTagParser 
: public wxHtmlParser
 
 894     wxMetaTagParser() { } 
 896     wxObject
* GetProduct() { return NULL
; } 
 899     virtual void AddText(const wxChar
* WXUNUSED(txt
)) {} 
 901     DECLARE_NO_COPY_CLASS(wxMetaTagParser
) 
 904 class wxMetaTagHandler 
: public wxHtmlTagHandler
 
 907     wxMetaTagHandler(wxString 
*retval
) : wxHtmlTagHandler(), m_retval(retval
) {} 
 908     wxString 
GetSupportedTags() { return wxT("META,BODY"); } 
 909     bool HandleTag(const wxHtmlTag
& tag
); 
 914     DECLARE_NO_COPY_CLASS(wxMetaTagHandler
) 
 917 bool wxMetaTagHandler::HandleTag(const wxHtmlTag
& tag
) 
 919     if (tag
.GetName() == _T("BODY")) 
 921         m_Parser
->StopParsing(); 
 925     if (tag
.HasParam(_T("HTTP-EQUIV")) && 
 926         tag
.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) && 
 927         tag
.HasParam(_T("CONTENT"))) 
 929         wxString content 
= tag
.GetParam(_T("CONTENT")).Lower(); 
 930         if (content
.Left(19) == _T("text/html; charset=")) 
 932             *m_retval 
= content
.Mid(19); 
 933             m_Parser
->StopParsing(); 
 941 wxString 
wxHtmlParser::ExtractCharsetInformation(const wxString
& markup
) 
 944     wxMetaTagParser 
*parser 
= new wxMetaTagParser(); 
 947         parser
->AddTagHandler(new wxMetaTagHandler(&charset
)); 
 948         parser
->Parse(markup
);