1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/html/htmlpars.cpp 
   3 // Purpose:     wx28HtmlParser class (generic parser) 
   4 // Author:      Vaclav Slavik 
   6 // Copyright:   (c) 1999 Vaclav Slavik 
   7 // Licence:     wxWindows licence 
   8 ///////////////////////////////////////////////////////////////////////////// 
  10 #include "wx/wxprec.h" 
  19     #include "wx/dynarray.h" 
  25 #include "wx/tokenzr.h" 
  26 #include "wx/wfstream.h" 
  28 #include "wx/fontmap.h" 
  29 #include "wx/html/htmldefs.h" 
  30 #include "wx/arrimpl.cpp" 
  33     #include "wx/msw/wince/missing.h"       // for bsearch() 
  36 // DLL options compatibility check: 
  37 WX_CHECK_BUILD_OPTIONS("wxHTML") 
  39 const wxChar 
*wxTRACE_HTML_DEBUG 
= wxT("htmldebug"); 
  41 //----------------------------------------------------------------------------- 
  42 // wx28HtmlParser helpers 
  43 //----------------------------------------------------------------------------- 
  45 class wx28HtmlTextPiece
 
  48     wx28HtmlTextPiece(int pos
, int lng
) : m_pos(pos
), m_lng(lng
) {} 
  52 WX_DECLARE_OBJARRAY(wx28HtmlTextPiece
, wx28HtmlTextPieces
); 
  53 WX_DEFINE_OBJARRAY(wx28HtmlTextPieces
) 
  55 class wx28HtmlParserState
 
  58     wx28HtmlTag         
*m_curTag
; 
  60     wx28HtmlTextPieces  
*m_textPieces
; 
  63     wx28HtmlParserState 
*m_nextState
; 
  66 //----------------------------------------------------------------------------- 
  68 //----------------------------------------------------------------------------- 
  70 IMPLEMENT_ABSTRACT_CLASS(wx28HtmlParser
,wxObject
) 
  72 wx28HtmlParser::wx28HtmlParser() 
  73     : wxObject(), m_HandlersHash(wxKEY_STRING
), 
  74       m_FS(NULL
), m_HandlersStack(NULL
) 
  76     m_entitiesParser 
= new wx28HtmlEntitiesParser
; 
  84 wx28HtmlParser::~wx28HtmlParser() 
  86     while (RestoreState()) {} 
  91         wxList
& tmp 
= *m_HandlersStack
; 
  92         wxList::iterator it
, en
; 
  93         for( it 
= tmp
.begin(), en 
= tmp
.end(); it 
!= en
; ++it 
) 
  94             delete (wxHashTable
*)*it
; 
  97     delete m_HandlersStack
; 
  98     m_HandlersHash
.Clear(); 
  99     WX_CLEAR_LIST(wxList
, m_HandlersList
); 
 100     delete m_entitiesParser
; 
 103 wxObject
* wx28HtmlParser::Parse(const wxString
& source
) 
 107     wxObject 
*result 
= GetProduct(); 
 112 void wx28HtmlParser::InitParser(const wxString
& source
) 
 115     m_stopParsing 
= false; 
 118 void wx28HtmlParser::DoneParser() 
 123 void wx28HtmlParser::SetSource(const wxString
& src
) 
 132 void wx28HtmlParser::CreateDOMTree() 
 134     wx28HtmlTagsCache 
cache(m_Source
); 
 135     m_TextPieces 
= new wx28HtmlTextPieces
; 
 136     CreateDOMSubTree(NULL
, 0, m_Source
.length(), &cache
); 
 140 extern bool wxIsCDATAElement(const wxChar 
*tag
); 
 142 void wx28HtmlParser::CreateDOMSubTree(wx28HtmlTag 
*cur
, 
 143                                     int begin_pos
, int end_pos
, 
 144                                     wx28HtmlTagsCache 
*cache
) 
 146     if (end_pos 
<= begin_pos
) return; 
 150     int textBeginning 
= begin_pos
; 
 152     // If the tag contains CDATA text, we include the text between beginning 
 153     // and ending tag verbosely. Setting i=end_pos will skip to the very 
 154     // end of this function where text piece is added, bypassing any child 
 155     // tags parsing (CDATA element can't have child elements by definition): 
 156     if (cur 
!= NULL 
&& wxIsCDATAElement(cur
->GetName().c_str())) 
 163         c 
= m_Source
.GetChar(i
); 
 167             // add text to m_TextPieces: 
 168             if (i 
- textBeginning 
> 0) 
 170                     wx28HtmlTextPiece(textBeginning
, i 
- textBeginning
)); 
 172             // if it is a comment, skip it: 
 173             if (i 
< end_pos
-6 && m_Source
.GetChar(i
+1) == wxT('!') && 
 174                                  m_Source
.GetChar(i
+2) == wxT('-') && 
 175                                  m_Source
.GetChar(i
+3) == wxT('-')) 
 177                 // Comments begin with "<!--" and end with "--[ \t\r\n]*>" 
 178                 // according to HTML 4.0 
 183                     c 
= m_Source
.GetChar(i
++); 
 184                     if ((c 
== wxT(' ') || c 
== wxT('\n') || 
 185                         c 
== wxT('\r') || c 
== wxT('\t')) && dashes 
>= 2) {} 
 186                     else if (c 
== wxT('>') && dashes 
>= 2) 
 191                     else if (c 
== wxT('-')) 
 198             // add another tag to the tree: 
 199             else if (i 
< end_pos
-1 && m_Source
.GetChar(i
+1) != wxT('/')) 
 203                     chd 
= new wx28HtmlTag(cur
, m_Source
, 
 204                                         i
, end_pos
, cache
, m_entitiesParser
); 
 207                     chd 
= new wx28HtmlTag(NULL
, m_Source
, 
 208                                         i
, end_pos
, cache
, m_entitiesParser
); 
 211                         // if this is the first tag to be created make the root 
 212                         // m_Tags point to it: 
 217                         // if there is already a root tag add this tag as 
 219                         chd
->m_Prev 
= m_Tags
->GetLastSibling(); 
 220                         chd
->m_Prev
->m_Next 
= chd
; 
 224                 if (chd
->HasEnding()) 
 226                     CreateDOMSubTree(chd
, 
 227                                      chd
->GetBeginPos(), chd
->GetEndPos1(), 
 229                     i 
= chd
->GetEndPos2(); 
 232                     i 
= chd
->GetBeginPos(); 
 237             // ... or skip ending tag: 
 240                 while (i 
< end_pos 
&& m_Source
.GetChar(i
) != wxT('>')) i
++; 
 247     // add remaining text to m_TextPieces: 
 248     if (end_pos 
- textBeginning 
> 0) 
 250             wx28HtmlTextPiece(textBeginning
, end_pos 
- textBeginning
)); 
 253 void wx28HtmlParser::DestroyDOMTree() 
 255     wx28HtmlTag 
*t1
, *t2
; 
 259         t2 
= t1
->GetNextSibling(); 
 263     m_Tags 
= m_CurTag 
= NULL
; 
 269 void wx28HtmlParser::DoParsing() 
 273     DoParsing(0, m_Source
.length()); 
 276 void wx28HtmlParser::DoParsing(int begin_pos
, int end_pos
) 
 278     if (end_pos 
<= begin_pos
) return; 
 280     wx28HtmlTextPieces
& pieces 
= *m_TextPieces
; 
 281     size_t piecesCnt 
= pieces
.GetCount(); 
 283     while (begin_pos 
< end_pos
) 
 285         while (m_CurTag 
&& m_CurTag
->GetBeginPos() < begin_pos
) 
 286             m_CurTag 
= m_CurTag
->GetNextTag(); 
 287         while (m_CurTextPiece 
< piecesCnt 
&& 
 288                pieces
[m_CurTextPiece
].m_pos 
< begin_pos
) 
 291         if (m_CurTextPiece 
< piecesCnt 
&& 
 293              pieces
[m_CurTextPiece
].m_pos 
< m_CurTag
->GetBeginPos())) 
 296             AddText(GetEntitiesParser()->Parse( 
 297                        m_Source
.Mid(pieces
[m_CurTextPiece
].m_pos
, 
 298                                     pieces
[m_CurTextPiece
].m_lng
))); 
 299             begin_pos 
= pieces
[m_CurTextPiece
].m_pos 
+ 
 300                         pieces
[m_CurTextPiece
].m_lng
; 
 305             if (m_CurTag
->HasEnding()) 
 306                 begin_pos 
= m_CurTag
->GetEndPos2(); 
 308                 begin_pos 
= m_CurTag
->GetBeginPos(); 
 309             wx28HtmlTag 
*t 
= m_CurTag
; 
 310             m_CurTag 
= m_CurTag
->GetNextTag(); 
 319 void wx28HtmlParser::AddTag(const wx28HtmlTag
& tag
) 
 321     wx28HtmlTagHandler 
*h
; 
 324     h 
= (wx28HtmlTagHandler
*) m_HandlersHash
.Get(tag
.GetName()); 
 327         inner 
= h
->HandleTag(tag
); 
 334             DoParsing(tag
.GetBeginPos(), tag
.GetEndPos1()); 
 338 void wx28HtmlParser::AddTagHandler(wx28HtmlTagHandler 
*handler
) 
 340     wxString 
s(handler
->GetSupportedTags()); 
 341     wxStringTokenizer 
tokenizer(s
, wxT(", ")); 
 343     while (tokenizer
.HasMoreTokens()) 
 344         m_HandlersHash
.Put(tokenizer
.GetNextToken(), handler
); 
 346     if (m_HandlersList
.IndexOf(handler
) == wxNOT_FOUND
) 
 347         m_HandlersList
.Append(handler
); 
 349     handler
->SetParser(this); 
 352 void wx28HtmlParser::PushTagHandler(wx28HtmlTagHandler 
*handler
, const wxString
& tags
) 
 354     wxStringTokenizer 
tokenizer(tags
, wxT(", ")); 
 357     if (m_HandlersStack 
== NULL
) 
 359         m_HandlersStack 
= new wxList
; 
 362     m_HandlersStack
->Insert((wxObject
*)new wxHashTable(m_HandlersHash
)); 
 364     while (tokenizer
.HasMoreTokens()) 
 366         key 
= tokenizer
.GetNextToken(); 
 367         m_HandlersHash
.Delete(key
); 
 368         m_HandlersHash
.Put(key
, handler
); 
 372 void wx28HtmlParser::PopTagHandler() 
 374     wxList::compatibility_iterator first
; 
 376     if ( !m_HandlersStack 
|| 
 378          !(first 
= m_HandlersStack
->GetFirst()) 
 380          ((first 
= m_HandlersStack
->GetFirst()) == NULL
) 
 381 #endif // wxUSE_STL/!wxUSE_STL 
 384         wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack.")); 
 387     m_HandlersHash 
= *((wxHashTable
*) first
->GetData()); 
 388     delete (wxHashTable
*) first
->GetData(); 
 389     m_HandlersStack
->Erase(first
); 
 392 void wx28HtmlParser::SetSourceAndSaveState(const wxString
& src
) 
 394     wx28HtmlParserState 
*s 
= new wx28HtmlParserState
; 
 396     s
->m_curTag 
= m_CurTag
; 
 398     s
->m_textPieces 
= m_TextPieces
; 
 399     s
->m_curTextPiece 
= m_CurTextPiece
; 
 400     s
->m_source 
= m_Source
; 
 402     s
->m_nextState 
= m_SavedStates
; 
 409     m_Source 
= wxEmptyString
; 
 414 bool wx28HtmlParser::RestoreState() 
 416     if (!m_SavedStates
) return false; 
 420     wx28HtmlParserState 
*s 
= m_SavedStates
; 
 421     m_SavedStates 
= s
->m_nextState
; 
 423     m_CurTag 
= s
->m_curTag
; 
 425     m_TextPieces 
= s
->m_textPieces
; 
 426     m_CurTextPiece 
= s
->m_curTextPiece
; 
 427     m_Source 
= s
->m_source
; 
 433 wxString 
wx28HtmlParser::GetInnerSource(const wx28HtmlTag
& tag
) 
 435     return GetSource()->Mid(tag
.GetBeginPos(), 
 436                             tag
.GetEndPos1() - tag
.GetBeginPos()); 
 439 //----------------------------------------------------------------------------- 
 440 // wx28HtmlTagHandler 
 441 //----------------------------------------------------------------------------- 
 443 IMPLEMENT_ABSTRACT_CLASS(wx28HtmlTagHandler
,wxObject
) 
 445 void wx28HtmlTagHandler::ParseInnerSource(const wxString
& source
) 
 447     // It is safe to temporarily change the source being parsed, 
 448     // provided we restore the state back after parsing 
 449     m_Parser
->SetSourceAndSaveState(source
); 
 450     m_Parser
->DoParsing(); 
 451     m_Parser
->RestoreState(); 
 455 //----------------------------------------------------------------------------- 
 456 // wx28HtmlEntitiesParser 
 457 //----------------------------------------------------------------------------- 
 459 IMPLEMENT_DYNAMIC_CLASS(wx28HtmlEntitiesParser
,wxObject
) 
 461 wx28HtmlEntitiesParser::wx28HtmlEntitiesParser() 
 462 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 463     : m_conv(NULL
), m_encoding(wxFONTENCODING_SYSTEM
) 
 468 wx28HtmlEntitiesParser::~wx28HtmlEntitiesParser() 
 470 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 475 void wx28HtmlEntitiesParser::SetEncoding(wxFontEncoding encoding
) 
 477 #if wxUSE_WCHAR_T && !wxUSE_UNICODE 
 478     if (encoding 
== m_encoding
) 
 483     m_encoding 
= encoding
; 
 484     if (m_encoding 
== wxFONTENCODING_SYSTEM
) 
 487         m_conv 
= new wxCSConv(wxFontMapper::GetEncodingName(m_encoding
)); 
 493 wxString 
wx28HtmlEntitiesParser::Parse(const wxString
& input
) 
 495     const wxChar 
*c
, *last
; 
 496     const wxChar 
*in_str 
= input
.c_str(); 
 499     for (c 
= in_str
, last 
= in_str
; *c 
!= wxT('\0'); c
++) 
 503             if ( output
.empty() ) 
 504                 output
.reserve(input
.length()); 
 507                 output
.append(last
, c 
- last
); 
 508             if ( *++c 
== wxT('\0') ) 
 512             const wxChar 
*ent_s 
= c
; 
 515             for (; (*c 
>= wxT('a') && *c 
<= wxT('z')) || 
 516                    (*c 
>= wxT('A') && *c 
<= wxT('Z')) || 
 517                    (*c 
>= wxT('0') && *c 
<= wxT('9')) || 
 518                    *c 
== wxT('_') || *c 
== wxT('#'); c
++) {} 
 519             entity
.append(ent_s
, c 
- ent_s
); 
 520             if (*c 
!= wxT(';')) c
--; 
 522             entity_char 
= GetEntityChar(entity
); 
 524                 output 
<< entity_char
; 
 527                 output
.append(ent_s
-1, c
-ent_s
+2); 
 528                 wxLogTrace(wxTRACE_HTML_DEBUG
, 
 529                            wxT("Unrecognized HTML entity: '%s'"), 
 534     if (last 
== in_str
) // common case: no entity 
 536     if (*last 
!= wxT('\0')) 
 541 struct wx28HtmlEntityInfo
 
 547 extern "C" int LINKAGEMODE 
wx28HtmlEntityCompare(const void *key
, const void *item
) 
 549     return wxStrcmp((wxChar
*)key
, ((wx28HtmlEntityInfo
*)item
)->name
); 
 553 wxChar 
wx28HtmlEntitiesParser::GetCharForCode(unsigned code
) 
 558     wbuf
[0] = (wchar_t)code
; 
 560     wxMBConv 
*conv 
= m_conv 
? m_conv 
: &wxConvLocal
; 
 561     if (conv
->WC2MB(buf
, wbuf
, 2) == (size_t)-1) 
 565     return (code 
< 256) ? (wxChar
)code 
: '?'; 
 570 wxChar 
wx28HtmlEntitiesParser::GetEntityChar(const wxString
& entity
) 
 574     if (entity
[0] == wxT('#')) 
 576         const wxChar 
*ent_s 
= entity
.c_str(); 
 577         const wxChar 
*format
; 
 579         if (ent_s
[1] == wxT('x') || ent_s
[1] == wxT('X')) 
 588         if (wxSscanf(ent_s
, format
, &code
) != 1) 
 593         static wx28HtmlEntityInfo substitutions
[] = { 
 594             { wxT("AElig"),198 }, 
 595             { wxT("Aacute"),193 }, 
 596             { wxT("Acirc"),194 }, 
 597             { wxT("Agrave"),192 }, 
 598             { wxT("Alpha"),913 }, 
 599             { wxT("Aring"),197 }, 
 600             { wxT("Atilde"),195 }, 
 603             { wxT("Ccedil"),199 }, 
 605             { wxT("Dagger"),8225 }, 
 606             { wxT("Delta"),916 }, 
 608             { wxT("Eacute"),201 }, 
 609             { wxT("Ecirc"),202 }, 
 610             { wxT("Egrave"),200 }, 
 611             { wxT("Epsilon"),917 }, 
 614             { wxT("Gamma"),915 }, 
 615             { wxT("Iacute"),205 }, 
 616             { wxT("Icirc"),206 }, 
 617             { wxT("Igrave"),204 }, 
 620             { wxT("Kappa"),922 }, 
 621             { wxT("Lambda"),923 }, 
 623             { wxT("Ntilde"),209 }, 
 625             { wxT("OElig"),338 }, 
 626             { wxT("Oacute"),211 }, 
 627             { wxT("Ocirc"),212 }, 
 628             { wxT("Ograve"),210 }, 
 629             { wxT("Omega"),937 }, 
 630             { wxT("Omicron"),927 }, 
 631             { wxT("Oslash"),216 }, 
 632             { wxT("Otilde"),213 }, 
 636             { wxT("Prime"),8243 }, 
 639             { wxT("Scaron"),352 }, 
 640             { wxT("Sigma"),931 }, 
 641             { wxT("THORN"),222 }, 
 643             { wxT("Theta"),920 }, 
 644             { wxT("Uacute"),218 }, 
 645             { wxT("Ucirc"),219 }, 
 646             { wxT("Ugrave"),217 }, 
 647             { wxT("Upsilon"),933 }, 
 650             { wxT("Yacute"),221 }, 
 653             { wxT("aacute"),225 }, 
 654             { wxT("acirc"),226 }, 
 655             { wxT("acute"),180 }, 
 656             { wxT("aelig"),230 }, 
 657             { wxT("agrave"),224 }, 
 658             { wxT("alefsym"),8501 }, 
 659             { wxT("alpha"),945 }, 
 663             { wxT("aring"),229 }, 
 664             { wxT("asymp"),8776 }, 
 665             { wxT("atilde"),227 }, 
 667             { wxT("bdquo"),8222 }, 
 669             { wxT("brvbar"),166 }, 
 670             { wxT("bull"),8226 }, 
 672             { wxT("ccedil"),231 }, 
 673             { wxT("cedil"),184 }, 
 677             { wxT("clubs"),9827 }, 
 678             { wxT("cong"),8773 }, 
 680             { wxT("crarr"),8629 }, 
 682             { wxT("curren"),164 }, 
 683             { wxT("dArr"),8659 }, 
 684             { wxT("dagger"),8224 }, 
 685             { wxT("darr"),8595 }, 
 687             { wxT("delta"),948 }, 
 688             { wxT("diams"),9830 }, 
 689             { wxT("divide"),247 }, 
 690             { wxT("eacute"),233 }, 
 691             { wxT("ecirc"),234 }, 
 692             { wxT("egrave"),232 }, 
 693             { wxT("empty"),8709 }, 
 694             { wxT("emsp"),8195 }, 
 695             { wxT("ensp"),8194 }, 
 696             { wxT("epsilon"),949 }, 
 697             { wxT("equiv"),8801 }, 
 701             { wxT("euro"),8364 }, 
 702             { wxT("exist"),8707 }, 
 704             { wxT("forall"),8704 }, 
 705             { wxT("frac12"),189 }, 
 706             { wxT("frac14"),188 }, 
 707             { wxT("frac34"),190 }, 
 708             { wxT("frasl"),8260 }, 
 709             { wxT("gamma"),947 }, 
 712             { wxT("hArr"),8660 }, 
 713             { wxT("harr"),8596 }, 
 714             { wxT("hearts"),9829 }, 
 715             { wxT("hellip"),8230 }, 
 716             { wxT("iacute"),237 }, 
 717             { wxT("icirc"),238 }, 
 718             { wxT("iexcl"),161 }, 
 719             { wxT("igrave"),236 }, 
 720             { wxT("image"),8465 }, 
 721             { wxT("infin"),8734 }, 
 724             { wxT("iquest"),191 }, 
 725             { wxT("isin"),8712 }, 
 727             { wxT("kappa"),954 }, 
 728             { wxT("lArr"),8656 }, 
 729             { wxT("lambda"),955 }, 
 730             { wxT("lang"),9001 }, 
 731             { wxT("laquo"),171 }, 
 732             { wxT("larr"),8592 }, 
 733             { wxT("lceil"),8968 }, 
 734             { wxT("ldquo"),8220 }, 
 736             { wxT("lfloor"),8970 }, 
 737             { wxT("lowast"),8727 }, 
 740             { wxT("lsaquo"),8249 }, 
 741             { wxT("lsquo"),8216 }, 
 744             { wxT("mdash"),8212 }, 
 745             { wxT("micro"),181 }, 
 746             { wxT("middot"),183 }, 
 747             { wxT("minus"),8722 }, 
 749             { wxT("nabla"),8711 }, 
 751             { wxT("ndash"),8211 }, 
 755             { wxT("notin"),8713 }, 
 756             { wxT("nsub"),8836 }, 
 757             { wxT("ntilde"),241 }, 
 759             { wxT("oacute"),243 }, 
 760             { wxT("ocirc"),244 }, 
 761             { wxT("oelig"),339 }, 
 762             { wxT("ograve"),242 }, 
 763             { wxT("oline"),8254 }, 
 764             { wxT("omega"),969 }, 
 765             { wxT("omicron"),959 }, 
 766             { wxT("oplus"),8853 }, 
 770             { wxT("oslash"),248 }, 
 771             { wxT("otilde"),245 }, 
 772             { wxT("otimes"),8855 }, 
 775             { wxT("part"),8706 }, 
 776             { wxT("permil"),8240 }, 
 777             { wxT("perp"),8869 }, 
 781             { wxT("plusmn"),177 }, 
 782             { wxT("pound"),163 }, 
 783             { wxT("prime"),8242 }, 
 784             { wxT("prod"),8719 }, 
 785             { wxT("prop"),8733 }, 
 788             { wxT("rArr"),8658 }, 
 789             { wxT("radic"),8730 }, 
 790             { wxT("rang"),9002 }, 
 791             { wxT("raquo"),187 }, 
 792             { wxT("rarr"),8594 }, 
 793             { wxT("rceil"),8969 }, 
 794             { wxT("rdquo"),8221 }, 
 795             { wxT("real"),8476 }, 
 797             { wxT("rfloor"),8971 }, 
 800             { wxT("rsaquo"),8250 }, 
 801             { wxT("rsquo"),8217 }, 
 802             { wxT("sbquo"),8218 }, 
 803             { wxT("scaron"),353 }, 
 804             { wxT("sdot"),8901 }, 
 807             { wxT("sigma"),963 }, 
 808             { wxT("sigmaf"),962 }, 
 810             { wxT("spades"),9824 }, 
 812             { wxT("sube"),8838 }, 
 818             { wxT("supe"),8839 }, 
 819             { wxT("szlig"),223 }, 
 821             { wxT("there4"),8756 }, 
 822             { wxT("theta"),952 }, 
 823             { wxT("thetasym"),977 }, 
 824             { wxT("thinsp"),8201 }, 
 825             { wxT("thorn"),254 }, 
 826             { wxT("tilde"),732 }, 
 827             { wxT("times"),215 }, 
 828             { wxT("trade"),8482 }, 
 829             { wxT("uArr"),8657 }, 
 830             { wxT("uacute"),250 }, 
 831             { wxT("uarr"),8593 }, 
 832             { wxT("ucirc"),251 }, 
 833             { wxT("ugrave"),249 }, 
 835             { wxT("upsih"),978 }, 
 836             { wxT("upsilon"),965 }, 
 838             { wxT("weierp"),8472 }, 
 840             { wxT("yacute"),253 }, 
 845             { wxT("zwnj"),8204 }, 
 847         static size_t substitutions_cnt 
= 0; 
 849         if (substitutions_cnt 
== 0) 
 850             while (substitutions
[substitutions_cnt
].code 
!= 0) 
 853         wx28HtmlEntityInfo 
*info 
= NULL
; 
 855         // bsearch crashes under WinCE for some reason 
 857         for (i 
= 0; i 
< substitutions_cnt
; i
++) 
 859             if (entity 
== substitutions
[i
].name
) 
 861                 info 
= & substitutions
[i
]; 
 866         info 
= (wx28HtmlEntityInfo
*) bsearch(entity
.c_str(), substitutions
, 
 868                                            sizeof(wx28HtmlEntityInfo
), 
 869                                            wx28HtmlEntityCompare
); 
 878         return GetCharForCode(code
); 
 881 wxFSFile 
*wx28HtmlParser::OpenURL(wx28HtmlURLType 
WXUNUSED(type
), 
 882                                 const wxString
& url
) const 
 884     return m_FS 
? m_FS
->OpenFile(url
) : NULL
; 
 889 //----------------------------------------------------------------------------- 
 890 // wx28HtmlParser::ExtractCharsetInformation 
 891 //----------------------------------------------------------------------------- 
 893 class wxMetaTagParser 
: public wx28HtmlParser
 
 896     wxMetaTagParser() { } 
 898     wxObject
* GetProduct() { return NULL
; } 
 901     virtual void AddText(const wxChar
* WXUNUSED(txt
)) {} 
 903     DECLARE_NO_COPY_CLASS(wxMetaTagParser
) 
 906 class wxMetaTagHandler 
: public wx28HtmlTagHandler
 
 909     wxMetaTagHandler(wxString 
*retval
) : wx28HtmlTagHandler(), m_retval(retval
) {} 
 910     wxString 
GetSupportedTags() { return wxT("META,BODY"); } 
 911     bool HandleTag(const wx28HtmlTag
& tag
); 
 916     DECLARE_NO_COPY_CLASS(wxMetaTagHandler
) 
 919 bool wxMetaTagHandler::HandleTag(const wx28HtmlTag
& tag
) 
 921     if (tag
.GetName() == wxT("BODY")) 
 923         m_Parser
->StopParsing(); 
 927     if (tag
.HasParam(wxT("HTTP-EQUIV")) && 
 928         tag
.GetParam(wxT("HTTP-EQUIV")).IsSameAs(wxT("Content-Type"), false) && 
 929         tag
.HasParam(wxT("CONTENT"))) 
 931         wxString content 
= tag
.GetParam(wxT("CONTENT")).Lower(); 
 932         if (content
.Left(19) == wxT("text/html; charset=")) 
 934             *m_retval 
= content
.Mid(19); 
 935             m_Parser
->StopParsing(); 
 943 wxString 
wx28HtmlParser::ExtractCharsetInformation(const wxString
& markup
) 
 946     wxMetaTagParser 
*parser 
= new wxMetaTagParser(); 
 949         parser
->AddTagHandler(new wxMetaTagHandler(&charset
)); 
 950         parser
->Parse(markup
);