new HTML tags parser and entities substitution code

author Václav Slavík <vslavik@fastmail.fm>

Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)

committer Václav Slavík <vslavik@fastmail.fm>

Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)
author Václav Slavík <vslavik@fastmail.fm>
Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)
committer Václav Slavík <vslavik@fastmail.fm>
Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)
diff --git a/include/wx/html/htmlpars.h b/include/wx/html/htmlpars.h

index 6a93d07ddbebbaff1c6c37c5490ba0d0a959f100..238a86c7044f40b07ac0f9690bdcdb0aca7cd2a7 100644 (file)
--- a/include/wx/html/htmlpars.h
+++ b/include/wx/html/htmlpars.h
@@ -21,24 +21,21 @@
  #include "wx/html/htmltag.h"
  #include "wx/filesys.h"
  
-class wxHtmlParser;
-class wxHtmlTagHandler;
-
-//--------------------------------------------------------------------------------
-// wxHtmlParser
-//                  This class handles generic parsing of HTML document : it scans
-//                  the document and divide it into blocks of tags (where one block
-//                  consists of starting and ending tag and of text between these
-//                  2 tags.
-//--------------------------------------------------------------------------------
-
+class WXDLLEXPORT wxMBConv;
+class WXDLLEXPORT wxHtmlParser;
+class WXDLLEXPORT wxHtmlTagHandler;
+class WXDLLEXPORT wxHtmlEntitiesParser;
+
+// This class handles generic parsing of HTML document : it scans
+// the document and divide it into blocks of tags (where one block
+// consists of starting and ending tag and of text between these
+// 2 tags.
  class WXDLLEXPORT wxHtmlParser : public wxObject
  {
      DECLARE_ABSTRACT_CLASS(wxHtmlParser)
  
  public:
-    wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING) 
-        { m_FS = NULL; m_Cache = NULL; m_HandlersStack = NULL; }
+    wxHtmlParser();
      virtual ~wxHtmlParser();
  
      // Sets the class which will be used for opening files
@@ -106,6 +103,9 @@ protected:
      // ignored if no hander is found.
      // Derived class is *responsible* for filling in m_Handlers table.
      virtual void AddTag(const wxHtmlTag& tag);
+    
+    // Returns entity parser object, used to substitute HTML &entities;
+    wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
  
  protected:
      // source being parsed
@@ -130,24 +130,20 @@ protected:
      wxFileSystem *m_FS;
      // handlers stack used by PushTagHandler and PopTagHandler
      wxList *m_HandlersStack;
+    
+    // entity parse
+    wxHtmlEntitiesParser *m_entitiesParser;
  };
  
  
  
-
-
-
-//--------------------------------------------------------------------------------
-// wxHtmlTagHandler
-//                  This class (and derived classes) cooperates with wxHtmlParser.
-//                  Each recognized tag is passed to handler which is capable
-//                  of handling it. Each tag is handled in 3 steps:
-//                  1. Handler will modifies state of parser
-//                    (using it's public methods)
-//                  2. Parser parses source between starting and ending tag
-//                  3. Handler restores original state of the parser
-//--------------------------------------------------------------------------------
-
+// This class (and derived classes) cooperates with wxHtmlParser.
+// Each recognized tag is passed to handler which is capable
+// of handling it. Each tag is handled in 3 steps:
+// 1. Handler will modifies state of parser
+//    (using it's public methods)
+// 2. Parser parses source between starting and ending tag
+// 3. Handler restores original state of the parser
  class WXDLLEXPORT wxHtmlTagHandler : public wxObject
  {
      DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler)
@@ -184,6 +180,33 @@ protected:
  };
  
  
+// This class is used to parse HTML entities in strings. It can handle
+// both named entities and &#xxxx entries where xxxx is Unicode code.
+class WXDLLEXPORT wxHtmlEntitiesParser : public wxObject
+{
+    DECLARE_DYNAMIC_CLASS(wxHtmlEntitiesParser)
+
+public:
+    wxHtmlEntitiesParser();
+    virtual ~wxHtmlEntitiesParser();
+    
+    // Sets encoding of output string.
+    // Has no effect if wxUSE_WCHAR_T==0 or wxUSE_UNICODE==1
+    void SetEncoding(wxFontEncoding encoding);
+    
+    // Parses entities in input and replaces them with respective characters
+    // (with respect to output encoding)
+    wxString Parse(const wxString& input);
+    
+protected:
+    wxChar GetEntityChar(const wxString& entity);
+    wxChar GetCharForCode(unsigned code);
+
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+    wxMBConv *m_conv;
+    wxFontEncoding m_encoding;
+#endif
+};
  
  
  #endif
diff --git a/src/html/helpdata.cpp b/src/html/helpdata.cpp

index 76e7264ae8d9c53214a936150c2d2f68e216fa5f..3b7b4c4ccd3c5793ce3fbb0edeb4e819dbedd69b 100644 (file)
--- a/src/html/helpdata.cpp
+++ b/src/html/helpdata.cpp
@@ -157,160 +157,12 @@ bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
      }
      else 
      { // "PARAM"
-        if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name")) 
-        {
+        if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name"))
              m_Name = tag.GetParam(wxT("VALUE"));
-            if (m_Name.Find(wxT('&')) != -1) 
-            {
-#define ESCSEQ(escape, subst)  \
-                  { _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
-        static wxChar* substitutions[][4] =
-                        {
-                ESCSEQ("quot", "\""),
-                ESCSEQ("#34", "\""),
-                ESCSEQ("#8220", "\""),
-                ESCSEQ("#8221", "\""),
-                ESCSEQ("lt", "<"),
-                ESCSEQ("#60", "<"),
-                ESCSEQ("gt", ">"),
-                ESCSEQ("#62", ">"),
-
-                ESCSEQ("#94", "^"), /* ^ */
-    
-                ESCSEQ("nbsp", " "),
-                ESCSEQ("#32", " "),
-                ESCSEQ("iexcl", "!"),
-                ESCSEQ("#33", "!"),
-                ESCSEQ("cent", "¢"/* ¢ */),
-                ESCSEQ("#162", "¢"/* ¢ */),
-    
-                ESCSEQ("trade", "(TM)"),
-                ESCSEQ("#153", "(TM)"),
-                ESCSEQ("#8482", "(TM)"),
-
-                ESCSEQ("yen", "¥"),
-                ESCSEQ("#165", "¥"),
-                ESCSEQ("brkbar", "¦"),
-                ESCSEQ("#166", "¦"),
-                ESCSEQ("sect", "§"),
-                ESCSEQ("#167", "§"),
-                ESCSEQ("uml", "¨"),
-                ESCSEQ("#168", "¨"),
-    
-                ESCSEQ("copy", "©"), /* © */
-                ESCSEQ("#169", "©"),
-                ESCSEQ("ordf", "ª"),
-                ESCSEQ("#170", "ª"),
-                ESCSEQ("laquo", "«"), /* « */
-                ESCSEQ("#171", "«"),
-                ESCSEQ("not", "¬"),
-                ESCSEQ("#172", "¬"),
-    
-                ESCSEQ("reg", "®"), /* ® */
-                ESCSEQ("#174", "®"),
-    
-                ESCSEQ("deg", "°"), /* ° */
-                ESCSEQ("#176", "°"),
-                ESCSEQ("plusm", "±"), /* ± */
-                ESCSEQ("#177", "±"),
-    
-                ESCSEQ("acute", "´"),
-                ESCSEQ("#180", "´"),
-                ESCSEQ("macron", "¯"),
-                ESCSEQ("#175", "¯"),
-                ESCSEQ("micro", "µ"), /* µ */
-                ESCSEQ("#181", "µ"),
-                ESCSEQ("para", "¶"), /* ¶ */
-                ESCSEQ("#182", "¶"),
-    
-                ESCSEQ("ordm", "º"), /* º */
-                ESCSEQ("#186", "º"),
-                ESCSEQ("raquo", "»"), /* » */
-                ESCSEQ("#187", "»"),
-    
-                ESCSEQ("iquest", "¿"), /* ¿ */
-                ESCSEQ("#191", "¿"),
-                ESCSEQ("Agrave", "\300"/* À */),
-                ESCSEQ("#193", "\300"/* À */),
-    
-                ESCSEQ("Acirc", "\302"/* Â */),
-                ESCSEQ("Atilde", "\303"/* Ã */),
-                ESCSEQ("Auml", "\304"/* Ä */),
-                ESCSEQ("Aring", " "),
-                ESCSEQ("AElig", " "),
-                ESCSEQ("Ccedil", "\347"/* ç */),
-                ESCSEQ("Egrave", "\310"/* È */),
-                ESCSEQ("Eacute", "\311"/* É */),
-                ESCSEQ("Ecirc", "\312"/* Ê */),
-                ESCSEQ("Euml", "\313"/* Ë */),
-                ESCSEQ("Igrave", "\314"/* Ì */),
-
-                ESCSEQ("Icirc", "\316"/* Î */),
-                ESCSEQ("Iuml", "\317"/* Ï */),
-    
-                ESCSEQ("Ntilde", "\321"/* Ñ */),
-                ESCSEQ("Ograve", "\322"/* Ò */),
-    
-                ESCSEQ("Ocirc", "\324"/* Ô */),
-                ESCSEQ("Otilde", "\325"/* Õ */),
-                ESCSEQ("Ouml", "\326"/* Ö */),
-    
-                ESCSEQ("Oslash", " "),
-                ESCSEQ("Ugrave", "\331"/* Ù */),
-    
-                ESCSEQ("Ucirc", " "),
-                ESCSEQ("Uuml", "\334"/* Ü */),
-    
-                ESCSEQ("szlig", "\247"/* § */),
-                ESCSEQ("agrave","\340"/* à */),
-                ESCSEQ("aacute", "\341"/* á */),
-                ESCSEQ("acirc", "\342"/* â */),
-                ESCSEQ("atilde", "\343"/* ã */),
-                ESCSEQ("auml", "\344"/* ä */),
-                ESCSEQ("aring", "a"),
-                ESCSEQ("aelig", "ae"),
-                ESCSEQ("ccedil", "\347"/* ç */),
-                ESCSEQ("egrave", "\350"/* è */),
-                ESCSEQ("eacute", "\351"/* é */),
-                ESCSEQ("ecirc", "\352"/* ê */),
-                ESCSEQ("euml", "\353"/* ë */),
-                ESCSEQ("igrave", "\354"/* ì */),
-                ESCSEQ("iacute", "\355"/* í */),
-                ESCSEQ("icirc", " "),
-                ESCSEQ("iuml", "\357"/* ï */),
-                ESCSEQ("eth", " "),
-                ESCSEQ("ntilde", "\361"/* ñ */),
-                ESCSEQ("ograve", "\362"/* ò */),
-                ESCSEQ("oacute", "\363"/* ó */),
-                ESCSEQ("ocirc", "\364"/* ô */),
-                ESCSEQ("otilde", "\365"/* õ */),
-                ESCSEQ("ouml", "\366"/* ö */),
-                ESCSEQ("divide", " "),
-                ESCSEQ("oslash", " "),
-                ESCSEQ("ugrave", "\371"/* ù */),
-                ESCSEQ("uacute", "\372"/* ú */),
-                ESCSEQ("ucirc", "\373"/* û */),
-                ESCSEQ("uuml", "\374"/* ü */),
-    
-                ESCSEQ("yuml", ""),
-
-                /* this one should ALWAYS stay the last one!!! */
-                ESCSEQ("amp", "&"),
-                ESCSEQ("#38", "&"),
-
-                { NULL, NULL, NULL }
-                };
-
-                for (int i = 0; substitutions[i][0] != NULL; i++)
-                {
-                    m_Name.Replace(substitutions[i][0], substitutions[i][3], TRUE);
-                    m_Name.Replace(substitutions[i][1], substitutions[i][3], TRUE);
-                    m_Name.Replace(substitutions[i][2], substitutions[i][3], TRUE);
-                }
-            }
-        }
-        if (tag.GetParam(wxT("NAME")) == wxT("Local")) m_Page = tag.GetParam(wxT("VALUE"));
-        if (tag.GetParam(wxT("NAME")) == wxT("ID")) tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
+        if (tag.GetParam(wxT("NAME")) == wxT("Local")) 
+            m_Page = tag.GetParam(wxT("VALUE"));
+        if (tag.GetParam(wxT("NAME")) == wxT("ID")) 
+            tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
          return FALSE;
      }
  }
diff --git a/src/html/htmlcell.cpp b/src/html/htmlcell.cpp

index c907af3fe77a725e7b6dfc215176da631e50c70d..cd927aa55cc1b855db6e00bddf01bae66064588a 100644 (file)
--- a/src/html/htmlcell.cpp
+++ b/src/html/htmlcell.cpp
@@ -131,156 +131,6 @@ const wxHtmlCell* wxHtmlCell::Find(int condition, const void* param) const
  wxHtmlWordCell::wxHtmlWordCell(const wxString& word, wxDC& dc) : wxHtmlCell()
  {
      m_Word = word;
-    
-    if (m_Word.Find(wxT('&')) != -1) 
-    {
-#define ESCSEQ(escape, subst)  \
-                  { _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
-        static wxChar* substitutions[][4] =
-                {
-                ESCSEQ("quot", "\""),
-                ESCSEQ("#34", "\""),
-                ESCSEQ("#8220", "\""),
-                ESCSEQ("#8221", "\""),
-                ESCSEQ("lt", "<"),
-                ESCSEQ("#60", "<"),
-                ESCSEQ("gt", ">"),
-                ESCSEQ("#62", ">"),
-
-                ESCSEQ("#94", "^"), /* ^ */
-    
-                ESCSEQ("nbsp", " "),
-                ESCSEQ("#32", " "),
-                ESCSEQ("iexcl", "!"),
-                ESCSEQ("#33", "!"),
-                ESCSEQ("cent", "¢"/* ¢ */),
-                ESCSEQ("#162", "¢"/* ¢ */),
-    
-                ESCSEQ("trade", "(TM)"),
-                ESCSEQ("#153", "(TM)"),
-                ESCSEQ("#8482", "(TM)"),
-
-                ESCSEQ("yen", "¥"),
-                ESCSEQ("#165", "¥"),
-                ESCSEQ("brkbar", "¦"),
-                ESCSEQ("#166", "¦"),
-                ESCSEQ("sect", "§"),
-                ESCSEQ("#167", "§"),
-                ESCSEQ("uml", "¨"),
-                ESCSEQ("#168", "¨"),
-    
-                ESCSEQ("copy", "©"), /* © */
-                ESCSEQ("#169", "©"),
-                ESCSEQ("ordf", "ª"),
-                ESCSEQ("#170", "ª"),
-                ESCSEQ("laquo", "«"), /* « */
-                ESCSEQ("#171", "«"),
-                ESCSEQ("not", "¬"),
-                ESCSEQ("#172", "¬"),
-    
-                ESCSEQ("reg", "®"), /* ® */
-                ESCSEQ("#174", "®"),
-    
-                ESCSEQ("deg", "°"), /* ° */
-                ESCSEQ("#176", "°"),
-                ESCSEQ("plusm", "±"), /* ± */
-                ESCSEQ("#177", "±"),
-    
-                ESCSEQ("acute", "´"),
-                ESCSEQ("#180", "´"),
-                ESCSEQ("macron", "¯"),
-                ESCSEQ("#175", "¯"),
-                ESCSEQ("micro", "µ"), /* µ */
-                ESCSEQ("#181", "µ"),
-                ESCSEQ("para", "¶"), /* ¶ */
-                ESCSEQ("#182", "¶"),
-    
-                ESCSEQ("ordm", "º"), /* º */
-                ESCSEQ("#186", "º"),
-                ESCSEQ("raquo", "»"), /* » */
-                ESCSEQ("#187", "»"),
-    
-                ESCSEQ("iquest", "¿"), /* ¿ */
-                ESCSEQ("#191", "¿"),
-                ESCSEQ("Agrave", "\300"/* À */),
-                ESCSEQ("#193", "\300"/* À */),
-    
-                ESCSEQ("Acirc", "\302"/* Â */),
-                ESCSEQ("Atilde", "\303"/* Ã */),
-                ESCSEQ("Auml", "\304"/* Ä */),
-                ESCSEQ("Aring", " "),
-                ESCSEQ("AElig", " "),
-                ESCSEQ("Ccedil", "\347"/* ç */),
-                ESCSEQ("Egrave", "\310"/* È */),
-                ESCSEQ("Eacute", "\311"/* É */),
-                ESCSEQ("Ecirc", "\312"/* Ê */),
-                ESCSEQ("Euml", "\313"/* Ë */),
-                ESCSEQ("Igrave", "\314"/* Ì */),
-
-                ESCSEQ("Icirc", "\316"/* Î */),
-                ESCSEQ("Iuml", "\317"/* Ï */),
-    
-                ESCSEQ("Ntilde", "\321"/* Ñ */),
-                ESCSEQ("Ograve", "\322"/* Ò */),
-    
-                ESCSEQ("Ocirc", "\324"/* Ô */),
-                ESCSEQ("Otilde", "\325"/* Õ */),
-                ESCSEQ("Ouml", "\326"/* Ö */),
-    
-                ESCSEQ("Oslash", " "),
-                ESCSEQ("Ugrave", "\331"/* Ù */),
-    
-                ESCSEQ("Ucirc", " "),
-                ESCSEQ("Uuml", "\334"/* Ü */),
-    
-                ESCSEQ("szlig", "\247"/* § */),
-                ESCSEQ("agrave","\340"/* à */),
-                ESCSEQ("aacute", "\341"/* á */),
-                ESCSEQ("acirc", "\342"/* â */),
-                ESCSEQ("atilde", "\343"/* ã */),
-                ESCSEQ("auml", "\344"/* ä */),
-                ESCSEQ("aring", "a"),
-                ESCSEQ("aelig", "ae"),
-                ESCSEQ("ccedil", "\347"/* ç */),
-                ESCSEQ("egrave", "\350"/* è */),
-                ESCSEQ("eacute", "\351"/* é */),
-                ESCSEQ("ecirc", "\352"/* ê */),
-                ESCSEQ("euml", "\353"/* ë */),
-                ESCSEQ("igrave", "\354"/* ì */),
-                ESCSEQ("iacute", "\355"/* í */),
-                ESCSEQ("icirc", " "),
-                ESCSEQ("iuml", "\357"/* ï */),
-                ESCSEQ("eth", " "),
-                ESCSEQ("ntilde", "\361"/* ñ */),
-                ESCSEQ("ograve", "\362"/* ò */),
-                ESCSEQ("oacute", "\363"/* ó */),
-                ESCSEQ("ocirc", "\364"/* ô */),
-                ESCSEQ("otilde", "\365"/* õ */),
-                ESCSEQ("ouml", "\366"/* ö */),
-                ESCSEQ("divide", " "),
-                ESCSEQ("oslash", " "),
-                ESCSEQ("ugrave", "\371"/* ù */),
-                ESCSEQ("uacute", "\372"/* ú */),
-                ESCSEQ("ucirc", "\373"/* û */),
-                ESCSEQ("uuml", "\374"/* ü */),
-    
-                ESCSEQ("yuml", ""),
-
-                /* this one should ALWAYS stay the last one!!! */
-                ESCSEQ("amp", "&"),
-                ESCSEQ("#38", "&"),
-
-                { NULL, NULL, NULL }
-                };
-
-        for (int i = 0; substitutions[i][0] != NULL; i++) 
-        {
-            m_Word.Replace(substitutions[i][0], substitutions[i][3], TRUE);
-            m_Word.Replace(substitutions[i][1], substitutions[i][3], TRUE);
-            m_Word.Replace(substitutions[i][2], substitutions[i][3], TRUE);
-        }
-    }
-
      dc.GetTextExtent(m_Word, &m_Width, &m_Height, &m_Descent);
      SetCanLiveOnPagebreak(FALSE);
  }
diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp

index ab88ab7d46ee5acafb478b17056fe7d6b736dbc5..dc572b0a89830970ce9627ab41e792074eea4d5b 100644 (file)
--- a/src/html/htmlpars.cpp
+++ b/src/html/htmlpars.cpp
@@ -28,6 +28,7 @@
  #include "wx/tokenzr.h"
  #include "wx/wfstream.h"
  #include "wx/url.h"
+#include "wx/fontmap.h"
  #include "wx/html/htmldefs.h"
  #include "wx/html/htmlpars.h"
  
@@ -39,6 +40,21 @@
  
  IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
  
+wxHtmlParser::wxHtmlParser() 
+    : wxObject(), m_Cache(NULL), m_HandlersHash(wxKEY_STRING), 
+      m_FS(NULL), m_HandlersStack(NULL)
+{
+    m_entitiesParser = new wxHtmlEntitiesParser;
+}
+
+wxHtmlParser::~wxHtmlParser()
+{
+    delete m_HandlersStack;
+    m_HandlersHash.Clear();
+    m_HandlersList.DeleteContents(TRUE);
+    m_HandlersList.Clear();
+    delete m_entitiesParser;
+}
  
  wxObject* wxHtmlParser::Parse(const wxString& source)
  {
@@ -180,18 +196,398 @@ void wxHtmlParser::PopTagHandler()
      m_HandlersStack->DeleteNode(first);
  }
  
-wxHtmlParser::~wxHtmlParser()
-{
-    if (m_HandlersStack) delete m_HandlersStack;
-    m_HandlersHash.Clear();
-    m_HandlersList.DeleteContents(TRUE);
-    m_HandlersList.Clear();
-}
-
  //-----------------------------------------------------------------------------
  // wxHtmlTagHandler
  //-----------------------------------------------------------------------------
  
  IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
+
+
+//-----------------------------------------------------------------------------
+// wxHtmlEntitiesParser
+//-----------------------------------------------------------------------------
+
+IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)
+
+wxHtmlEntitiesParser::wxHtmlEntitiesParser()
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+    : m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
  #endif
+{
+}
+
+wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
+{
+    delete m_conv;
+}
  
+void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
+{
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+    if (encoding == m_encoding) return;
+    delete m_conv;
+    m_conv = NULL;
+    m_encoding = encoding;
+    if (m_encoding != wxFONTENCODING_SYSTEM)
+        m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
+#endif
+}
+
+wxString wxHtmlEntitiesParser::Parse(const wxString& input)
+{
+    const wxChar *c, *last;
+    const wxChar *in_str = input.c_str();
+    wxString output;
+    
+    for (c = in_str, last = in_str; *c != wxT('\0'); c++)
+    {
+        if (*c == wxT('&'))
+        {
+            if (c - last > 0)
+                output.append(last, c - last);
+            if (++c == wxT('\0')) break;
+            wxString entity;
+            const wxChar *ent_s = c;
+            for (; (*c >= wxT('a') && *c <= wxT('z')) ||
+                   (*c >= wxT('A') && *c <= wxT('Z')) ||
+                   (*c >= wxT('0') && *c <= wxT('9')) ||
+                   *c == wxT('_') || *c == wxT('#'); c++) {}
+            entity.append(ent_s, c - ent_s);
+            if (*c == wxT(';')) c++;
+            output << GetEntityChar(entity);
+            last = c;
+        }
+    }
+    if (*last != wxT('\0'))
+        output.append(last);
+    return output;
+}
+
+struct wxHtmlEntityInfo
+{
+    const wxChar *name;
+    unsigned code;
+};
+
+static int compar_entity(const void *key, const void *item)
+{
+    return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
+}
+
+wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
+{
+#if wxUSE_UNICODE
+    return (wxChar)code;
+#elif wxUSE_WCHAR_T
+    char buf[2];
+    wchar_t wbuf[2];
+    wbuf[0] = (wchar_t)code;
+    wbuf[1] = 0;
+    wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
+    if (conv->WC2MB(buf, wbuf, 1) == (size_t)-1)
+        return '?';
+    return buf[0];
+#else
+    return (code < 256) ? (wxChar)code : '?';
+#endif
+}
+
+wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
+{
+    unsigned code = 0;
+    
+    if (entity[0] == wxT('#'))
+    {
+        const wxChar *ent_s = entity.c_str();
+        const wxChar *format;
+        
+        if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
+        {
+            format = wxT("%x");
+            ent_s++;
+        }
+        else
+            format = wxT("%u");
+        ent_s++;
+
+        if (wxSscanf(ent_s, format, &code) != 1)
+            code = 0;
+    }
+    else
+    {
+        static wxHtmlEntityInfo substitutions[] = {
+            { wxT("AElig"),198 },
+            { wxT("Aacute"),193 },
+            { wxT("Acirc"),194 },
+            { wxT("Agrave"),192 },
+            { wxT("Alpha"),913 },
+            { wxT("Aring"),197 },
+            { wxT("Atilde"),195 },
+            { wxT("Auml"),196 },
+            { wxT("Beta"),914 },
+            { wxT("Ccedil"),199 },
+            { wxT("Chi"),935 },
+            { wxT("Dagger"),8225 },
+            { wxT("Delta"),916 },
+            { wxT("ETH"),208 },
+            { wxT("Eacute"),201 },
+            { wxT("Ecirc"),202 },
+            { wxT("Egrave"),200 },
+            { wxT("Epsilon"),917 },
+            { wxT("Eta"),919 },
+            { wxT("Euml"),203 },
+            { wxT("Gamma"),915 },
+            { wxT("Iacute"),205 },
+            { wxT("Icirc"),206 },
+            { wxT("Igrave"),204 },
+            { wxT("Iota"),921 },
+            { wxT("Iuml"),207 },
+            { wxT("Kappa"),922 },
+            { wxT("Lambda"),923 },
+            { wxT("Mu"),924 },
+            { wxT("Ntilde"),209 },
+            { wxT("Nu"),925 },
+            { wxT("OElig"),338 },
+            { wxT("Oacute"),211 },
+            { wxT("Ocirc"),212 },
+            { wxT("Ograve"),210 },
+            { wxT("Omega"),937 },
+            { wxT("Omicron"),927 },
+            { wxT("Oslash"),216 },
+            { wxT("Otilde"),213 },
+            { wxT("Ouml"),214 },
+            { wxT("Phi"),934 },
+            { wxT("Pi"),928 },
+            { wxT("Prime"),8243 },
+            { wxT("Psi"),936 },
+            { wxT("Rho"),929 },
+            { wxT("Scaron"),352 },
+            { wxT("Sigma"),931 },
+            { wxT("THORN"),222 },
+            { wxT("Tau"),932 },
+            { wxT("Theta"),920 },
+            { wxT("Uacute"),218 },
+            { wxT("Ucirc"),219 },
+            { wxT("Ugrave"),217 },
+            { wxT("Upsilon"),933 },
+            { wxT("Uuml"),220 },
+            { wxT("Xi"),926 },
+            { wxT("Yacute"),221 },
+            { wxT("Yuml"),376 },
+            { wxT("Zeta"),918 },
+            { wxT("aacute"),225 },
+            { wxT("acirc"),226 },
+            { wxT("acute"),180 },
+            { wxT("aelig"),230 },
+            { wxT("agrave"),224 },
+            { wxT("alefsym"),8501 },
+            { wxT("alpha"),945 },
+            { wxT("amp"),38 },
+            { wxT("and"),8743 },
+            { wxT("ang"),8736 },
+            { wxT("aring"),229 },
+            { wxT("asymp"),8776 },
+            { wxT("atilde"),227 },
+            { wxT("auml"),228 },
+            { wxT("bdquo"),8222 },
+            { wxT("beta"),946 },
+            { wxT("brvbar"),166 },
+            { wxT("bull"),8226 },
+            { wxT("cap"),8745 },
+            { wxT("ccedil"),231 },
+            { wxT("cedil"),184 },
+            { wxT("cent"),162 },
+            { wxT("chi"),967 },
+            { wxT("circ"),710 },
+            { wxT("clubs"),9827 },
+            { wxT("cong"),8773 },
+            { wxT("copy"),169 },
+            { wxT("crarr"),8629 },
+            { wxT("cup"),8746 },
+            { wxT("curren"),164 },
+            { wxT("dArr"),8659 },
+            { wxT("dagger"),8224 },
+            { wxT("darr"),8595 },
+            { wxT("deg"),176 },
+            { wxT("delta"),948 },
+            { wxT("diams"),9830 },
+            { wxT("divide"),247 },
+            { wxT("eacute"),233 },
+            { wxT("ecirc"),234 },
+            { wxT("egrave"),232 },
+            { wxT("empty"),8709 },
+            { wxT("emsp"),8195 },
+            { wxT("ensp"),8194 },
+            { wxT("epsilon"),949 },
+            { wxT("equiv"),8801 },
+            { wxT("eta"),951 },
+            { wxT("eth"),240 },
+            { wxT("euml"),235 },
+            { wxT("euro"),8364 },
+            { wxT("exist"),8707 },
+            { wxT("fnof"),402 },
+            { wxT("forall"),8704 },
+            { wxT("frac12"),189 },
+            { wxT("frac14"),188 },
+            { wxT("frac34"),190 },
+            { wxT("frasl"),8260 },
+            { wxT("gamma"),947 },
+            { wxT("ge"),8805 },
+            { wxT("gt"),62 },
+            { wxT("hArr"),8660 },
+            { wxT("harr"),8596 },
+            { wxT("hearts"),9829 },
+            { wxT("hellip"),8230 },
+            { wxT("iacute"),237 },
+            { wxT("icirc"),238 },
+            { wxT("iexcl"),161 },
+            { wxT("igrave"),236 },
+            { wxT("image"),8465 },
+            { wxT("infin"),8734 },
+            { wxT("int"),8747 },
+            { wxT("iota"),953 },
+            { wxT("iquest"),191 },
+            { wxT("isin"),8712 },
+            { wxT("iuml"),239 },
+            { wxT("kappa"),954 },
+            { wxT("lArr"),8656 },
+            { wxT("lambda"),955 },
+            { wxT("lang"),9001 },
+            { wxT("laquo"),171 },
+            { wxT("larr"),8592 },
+            { wxT("lceil"),8968 },
+            { wxT("ldquo"),8220 },
+            { wxT("le"),8804 },
+            { wxT("lfloor"),8970 },
+            { wxT("lowast"),8727 },
+            { wxT("loz"),9674 },
+            { wxT("lrm"),8206 },
+            { wxT("lsaquo"),8249 },
+            { wxT("lsquo"),8216 },
+            { wxT("lt"),60 },
+            { wxT("macr"),175 },
+            { wxT("mdash"),8212 },
+            { wxT("micro"),181 },
+            { wxT("middot"),183 },
+            { wxT("minus"),8722 },
+            { wxT("mu"),956 },
+            { wxT("nabla"),8711 },
+            { wxT("nbsp"),160 },
+            { wxT("ndash"),8211 },
+            { wxT("ne"),8800 },
+            { wxT("ni"),8715 },
+            { wxT("not"),172 },
+            { wxT("notin"),8713 },
+            { wxT("nsub"),8836 },
+            { wxT("ntilde"),241 },
+            { wxT("nu"),957 },
+            { wxT("oacute"),243 },
+            { wxT("ocirc"),244 },
+            { wxT("oelig"),339 },
+            { wxT("ograve"),242 },
+            { wxT("oline"),8254 },
+            { wxT("omega"),969 },
+            { wxT("omicron"),959 },
+            { wxT("oplus"),8853 },
+            { wxT("or"),8744 },
+            { wxT("ordf"),170 },
+            { wxT("ordm"),186 },
+            { wxT("oslash"),248 },
+            { wxT("otilde"),245 },
+            { wxT("otimes"),8855 },
+            { wxT("ouml"),246 },
+            { wxT("para"),182 },
+            { wxT("part"),8706 },
+            { wxT("permil"),8240 },
+            { wxT("perp"),8869 },
+            { wxT("phi"),966 },
+            { wxT("pi"),960 },
+            { wxT("piv"),982 },
+            { wxT("plusmn"),177 },
+            { wxT("pound"),163 },
+            { wxT("prime"),8242 },
+            { wxT("prod"),8719 },
+            { wxT("prop"),8733 },
+            { wxT("psi"),968 },
+            { wxT("quot"),34 },
+            { wxT("rArr"),8658 },
+            { wxT("radic"),8730 },
+            { wxT("rang"),9002 },
+            { wxT("raquo"),187 },
+            { wxT("rarr"),8594 },
+            { wxT("rceil"),8969 },
+            { wxT("rdquo"),8221 },
+            { wxT("real"),8476 },
+            { wxT("reg"),174 },
+            { wxT("rfloor"),8971 },
+            { wxT("rho"),961 },
+            { wxT("rlm"),8207 },
+            { wxT("rsaquo"),8250 },
+            { wxT("rsquo"),8217 },
+            { wxT("sbquo"),8218 },
+            { wxT("scaron"),353 },
+            { wxT("sdot"),8901 },
+            { wxT("sect"),167 },
+            { wxT("shy"),173 },
+            { wxT("sigma"),963 },
+            { wxT("sigmaf"),962 },
+            { wxT("sim"),8764 },
+            { wxT("spades"),9824 },
+            { wxT("sub"),8834 },
+            { wxT("sube"),8838 },
+            { wxT("sum"),8721 },
+            { wxT("sup"),8835 },
+            { wxT("sup1"),185 },
+            { wxT("sup2"),178 },
+            { wxT("sup3"),179 },
+            { wxT("supe"),8839 },
+            { wxT("szlig"),223 },
+            { wxT("tau"),964 },
+            { wxT("there4"),8756 },
+            { wxT("theta"),952 },
+            { wxT("thetasym"),977 },
+            { wxT("thinsp"),8201 },
+            { wxT("thorn"),254 },
+            { wxT("tilde"),732 },
+            { wxT("times"),215 },
+            { wxT("trade"),8482 },
+            { wxT("uArr"),8657 },
+            { wxT("uacute"),250 },
+            { wxT("uarr"),8593 },
+            { wxT("ucirc"),251 },
+            { wxT("ugrave"),249 },
+            { wxT("uml"),168 },
+            { wxT("upsih"),978 },
+            { wxT("upsilon"),965 },
+            { wxT("uuml"),252 },
+            { wxT("weierp"),8472 },
+            { wxT("xi"),958 },
+            { wxT("yacute"),253 },
+            { wxT("yen"),165 },
+            { wxT("yuml"),255 },
+            { wxT("zeta"),950 },
+            { wxT("zwj"),8205 },
+            { wxT("zwnj"),8204 },
+            {NULL, 0}};
+        static size_t substitutions_cnt = 0;
+        
+        if (substitutions_cnt == 0)
+            while (substitutions[substitutions_cnt].code != 0)
+                substitutions_cnt++;
+
+        wxHtmlEntityInfo *info;
+        info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions, 
+                                           substitutions_cnt,
+                                           sizeof(wxHtmlEntityInfo),
+                                           compar_entity);
+        if (info)
+            code = info->code;
+    }
+    
+    if (code == 0)
+        return wxT('?');
+    else
+        return GetCharForCode(code);
+}
+
+#endif
diff --git a/src/html/htmltag.cpp b/src/html/htmltag.cpp

index 02f045ce3c8aa8c2062932a490fd11ee89567342..9b8049fdba42829d05a317c1f33c2b38881a7b46 100644 (file)
--- a/src/html/htmltag.cpp
+++ b/src/html/htmltag.cpp
@@ -26,6 +26,7 @@
  #endif
  
  #include "wx/html/htmltag.h"
+#include "wx/html/htmlpars.h"
  #include <stdio.h> // for vsscanf
  #include <stdarg.h>
  
@@ -121,15 +122,17 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
      }
  }
  
-
-
  void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
  {
      if (m_Cache == NULL) return;
      if (m_Cache[m_CachePos].Key != at) 
      {
          int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
-        do {m_CachePos += delta;} while (m_Cache[m_CachePos].Key != at);
+        do 
+        { 
+            m_CachePos += delta; 
+        }
+        while (m_Cache[m_CachePos].Key != at);
      }
      *end1 = m_Cache[m_CachePos].End1;
      *end2 = m_Cache[m_CachePos].End2;
@@ -144,64 +147,129 @@ void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
  
  IMPLEMENT_CLASS(wxHtmlTag,wxObject)
  
-wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCache* cache) : wxObject()
+wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, 
+                     wxHtmlTagsCache *cache,
+                     wxHtmlEntitiesParser *entParser) : wxObject()
  {
      int i;
-    char c;
+    wxChar c;
  
      // fill-in name, params and begin pos:
-    m_Name = m_Params = wxEmptyString;
      i = pos+1;
-    if (source[i] == wxT('/')) { m_Ending = TRUE; i++; }
-    else m_Ending = FALSE;
+    if (source[i] == wxT('/')) 
+        { m_Ending = TRUE; i++; }
+    else 
+        m_Ending = FALSE;
  
      // find tag's name and convert it to uppercase:
      while ((i < end_pos) && 
-               ((c = source[i++]) != wxT(' ') && c != wxT('\r') && 
-                 c != wxT('\n') && c != wxT('\t') &&
-                 c != wxT('>'))) 
+           ((c = source[i++]) != wxT(' ') && c != wxT('\r') && 
+             c != wxT('\n') && c != wxT('\t') &&
+             c != wxT('>'))) 
      {
-        if ((c >= wxT('a')) && (c <= wxT('z'))) c -= (wxT('a') - wxT('A'));
-        m_Name += c;
+        if ((c >= wxT('a')) && (c <= wxT('z'))) 
+            c -= (wxT('a') - wxT('A'));
+        m_Name << c;
      }
  
      // if the tag has parameters, read them and "normalize" them,
      // i.e. convert to uppercase, replace whitespaces by spaces and 
      // remove whitespaces around '=':
      if (source[i-1] != wxT('>'))
-        while ((i < end_pos) && ((c = source[i++]) != wxT('>'))) 
+    {
+        #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
+                             c == wxT('\n') || c == wxT('\t'))
+        wxString pname, pvalue;
+        wxChar quote;
+        enum 
          {
-            if ((c >= wxT('a')) && (c <= wxT('z'))) 
-                c -= (wxT('a') - wxT('A'));
-            if (c == wxT('\r') || c == wxT('\n') || c == wxT('\t')) 
-                c = wxT(' '); // make future parsing a bit simpler
-            m_Params += c;
-            if (c == wxT('"')) 
+            ST_BEFORE_NAME = 1, 
+            ST_NAME,
+            ST_BEFORE_EQ,
+            ST_BEFORE_VALUE,
+            ST_VALUE
+        } state;
+    
+        quote = 0;
+        state = ST_BEFORE_NAME;
+        while (i < end_pos)
+        {
+            c = source[i++];
+
+            if (c == wxT('>') && !(state == ST_VALUE && quote != 0)) 
              {
-                // remove spaces around the '=' character:
-                if (m_Params.Length() > 1 && 
-                    m_Params[m_Params.Length()-2] == wxT(' '))
+                if (state == ST_BEFORE_EQ || state == ST_NAME)
                  {
-                    m_Params.RemoveLast();
-                    while (m_Params.Length() > 0 && m_Params.Last() == wxT(' ')) 
-                        m_Params.RemoveLast();
-                    m_Params += wxT('"');
+                    m_ParamNames.Add(pname);
+                    m_ParamValues.Add(wxEmptyString);
                  }
-                while ((i < end_pos) && (source[i++] == wxT(' '))) {}
-                if (i < end_pos) i--;
-            
-                // ...and copy the value to m_Params:
-                while ((i < end_pos) && ((c = source[i++]) != wxT('"'))) 
-                    m_Params += c;
-                m_Params += c;
+                else if (state == ST_VALUE && quote == 0)
+                {
+                    m_ParamNames.Add(pname);
+                    m_ParamValues.Add(entParser->Parse(pvalue));
+                }
+                break;
              }
-            else if (c == wxT('\'')) 
+            switch (state)
              {
-                while ((i < end_pos) && ((c = source[i++]) != wxT('\''))) 
-                    m_Params += c;
-                m_Params += c;
+                case ST_BEFORE_NAME:
+                    if (!IS_WHITE(c))
+                    {
+                        pname = c;
+                        state = ST_NAME;
+                    }
+                    break;
+                case ST_NAME:
+                    if (IS_WHITE(c))
+                        state = ST_BEFORE_EQ;
+                    else if (c == wxT('='))
+                        state = ST_BEFORE_VALUE;
+                    else
+                        pname << c;
+                    break;
+                case ST_BEFORE_EQ:
+                    if (c == wxT('='))
+                        state = ST_BEFORE_VALUE;
+                    else if (!IS_WHITE(c))
+                    {
+                        m_ParamNames.Add(pname);
+                        m_ParamValues.Add(wxEmptyString);
+                        pname = c;
+                        state = ST_NAME;
+                    }
+                    break;
+                case ST_BEFORE_VALUE:
+                    if (!IS_WHITE(c))
+                    {
+                        if (c == wxT('"') || c == wxT('\''))
+                            quote = c, pvalue = wxEmptyString;
+                        else
+                            quote = 0, pvalue = c;
+                        state = ST_VALUE;
+                    }
+                    break;
+                case ST_VALUE:
+                    if ((quote != 0 && c == quote) ||
+                        (quote == 0 && IS_WHITE(c)))
+                    {
+                        m_ParamNames.Add(pname);
+                        if (quote == 0)
+                        {
+                            // VS: backward compatibility, no real reason,
+                            //     but wxHTML code relies on this... :(
+                            pvalue.MakeUpper();
+                        }
+                        m_ParamValues.Add(entParser->Parse(pvalue));
+                        state = ST_BEFORE_NAME;
+                    }
+                    else
+                        pvalue << c;
+                    break;
              }
          }
+        
+        #undef IS_WHITE
+   }
     m_Begin = i;
  
     cache->QueryTag(pos, &m_End1, &m_End2);
@@ -209,113 +277,49 @@ wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCac
     if (m_End2 > end_pos) m_End2 = end_pos;
  }
  
-    
-
  bool wxHtmlTag::HasParam(const wxString& par) const
  {
-    const wxChar *st = m_Params, *p = par;
-    const wxChar *st2, *p2;
-    const wxChar invalid = wxT('\1');
-
-    if (*st == 0) return FALSE;
-    if (*p == 0) return FALSE;
-    for (st2 = st, p2 = p; ; st2++) 
-    {
-        if (*p2 == 0 && *st2 == wxT('=')) return TRUE;
-        if (*st2 == 0) return FALSE;
-        if (*p2 != *st2) p2 = &invalid;
-        if (*p2 == *st2) p2++;
-        if (*st2 == wxT(' ')) p2 = p;
-        else if (*st2 == wxT('=')) 
-        {
-            p2 = p;
-            while (*st2 != wxT(' ')) 
-            {
-                if (*st2 == wxT('"')) 
-                {
-                    st2++;
-                    while (*st2 != wxT('"')) st2++;
-                }
-                st2++;
-                if (*st2 == 0) return FALSE;
-            }
-        }
-    }
+    return (m_ParamNames.Index(par, FALSE) != wxNOT_FOUND);
  }
  
-
-
  wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
  {
-    const wxChar *st = m_Params, *p = par;
-    const wxChar *st2, *p2;
-    const wxChar invalid = wxT('\1');
-    bool comma;
-    wxChar comma_char;
-
-    if (*st == 0) return wxEmptyString;
-    if (*p == 0) return wxEmptyString;
-    for (st2 = st, p2 = p; ; st2++) 
+    int index = m_ParamNames.Index(par, FALSE);
+    if (index == wxNOT_FOUND)
+        return wxEmptyString;
+    if (with_commas)
      {
-        if (*p2 == 0 && *st2 == wxT('='))  // found
-        {
-            wxString fnd = wxEmptyString;
-            st2++; // '=' character
-            comma = FALSE;
-            comma_char = wxT('\0');
-            if (!with_commas && (*(st2) == wxT('"'))) 
-            {
-                st2++;
-                comma = TRUE; 
-                comma_char = wxT('"');
-            }
-            else if (!with_commas && (*(st2) == wxT('\''))) 
-            {
-                st2++; 
-                comma = TRUE;
-                comma_char = wxT('\'');
-            }
-        
-            while (*st2 != 0) 
-            {
-                if (comma && *st2 == comma_char) comma = FALSE;
-                else if ((*st2 == wxT(' ')) && (!comma)) break;
-                fnd += (*(st2++));
-            }
-            if (!with_commas && (*(st2-1) == comma_char)) fnd.RemoveLast();
-            return fnd;
-        }
-        if (*st2 == 0) return wxEmptyString;
-        if (*p2 != *st2) p2 = &invalid;
-        if (*p2 == *st2) p2++;
-        if (*st2 == wxT(' ')) p2 = p;
-        else if (*st2 == wxT('=')) 
-        {
-            p2 = p;
-            while (*st2 != wxT(' ')) 
-            {
-                if (*st2 == wxT('"')) 
-                {
-                    st2++;
-                    while (*st2 != wxT('"')) st2++;
-                }
-                else if (*st2 == wxT('\''))
-                {
-                    st2++;
-                    while (*st2 != wxT('\'')) st2++;
-                }
-                st2++;
-            }
-        }
+        // VS: backward compatibility, seems to be never used by wxHTML...
+        wxString s;
+        s << wxT('"') << m_ParamValues[index] << wxT('"');
+        return s;
      }
+    else
+        return m_ParamValues[index];
  }
  
-
-
  int wxHtmlTag::ScanParam(const wxString& par, wxChar *format, void *param) const
  {
      wxString parval = GetParam(par);
      return wxSscanf(parval, format, param);
  }
  
+wxString wxHtmlTag::GetAllParams() const
+{
+    // VS: this function is for backward compatiblity only, 
+    //     never used by wxHTML
+    wxString s;
+    size_t cnt = m_ParamNames.GetCount();
+    for (size_t i = 0; i < cnt; i++)
+    {
+        s << m_ParamNames[i];
+        s << wxT('=');
+        if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
+            s << wxT('\'') << m_ParamValues[i] << wxT('\'');
+        else
+            s << wxT('"') << m_ParamValues[i] << wxT('"');
+    }
+    return s;
+}
+
  #endif
diff --git a/src/html/winpars.cpp b/src/html/winpars.cpp

index 121b76e3aa2903fdb80b15c3c5fa7375c2d8d608..e90c35f7234c6be4685b1cbb98ba8623a3bfab6e 100644 (file)
--- a/src/html/winpars.cpp
+++ b/src/html/winpars.cpp
@@ -197,10 +197,12 @@ void wxHtmlWinParser::AddText(const char* txt)
      char temp[wxHTML_BUFLEN];
      register char d;
      int templen = 0;
-
+    
      if (m_tmpLastWasSpace) 
      {
-        while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++;
+        while ((i < lng) && 
+               ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || 
+                (txt[i] == '\t'))) i++;
      }
  
      while (i < lng) 
@@ -210,7 +212,8 @@ void wxHtmlWinParser::AddText(const char* txt)
          if ((d == '\n') || (d == '\r') || (d == ' ') || (d == '\t')) 
             {
              i++, x++;
-            while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
+            while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || 
+                                 (txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
          }
          else i++;
  
@@ -219,9 +222,11 @@ void wxHtmlWinParser::AddText(const char* txt)
              temp[templen-1] = ' ';
              temp[templen] = 0;
              templen = 0;
-            if (m_EncConv) m_EncConv->Convert(temp);
-            c = new wxHtmlWordCell(temp, *(GetDC()));
-            if (m_UseLink) c->SetLink(m_Link);
+            if (m_EncConv) 
+                m_EncConv->Convert(temp);
+            c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
+            if (m_UseLink) 
+                c->SetLink(m_Link);
              m_Container->InsertCell(c);
              m_tmpLastWasSpace = TRUE;
          }
@@ -229,9 +234,11 @@ void wxHtmlWinParser::AddText(const char* txt)
      if (templen) 
      {
          temp[templen] = 0;
-        if (m_EncConv) m_EncConv->Convert(temp);
-        c = new wxHtmlWordCell(temp, *(GetDC()));
-        if (m_UseLink) c->SetLink(m_Link);
+        if (m_EncConv) 
+            m_EncConv->Convert(temp);
+        c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
+        if (m_UseLink) 
+            c->SetLink(m_Link);
          m_Container->InsertCell(c);
          m_tmpLastWasSpace = FALSE;
      }
@@ -333,7 +340,11 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
  void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
  {
      m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
-    if (m_EncConv) {delete m_EncConv; m_EncConv = NULL;}
+    if (m_EncConv) 
+    {
+        delete m_EncConv; 
+        m_EncConv = NULL;
+    }
  
      if (enc == wxFONTENCODING_DEFAULT) return;
  
@@ -363,6 +374,10 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
          m_OutputEnc = wxFONTENCODING_DEFAULT;
          
      m_InputEnc = enc;
+    if (m_OutputEnc == wxFONTENCODING_DEFAULT)
+        GetEntitiesParser()->SetEncoding(wxFONTENCODING_SYSTEM);
+    else
+        GetEntitiesParser()->SetEncoding(m_OutputEnc);
      
      if (m_InputEnc == m_OutputEnc) return;
author	Václav Slavík <vslavik@fastmail.fm>
	Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)
committer	Václav Slavík <vslavik@fastmail.fm>
	Sun, 1 Jul 2001 15:09:35 +0000 (15:09 +0000)
include/wx/html/htmlpars.h		patch \| blob \| blame \| history
src/html/helpdata.cpp		patch \| blob \| blame \| history
src/html/htmlcell.cpp		patch \| blob \| blame \| history
src/html/htmlpars.cpp		patch \| blob \| blame \| history
src/html/htmltag.cpp		patch \| blob \| blame \| history
src/html/winpars.cpp		patch \| blob \| blame \| history