#include "wx/html/htmltag.h"
#include "wx/filesys.h"
-class wxHtmlParser;
-class wxHtmlTagHandler;
-
-//--------------------------------------------------------------------------------
-// wxHtmlParser
-// This class handles generic parsing of HTML document : it scans
-// the document and divide it into blocks of tags (where one block
-// consists of starting and ending tag and of text between these
-// 2 tags.
-//--------------------------------------------------------------------------------
-
+class WXDLLEXPORT wxMBConv;
+class WXDLLEXPORT wxHtmlParser;
+class WXDLLEXPORT wxHtmlTagHandler;
+class WXDLLEXPORT wxHtmlEntitiesParser;
+
+// This class handles generic parsing of HTML document : it scans
+// the document and divide it into blocks of tags (where one block
+// consists of starting and ending tag and of text between these
+// 2 tags.
class WXDLLEXPORT wxHtmlParser : public wxObject
{
DECLARE_ABSTRACT_CLASS(wxHtmlParser)
public:
- wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING)
- { m_FS = NULL; m_Cache = NULL; m_HandlersStack = NULL; }
+ wxHtmlParser();
virtual ~wxHtmlParser();
// Sets the class which will be used for opening files
// ignored if no hander is found.
// Derived class is *responsible* for filling in m_Handlers table.
virtual void AddTag(const wxHtmlTag& tag);
+
+ // Returns entity parser object, used to substitute HTML &entities;
+ wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
protected:
// source being parsed
wxFileSystem *m_FS;
// handlers stack used by PushTagHandler and PopTagHandler
wxList *m_HandlersStack;
+
+ // entity parse
+ wxHtmlEntitiesParser *m_entitiesParser;
};
-
-
-
-//--------------------------------------------------------------------------------
-// wxHtmlTagHandler
-// This class (and derived classes) cooperates with wxHtmlParser.
-// Each recognized tag is passed to handler which is capable
-// of handling it. Each tag is handled in 3 steps:
-// 1. Handler will modifies state of parser
-// (using it's public methods)
-// 2. Parser parses source between starting and ending tag
-// 3. Handler restores original state of the parser
-//--------------------------------------------------------------------------------
-
+// This class (and derived classes) cooperates with wxHtmlParser.
+// Each recognized tag is passed to handler which is capable
+// of handling it. Each tag is handled in 3 steps:
+// 1. Handler will modifies state of parser
+// (using it's public methods)
+// 2. Parser parses source between starting and ending tag
+// 3. Handler restores original state of the parser
class WXDLLEXPORT wxHtmlTagHandler : public wxObject
{
DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler)
};
+// This class is used to parse HTML entities in strings. It can handle
+// both named entities and &#xxxx entries where xxxx is Unicode code.
+class WXDLLEXPORT wxHtmlEntitiesParser : public wxObject
+{
+ DECLARE_DYNAMIC_CLASS(wxHtmlEntitiesParser)
+
+public:
+ wxHtmlEntitiesParser();
+ virtual ~wxHtmlEntitiesParser();
+
+ // Sets encoding of output string.
+ // Has no effect if wxUSE_WCHAR_T==0 or wxUSE_UNICODE==1
+ void SetEncoding(wxFontEncoding encoding);
+
+ // Parses entities in input and replaces them with respective characters
+ // (with respect to output encoding)
+ wxString Parse(const wxString& input);
+
+protected:
+ wxChar GetEntityChar(const wxString& entity);
+ wxChar GetCharForCode(unsigned code);
+
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+ wxMBConv *m_conv;
+ wxFontEncoding m_encoding;
+#endif
+};
#endif
}
else
{ // "PARAM"
- if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name"))
- {
+ if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name"))
m_Name = tag.GetParam(wxT("VALUE"));
- if (m_Name.Find(wxT('&')) != -1)
- {
-#define ESCSEQ(escape, subst) \
- { _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
- static wxChar* substitutions[][4] =
- {
- ESCSEQ("quot", "\""),
- ESCSEQ("#34", "\""),
- ESCSEQ("#8220", "\""),
- ESCSEQ("#8221", "\""),
- ESCSEQ("lt", "<"),
- ESCSEQ("#60", "<"),
- ESCSEQ("gt", ">"),
- ESCSEQ("#62", ">"),
-
- ESCSEQ("#94", "^"), /* ^ */
-
- ESCSEQ("nbsp", " "),
- ESCSEQ("#32", " "),
- ESCSEQ("iexcl", "!"),
- ESCSEQ("#33", "!"),
- ESCSEQ("cent", "¢"/* ¢ */),
- ESCSEQ("#162", "¢"/* ¢ */),
-
- ESCSEQ("trade", "(TM)"),
- ESCSEQ("#153", "(TM)"),
- ESCSEQ("#8482", "(TM)"),
-
- ESCSEQ("yen", "¥"),
- ESCSEQ("#165", "¥"),
- ESCSEQ("brkbar", "¦"),
- ESCSEQ("#166", "¦"),
- ESCSEQ("sect", "§"),
- ESCSEQ("#167", "§"),
- ESCSEQ("uml", "¨"),
- ESCSEQ("#168", "¨"),
-
- ESCSEQ("copy", "©"), /* © */
- ESCSEQ("#169", "©"),
- ESCSEQ("ordf", "ª"),
- ESCSEQ("#170", "ª"),
- ESCSEQ("laquo", "«"), /* « */
- ESCSEQ("#171", "«"),
- ESCSEQ("not", "¬"),
- ESCSEQ("#172", "¬"),
-
- ESCSEQ("reg", "®"), /* ® */
- ESCSEQ("#174", "®"),
-
- ESCSEQ("deg", "°"), /* ° */
- ESCSEQ("#176", "°"),
- ESCSEQ("plusm", "±"), /* ± */
- ESCSEQ("#177", "±"),
-
- ESCSEQ("acute", "´"),
- ESCSEQ("#180", "´"),
- ESCSEQ("macron", "¯"),
- ESCSEQ("#175", "¯"),
- ESCSEQ("micro", "µ"), /* µ */
- ESCSEQ("#181", "µ"),
- ESCSEQ("para", "¶"), /* ¶ */
- ESCSEQ("#182", "¶"),
-
- ESCSEQ("ordm", "º"), /* º */
- ESCSEQ("#186", "º"),
- ESCSEQ("raquo", "»"), /* » */
- ESCSEQ("#187", "»"),
-
- ESCSEQ("iquest", "¿"), /* ¿ */
- ESCSEQ("#191", "¿"),
- ESCSEQ("Agrave", "\300"/* À */),
- ESCSEQ("#193", "\300"/* À */),
-
- ESCSEQ("Acirc", "\302"/* Â */),
- ESCSEQ("Atilde", "\303"/* Ã */),
- ESCSEQ("Auml", "\304"/* Ä */),
- ESCSEQ("Aring", " "),
- ESCSEQ("AElig", " "),
- ESCSEQ("Ccedil", "\347"/* ç */),
- ESCSEQ("Egrave", "\310"/* È */),
- ESCSEQ("Eacute", "\311"/* É */),
- ESCSEQ("Ecirc", "\312"/* Ê */),
- ESCSEQ("Euml", "\313"/* Ë */),
- ESCSEQ("Igrave", "\314"/* Ì */),
-
- ESCSEQ("Icirc", "\316"/* Î */),
- ESCSEQ("Iuml", "\317"/* Ï */),
-
- ESCSEQ("Ntilde", "\321"/* Ñ */),
- ESCSEQ("Ograve", "\322"/* Ò */),
-
- ESCSEQ("Ocirc", "\324"/* Ô */),
- ESCSEQ("Otilde", "\325"/* Õ */),
- ESCSEQ("Ouml", "\326"/* Ö */),
-
- ESCSEQ("Oslash", " "),
- ESCSEQ("Ugrave", "\331"/* Ù */),
-
- ESCSEQ("Ucirc", " "),
- ESCSEQ("Uuml", "\334"/* Ü */),
-
- ESCSEQ("szlig", "\247"/* § */),
- ESCSEQ("agrave","\340"/* à */),
- ESCSEQ("aacute", "\341"/* á */),
- ESCSEQ("acirc", "\342"/* â */),
- ESCSEQ("atilde", "\343"/* ã */),
- ESCSEQ("auml", "\344"/* ä */),
- ESCSEQ("aring", "a"),
- ESCSEQ("aelig", "ae"),
- ESCSEQ("ccedil", "\347"/* ç */),
- ESCSEQ("egrave", "\350"/* è */),
- ESCSEQ("eacute", "\351"/* é */),
- ESCSEQ("ecirc", "\352"/* ê */),
- ESCSEQ("euml", "\353"/* ë */),
- ESCSEQ("igrave", "\354"/* ì */),
- ESCSEQ("iacute", "\355"/* í */),
- ESCSEQ("icirc", " "),
- ESCSEQ("iuml", "\357"/* ï */),
- ESCSEQ("eth", " "),
- ESCSEQ("ntilde", "\361"/* ñ */),
- ESCSEQ("ograve", "\362"/* ò */),
- ESCSEQ("oacute", "\363"/* ó */),
- ESCSEQ("ocirc", "\364"/* ô */),
- ESCSEQ("otilde", "\365"/* õ */),
- ESCSEQ("ouml", "\366"/* ö */),
- ESCSEQ("divide", " "),
- ESCSEQ("oslash", " "),
- ESCSEQ("ugrave", "\371"/* ù */),
- ESCSEQ("uacute", "\372"/* ú */),
- ESCSEQ("ucirc", "\373"/* û */),
- ESCSEQ("uuml", "\374"/* ü */),
-
- ESCSEQ("yuml", ""),
-
- /* this one should ALWAYS stay the last one!!! */
- ESCSEQ("amp", "&"),
- ESCSEQ("#38", "&"),
-
- { NULL, NULL, NULL }
- };
-
- for (int i = 0; substitutions[i][0] != NULL; i++)
- {
- m_Name.Replace(substitutions[i][0], substitutions[i][3], TRUE);
- m_Name.Replace(substitutions[i][1], substitutions[i][3], TRUE);
- m_Name.Replace(substitutions[i][2], substitutions[i][3], TRUE);
- }
- }
- }
- if (tag.GetParam(wxT("NAME")) == wxT("Local")) m_Page = tag.GetParam(wxT("VALUE"));
- if (tag.GetParam(wxT("NAME")) == wxT("ID")) tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
+ if (tag.GetParam(wxT("NAME")) == wxT("Local"))
+ m_Page = tag.GetParam(wxT("VALUE"));
+ if (tag.GetParam(wxT("NAME")) == wxT("ID"))
+ tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
return FALSE;
}
}
wxHtmlWordCell::wxHtmlWordCell(const wxString& word, wxDC& dc) : wxHtmlCell()
{
m_Word = word;
-
- if (m_Word.Find(wxT('&')) != -1)
- {
-#define ESCSEQ(escape, subst) \
- { _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
- static wxChar* substitutions[][4] =
- {
- ESCSEQ("quot", "\""),
- ESCSEQ("#34", "\""),
- ESCSEQ("#8220", "\""),
- ESCSEQ("#8221", "\""),
- ESCSEQ("lt", "<"),
- ESCSEQ("#60", "<"),
- ESCSEQ("gt", ">"),
- ESCSEQ("#62", ">"),
-
- ESCSEQ("#94", "^"), /* ^ */
-
- ESCSEQ("nbsp", " "),
- ESCSEQ("#32", " "),
- ESCSEQ("iexcl", "!"),
- ESCSEQ("#33", "!"),
- ESCSEQ("cent", "¢"/* ¢ */),
- ESCSEQ("#162", "¢"/* ¢ */),
-
- ESCSEQ("trade", "(TM)"),
- ESCSEQ("#153", "(TM)"),
- ESCSEQ("#8482", "(TM)"),
-
- ESCSEQ("yen", "¥"),
- ESCSEQ("#165", "¥"),
- ESCSEQ("brkbar", "¦"),
- ESCSEQ("#166", "¦"),
- ESCSEQ("sect", "§"),
- ESCSEQ("#167", "§"),
- ESCSEQ("uml", "¨"),
- ESCSEQ("#168", "¨"),
-
- ESCSEQ("copy", "©"), /* © */
- ESCSEQ("#169", "©"),
- ESCSEQ("ordf", "ª"),
- ESCSEQ("#170", "ª"),
- ESCSEQ("laquo", "«"), /* « */
- ESCSEQ("#171", "«"),
- ESCSEQ("not", "¬"),
- ESCSEQ("#172", "¬"),
-
- ESCSEQ("reg", "®"), /* ® */
- ESCSEQ("#174", "®"),
-
- ESCSEQ("deg", "°"), /* ° */
- ESCSEQ("#176", "°"),
- ESCSEQ("plusm", "±"), /* ± */
- ESCSEQ("#177", "±"),
-
- ESCSEQ("acute", "´"),
- ESCSEQ("#180", "´"),
- ESCSEQ("macron", "¯"),
- ESCSEQ("#175", "¯"),
- ESCSEQ("micro", "µ"), /* µ */
- ESCSEQ("#181", "µ"),
- ESCSEQ("para", "¶"), /* ¶ */
- ESCSEQ("#182", "¶"),
-
- ESCSEQ("ordm", "º"), /* º */
- ESCSEQ("#186", "º"),
- ESCSEQ("raquo", "»"), /* » */
- ESCSEQ("#187", "»"),
-
- ESCSEQ("iquest", "¿"), /* ¿ */
- ESCSEQ("#191", "¿"),
- ESCSEQ("Agrave", "\300"/* À */),
- ESCSEQ("#193", "\300"/* À */),
-
- ESCSEQ("Acirc", "\302"/* Â */),
- ESCSEQ("Atilde", "\303"/* Ã */),
- ESCSEQ("Auml", "\304"/* Ä */),
- ESCSEQ("Aring", " "),
- ESCSEQ("AElig", " "),
- ESCSEQ("Ccedil", "\347"/* ç */),
- ESCSEQ("Egrave", "\310"/* È */),
- ESCSEQ("Eacute", "\311"/* É */),
- ESCSEQ("Ecirc", "\312"/* Ê */),
- ESCSEQ("Euml", "\313"/* Ë */),
- ESCSEQ("Igrave", "\314"/* Ì */),
-
- ESCSEQ("Icirc", "\316"/* Î */),
- ESCSEQ("Iuml", "\317"/* Ï */),
-
- ESCSEQ("Ntilde", "\321"/* Ñ */),
- ESCSEQ("Ograve", "\322"/* Ò */),
-
- ESCSEQ("Ocirc", "\324"/* Ô */),
- ESCSEQ("Otilde", "\325"/* Õ */),
- ESCSEQ("Ouml", "\326"/* Ö */),
-
- ESCSEQ("Oslash", " "),
- ESCSEQ("Ugrave", "\331"/* Ù */),
-
- ESCSEQ("Ucirc", " "),
- ESCSEQ("Uuml", "\334"/* Ü */),
-
- ESCSEQ("szlig", "\247"/* § */),
- ESCSEQ("agrave","\340"/* à */),
- ESCSEQ("aacute", "\341"/* á */),
- ESCSEQ("acirc", "\342"/* â */),
- ESCSEQ("atilde", "\343"/* ã */),
- ESCSEQ("auml", "\344"/* ä */),
- ESCSEQ("aring", "a"),
- ESCSEQ("aelig", "ae"),
- ESCSEQ("ccedil", "\347"/* ç */),
- ESCSEQ("egrave", "\350"/* è */),
- ESCSEQ("eacute", "\351"/* é */),
- ESCSEQ("ecirc", "\352"/* ê */),
- ESCSEQ("euml", "\353"/* ë */),
- ESCSEQ("igrave", "\354"/* ì */),
- ESCSEQ("iacute", "\355"/* í */),
- ESCSEQ("icirc", " "),
- ESCSEQ("iuml", "\357"/* ï */),
- ESCSEQ("eth", " "),
- ESCSEQ("ntilde", "\361"/* ñ */),
- ESCSEQ("ograve", "\362"/* ò */),
- ESCSEQ("oacute", "\363"/* ó */),
- ESCSEQ("ocirc", "\364"/* ô */),
- ESCSEQ("otilde", "\365"/* õ */),
- ESCSEQ("ouml", "\366"/* ö */),
- ESCSEQ("divide", " "),
- ESCSEQ("oslash", " "),
- ESCSEQ("ugrave", "\371"/* ù */),
- ESCSEQ("uacute", "\372"/* ú */),
- ESCSEQ("ucirc", "\373"/* û */),
- ESCSEQ("uuml", "\374"/* ü */),
-
- ESCSEQ("yuml", ""),
-
- /* this one should ALWAYS stay the last one!!! */
- ESCSEQ("amp", "&"),
- ESCSEQ("#38", "&"),
-
- { NULL, NULL, NULL }
- };
-
- for (int i = 0; substitutions[i][0] != NULL; i++)
- {
- m_Word.Replace(substitutions[i][0], substitutions[i][3], TRUE);
- m_Word.Replace(substitutions[i][1], substitutions[i][3], TRUE);
- m_Word.Replace(substitutions[i][2], substitutions[i][3], TRUE);
- }
- }
-
dc.GetTextExtent(m_Word, &m_Width, &m_Height, &m_Descent);
SetCanLiveOnPagebreak(FALSE);
}
#include "wx/tokenzr.h"
#include "wx/wfstream.h"
#include "wx/url.h"
+#include "wx/fontmap.h"
#include "wx/html/htmldefs.h"
#include "wx/html/htmlpars.h"
IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
+wxHtmlParser::wxHtmlParser()
+ : wxObject(), m_Cache(NULL), m_HandlersHash(wxKEY_STRING),
+ m_FS(NULL), m_HandlersStack(NULL)
+{
+ m_entitiesParser = new wxHtmlEntitiesParser;
+}
+
+wxHtmlParser::~wxHtmlParser()
+{
+ delete m_HandlersStack;
+ m_HandlersHash.Clear();
+ m_HandlersList.DeleteContents(TRUE);
+ m_HandlersList.Clear();
+ delete m_entitiesParser;
+}
wxObject* wxHtmlParser::Parse(const wxString& source)
{
m_HandlersStack->DeleteNode(first);
}
-wxHtmlParser::~wxHtmlParser()
-{
- if (m_HandlersStack) delete m_HandlersStack;
- m_HandlersHash.Clear();
- m_HandlersList.DeleteContents(TRUE);
- m_HandlersList.Clear();
-}
-
//-----------------------------------------------------------------------------
// wxHtmlTagHandler
//-----------------------------------------------------------------------------
IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
+
+
+//-----------------------------------------------------------------------------
+// wxHtmlEntitiesParser
+//-----------------------------------------------------------------------------
+
+IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)
+
+wxHtmlEntitiesParser::wxHtmlEntitiesParser()
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+ : m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
#endif
+{
+}
+
+wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
+{
+ delete m_conv;
+}
+void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
+{
+#if wxUSE_WCHAR_T && !wxUSE_UNICODE
+ if (encoding == m_encoding) return;
+ delete m_conv;
+ m_conv = NULL;
+ m_encoding = encoding;
+ if (m_encoding != wxFONTENCODING_SYSTEM)
+ m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
+#endif
+}
+
+wxString wxHtmlEntitiesParser::Parse(const wxString& input)
+{
+ const wxChar *c, *last;
+ const wxChar *in_str = input.c_str();
+ wxString output;
+
+ for (c = in_str, last = in_str; *c != wxT('\0'); c++)
+ {
+ if (*c == wxT('&'))
+ {
+ if (c - last > 0)
+ output.append(last, c - last);
+ if (++c == wxT('\0')) break;
+ wxString entity;
+ const wxChar *ent_s = c;
+ for (; (*c >= wxT('a') && *c <= wxT('z')) ||
+ (*c >= wxT('A') && *c <= wxT('Z')) ||
+ (*c >= wxT('0') && *c <= wxT('9')) ||
+ *c == wxT('_') || *c == wxT('#'); c++) {}
+ entity.append(ent_s, c - ent_s);
+ if (*c == wxT(';')) c++;
+ output << GetEntityChar(entity);
+ last = c;
+ }
+ }
+ if (*last != wxT('\0'))
+ output.append(last);
+ return output;
+}
+
+struct wxHtmlEntityInfo
+{
+ const wxChar *name;
+ unsigned code;
+};
+
+static int compar_entity(const void *key, const void *item)
+{
+ return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
+}
+
+wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
+{
+#if wxUSE_UNICODE
+ return (wxChar)code;
+#elif wxUSE_WCHAR_T
+ char buf[2];
+ wchar_t wbuf[2];
+ wbuf[0] = (wchar_t)code;
+ wbuf[1] = 0;
+ wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
+ if (conv->WC2MB(buf, wbuf, 1) == (size_t)-1)
+ return '?';
+ return buf[0];
+#else
+ return (code < 256) ? (wxChar)code : '?';
+#endif
+}
+
+wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
+{
+ unsigned code = 0;
+
+ if (entity[0] == wxT('#'))
+ {
+ const wxChar *ent_s = entity.c_str();
+ const wxChar *format;
+
+ if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
+ {
+ format = wxT("%x");
+ ent_s++;
+ }
+ else
+ format = wxT("%u");
+ ent_s++;
+
+ if (wxSscanf(ent_s, format, &code) != 1)
+ code = 0;
+ }
+ else
+ {
+ static wxHtmlEntityInfo substitutions[] = {
+ { wxT("AElig"),198 },
+ { wxT("Aacute"),193 },
+ { wxT("Acirc"),194 },
+ { wxT("Agrave"),192 },
+ { wxT("Alpha"),913 },
+ { wxT("Aring"),197 },
+ { wxT("Atilde"),195 },
+ { wxT("Auml"),196 },
+ { wxT("Beta"),914 },
+ { wxT("Ccedil"),199 },
+ { wxT("Chi"),935 },
+ { wxT("Dagger"),8225 },
+ { wxT("Delta"),916 },
+ { wxT("ETH"),208 },
+ { wxT("Eacute"),201 },
+ { wxT("Ecirc"),202 },
+ { wxT("Egrave"),200 },
+ { wxT("Epsilon"),917 },
+ { wxT("Eta"),919 },
+ { wxT("Euml"),203 },
+ { wxT("Gamma"),915 },
+ { wxT("Iacute"),205 },
+ { wxT("Icirc"),206 },
+ { wxT("Igrave"),204 },
+ { wxT("Iota"),921 },
+ { wxT("Iuml"),207 },
+ { wxT("Kappa"),922 },
+ { wxT("Lambda"),923 },
+ { wxT("Mu"),924 },
+ { wxT("Ntilde"),209 },
+ { wxT("Nu"),925 },
+ { wxT("OElig"),338 },
+ { wxT("Oacute"),211 },
+ { wxT("Ocirc"),212 },
+ { wxT("Ograve"),210 },
+ { wxT("Omega"),937 },
+ { wxT("Omicron"),927 },
+ { wxT("Oslash"),216 },
+ { wxT("Otilde"),213 },
+ { wxT("Ouml"),214 },
+ { wxT("Phi"),934 },
+ { wxT("Pi"),928 },
+ { wxT("Prime"),8243 },
+ { wxT("Psi"),936 },
+ { wxT("Rho"),929 },
+ { wxT("Scaron"),352 },
+ { wxT("Sigma"),931 },
+ { wxT("THORN"),222 },
+ { wxT("Tau"),932 },
+ { wxT("Theta"),920 },
+ { wxT("Uacute"),218 },
+ { wxT("Ucirc"),219 },
+ { wxT("Ugrave"),217 },
+ { wxT("Upsilon"),933 },
+ { wxT("Uuml"),220 },
+ { wxT("Xi"),926 },
+ { wxT("Yacute"),221 },
+ { wxT("Yuml"),376 },
+ { wxT("Zeta"),918 },
+ { wxT("aacute"),225 },
+ { wxT("acirc"),226 },
+ { wxT("acute"),180 },
+ { wxT("aelig"),230 },
+ { wxT("agrave"),224 },
+ { wxT("alefsym"),8501 },
+ { wxT("alpha"),945 },
+ { wxT("amp"),38 },
+ { wxT("and"),8743 },
+ { wxT("ang"),8736 },
+ { wxT("aring"),229 },
+ { wxT("asymp"),8776 },
+ { wxT("atilde"),227 },
+ { wxT("auml"),228 },
+ { wxT("bdquo"),8222 },
+ { wxT("beta"),946 },
+ { wxT("brvbar"),166 },
+ { wxT("bull"),8226 },
+ { wxT("cap"),8745 },
+ { wxT("ccedil"),231 },
+ { wxT("cedil"),184 },
+ { wxT("cent"),162 },
+ { wxT("chi"),967 },
+ { wxT("circ"),710 },
+ { wxT("clubs"),9827 },
+ { wxT("cong"),8773 },
+ { wxT("copy"),169 },
+ { wxT("crarr"),8629 },
+ { wxT("cup"),8746 },
+ { wxT("curren"),164 },
+ { wxT("dArr"),8659 },
+ { wxT("dagger"),8224 },
+ { wxT("darr"),8595 },
+ { wxT("deg"),176 },
+ { wxT("delta"),948 },
+ { wxT("diams"),9830 },
+ { wxT("divide"),247 },
+ { wxT("eacute"),233 },
+ { wxT("ecirc"),234 },
+ { wxT("egrave"),232 },
+ { wxT("empty"),8709 },
+ { wxT("emsp"),8195 },
+ { wxT("ensp"),8194 },
+ { wxT("epsilon"),949 },
+ { wxT("equiv"),8801 },
+ { wxT("eta"),951 },
+ { wxT("eth"),240 },
+ { wxT("euml"),235 },
+ { wxT("euro"),8364 },
+ { wxT("exist"),8707 },
+ { wxT("fnof"),402 },
+ { wxT("forall"),8704 },
+ { wxT("frac12"),189 },
+ { wxT("frac14"),188 },
+ { wxT("frac34"),190 },
+ { wxT("frasl"),8260 },
+ { wxT("gamma"),947 },
+ { wxT("ge"),8805 },
+ { wxT("gt"),62 },
+ { wxT("hArr"),8660 },
+ { wxT("harr"),8596 },
+ { wxT("hearts"),9829 },
+ { wxT("hellip"),8230 },
+ { wxT("iacute"),237 },
+ { wxT("icirc"),238 },
+ { wxT("iexcl"),161 },
+ { wxT("igrave"),236 },
+ { wxT("image"),8465 },
+ { wxT("infin"),8734 },
+ { wxT("int"),8747 },
+ { wxT("iota"),953 },
+ { wxT("iquest"),191 },
+ { wxT("isin"),8712 },
+ { wxT("iuml"),239 },
+ { wxT("kappa"),954 },
+ { wxT("lArr"),8656 },
+ { wxT("lambda"),955 },
+ { wxT("lang"),9001 },
+ { wxT("laquo"),171 },
+ { wxT("larr"),8592 },
+ { wxT("lceil"),8968 },
+ { wxT("ldquo"),8220 },
+ { wxT("le"),8804 },
+ { wxT("lfloor"),8970 },
+ { wxT("lowast"),8727 },
+ { wxT("loz"),9674 },
+ { wxT("lrm"),8206 },
+ { wxT("lsaquo"),8249 },
+ { wxT("lsquo"),8216 },
+ { wxT("lt"),60 },
+ { wxT("macr"),175 },
+ { wxT("mdash"),8212 },
+ { wxT("micro"),181 },
+ { wxT("middot"),183 },
+ { wxT("minus"),8722 },
+ { wxT("mu"),956 },
+ { wxT("nabla"),8711 },
+ { wxT("nbsp"),160 },
+ { wxT("ndash"),8211 },
+ { wxT("ne"),8800 },
+ { wxT("ni"),8715 },
+ { wxT("not"),172 },
+ { wxT("notin"),8713 },
+ { wxT("nsub"),8836 },
+ { wxT("ntilde"),241 },
+ { wxT("nu"),957 },
+ { wxT("oacute"),243 },
+ { wxT("ocirc"),244 },
+ { wxT("oelig"),339 },
+ { wxT("ograve"),242 },
+ { wxT("oline"),8254 },
+ { wxT("omega"),969 },
+ { wxT("omicron"),959 },
+ { wxT("oplus"),8853 },
+ { wxT("or"),8744 },
+ { wxT("ordf"),170 },
+ { wxT("ordm"),186 },
+ { wxT("oslash"),248 },
+ { wxT("otilde"),245 },
+ { wxT("otimes"),8855 },
+ { wxT("ouml"),246 },
+ { wxT("para"),182 },
+ { wxT("part"),8706 },
+ { wxT("permil"),8240 },
+ { wxT("perp"),8869 },
+ { wxT("phi"),966 },
+ { wxT("pi"),960 },
+ { wxT("piv"),982 },
+ { wxT("plusmn"),177 },
+ { wxT("pound"),163 },
+ { wxT("prime"),8242 },
+ { wxT("prod"),8719 },
+ { wxT("prop"),8733 },
+ { wxT("psi"),968 },
+ { wxT("quot"),34 },
+ { wxT("rArr"),8658 },
+ { wxT("radic"),8730 },
+ { wxT("rang"),9002 },
+ { wxT("raquo"),187 },
+ { wxT("rarr"),8594 },
+ { wxT("rceil"),8969 },
+ { wxT("rdquo"),8221 },
+ { wxT("real"),8476 },
+ { wxT("reg"),174 },
+ { wxT("rfloor"),8971 },
+ { wxT("rho"),961 },
+ { wxT("rlm"),8207 },
+ { wxT("rsaquo"),8250 },
+ { wxT("rsquo"),8217 },
+ { wxT("sbquo"),8218 },
+ { wxT("scaron"),353 },
+ { wxT("sdot"),8901 },
+ { wxT("sect"),167 },
+ { wxT("shy"),173 },
+ { wxT("sigma"),963 },
+ { wxT("sigmaf"),962 },
+ { wxT("sim"),8764 },
+ { wxT("spades"),9824 },
+ { wxT("sub"),8834 },
+ { wxT("sube"),8838 },
+ { wxT("sum"),8721 },
+ { wxT("sup"),8835 },
+ { wxT("sup1"),185 },
+ { wxT("sup2"),178 },
+ { wxT("sup3"),179 },
+ { wxT("supe"),8839 },
+ { wxT("szlig"),223 },
+ { wxT("tau"),964 },
+ { wxT("there4"),8756 },
+ { wxT("theta"),952 },
+ { wxT("thetasym"),977 },
+ { wxT("thinsp"),8201 },
+ { wxT("thorn"),254 },
+ { wxT("tilde"),732 },
+ { wxT("times"),215 },
+ { wxT("trade"),8482 },
+ { wxT("uArr"),8657 },
+ { wxT("uacute"),250 },
+ { wxT("uarr"),8593 },
+ { wxT("ucirc"),251 },
+ { wxT("ugrave"),249 },
+ { wxT("uml"),168 },
+ { wxT("upsih"),978 },
+ { wxT("upsilon"),965 },
+ { wxT("uuml"),252 },
+ { wxT("weierp"),8472 },
+ { wxT("xi"),958 },
+ { wxT("yacute"),253 },
+ { wxT("yen"),165 },
+ { wxT("yuml"),255 },
+ { wxT("zeta"),950 },
+ { wxT("zwj"),8205 },
+ { wxT("zwnj"),8204 },
+ {NULL, 0}};
+ static size_t substitutions_cnt = 0;
+
+ if (substitutions_cnt == 0)
+ while (substitutions[substitutions_cnt].code != 0)
+ substitutions_cnt++;
+
+ wxHtmlEntityInfo *info;
+ info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions,
+ substitutions_cnt,
+ sizeof(wxHtmlEntityInfo),
+ compar_entity);
+ if (info)
+ code = info->code;
+ }
+
+ if (code == 0)
+ return wxT('?');
+ else
+ return GetCharForCode(code);
+}
+
+#endif
#endif
#include "wx/html/htmltag.h"
+#include "wx/html/htmlpars.h"
#include <stdio.h> // for vsscanf
#include <stdarg.h>
}
}
-
-
void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
{
if (m_Cache == NULL) return;
if (m_Cache[m_CachePos].Key != at)
{
int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
- do {m_CachePos += delta;} while (m_Cache[m_CachePos].Key != at);
+ do
+ {
+ m_CachePos += delta;
+ }
+ while (m_Cache[m_CachePos].Key != at);
}
*end1 = m_Cache[m_CachePos].End1;
*end2 = m_Cache[m_CachePos].End2;
IMPLEMENT_CLASS(wxHtmlTag,wxObject)
-wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCache* cache) : wxObject()
+wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos,
+ wxHtmlTagsCache *cache,
+ wxHtmlEntitiesParser *entParser) : wxObject()
{
int i;
- char c;
+ wxChar c;
// fill-in name, params and begin pos:
- m_Name = m_Params = wxEmptyString;
i = pos+1;
- if (source[i] == wxT('/')) { m_Ending = TRUE; i++; }
- else m_Ending = FALSE;
+ if (source[i] == wxT('/'))
+ { m_Ending = TRUE; i++; }
+ else
+ m_Ending = FALSE;
// find tag's name and convert it to uppercase:
while ((i < end_pos) &&
- ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
- c != wxT('\n') && c != wxT('\t') &&
- c != wxT('>')))
+ ((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
+ c != wxT('\n') && c != wxT('\t') &&
+ c != wxT('>')))
{
- if ((c >= wxT('a')) && (c <= wxT('z'))) c -= (wxT('a') - wxT('A'));
- m_Name += c;
+ if ((c >= wxT('a')) && (c <= wxT('z')))
+ c -= (wxT('a') - wxT('A'));
+ m_Name << c;
}
// if the tag has parameters, read them and "normalize" them,
// i.e. convert to uppercase, replace whitespaces by spaces and
// remove whitespaces around '=':
if (source[i-1] != wxT('>'))
- while ((i < end_pos) && ((c = source[i++]) != wxT('>')))
+ {
+ #define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
+ c == wxT('\n') || c == wxT('\t'))
+ wxString pname, pvalue;
+ wxChar quote;
+ enum
{
- if ((c >= wxT('a')) && (c <= wxT('z')))
- c -= (wxT('a') - wxT('A'));
- if (c == wxT('\r') || c == wxT('\n') || c == wxT('\t'))
- c = wxT(' '); // make future parsing a bit simpler
- m_Params += c;
- if (c == wxT('"'))
+ ST_BEFORE_NAME = 1,
+ ST_NAME,
+ ST_BEFORE_EQ,
+ ST_BEFORE_VALUE,
+ ST_VALUE
+ } state;
+
+ quote = 0;
+ state = ST_BEFORE_NAME;
+ while (i < end_pos)
+ {
+ c = source[i++];
+
+ if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
{
- // remove spaces around the '=' character:
- if (m_Params.Length() > 1 &&
- m_Params[m_Params.Length()-2] == wxT(' '))
+ if (state == ST_BEFORE_EQ || state == ST_NAME)
{
- m_Params.RemoveLast();
- while (m_Params.Length() > 0 && m_Params.Last() == wxT(' '))
- m_Params.RemoveLast();
- m_Params += wxT('"');
+ m_ParamNames.Add(pname);
+ m_ParamValues.Add(wxEmptyString);
}
- while ((i < end_pos) && (source[i++] == wxT(' '))) {}
- if (i < end_pos) i--;
-
- // ...and copy the value to m_Params:
- while ((i < end_pos) && ((c = source[i++]) != wxT('"')))
- m_Params += c;
- m_Params += c;
+ else if (state == ST_VALUE && quote == 0)
+ {
+ m_ParamNames.Add(pname);
+ m_ParamValues.Add(entParser->Parse(pvalue));
+ }
+ break;
}
- else if (c == wxT('\''))
+ switch (state)
{
- while ((i < end_pos) && ((c = source[i++]) != wxT('\'')))
- m_Params += c;
- m_Params += c;
+ case ST_BEFORE_NAME:
+ if (!IS_WHITE(c))
+ {
+ pname = c;
+ state = ST_NAME;
+ }
+ break;
+ case ST_NAME:
+ if (IS_WHITE(c))
+ state = ST_BEFORE_EQ;
+ else if (c == wxT('='))
+ state = ST_BEFORE_VALUE;
+ else
+ pname << c;
+ break;
+ case ST_BEFORE_EQ:
+ if (c == wxT('='))
+ state = ST_BEFORE_VALUE;
+ else if (!IS_WHITE(c))
+ {
+ m_ParamNames.Add(pname);
+ m_ParamValues.Add(wxEmptyString);
+ pname = c;
+ state = ST_NAME;
+ }
+ break;
+ case ST_BEFORE_VALUE:
+ if (!IS_WHITE(c))
+ {
+ if (c == wxT('"') || c == wxT('\''))
+ quote = c, pvalue = wxEmptyString;
+ else
+ quote = 0, pvalue = c;
+ state = ST_VALUE;
+ }
+ break;
+ case ST_VALUE:
+ if ((quote != 0 && c == quote) ||
+ (quote == 0 && IS_WHITE(c)))
+ {
+ m_ParamNames.Add(pname);
+ if (quote == 0)
+ {
+ // VS: backward compatibility, no real reason,
+ // but wxHTML code relies on this... :(
+ pvalue.MakeUpper();
+ }
+ m_ParamValues.Add(entParser->Parse(pvalue));
+ state = ST_BEFORE_NAME;
+ }
+ else
+ pvalue << c;
+ break;
}
}
+
+ #undef IS_WHITE
+ }
m_Begin = i;
cache->QueryTag(pos, &m_End1, &m_End2);
if (m_End2 > end_pos) m_End2 = end_pos;
}
-
-
bool wxHtmlTag::HasParam(const wxString& par) const
{
- const wxChar *st = m_Params, *p = par;
- const wxChar *st2, *p2;
- const wxChar invalid = wxT('\1');
-
- if (*st == 0) return FALSE;
- if (*p == 0) return FALSE;
- for (st2 = st, p2 = p; ; st2++)
- {
- if (*p2 == 0 && *st2 == wxT('=')) return TRUE;
- if (*st2 == 0) return FALSE;
- if (*p2 != *st2) p2 = &invalid;
- if (*p2 == *st2) p2++;
- if (*st2 == wxT(' ')) p2 = p;
- else if (*st2 == wxT('='))
- {
- p2 = p;
- while (*st2 != wxT(' '))
- {
- if (*st2 == wxT('"'))
- {
- st2++;
- while (*st2 != wxT('"')) st2++;
- }
- st2++;
- if (*st2 == 0) return FALSE;
- }
- }
- }
+ return (m_ParamNames.Index(par, FALSE) != wxNOT_FOUND);
}
-
-
wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
{
- const wxChar *st = m_Params, *p = par;
- const wxChar *st2, *p2;
- const wxChar invalid = wxT('\1');
- bool comma;
- wxChar comma_char;
-
- if (*st == 0) return wxEmptyString;
- if (*p == 0) return wxEmptyString;
- for (st2 = st, p2 = p; ; st2++)
+ int index = m_ParamNames.Index(par, FALSE);
+ if (index == wxNOT_FOUND)
+ return wxEmptyString;
+ if (with_commas)
{
- if (*p2 == 0 && *st2 == wxT('=')) // found
- {
- wxString fnd = wxEmptyString;
- st2++; // '=' character
- comma = FALSE;
- comma_char = wxT('\0');
- if (!with_commas && (*(st2) == wxT('"')))
- {
- st2++;
- comma = TRUE;
- comma_char = wxT('"');
- }
- else if (!with_commas && (*(st2) == wxT('\'')))
- {
- st2++;
- comma = TRUE;
- comma_char = wxT('\'');
- }
-
- while (*st2 != 0)
- {
- if (comma && *st2 == comma_char) comma = FALSE;
- else if ((*st2 == wxT(' ')) && (!comma)) break;
- fnd += (*(st2++));
- }
- if (!with_commas && (*(st2-1) == comma_char)) fnd.RemoveLast();
- return fnd;
- }
- if (*st2 == 0) return wxEmptyString;
- if (*p2 != *st2) p2 = &invalid;
- if (*p2 == *st2) p2++;
- if (*st2 == wxT(' ')) p2 = p;
- else if (*st2 == wxT('='))
- {
- p2 = p;
- while (*st2 != wxT(' '))
- {
- if (*st2 == wxT('"'))
- {
- st2++;
- while (*st2 != wxT('"')) st2++;
- }
- else if (*st2 == wxT('\''))
- {
- st2++;
- while (*st2 != wxT('\'')) st2++;
- }
- st2++;
- }
- }
+ // VS: backward compatibility, seems to be never used by wxHTML...
+ wxString s;
+ s << wxT('"') << m_ParamValues[index] << wxT('"');
+ return s;
}
+ else
+ return m_ParamValues[index];
}
-
-
int wxHtmlTag::ScanParam(const wxString& par, wxChar *format, void *param) const
{
wxString parval = GetParam(par);
return wxSscanf(parval, format, param);
}
+wxString wxHtmlTag::GetAllParams() const
+{
+ // VS: this function is for backward compatiblity only,
+ // never used by wxHTML
+ wxString s;
+ size_t cnt = m_ParamNames.GetCount();
+ for (size_t i = 0; i < cnt; i++)
+ {
+ s << m_ParamNames[i];
+ s << wxT('=');
+ if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
+ s << wxT('\'') << m_ParamValues[i] << wxT('\'');
+ else
+ s << wxT('"') << m_ParamValues[i] << wxT('"');
+ }
+ return s;
+}
+
#endif
char temp[wxHTML_BUFLEN];
register char d;
int templen = 0;
-
+
if (m_tmpLastWasSpace)
{
- while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++;
+ while ((i < lng) &&
+ ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') ||
+ (txt[i] == '\t'))) i++;
}
while (i < lng)
if ((d == '\n') || (d == '\r') || (d == ' ') || (d == '\t'))
{
i++, x++;
- while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
+ while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') ||
+ (txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
}
else i++;
temp[templen-1] = ' ';
temp[templen] = 0;
templen = 0;
- if (m_EncConv) m_EncConv->Convert(temp);
- c = new wxHtmlWordCell(temp, *(GetDC()));
- if (m_UseLink) c->SetLink(m_Link);
+ if (m_EncConv)
+ m_EncConv->Convert(temp);
+ c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
+ if (m_UseLink)
+ c->SetLink(m_Link);
m_Container->InsertCell(c);
m_tmpLastWasSpace = TRUE;
}
if (templen)
{
temp[templen] = 0;
- if (m_EncConv) m_EncConv->Convert(temp);
- c = new wxHtmlWordCell(temp, *(GetDC()));
- if (m_UseLink) c->SetLink(m_Link);
+ if (m_EncConv)
+ m_EncConv->Convert(temp);
+ c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
+ if (m_UseLink)
+ c->SetLink(m_Link);
m_Container->InsertCell(c);
m_tmpLastWasSpace = FALSE;
}
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
{
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
- if (m_EncConv) {delete m_EncConv; m_EncConv = NULL;}
+ if (m_EncConv)
+ {
+ delete m_EncConv;
+ m_EncConv = NULL;
+ }
if (enc == wxFONTENCODING_DEFAULT) return;
m_OutputEnc = wxFONTENCODING_DEFAULT;
m_InputEnc = enc;
+ if (m_OutputEnc == wxFONTENCODING_DEFAULT)
+ GetEntitiesParser()->SetEncoding(wxFONTENCODING_SYSTEM);
+ else
+ GetEntitiesParser()->SetEncoding(m_OutputEnc);
if (m_InputEnc == m_OutputEnc) return;