src/html/htmlpars.cpp

/////////////////////////////////////////////////////////////////////////////
// Name:        htmlpars.cpp
// Purpose:     wxHtmlParser class (generic parser)
// Author:      Vaclav Slavik
// RCS-ID:      $Id$
// Copyright:   (c) 1999 Vaclav Slavik
// Licence:     wxWindows Licence
/////////////////////////////////////////////////////////////////////////////


#ifdef __GNUG__
#pragma implementation
#endif

#include "wx/wxprec.h"

#include "wx/defs.h"
#if wxUSE_HTML && wxUSE_STREAMS

#ifdef __BORDLANDC__
#pragma hdrstop
#endif

#ifndef WXPRECOMP
    #include "wx/log.h"
    #include "wx/intl.h"
#endif

#include "wx/tokenzr.h"
#include "wx/wfstream.h"
#include "wx/url.h"
#include "wx/fontmap.h"
#include "wx/html/htmldefs.h"
#include "wx/html/htmlpars.h"
#include "wx/dynarray.h"
#include "wx/arrimpl.cpp"

//-----------------------------------------------------------------------------
// wxHtmlParser helpers
//-----------------------------------------------------------------------------

class wxHtmlTextPiece
{
public:
    wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {}
    int m_pos, m_lng;
};

WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces);
WX_DEFINE_OBJARRAY(wxHtmlTextPieces);

class wxHtmlParserState
{
public:
    wxHtmlTag         *m_curTag;
    wxHtmlTag         *m_tags;
    wxHtmlTextPieces  *m_textPieces;
    int                m_curTextPiece;
    wxString           m_source;
    wxHtmlParserState *m_nextState;
};

//-----------------------------------------------------------------------------
// wxHtmlParser
//-----------------------------------------------------------------------------

IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)

wxHtmlParser::wxHtmlParser()
    : wxObject(), m_HandlersHash(wxKEY_STRING),
      m_FS(NULL), m_HandlersStack(NULL)
{
    m_entitiesParser = new wxHtmlEntitiesParser;
    m_Tags = NULL;
    m_CurTag = NULL;
    m_TextPieces = NULL;
    m_CurTextPiece = 0;
    m_SavedStates = NULL;
}

wxHtmlParser::~wxHtmlParser()
{
    while (RestoreState()) {}
    DestroyDOMTree();

    delete m_HandlersStack;
    m_HandlersHash.Clear();
    m_HandlersList.DeleteContents(TRUE);
    m_HandlersList.Clear();
    delete m_entitiesParser;
}

wxObject* wxHtmlParser::Parse(const wxString& source)
{
    wxObject *result;

    InitParser(source);
    DoParsing();
    result = GetProduct();
    DoneParser();
    return result;
}

void wxHtmlParser::InitParser(const wxString& source)
{
    SetSource(source);
}

void wxHtmlParser::DoneParser()
{
    DestroyDOMTree();
}

void wxHtmlParser::SetSource(const wxString& src)
{
    DestroyDOMTree();
    m_Source = src;
    CreateDOMTree();
    m_CurTag = NULL;
    m_CurTextPiece = 0;
}

void wxHtmlParser::CreateDOMTree()
{
    wxHtmlTagsCache cache(m_Source);
    m_TextPieces = new wxHtmlTextPieces;
    CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache);
    m_CurTextPiece = 0;
}

void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
                                    int begin_pos, int end_pos,
                                    wxHtmlTagsCache *cache)
{
    if (end_pos <= begin_pos) return;

    wxChar c;
    int i = begin_pos;
    int textBeginning = begin_pos;

    while (i < end_pos)
    {
        c = m_Source.GetChar(i);

        if (c == wxT('<'))
        {
            // add text to m_TextPieces:
            if (i - textBeginning > 0)
                m_TextPieces->Add(
                    wxHtmlTextPiece(textBeginning, i - textBeginning));

            // if it is a comment, skip it:
            if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
                                 m_Source.GetChar(i+2) == wxT('-') &&
                                 m_Source.GetChar(i+3) == wxT('-'))
            {
                // Comments begin with "<!--" and end with "--[ \t\r\n]*>"
                // according to HTML 4.0
                int dashes = 0;
                i += 4;
                while (i < end_pos)
                {
                    c = m_Source.GetChar(i++);
                    if ((c == wxT(' ') || c == wxT('\n') ||
                        c == wxT('\r') || c == wxT('\t')) && dashes >= 2) {}
                    else if (c == wxT('>') && dashes >= 2)
                    {
                        textBeginning = i;
                        break;
                    }
                    else if (c == wxT('-'))
                        dashes++;
                    else
                        dashes = 0;
                }
            }

            // add another tag to the tree:
            else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/'))
	        {
                wxHtmlTag *chd;
                if (cur)
                    chd = new wxHtmlTag(cur, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                else
                {
                    chd = new wxHtmlTag(NULL, m_Source,
                                        i, end_pos, cache, m_entitiesParser);
                    if (!m_Tags)
                    {
                        // if this is the first tag to be created make the root
                        // m_Tags point to it:
                        m_Tags = chd;
                    }
                    else
                    {
                        // if there is already a root tag add this tag as
                        // the last sibling:
                        chd->m_Prev = m_Tags->GetLastSibling();
                        chd->m_Prev->m_Next = chd;
                    }
                }

                if (chd->HasEnding())
                {
                    CreateDOMSubTree(chd,
                                     chd->GetBeginPos(), chd->GetEndPos1(),
                                     cache);
                    i = chd->GetEndPos2();
                }
                else
                    i = chd->GetBeginPos();
                textBeginning = i;
            }

            // ... or skip ending tag:
            else
            {
                while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++;
                textBeginning = i+1;
            }
        }
        else i++;
    }

    // add remaining text to m_TextPieces:
    if (end_pos - textBeginning > 0)
        m_TextPieces->Add(
            wxHtmlTextPiece(textBeginning, end_pos - textBeginning));
}

void wxHtmlParser::DestroyDOMTree()
{
    wxHtmlTag *t1, *t2;
    t1 = m_Tags;
    while (t1)
    {
        t2 = t1->GetNextSibling();
        delete t1;
        t1 = t2;
    }
    m_Tags = m_CurTag = NULL;

    delete m_TextPieces;
    m_TextPieces = NULL;
}

void wxHtmlParser::DoParsing()
{
    m_CurTag = m_Tags;
    m_CurTextPiece = 0;
    DoParsing(0, m_Source.Length());
}

void wxHtmlParser::DoParsing(int begin_pos, int end_pos)
{
    if (end_pos <= begin_pos) return;

    wxHtmlTextPieces& pieces = *m_TextPieces;
    size_t piecesCnt = pieces.GetCount();

    while (begin_pos < end_pos)
    {
        while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos)
            m_CurTag = m_CurTag->GetNextTag();
        while (m_CurTextPiece < piecesCnt &&
               pieces[m_CurTextPiece].m_pos < begin_pos)
            m_CurTextPiece++;

        if (m_CurTextPiece < piecesCnt &&
            (!m_CurTag ||
             pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos()))
        {
            // Add text:
            AddText(GetEntitiesParser()->Parse(
                       m_Source.Mid(pieces[m_CurTextPiece].m_pos,
                                    pieces[m_CurTextPiece].m_lng)));
            begin_pos = pieces[m_CurTextPiece].m_pos +
                        pieces[m_CurTextPiece].m_lng;
            m_CurTextPiece++;
        }
        else if (m_CurTag)
        {
            // Add tag:
            if (m_CurTag)
            {
                if (m_CurTag->HasEnding())
                    begin_pos = m_CurTag->GetEndPos2();
                else
                    begin_pos = m_CurTag->GetBeginPos();
            }
            wxHtmlTag *t = m_CurTag;
            m_CurTag = m_CurTag->GetNextTag();
            AddTag(*t);
        }
        else break;
    }
}

void wxHtmlParser::AddTag(const wxHtmlTag& tag)
{
    wxHtmlTagHandler *h;
    bool inner = FALSE;

    h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName());
    if (h)
        inner = h->HandleTag(tag);
    if (!inner)
    {
        if (tag.HasEnding())
            DoParsing(tag.GetBeginPos(), tag.GetEndPos1());
    }
}

void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler)
{
    wxString s(handler->GetSupportedTags());
    wxStringTokenizer tokenizer(s, wxT(", "));

    while (tokenizer.HasMoreTokens())
        m_HandlersHash.Put(tokenizer.NextToken(), handler);

    if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND)
        m_HandlersList.Append(handler);

    handler->SetParser(this);
}

void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags)
{
    wxStringTokenizer tokenizer(tags, wxT(", "));
    wxString key;

    if (m_HandlersStack == NULL)
    {
        m_HandlersStack = new wxList;
        m_HandlersStack->DeleteContents(TRUE);
    }

    m_HandlersStack->Insert(new wxHashTable(m_HandlersHash));

    while (tokenizer.HasMoreTokens())
    {
        key = tokenizer.NextToken();
        m_HandlersHash.Delete(key);
        m_HandlersHash.Put(key, handler);
    }
}

void wxHtmlParser::PopTagHandler()
{
    wxNode *first;

    if (m_HandlersStack == NULL ||
        (first = m_HandlersStack->GetFirst()) == NULL)
    {
        wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
        return;
    }
    m_HandlersHash = *((wxHashTable*) first->GetData());
    m_HandlersStack->DeleteNode(first);
}

void wxHtmlParser::SetSourceAndSaveState(const wxString& src)
{
    wxHtmlParserState *s = new wxHtmlParserState;

    s->m_curTag = m_CurTag;
    s->m_tags = m_Tags;
    s->m_textPieces = m_TextPieces;
    s->m_curTextPiece = m_CurTextPiece;
    s->m_source = m_Source;

    s->m_nextState = m_SavedStates;
    m_SavedStates = s;

    m_CurTag = NULL;
    m_Tags = NULL;
    m_TextPieces = NULL;
    m_CurTextPiece = 0;
    m_Source = wxEmptyString;

    SetSource(src);
}

bool wxHtmlParser::RestoreState()
{
    if (!m_SavedStates) return FALSE;

    DestroyDOMTree();

    wxHtmlParserState *s = m_SavedStates;
    m_SavedStates = s->m_nextState;

    m_CurTag = s->m_curTag;
    m_Tags = s->m_tags;
    m_TextPieces = s->m_textPieces;
    m_CurTextPiece = s->m_curTextPiece;
    m_Source = s->m_source;

    delete s;
    return TRUE;
}

//-----------------------------------------------------------------------------
// wxHtmlTagHandler
//-----------------------------------------------------------------------------

IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)


//-----------------------------------------------------------------------------
// wxHtmlEntitiesParser
//-----------------------------------------------------------------------------

IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)

wxHtmlEntitiesParser::wxHtmlEntitiesParser()
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
    : m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
#endif
{
}

wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
{
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
    delete m_conv;
#endif
}

void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
{
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
    if (encoding == m_encoding) return;
    delete m_conv;
    m_conv = NULL;
    m_encoding = encoding;
    if (m_encoding != wxFONTENCODING_SYSTEM)
        m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
#else
    (void) encoding;
#endif
}

wxString wxHtmlEntitiesParser::Parse(const wxString& input)
{
    const wxChar *c, *last;
    const wxChar *in_str = input.c_str();
    wxString output;

    for (c = in_str, last = in_str; *c != wxT('\0'); c++)
    {
        if (*c == wxT('&'))
        {
            if (c - last > 0)
                output.append(last, c - last);
            if (++c == wxT('\0')) break;
            wxString entity;
            const wxChar *ent_s = c;
            for (; (*c >= wxT('a') && *c <= wxT('z')) ||
                   (*c >= wxT('A') && *c <= wxT('Z')) ||
                   (*c >= wxT('0') && *c <= wxT('9')) ||
                   *c == wxT('_') || *c == wxT('#'); c++) {}
            entity.append(ent_s, c - ent_s);
            if (*c != wxT(';')) c--;
            last = c+1;
            output << GetEntityChar(entity);
        }
    }
    if (*last != wxT('\0'))
        output.append(last);
    return output;
}

struct wxHtmlEntityInfo
{
    const wxChar *name;
    unsigned code;
};

static int LINKAGEMODE compar_entity(const void *key, const void *item)
{
    return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
}

wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
{
#if wxUSE_UNICODE
    return (wxChar)code;
#elif wxUSE_WCHAR_T
    char buf[2];
    wchar_t wbuf[2];
    wbuf[0] = (wchar_t)code;
    wbuf[1] = 0;
    wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
    if (conv->WC2MB(buf, wbuf, 2) == (size_t)-1)
        return '?';
    return buf[0];
#else
    return (code < 256) ? (wxChar)code : '?';
#endif
}

wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
{
    unsigned code = 0;

    if (entity[0] == wxT('#'))
    {
        const wxChar *ent_s = entity.c_str();
        const wxChar *format;

        if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
        {
            format = wxT("%x");
            ent_s++;
        }
        else
            format = wxT("%u");
        ent_s++;

        if (wxSscanf(ent_s, format, &code) != 1)
            code = 0;
    }
    else
    {
        static wxHtmlEntityInfo substitutions[] = {
            { wxT("AElig"),198 },
            { wxT("Aacute"),193 },
            { wxT("Acirc"),194 },
            { wxT("Agrave"),192 },
            { wxT("Alpha"),913 },
            { wxT("Aring"),197 },
            { wxT("Atilde"),195 },
            { wxT("Auml"),196 },
            { wxT("Beta"),914 },
            { wxT("Ccedil"),199 },
            { wxT("Chi"),935 },
            { wxT("Dagger"),8225 },
            { wxT("Delta"),916 },
            { wxT("ETH"),208 },
            { wxT("Eacute"),201 },
            { wxT("Ecirc"),202 },
            { wxT("Egrave"),200 },
            { wxT("Epsilon"),917 },
            { wxT("Eta"),919 },
            { wxT("Euml"),203 },
            { wxT("Gamma"),915 },
            { wxT("Iacute"),205 },
            { wxT("Icirc"),206 },
            { wxT("Igrave"),204 },
            { wxT("Iota"),921 },
            { wxT("Iuml"),207 },
            { wxT("Kappa"),922 },
            { wxT("Lambda"),923 },
            { wxT("Mu"),924 },
            { wxT("Ntilde"),209 },
            { wxT("Nu"),925 },
            { wxT("OElig"),338 },
            { wxT("Oacute"),211 },
            { wxT("Ocirc"),212 },
            { wxT("Ograve"),210 },
            { wxT("Omega"),937 },
            { wxT("Omicron"),927 },
            { wxT("Oslash"),216 },
            { wxT("Otilde"),213 },
            { wxT("Ouml"),214 },
            { wxT("Phi"),934 },
            { wxT("Pi"),928 },
            { wxT("Prime"),8243 },
            { wxT("Psi"),936 },
            { wxT("Rho"),929 },
            { wxT("Scaron"),352 },
            { wxT("Sigma"),931 },
            { wxT("THORN"),222 },
            { wxT("Tau"),932 },
            { wxT("Theta"),920 },
            { wxT("Uacute"),218 },
            { wxT("Ucirc"),219 },
            { wxT("Ugrave"),217 },
            { wxT("Upsilon"),933 },
            { wxT("Uuml"),220 },
            { wxT("Xi"),926 },
            { wxT("Yacute"),221 },
            { wxT("Yuml"),376 },
            { wxT("Zeta"),918 },
            { wxT("aacute"),225 },
            { wxT("acirc"),226 },
            { wxT("acute"),180 },
            { wxT("aelig"),230 },
            { wxT("agrave"),224 },
            { wxT("alefsym"),8501 },
            { wxT("alpha"),945 },
            { wxT("amp"),38 },
            { wxT("and"),8743 },
            { wxT("ang"),8736 },
            { wxT("aring"),229 },
            { wxT("asymp"),8776 },
            { wxT("atilde"),227 },
            { wxT("auml"),228 },
            { wxT("bdquo"),8222 },
            { wxT("beta"),946 },
            { wxT("brvbar"),166 },
            { wxT("bull"),8226 },
            { wxT("cap"),8745 },
            { wxT("ccedil"),231 },
            { wxT("cedil"),184 },
            { wxT("cent"),162 },
            { wxT("chi"),967 },
            { wxT("circ"),710 },
            { wxT("clubs"),9827 },
            { wxT("cong"),8773 },
            { wxT("copy"),169 },
            { wxT("crarr"),8629 },
            { wxT("cup"),8746 },
            { wxT("curren"),164 },
            { wxT("dArr"),8659 },
            { wxT("dagger"),8224 },
            { wxT("darr"),8595 },
            { wxT("deg"),176 },
            { wxT("delta"),948 },
            { wxT("diams"),9830 },
            { wxT("divide"),247 },
            { wxT("eacute"),233 },
            { wxT("ecirc"),234 },
            { wxT("egrave"),232 },
            { wxT("empty"),8709 },
            { wxT("emsp"),8195 },
            { wxT("ensp"),8194 },
            { wxT("epsilon"),949 },
            { wxT("equiv"),8801 },
            { wxT("eta"),951 },
            { wxT("eth"),240 },
            { wxT("euml"),235 },
            { wxT("euro"),8364 },
            { wxT("exist"),8707 },
            { wxT("fnof"),402 },
            { wxT("forall"),8704 },
            { wxT("frac12"),189 },
            { wxT("frac14"),188 },
            { wxT("frac34"),190 },
            { wxT("frasl"),8260 },
            { wxT("gamma"),947 },
            { wxT("ge"),8805 },
            { wxT("gt"),62 },
            { wxT("hArr"),8660 },
            { wxT("harr"),8596 },
            { wxT("hearts"),9829 },
            { wxT("hellip"),8230 },
            { wxT("iacute"),237 },
            { wxT("icirc"),238 },
            { wxT("iexcl"),161 },
            { wxT("igrave"),236 },
            { wxT("image"),8465 },
            { wxT("infin"),8734 },
            { wxT("int"),8747 },
            { wxT("iota"),953 },
            { wxT("iquest"),191 },
            { wxT("isin"),8712 },
            { wxT("iuml"),239 },
            { wxT("kappa"),954 },
            { wxT("lArr"),8656 },
            { wxT("lambda"),955 },
            { wxT("lang"),9001 },
            { wxT("laquo"),171 },
            { wxT("larr"),8592 },
            { wxT("lceil"),8968 },
            { wxT("ldquo"),8220 },
            { wxT("le"),8804 },
            { wxT("lfloor"),8970 },
            { wxT("lowast"),8727 },
            { wxT("loz"),9674 },
            { wxT("lrm"),8206 },
            { wxT("lsaquo"),8249 },
            { wxT("lsquo"),8216 },
            { wxT("lt"),60 },
            { wxT("macr"),175 },
            { wxT("mdash"),8212 },
            { wxT("micro"),181 },
            { wxT("middot"),183 },
            { wxT("minus"),8722 },
            { wxT("mu"),956 },
            { wxT("nabla"),8711 },
            { wxT("nbsp"),160 },
            { wxT("ndash"),8211 },
            { wxT("ne"),8800 },
            { wxT("ni"),8715 },
            { wxT("not"),172 },
            { wxT("notin"),8713 },
            { wxT("nsub"),8836 },
            { wxT("ntilde"),241 },
            { wxT("nu"),957 },
            { wxT("oacute"),243 },
            { wxT("ocirc"),244 },
            { wxT("oelig"),339 },
            { wxT("ograve"),242 },
            { wxT("oline"),8254 },
            { wxT("omega"),969 },
            { wxT("omicron"),959 },
            { wxT("oplus"),8853 },
            { wxT("or"),8744 },
            { wxT("ordf"),170 },
            { wxT("ordm"),186 },
            { wxT("oslash"),248 },
            { wxT("otilde"),245 },
            { wxT("otimes"),8855 },
            { wxT("ouml"),246 },
            { wxT("para"),182 },
            { wxT("part"),8706 },
            { wxT("permil"),8240 },
            { wxT("perp"),8869 },
            { wxT("phi"),966 },
            { wxT("pi"),960 },
            { wxT("piv"),982 },
            { wxT("plusmn"),177 },
            { wxT("pound"),163 },
            { wxT("prime"),8242 },
            { wxT("prod"),8719 },
            { wxT("prop"),8733 },
            { wxT("psi"),968 },
            { wxT("quot"),34 },
            { wxT("rArr"),8658 },
            { wxT("radic"),8730 },
            { wxT("rang"),9002 },
            { wxT("raquo"),187 },
            { wxT("rarr"),8594 },
            { wxT("rceil"),8969 },
            { wxT("rdquo"),8221 },
            { wxT("real"),8476 },
            { wxT("reg"),174 },
            { wxT("rfloor"),8971 },
            { wxT("rho"),961 },
            { wxT("rlm"),8207 },
            { wxT("rsaquo"),8250 },
            { wxT("rsquo"),8217 },
            { wxT("sbquo"),8218 },
            { wxT("scaron"),353 },
            { wxT("sdot"),8901 },
            { wxT("sect"),167 },
            { wxT("shy"),173 },
            { wxT("sigma"),963 },
            { wxT("sigmaf"),962 },
            { wxT("sim"),8764 },
            { wxT("spades"),9824 },
            { wxT("sub"),8834 },
            { wxT("sube"),8838 },
            { wxT("sum"),8721 },
            { wxT("sup"),8835 },
            { wxT("sup1"),185 },
            { wxT("sup2"),178 },
            { wxT("sup3"),179 },
            { wxT("supe"),8839 },
            { wxT("szlig"),223 },
            { wxT("tau"),964 },
            { wxT("there4"),8756 },
            { wxT("theta"),952 },
            { wxT("thetasym"),977 },
            { wxT("thinsp"),8201 },
            { wxT("thorn"),254 },
            { wxT("tilde"),732 },
            { wxT("times"),215 },
            { wxT("trade"),8482 },
            { wxT("uArr"),8657 },
            { wxT("uacute"),250 },
            { wxT("uarr"),8593 },
            { wxT("ucirc"),251 },
            { wxT("ugrave"),249 },
            { wxT("uml"),168 },
            { wxT("upsih"),978 },
            { wxT("upsilon"),965 },
            { wxT("uuml"),252 },
            { wxT("weierp"),8472 },
            { wxT("xi"),958 },
            { wxT("yacute"),253 },
            { wxT("yen"),165 },
            { wxT("yuml"),255 },
            { wxT("zeta"),950 },
            { wxT("zwj"),8205 },
            { wxT("zwnj"),8204 },
            {NULL, 0}};
        static size_t substitutions_cnt = 0;

        if (substitutions_cnt == 0)
            while (substitutions[substitutions_cnt].code != 0)
                substitutions_cnt++;

        wxHtmlEntityInfo *info;
        info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions,
                                           substitutions_cnt,
                                           sizeof(wxHtmlEntityInfo),
                                           compar_entity);
        if (info)
            code = info->code;
    }

    if (code == 0)
        return wxT('?');
    else
        return GetCharForCode(code);
}

#endif
Commit	Line	Data
	1	/////////////////////////////////////////////////////////////////////////////
	2	// Name: htmlpars.cpp
	3	// Purpose: wxHtmlParser class (generic parser)
	4	// Author: Vaclav Slavik
	5	// RCS-ID: $Id$
	6	// Copyright: (c) 1999 Vaclav Slavik
	7	// Licence: wxWindows Licence
	8	/////////////////////////////////////////////////////////////////////////////
	9
	10
	11	#ifdef __GNUG__
	12	#pragma implementation
	13	#endif
	14
	15	#include "wx/wxprec.h"
	16
	17	#include "wx/defs.h"
	18	#if wxUSE_HTML && wxUSE_STREAMS
	19
	20	#ifdef __BORDLANDC__
	21	#pragma hdrstop
	22	#endif
	23
	24	#ifndef WXPRECOMP
	25	#include "wx/log.h"
	26	#include "wx/intl.h"
	27	#endif
	28
	29	#include "wx/tokenzr.h"
	30	#include "wx/wfstream.h"
	31	#include "wx/url.h"
	32	#include "wx/fontmap.h"
	33	#include "wx/html/htmldefs.h"
	34	#include "wx/html/htmlpars.h"
	35	#include "wx/dynarray.h"
	36	#include "wx/arrimpl.cpp"
	37
	38	//-----------------------------------------------------------------------------
	39	// wxHtmlParser helpers
	40	//-----------------------------------------------------------------------------
	41
	42	class wxHtmlTextPiece
	43	{
	44	public:
	45	wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {}
	46	int m_pos, m_lng;
	47	};
	48
	49	WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces);
	50	WX_DEFINE_OBJARRAY(wxHtmlTextPieces);
	51
	52	class wxHtmlParserState
	53	{
	54	public:
	55	wxHtmlTag *m_curTag;
	56	wxHtmlTag *m_tags;
	57	wxHtmlTextPieces *m_textPieces;
	58	int m_curTextPiece;
	59	wxString m_source;
	60	wxHtmlParserState *m_nextState;
	61	};
	62
	63	//-----------------------------------------------------------------------------
	64	// wxHtmlParser
	65	//-----------------------------------------------------------------------------
	66
	67	IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
	68
	69	wxHtmlParser::wxHtmlParser()
	70	: wxObject(), m_HandlersHash(wxKEY_STRING),
	71	m_FS(NULL), m_HandlersStack(NULL)
	72	{
	73	m_entitiesParser = new wxHtmlEntitiesParser;
	74	m_Tags = NULL;
	75	m_CurTag = NULL;
	76	m_TextPieces = NULL;
	77	m_CurTextPiece = 0;
	78	m_SavedStates = NULL;
	79	}
	80
	81	wxHtmlParser::~wxHtmlParser()
	82	{
	83	while (RestoreState()) {}
	84	DestroyDOMTree();
	85
	86	delete m_HandlersStack;
	87	m_HandlersHash.Clear();
	88	m_HandlersList.DeleteContents(TRUE);
	89	m_HandlersList.Clear();
	90	delete m_entitiesParser;
	91	}
	92
	93	wxObject* wxHtmlParser::Parse(const wxString& source)
	94	{
	95	wxObject *result;
	96
	97	InitParser(source);
	98	DoParsing();
	99	result = GetProduct();
	100	DoneParser();
	101	return result;
	102	}
	103
	104	void wxHtmlParser::InitParser(const wxString& source)
	105	{
	106	SetSource(source);
	107	}
	108
	109	void wxHtmlParser::DoneParser()
	110	{
	111	DestroyDOMTree();
	112	}
	113
	114	void wxHtmlParser::SetSource(const wxString& src)
	115	{
	116	DestroyDOMTree();
	117	m_Source = src;
	118	CreateDOMTree();
	119	m_CurTag = NULL;
	120	m_CurTextPiece = 0;
	121	}
	122
	123	void wxHtmlParser::CreateDOMTree()
	124	{
	125	wxHtmlTagsCache cache(m_Source);
	126	m_TextPieces = new wxHtmlTextPieces;
	127	CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache);
	128	m_CurTextPiece = 0;
	129	}
	130
	131	void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur,
	132	int begin_pos, int end_pos,
	133	wxHtmlTagsCache *cache)
	134	{
	135	if (end_pos <= begin_pos) return;
	136
	137	wxChar c;
	138	int i = begin_pos;
	139	int textBeginning = begin_pos;
	140
	141	while (i < end_pos)
	142	{
	143	c = m_Source.GetChar(i);
	144
	145	if (c == wxT('<'))
	146	{
	147	// add text to m_TextPieces:
	148	if (i - textBeginning > 0)
	149	m_TextPieces->Add(
	150	wxHtmlTextPiece(textBeginning, i - textBeginning));
	151
	152	// if it is a comment, skip it:
	153	if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') &&
	154	m_Source.GetChar(i+2) == wxT('-') &&
	155	m_Source.GetChar(i+3) == wxT('-'))
	156	{
	157	// Comments begin with "<!--" and end with "--[ \t\r\n]*>"
	158	// according to HTML 4.0
	159	int dashes = 0;
	160	i += 4;
	161	while (i < end_pos)
	162	{
	163	c = m_Source.GetChar(i++);
	164	if ((c == wxT(' ') \|\| c == wxT('\n') \|\|
	165	c == wxT('\r') \|\| c == wxT('\t')) && dashes >= 2) {}
	166	else if (c == wxT('>') && dashes >= 2)
	167	{
	168	textBeginning = i;
	169	break;
	170	}
	171	else if (c == wxT('-'))
	172	dashes++;
	173	else
	174	dashes = 0;
	175	}
	176	}
	177
	178	// add another tag to the tree:
	179	else if (i < end_pos-1 && m_Source.GetChar(i+1) != wxT('/'))
	180	{
	181	wxHtmlTag *chd;
	182	if (cur)
	183	chd = new wxHtmlTag(cur, m_Source,
	184	i, end_pos, cache, m_entitiesParser);
	185	else
	186	{
	187	chd = new wxHtmlTag(NULL, m_Source,
	188	i, end_pos, cache, m_entitiesParser);
	189	if (!m_Tags)
	190	{
	191	// if this is the first tag to be created make the root
	192	// m_Tags point to it:
	193	m_Tags = chd;
	194	}
	195	else
	196	{
	197	// if there is already a root tag add this tag as
	198	// the last sibling:
	199	chd->m_Prev = m_Tags->GetLastSibling();
	200	chd->m_Prev->m_Next = chd;
	201	}
	202	}
	203
	204	if (chd->HasEnding())
	205	{
	206	CreateDOMSubTree(chd,
	207	chd->GetBeginPos(), chd->GetEndPos1(),
	208	cache);
	209	i = chd->GetEndPos2();
	210	}
	211	else
	212	i = chd->GetBeginPos();
	213	textBeginning = i;
	214	}
	215
	216	// ... or skip ending tag:
	217	else
	218	{
	219	while (i < end_pos && m_Source.GetChar(i) != wxT('>')) i++;
	220	textBeginning = i+1;
	221	}
	222	}
	223	else i++;
	224	}
	225
	226	// add remaining text to m_TextPieces:
	227	if (end_pos - textBeginning > 0)
	228	m_TextPieces->Add(
	229	wxHtmlTextPiece(textBeginning, end_pos - textBeginning));
	230	}
	231
	232	void wxHtmlParser::DestroyDOMTree()
	233	{
	234	wxHtmlTag t1, t2;
	235	t1 = m_Tags;
	236	while (t1)
	237	{
	238	t2 = t1->GetNextSibling();
	239	delete t1;
	240	t1 = t2;
	241	}
	242	m_Tags = m_CurTag = NULL;
	243
	244	delete m_TextPieces;
	245	m_TextPieces = NULL;
	246	}
	247
	248	void wxHtmlParser::DoParsing()
	249	{
	250	m_CurTag = m_Tags;
	251	m_CurTextPiece = 0;
	252	DoParsing(0, m_Source.Length());
	253	}
	254
	255	void wxHtmlParser::DoParsing(int begin_pos, int end_pos)
	256	{
	257	if (end_pos <= begin_pos) return;
	258
	259	wxHtmlTextPieces& pieces = *m_TextPieces;
	260	size_t piecesCnt = pieces.GetCount();
	261
	262	while (begin_pos < end_pos)
	263	{
	264	while (m_CurTag && m_CurTag->GetBeginPos() < begin_pos)
	265	m_CurTag = m_CurTag->GetNextTag();
	266	while (m_CurTextPiece < piecesCnt &&
	267	pieces[m_CurTextPiece].m_pos < begin_pos)
	268	m_CurTextPiece++;
	269
	270	if (m_CurTextPiece < piecesCnt &&
	271	(!m_CurTag \|\|
	272	pieces[m_CurTextPiece].m_pos < m_CurTag->GetBeginPos()))
	273	{
	274	// Add text:
	275	AddText(GetEntitiesParser()->Parse(
	276	m_Source.Mid(pieces[m_CurTextPiece].m_pos,
	277	pieces[m_CurTextPiece].m_lng)));
	278	begin_pos = pieces[m_CurTextPiece].m_pos +
	279	pieces[m_CurTextPiece].m_lng;
	280	m_CurTextPiece++;
	281	}
	282	else if (m_CurTag)
	283	{
	284	// Add tag:
	285	if (m_CurTag)
	286	{
	287	if (m_CurTag->HasEnding())
	288	begin_pos = m_CurTag->GetEndPos2();
	289	else
	290	begin_pos = m_CurTag->GetBeginPos();
	291	}
	292	wxHtmlTag *t = m_CurTag;
	293	m_CurTag = m_CurTag->GetNextTag();
	294	AddTag(*t);
	295	}
	296	else break;
	297	}
	298	}
	299
	300	void wxHtmlParser::AddTag(const wxHtmlTag& tag)
	301	{
	302	wxHtmlTagHandler *h;
	303	bool inner = FALSE;
	304
	305	h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName());
	306	if (h)
	307	inner = h->HandleTag(tag);
	308	if (!inner)
	309	{
	310	if (tag.HasEnding())
	311	DoParsing(tag.GetBeginPos(), tag.GetEndPos1());
	312	}
	313	}
	314
	315	void wxHtmlParser::AddTagHandler(wxHtmlTagHandler *handler)
	316	{
	317	wxString s(handler->GetSupportedTags());
	318	wxStringTokenizer tokenizer(s, wxT(", "));
	319
	320	while (tokenizer.HasMoreTokens())
	321	m_HandlersHash.Put(tokenizer.NextToken(), handler);
	322
	323	if (m_HandlersList.IndexOf(handler) == wxNOT_FOUND)
	324	m_HandlersList.Append(handler);
	325
	326	handler->SetParser(this);
	327	}
	328
	329	void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags)
	330	{
	331	wxStringTokenizer tokenizer(tags, wxT(", "));
	332	wxString key;
	333
	334	if (m_HandlersStack == NULL)
	335	{
	336	m_HandlersStack = new wxList;
	337	m_HandlersStack->DeleteContents(TRUE);
	338	}
	339
	340	m_HandlersStack->Insert(new wxHashTable(m_HandlersHash));
	341
	342	while (tokenizer.HasMoreTokens())
	343	{
	344	key = tokenizer.NextToken();
	345	m_HandlersHash.Delete(key);
	346	m_HandlersHash.Put(key, handler);
	347	}
	348	}
	349
	350	void wxHtmlParser::PopTagHandler()
	351	{
	352	wxNode *first;
	353
	354	if (m_HandlersStack == NULL \|\|
	355	(first = m_HandlersStack->GetFirst()) == NULL)
	356	{
	357	wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack."));
	358	return;
	359	}
	360	m_HandlersHash = ((wxHashTable) first->GetData());
	361	m_HandlersStack->DeleteNode(first);
	362	}
	363
	364	void wxHtmlParser::SetSourceAndSaveState(const wxString& src)
	365	{
	366	wxHtmlParserState *s = new wxHtmlParserState;
	367
	368	s->m_curTag = m_CurTag;
	369	s->m_tags = m_Tags;
	370	s->m_textPieces = m_TextPieces;
	371	s->m_curTextPiece = m_CurTextPiece;
	372	s->m_source = m_Source;
	373
	374	s->m_nextState = m_SavedStates;
	375	m_SavedStates = s;
	376
	377	m_CurTag = NULL;
	378	m_Tags = NULL;
	379	m_TextPieces = NULL;
	380	m_CurTextPiece = 0;
	381	m_Source = wxEmptyString;
	382
	383	SetSource(src);
	384	}
	385
	386	bool wxHtmlParser::RestoreState()
	387	{
	388	if (!m_SavedStates) return FALSE;
	389
	390	DestroyDOMTree();
	391
	392	wxHtmlParserState *s = m_SavedStates;
	393	m_SavedStates = s->m_nextState;
	394
	395	m_CurTag = s->m_curTag;
	396	m_Tags = s->m_tags;
	397	m_TextPieces = s->m_textPieces;
	398	m_CurTextPiece = s->m_curTextPiece;
	399	m_Source = s->m_source;
	400
	401	delete s;
	402	return TRUE;
	403	}
	404
	405	//-----------------------------------------------------------------------------
	406	// wxHtmlTagHandler
	407	//-----------------------------------------------------------------------------
	408
	409	IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
	410
	411
	412	//-----------------------------------------------------------------------------
	413	// wxHtmlEntitiesParser
	414	//-----------------------------------------------------------------------------
	415
	416	IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)
	417
	418	wxHtmlEntitiesParser::wxHtmlEntitiesParser()
	419	#if wxUSE_WCHAR_T && !wxUSE_UNICODE
	420	: m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
	421	#endif
	422	{
	423	}
	424
	425	wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
	426	{
	427	#if wxUSE_WCHAR_T && !wxUSE_UNICODE
	428	delete m_conv;
	429	#endif
	430	}
	431
	432	void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
	433	{
	434	#if wxUSE_WCHAR_T && !wxUSE_UNICODE
	435	if (encoding == m_encoding) return;
	436	delete m_conv;
	437	m_conv = NULL;
	438	m_encoding = encoding;
	439	if (m_encoding != wxFONTENCODING_SYSTEM)
	440	m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
	441	#else
	442	(void) encoding;
	443	#endif
	444	}
	445
	446	wxString wxHtmlEntitiesParser::Parse(const wxString& input)
	447	{
	448	const wxChar c, last;
	449	const wxChar *in_str = input.c_str();
	450	wxString output;
	451
	452	for (c = in_str, last = in_str; *c != wxT('\0'); c++)
	453	{
	454	if (*c == wxT('&'))
	455	{
	456	if (c - last > 0)
	457	output.append(last, c - last);
	458	if (++c == wxT('\0')) break;
	459	wxString entity;
	460	const wxChar *ent_s = c;
	461	for (; (c >= wxT('a') && c <= wxT('z')) \|\|
	462	(c >= wxT('A') && c <= wxT('Z')) \|\|
	463	(c >= wxT('0') && c <= wxT('9')) \|\|
	464	c == wxT('_') \|\| c == wxT('#'); c++) {}
	465	entity.append(ent_s, c - ent_s);
	466	if (*c != wxT(';')) c--;
	467	last = c+1;
	468	output << GetEntityChar(entity);
	469	}
	470	}
	471	if (*last != wxT('\0'))
	472	output.append(last);
	473	return output;
	474	}
	475
	476	struct wxHtmlEntityInfo
	477	{
	478	const wxChar *name;
	479	unsigned code;
	480	};
	481
	482	static int LINKAGEMODE compar_entity(const void key, const void item)
	483	{
	484	return wxStrcmp((wxChar)key, ((wxHtmlEntityInfo)item)->name);
	485	}
	486
	487	wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
	488	{
	489	#if wxUSE_UNICODE
	490	return (wxChar)code;
	491	#elif wxUSE_WCHAR_T
	492	char buf[2];
	493	wchar_t wbuf[2];
	494	wbuf[0] = (wchar_t)code;
	495	wbuf[1] = 0;
	496	wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
	497	if (conv->WC2MB(buf, wbuf, 2) == (size_t)-1)
	498	return '?';
	499	return buf[0];
	500	#else
	501	return (code < 256) ? (wxChar)code : '?';
	502	#endif
	503	}
	504
	505	wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
	506	{
	507	unsigned code = 0;
	508
	509	if (entity[0] == wxT('#'))
	510	{
	511	const wxChar *ent_s = entity.c_str();
	512	const wxChar *format;
	513
	514	if (ent_s[1] == wxT('x') \|\| ent_s[1] == wxT('X'))
	515	{
	516	format = wxT("%x");
	517	ent_s++;
	518	}
	519	else
	520	format = wxT("%u");
	521	ent_s++;
	522
	523	if (wxSscanf(ent_s, format, &code) != 1)
	524	code = 0;
	525	}
	526	else
	527	{
	528	static wxHtmlEntityInfo substitutions[] = {
	529	{ wxT("AElig"),198 },
	530	{ wxT("Aacute"),193 },
	531	{ wxT("Acirc"),194 },
	532	{ wxT("Agrave"),192 },
	533	{ wxT("Alpha"),913 },
	534	{ wxT("Aring"),197 },
	535	{ wxT("Atilde"),195 },
	536	{ wxT("Auml"),196 },
	537	{ wxT("Beta"),914 },
	538	{ wxT("Ccedil"),199 },
	539	{ wxT("Chi"),935 },
	540	{ wxT("Dagger"),8225 },
	541	{ wxT("Delta"),916 },
	542	{ wxT("ETH"),208 },
	543	{ wxT("Eacute"),201 },
	544	{ wxT("Ecirc"),202 },
	545	{ wxT("Egrave"),200 },
	546	{ wxT("Epsilon"),917 },
	547	{ wxT("Eta"),919 },
	548	{ wxT("Euml"),203 },
	549	{ wxT("Gamma"),915 },
	550	{ wxT("Iacute"),205 },
	551	{ wxT("Icirc"),206 },
	552	{ wxT("Igrave"),204 },
	553	{ wxT("Iota"),921 },
	554	{ wxT("Iuml"),207 },
	555	{ wxT("Kappa"),922 },
	556	{ wxT("Lambda"),923 },
	557	{ wxT("Mu"),924 },
	558	{ wxT("Ntilde"),209 },
	559	{ wxT("Nu"),925 },
	560	{ wxT("OElig"),338 },
	561	{ wxT("Oacute"),211 },
	562	{ wxT("Ocirc"),212 },
	563	{ wxT("Ograve"),210 },
	564	{ wxT("Omega"),937 },
	565	{ wxT("Omicron"),927 },
	566	{ wxT("Oslash"),216 },
	567	{ wxT("Otilde"),213 },
	568	{ wxT("Ouml"),214 },
	569	{ wxT("Phi"),934 },
	570	{ wxT("Pi"),928 },
	571	{ wxT("Prime"),8243 },
	572	{ wxT("Psi"),936 },
	573	{ wxT("Rho"),929 },
	574	{ wxT("Scaron"),352 },
	575	{ wxT("Sigma"),931 },
	576	{ wxT("THORN"),222 },
	577	{ wxT("Tau"),932 },
	578	{ wxT("Theta"),920 },
	579	{ wxT("Uacute"),218 },
	580	{ wxT("Ucirc"),219 },
	581	{ wxT("Ugrave"),217 },
	582	{ wxT("Upsilon"),933 },
	583	{ wxT("Uuml"),220 },
	584	{ wxT("Xi"),926 },
	585	{ wxT("Yacute"),221 },
	586	{ wxT("Yuml"),376 },
	587	{ wxT("Zeta"),918 },
	588	{ wxT("aacute"),225 },
	589	{ wxT("acirc"),226 },
	590	{ wxT("acute"),180 },
	591	{ wxT("aelig"),230 },
	592	{ wxT("agrave"),224 },
	593	{ wxT("alefsym"),8501 },
	594	{ wxT("alpha"),945 },
	595	{ wxT("amp"),38 },
	596	{ wxT("and"),8743 },
	597	{ wxT("ang"),8736 },
	598	{ wxT("aring"),229 },
	599	{ wxT("asymp"),8776 },
	600	{ wxT("atilde"),227 },
	601	{ wxT("auml"),228 },
	602	{ wxT("bdquo"),8222 },
	603	{ wxT("beta"),946 },
	604	{ wxT("brvbar"),166 },
	605	{ wxT("bull"),8226 },
	606	{ wxT("cap"),8745 },
	607	{ wxT("ccedil"),231 },
	608	{ wxT("cedil"),184 },
	609	{ wxT("cent"),162 },
	610	{ wxT("chi"),967 },
	611	{ wxT("circ"),710 },
	612	{ wxT("clubs"),9827 },
	613	{ wxT("cong"),8773 },
	614	{ wxT("copy"),169 },
	615	{ wxT("crarr"),8629 },
	616	{ wxT("cup"),8746 },
	617	{ wxT("curren"),164 },
	618	{ wxT("dArr"),8659 },
	619	{ wxT("dagger"),8224 },
	620	{ wxT("darr"),8595 },
	621	{ wxT("deg"),176 },
	622	{ wxT("delta"),948 },
	623	{ wxT("diams"),9830 },
	624	{ wxT("divide"),247 },
	625	{ wxT("eacute"),233 },
	626	{ wxT("ecirc"),234 },
	627	{ wxT("egrave"),232 },
	628	{ wxT("empty"),8709 },
	629	{ wxT("emsp"),8195 },
	630	{ wxT("ensp"),8194 },
	631	{ wxT("epsilon"),949 },
	632	{ wxT("equiv"),8801 },
	633	{ wxT("eta"),951 },
	634	{ wxT("eth"),240 },
	635	{ wxT("euml"),235 },
	636	{ wxT("euro"),8364 },
	637	{ wxT("exist"),8707 },
	638	{ wxT("fnof"),402 },
	639	{ wxT("forall"),8704 },
	640	{ wxT("frac12"),189 },
	641	{ wxT("frac14"),188 },
	642	{ wxT("frac34"),190 },
	643	{ wxT("frasl"),8260 },
	644	{ wxT("gamma"),947 },
	645	{ wxT("ge"),8805 },
	646	{ wxT("gt"),62 },
	647	{ wxT("hArr"),8660 },
	648	{ wxT("harr"),8596 },
	649	{ wxT("hearts"),9829 },
	650	{ wxT("hellip"),8230 },
	651	{ wxT("iacute"),237 },
	652	{ wxT("icirc"),238 },
	653	{ wxT("iexcl"),161 },
	654	{ wxT("igrave"),236 },
	655	{ wxT("image"),8465 },
	656	{ wxT("infin"),8734 },
	657	{ wxT("int"),8747 },
	658	{ wxT("iota"),953 },
	659	{ wxT("iquest"),191 },
	660	{ wxT("isin"),8712 },
	661	{ wxT("iuml"),239 },
	662	{ wxT("kappa"),954 },
	663	{ wxT("lArr"),8656 },
	664	{ wxT("lambda"),955 },
	665	{ wxT("lang"),9001 },
	666	{ wxT("laquo"),171 },
	667	{ wxT("larr"),8592 },
	668	{ wxT("lceil"),8968 },
	669	{ wxT("ldquo"),8220 },
	670	{ wxT("le"),8804 },
	671	{ wxT("lfloor"),8970 },
	672	{ wxT("lowast"),8727 },
	673	{ wxT("loz"),9674 },
	674	{ wxT("lrm"),8206 },
	675	{ wxT("lsaquo"),8249 },
	676	{ wxT("lsquo"),8216 },
	677	{ wxT("lt"),60 },
	678	{ wxT("macr"),175 },
	679	{ wxT("mdash"),8212 },
	680	{ wxT("micro"),181 },
	681	{ wxT("middot"),183 },
	682	{ wxT("minus"),8722 },
	683	{ wxT("mu"),956 },
	684	{ wxT("nabla"),8711 },
	685	{ wxT("nbsp"),160 },
	686	{ wxT("ndash"),8211 },
	687	{ wxT("ne"),8800 },
	688	{ wxT("ni"),8715 },
	689	{ wxT("not"),172 },
	690	{ wxT("notin"),8713 },
	691	{ wxT("nsub"),8836 },
	692	{ wxT("ntilde"),241 },
	693	{ wxT("nu"),957 },
	694	{ wxT("oacute"),243 },
	695	{ wxT("ocirc"),244 },
	696	{ wxT("oelig"),339 },
	697	{ wxT("ograve"),242 },
	698	{ wxT("oline"),8254 },
	699	{ wxT("omega"),969 },
	700	{ wxT("omicron"),959 },
	701	{ wxT("oplus"),8853 },
	702	{ wxT("or"),8744 },
	703	{ wxT("ordf"),170 },
	704	{ wxT("ordm"),186 },
	705	{ wxT("oslash"),248 },
	706	{ wxT("otilde"),245 },
	707	{ wxT("otimes"),8855 },
	708	{ wxT("ouml"),246 },
	709	{ wxT("para"),182 },
	710	{ wxT("part"),8706 },
	711	{ wxT("permil"),8240 },
	712	{ wxT("perp"),8869 },
	713	{ wxT("phi"),966 },
	714	{ wxT("pi"),960 },
	715	{ wxT("piv"),982 },
	716	{ wxT("plusmn"),177 },
	717	{ wxT("pound"),163 },
	718	{ wxT("prime"),8242 },
	719	{ wxT("prod"),8719 },
	720	{ wxT("prop"),8733 },
	721	{ wxT("psi"),968 },
	722	{ wxT("quot"),34 },
	723	{ wxT("rArr"),8658 },
	724	{ wxT("radic"),8730 },
	725	{ wxT("rang"),9002 },
	726	{ wxT("raquo"),187 },
	727	{ wxT("rarr"),8594 },
	728	{ wxT("rceil"),8969 },
	729	{ wxT("rdquo"),8221 },
	730	{ wxT("real"),8476 },
	731	{ wxT("reg"),174 },
	732	{ wxT("rfloor"),8971 },
	733	{ wxT("rho"),961 },
	734	{ wxT("rlm"),8207 },
	735	{ wxT("rsaquo"),8250 },
	736	{ wxT("rsquo"),8217 },
	737	{ wxT("sbquo"),8218 },
	738	{ wxT("scaron"),353 },
	739	{ wxT("sdot"),8901 },
	740	{ wxT("sect"),167 },
	741	{ wxT("shy"),173 },
	742	{ wxT("sigma"),963 },
	743	{ wxT("sigmaf"),962 },
	744	{ wxT("sim"),8764 },
	745	{ wxT("spades"),9824 },
	746	{ wxT("sub"),8834 },
	747	{ wxT("sube"),8838 },
	748	{ wxT("sum"),8721 },
	749	{ wxT("sup"),8835 },
	750	{ wxT("sup1"),185 },
	751	{ wxT("sup2"),178 },
	752	{ wxT("sup3"),179 },
	753	{ wxT("supe"),8839 },
	754	{ wxT("szlig"),223 },
	755	{ wxT("tau"),964 },
	756	{ wxT("there4"),8756 },
	757	{ wxT("theta"),952 },
	758	{ wxT("thetasym"),977 },
	759	{ wxT("thinsp"),8201 },
	760	{ wxT("thorn"),254 },
	761	{ wxT("tilde"),732 },
	762	{ wxT("times"),215 },
	763	{ wxT("trade"),8482 },
	764	{ wxT("uArr"),8657 },
	765	{ wxT("uacute"),250 },
	766	{ wxT("uarr"),8593 },
	767	{ wxT("ucirc"),251 },
	768	{ wxT("ugrave"),249 },
	769	{ wxT("uml"),168 },
	770	{ wxT("upsih"),978 },
	771	{ wxT("upsilon"),965 },
	772	{ wxT("uuml"),252 },
	773	{ wxT("weierp"),8472 },
	774	{ wxT("xi"),958 },
	775	{ wxT("yacute"),253 },
	776	{ wxT("yen"),165 },
	777	{ wxT("yuml"),255 },
	778	{ wxT("zeta"),950 },
	779	{ wxT("zwj"),8205 },
	780	{ wxT("zwnj"),8204 },
	781	{NULL, 0}};
	782	static size_t substitutions_cnt = 0;
	783
	784	if (substitutions_cnt == 0)
	785	while (substitutions[substitutions_cnt].code != 0)
	786	substitutions_cnt++;
	787
	788	wxHtmlEntityInfo *info;
	789	info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions,
	790	substitutions_cnt,
	791	sizeof(wxHtmlEntityInfo),
	792	compar_entity);
	793	if (info)
	794	code = info->code;
	795	}
	796
	797	if (code == 0)
	798	return wxT('?');
	799	else
	800	return GetCharForCode(code);
	801	}
	802
	803	#endif