X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/6cc4e6b81a9f2a0275d7fa0c6171658b24a2af8a..327940530ced978fa9cdef134a3a52ec8c1ee476:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index 69be9d281e..ae752eafba 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -8,8 +8,8 @@ ///////////////////////////////////////////////////////////////////////////// -#ifdef __GNUG__ -#pragma implementation +#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) +#pragma implementation "htmlpars.h" #endif #include "wx/wxprec.h" @@ -17,7 +17,7 @@ #include "wx/defs.h" #if wxUSE_HTML && wxUSE_STREAMS -#ifdef __BORDLANDC__ +#ifdef __BORLANDC__ #pragma hdrstop #endif @@ -35,6 +35,11 @@ #include "wx/dynarray.h" #include "wx/arrimpl.cpp" + +// DLL options compatibility check: +#include "wx/app.h" +WX_CHECK_BUILD_OPTIONS("wxHTML") + //----------------------------------------------------------------------------- // wxHtmlParser helpers //----------------------------------------------------------------------------- @@ -82,21 +87,26 @@ wxHtmlParser::~wxHtmlParser() { while (RestoreState()) {} DestroyDOMTree(); - + + if (m_HandlersStack) + { + wxList& tmp = *m_HandlersStack; + wxList::iterator it, en; + for( it = tmp.begin(), en = tmp.end(); it != en; ++it ) + delete (wxHashTable*)*it; + tmp.clear(); + } delete m_HandlersStack; m_HandlersHash.Clear(); - m_HandlersList.DeleteContents(TRUE); - m_HandlersList.Clear(); + WX_CLEAR_LIST(wxList, m_HandlersList); delete m_entitiesParser; } wxObject* wxHtmlParser::Parse(const wxString& source) { - wxObject *result; - InitParser(source); DoParsing(); - result = GetProduct(); + wxObject *result = GetProduct(); DoneParser(); return result; } @@ -104,6 +114,7 @@ wxObject* wxHtmlParser::Parse(const wxString& source) void wxHtmlParser::InitParser(const wxString& source) { SetSource(source); + m_stopParsing = FALSE; } void wxHtmlParser::DoneParser() @@ -128,6 +139,8 @@ void wxHtmlParser::CreateDOMTree() m_CurTextPiece = 0; } +extern bool wxIsCDATAElement(const wxChar *tag); + void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, int begin_pos, int end_pos, wxHtmlTagsCache *cache) @@ -138,6 +151,15 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, int i = begin_pos; int textBeginning = begin_pos; + // If the tag contains CDATA text, we include the text between beginning + // and ending tag verbosely. Setting i=end_pos will skip to the very + // end of this function where text piece is added, bypassing any child + // tags parsing (CDATA element can't have child elements by definition): + if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) + { + i = end_pos; + } + while (i < end_pos) { c = m_Source.GetChar(i); @@ -210,6 +232,7 @@ void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, } else i = chd->GetBeginPos(); + textBeginning = i; } @@ -292,6 +315,8 @@ void wxHtmlParser::DoParsing(int begin_pos, int end_pos) wxHtmlTag *t = m_CurTag; m_CurTag = m_CurTag->GetNextTag(); AddTag(*t); + if (m_stopParsing) + return; } else break; } @@ -304,7 +329,11 @@ void wxHtmlParser::AddTag(const wxHtmlTag& tag) h = (wxHtmlTagHandler*) m_HandlersHash.Get(tag.GetName()); if (h) + { inner = h->HandleTag(tag); + if (m_stopParsing) + return; + } if (!inner) { if (tag.HasEnding()) @@ -334,10 +363,9 @@ void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags) if (m_HandlersStack == NULL) { m_HandlersStack = new wxList; - m_HandlersStack->DeleteContents(TRUE); } - m_HandlersStack->Insert(new wxHashTable(m_HandlersHash)); + m_HandlersStack->Insert((wxObject*)new wxHashTable(m_HandlersHash)); while (tokenizer.HasMoreTokens()) { @@ -349,16 +377,22 @@ void wxHtmlParser::PushTagHandler(wxHtmlTagHandler *handler, wxString tags) void wxHtmlParser::PopTagHandler() { - wxNode *first; - - if (m_HandlersStack == NULL || - (first = m_HandlersStack->GetFirst()) == NULL) + wxList::compatibility_iterator first; + + if ( !m_HandlersStack || +#if wxUSE_STL + !(first = m_HandlersStack->GetFirst()) +#else // !wxUSE_STL + ((first = m_HandlersStack->GetFirst()) == NULL) +#endif // wxUSE_STL/!wxUSE_STL + ) { wxLogWarning(_("Warning: attempt to remove HTML tag handler from empty stack.")); return; } m_HandlersHash = *((wxHashTable*) first->GetData()); - m_HandlersStack->DeleteNode(first); + delete (wxHashTable*) first->GetData(); + m_HandlersStack->Erase(first); } void wxHtmlParser::SetSourceAndSaveState(const wxString& src) @@ -432,11 +466,15 @@ wxHtmlEntitiesParser::~wxHtmlEntitiesParser() void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding) { #if wxUSE_WCHAR_T && !wxUSE_UNICODE - if (encoding == m_encoding) return; + if (encoding == m_encoding) + return; + delete m_conv; - m_conv = NULL; + m_encoding = encoding; - if (m_encoding != wxFONTENCODING_SYSTEM) + if (m_encoding == wxFONTENCODING_SYSTEM) + m_conv = NULL; + else m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding)); #else (void) encoding; @@ -496,11 +534,10 @@ extern "C" int LINKAGEMODE wxHtmlEntityCompare(const void *key, const void *item return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name); } +#if !wxUSE_UNICODE wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) { -#if wxUSE_UNICODE - return (wxChar)code; -#elif wxUSE_WCHAR_T +#if wxUSE_WCHAR_T char buf[2]; wchar_t wbuf[2]; wbuf[0] = (wchar_t)code; @@ -513,6 +550,7 @@ wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code) return (code < 256) ? (wxChar)code : '?'; #endif } +#endif wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity) { @@ -818,4 +856,68 @@ wxFSFile *wxHtmlParser::OpenURL(wxHtmlURLType WXUNUSED(type), return GetFS()->OpenFile(url); } + +//----------------------------------------------------------------------------- +// wxHtmlParser::ExtractCharsetInformation +//----------------------------------------------------------------------------- + +class wxMetaTagParser : public wxHtmlParser +{ +public: + wxMetaTagParser() { } + + wxObject* GetProduct() { return NULL; } + +protected: + virtual void AddText(const wxChar* WXUNUSED(txt)) {} + + DECLARE_NO_COPY_CLASS(wxMetaTagParser) +}; + +class wxMetaTagHandler : public wxHtmlTagHandler +{ +public: + wxMetaTagHandler(wxString *retval) : wxHtmlTagHandler(), m_retval(retval) {} + wxString GetSupportedTags() { return wxT("META,BODY"); } + bool HandleTag(const wxHtmlTag& tag); + +private: + wxString *m_retval; + + DECLARE_NO_COPY_CLASS(wxMetaTagHandler) +}; + +bool wxMetaTagHandler::HandleTag(const wxHtmlTag& tag) +{ + if (tag.GetName() == _T("BODY")) + { + m_Parser->StopParsing(); + return FALSE; + } + + if (tag.HasParam(_T("HTTP-EQUIV")) && + tag.GetParam(_T("HTTP-EQUIV")).IsSameAs(_T("Content-Type"), false) && + tag.HasParam(_T("CONTENT"))) + { + wxString content = tag.GetParam(_T("CONTENT")).Lower(); + if (content.Left(19) == _T("text/html; charset=")) + { + *m_retval = content.Mid(19); + m_Parser->StopParsing(); + } + } + return FALSE; +} + + +/*static*/ +wxString wxHtmlParser::ExtractCharsetInformation(const wxString& markup) +{ + wxString charset; + wxMetaTagParser parser; + parser.AddTagHandler(new wxMetaTagHandler(&charset)); + parser.Parse(markup); + return charset; +} + #endif