X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/537760597bd4abea4e34ec4134d7c91365fb4328..4064f7deac3752bb5b20505daedc4ed750d19dd9:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index 3153ef819d..b1da8c92df 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -4,20 +4,15 @@ // Author: Vaclav Slavik // RCS-ID: $Id$ // Copyright: (c) 1999 Vaclav Slavik -// Licence: wxWindows Licence +// Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// - -#ifdef __GNUG__ -#pragma implementation -#endif - #include "wx/wxprec.h" #include "wx/defs.h" #if wxUSE_HTML && wxUSE_STREAMS -#ifdef __BORDLANDC__ +#ifdef __BORLANDC__ #pragma hdrstop #endif @@ -32,8 +27,43 @@ #include "wx/fontmap.h" #include "wx/html/htmldefs.h" #include "wx/html/htmlpars.h" +#include "wx/dynarray.h" +#include "wx/arrimpl.cpp" + +#ifdef __WXWINCE__ + #include "wx/msw/wince/missing.h" // for bsearch() +#endif + +// DLL options compatibility check: +#include "wx/app.h" +WX_CHECK_BUILD_OPTIONS("wxHTML") +const wxChar *wxTRACE_HTML_DEBUG = _T("htmldebug"); +//----------------------------------------------------------------------------- +// wxHtmlParser helpers +//----------------------------------------------------------------------------- + +class wxHtmlTextPiece +{ +public: + wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {} + int m_pos, m_lng; +}; + +WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces); +WX_DEFINE_OBJARRAY(wxHtmlTextPieces) + +class wxHtmlParserState +{ +public: + wxHtmlTag *m_curTag; + wxHtmlTag *m_tags; + wxHtmlTextPieces *m_textPieces; + int m_curTextPiece; + wxString m_source; + wxHtmlParserState *m_nextState; +}; //----------------------------------------------------------------------------- // wxHtmlParser @@ -42,28 +72,41 @@ IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject) wxHtmlParser::wxHtmlParser() - : wxObject(), m_Cache(NULL), m_HandlersHash(wxKEY_STRING), + : wxObject(), m_HandlersHash(wxKEY_STRING), m_FS(NULL), m_HandlersStack(NULL) { m_entitiesParser = new wxHtmlEntitiesParser; + m_Tags = NULL; + m_CurTag = NULL; + m_TextPieces = NULL; + m_CurTextPiece = 0; + m_SavedStates = NULL; } wxHtmlParser::~wxHtmlParser() { + while (RestoreState()) {} + DestroyDOMTree(); + + if (m_HandlersStack) + { + wxList& tmp = *m_HandlersStack; + wxList::iterator it, en; + for( it = tmp.begin(), en = tmp.end(); it != en; ++it ) + delete (wxHashTable*)*it; + tmp.clear(); + } delete m_HandlersStack; m_HandlersHash.Clear(); - m_HandlersList.DeleteContents(TRUE); - m_HandlersList.Clear(); + WX_CLEAR_LIST(wxList, m_HandlersList); delete m_entitiesParser; } wxObject* wxHtmlParser::Parse(const wxString& source) { - wxObject *result; - InitParser(source); DoParsing(); - result = GetProduct(); + wxObject *result = GetProduct(); DoneParser(); return result; } @@ -71,76 +114,222 @@ wxObject* wxHtmlParser::Parse(const wxString& source) void wxHtmlParser::InitParser(const wxString& source) { SetSource(source); + m_stopParsing = false; } void wxHtmlParser::DoneParser() { - delete m_Cache; - m_Cache = NULL; + DestroyDOMTree(); } void wxHtmlParser::SetSource(const wxString& src) { + DestroyDOMTree(); m_Source = src; - delete m_Cache; - m_Cache = new wxHtmlTagsCache(m_Source); + CreateDOMTree(); + m_CurTag = NULL; + m_CurTextPiece = 0; } -void wxHtmlParser::DoParsing(int begin_pos, int end_pos) +void wxHtmlParser::CreateDOMTree() +{ + wxHtmlTagsCache cache(m_Source); + m_TextPieces = new wxHtmlTextPieces; + CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache); + m_CurTextPiece = 0; +} + +extern bool wxIsCDATAElement(const wxChar *tag); + +void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, + int begin_pos, int end_pos, + wxHtmlTagsCache *cache) { if (end_pos <= begin_pos) return; - char c; - char *temp = new char[end_pos - begin_pos + 1]; - int i; - int templen; + wxChar c; + int i = begin_pos; + int textBeginning = begin_pos; - templen = 0; - i = begin_pos; + // If the tag contains CDATA text, we include the text between beginning + // and ending tag verbosely. Setting i=end_pos will skip to the very + // end of this function where text piece is added, bypassing any child + // tags parsing (CDATA element can't have child elements by definition): + if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) + { + i = end_pos; + } while (i < end_pos) { - c = m_Source[(unsigned int) i]; + c = m_Source.GetChar(i); - // continue building word: - if (c != '<') - { - temp[templen++] = c; - i++; - } + if (c == wxT('<')) + { + // add text to m_TextPieces: + if (i - textBeginning > 0) + m_TextPieces->Add( + wxHtmlTextPiece(textBeginning, i - textBeginning)); + + // if it is a comment, skip it: + if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') && + m_Source.GetChar(i+2) == wxT('-') && + m_Source.GetChar(i+3) == wxT('-')) + { + // Comments begin with "