X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/7127d12912341510250bfcf4921ab744faf775e0..9d5507f7a2701395e1d5c121bd877bb9066ee6ea:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index 42101a4c0e..f89b6f579f 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -1,29 +1,26 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: htmlpars.cpp +// Name: src/html/htmlpars.cpp // Purpose: wxHtmlParser class (generic parser) // Author: Vaclav Slavik // RCS-ID: $Id$ // Copyright: (c) 1999 Vaclav Slavik -// Licence: wxWindows Licence +// Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// - -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) -#pragma implementation "htmlpars.h" -#endif - #include "wx/wxprec.h" -#include "wx/defs.h" -#if wxUSE_HTML && wxUSE_STREAMS - #ifdef __BORLANDC__ -#pragma hdrstop + #pragma hdrstop #endif -#ifndef WXPRECOMP +#if wxUSE_HTML && wxUSE_STREAMS + +#ifndef WX_PRECOMP + #include "wx/dynarray.h" #include "wx/log.h" #include "wx/intl.h" + #include "wx/app.h" + #include "wx/wxcrtvararg.h" #endif #include "wx/tokenzr.h" @@ -32,17 +29,17 @@ #include "wx/fontmap.h" #include "wx/html/htmldefs.h" #include "wx/html/htmlpars.h" -#include "wx/dynarray.h" -#include "wx/arrimpl.cpp" +#include "wx/vector.h" #ifdef __WXWINCE__ #include "wx/msw/wince/missing.h" // for bsearch() #endif // DLL options compatibility check: -#include "wx/app.h" WX_CHECK_BUILD_OPTIONS("wxHTML") +const wxChar *wxTRACE_HTML_DEBUG = _T("htmldebug"); + //----------------------------------------------------------------------------- // wxHtmlParser helpers //----------------------------------------------------------------------------- @@ -50,12 +47,17 @@ WX_CHECK_BUILD_OPTIONS("wxHTML") class wxHtmlTextPiece { public: - wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {} - int m_pos, m_lng; + wxHtmlTextPiece() {} + wxHtmlTextPiece(const wxString::const_iterator& start, + const wxString::const_iterator& end) + : m_start(start), m_end(end) {} + wxString::const_iterator m_start, m_end; }; -WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces); -WX_DEFINE_OBJARRAY(wxHtmlTextPieces); +// NB: this is an empty class and not typedef because of forward declaration +class wxHtmlTextPieces : public wxVector +{ +}; class wxHtmlParserState { @@ -64,7 +66,7 @@ public: wxHtmlTag *m_tags; wxHtmlTextPieces *m_textPieces; int m_curTextPiece; - wxString m_source; + const wxString *m_source; wxHtmlParserState *m_nextState; }; @@ -75,9 +77,10 @@ public: IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject) wxHtmlParser::wxHtmlParser() - : wxObject(), m_HandlersHash(wxKEY_STRING), - m_FS(NULL), m_HandlersStack(NULL) + : wxObject(), + m_FS(NULL) { + m_Source = NULL; m_entitiesParser = new wxHtmlEntitiesParser; m_Tags = NULL; m_CurTag = NULL; @@ -91,18 +94,10 @@ wxHtmlParser::~wxHtmlParser() while (RestoreState()) {} DestroyDOMTree(); - if (m_HandlersStack) - { - wxList& tmp = *m_HandlersStack; - wxList::iterator it, en; - for( it = tmp.begin(), en = tmp.end(); it != en; ++it ) - delete (wxHashTable*)*it; - tmp.clear(); - } - delete m_HandlersStack; - m_HandlersHash.Clear(); - WX_CLEAR_LIST(wxList, m_HandlersList); + WX_CLEAR_ARRAY(m_HandlersStack); + WX_CLEAR_HASH_SET(wxHtmlTagHandlersSet, m_HandlersSet); delete m_entitiesParser; + delete m_Source; } wxObject* wxHtmlParser::Parse(const wxString& source) @@ -117,7 +112,7 @@ wxObject* wxHtmlParser::Parse(const wxString& source) void wxHtmlParser::InitParser(const wxString& source) { SetSource(source); - m_stopParsing = FALSE; + m_stopParsing = false; } void wxHtmlParser::DoneParser() @@ -128,7 +123,15 @@ void wxHtmlParser::DoneParser() void wxHtmlParser::SetSource(const wxString& src) { DestroyDOMTree(); - m_Source = src; + // NB: This is allocated on heap because wxHtmlTag uses iterators and + // making a copy of m_Source string in SetSourceAndSaveState() and + // RestoreState() would invalidate them (because wxString::m_impl's + // memory would change completely twice and iterators use pointers + // into it). So instead, we keep the string object intact and only + // store/restore pointer to it, for which we need it to be allocated + // on the heap. + delete m_Source; + m_Source = new wxString(src); CreateDOMTree(); m_CurTag = NULL; m_CurTextPiece = 0; @@ -136,73 +139,54 @@ void wxHtmlParser::SetSource(const wxString& src) void wxHtmlParser::CreateDOMTree() { - wxHtmlTagsCache cache(m_Source); + wxHtmlTagsCache cache(*m_Source); m_TextPieces = new wxHtmlTextPieces; - CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache); + CreateDOMSubTree(NULL, m_Source->begin(), m_Source->end(), &cache); m_CurTextPiece = 0; } -extern bool wxIsCDATAElement(const wxChar *tag); +extern bool wxIsCDATAElement(const wxString& tag); void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, - int begin_pos, int end_pos, + const wxString::const_iterator& begin_pos, + const wxString::const_iterator& end_pos, wxHtmlTagsCache *cache) { - if (end_pos <= begin_pos) return; + if (end_pos <= begin_pos) + return; wxChar c; - int i = begin_pos; - int textBeginning = begin_pos; + wxString::const_iterator i = begin_pos; + wxString::const_iterator textBeginning = begin_pos; // If the tag contains CDATA text, we include the text between beginning // and ending tag verbosely. Setting i=end_pos will skip to the very // end of this function where text piece is added, bypassing any child // tags parsing (CDATA element can't have child elements by definition): - if (cur != NULL && wxIsCDATAElement(cur->GetName().c_str())) + if (cur != NULL && wxIsCDATAElement(cur->GetName())) { i = end_pos; } while (i < end_pos) { - c = m_Source.GetChar(i); + c = *i; if (c == wxT('<')) { // add text to m_TextPieces: - if (i - textBeginning > 0) - m_TextPieces->Add( - wxHtmlTextPiece(textBeginning, i - textBeginning)); + if (i > textBeginning) + m_TextPieces->push_back(wxHtmlTextPiece(textBeginning, i)); // if it is a comment, skip it: - if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') && - m_Source.GetChar(i+2) == wxT('-') && - m_Source.GetChar(i+3) == wxT('-')) + if ( SkipCommentTag(i, m_Source->end()) ) { - // Comments begin with "