X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/223d09f6b523aac674ef9b72a883dfa8d37c5d4e..6a0fab3a3ae4d9b7372a25e81faadeb842b9ba6f:/src/html/htmlpars.cpp diff --git a/src/html/htmlpars.cpp b/src/html/htmlpars.cpp index d3f772440f..69be9d281e 100644 --- a/src/html/htmlpars.cpp +++ b/src/html/htmlpars.cpp @@ -12,26 +12,53 @@ #pragma implementation #endif -#include +#include "wx/wxprec.h" #include "wx/defs.h" -#if wxUSE_HTML +#if wxUSE_HTML && wxUSE_STREAMS #ifdef __BORDLANDC__ #pragma hdrstop #endif #ifndef WXPRECOMP -#include + #include "wx/log.h" + #include "wx/intl.h" #endif #include "wx/tokenzr.h" #include "wx/wfstream.h" #include "wx/url.h" +#include "wx/fontmap.h" #include "wx/html/htmldefs.h" #include "wx/html/htmlpars.h" +#include "wx/dynarray.h" +#include "wx/arrimpl.cpp" +//----------------------------------------------------------------------------- +// wxHtmlParser helpers +//----------------------------------------------------------------------------- + +class wxHtmlTextPiece +{ +public: + wxHtmlTextPiece(int pos, int lng) : m_pos(pos), m_lng(lng) {} + int m_pos, m_lng; +}; +WX_DECLARE_OBJARRAY(wxHtmlTextPiece, wxHtmlTextPieces); +WX_DEFINE_OBJARRAY(wxHtmlTextPieces); + +class wxHtmlParserState +{ +public: + wxHtmlTag *m_curTag; + wxHtmlTag *m_tags; + wxHtmlTextPieces *m_textPieces; + int m_curTextPiece; + wxString m_source; + wxHtmlParserState *m_nextState; +}; //----------------------------------------------------------------------------- // wxHtmlParser @@ -39,6 +66,29 @@ IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject) +wxHtmlParser::wxHtmlParser() + : wxObject(), m_HandlersHash(wxKEY_STRING), + m_FS(NULL), m_HandlersStack(NULL) +{ + m_entitiesParser = new wxHtmlEntitiesParser; + m_Tags = NULL; + m_CurTag = NULL; + m_TextPieces = NULL; + m_CurTextPiece = 0; + m_SavedStates = NULL; +} + +wxHtmlParser::~wxHtmlParser() +{ + while (RestoreState()) {} + DestroyDOMTree(); + + delete m_HandlersStack; + m_HandlersHash.Clear(); + m_HandlersList.DeleteContents(TRUE); + m_HandlersList.Clear(); + delete m_entitiesParser; +} wxObject* wxHtmlParser::Parse(const wxString& source) { @@ -51,70 +101,201 @@ wxObject* wxHtmlParser::Parse(const wxString& source) return result; } - - void wxHtmlParser::InitParser(const wxString& source) { - m_Source = source; - m_Cache = new wxHtmlTagsCache(m_Source); + SetSource(source); } - - - + void wxHtmlParser::DoneParser() { - delete m_Cache; - m_Cache = NULL; + DestroyDOMTree(); } - - -#define HTML_MAX_BUFLEN 1024 - -void wxHtmlParser::DoParsing(int begin_pos, int end_pos) +void wxHtmlParser::SetSource(const wxString& src) { - char temp[HTML_BUFLEN], c; - int i; - int templen; - - templen = 0; - i = begin_pos; + DestroyDOMTree(); + m_Source = src; + CreateDOMTree(); + m_CurTag = NULL; + m_CurTextPiece = 0; +} - while (i < end_pos) { - c = m_Source[i]; +void wxHtmlParser::CreateDOMTree() +{ + wxHtmlTagsCache cache(m_Source); + m_TextPieces = new wxHtmlTextPieces; + CreateDOMSubTree(NULL, 0, m_Source.Length(), &cache); + m_CurTextPiece = 0; +} - // continue building word: - if (c != '<') { - temp[templen++] = c; - if (templen == HTML_BUFLEN-1) { - temp[templen] = 0; - AddText(temp); - templen = 0; +void wxHtmlParser::CreateDOMSubTree(wxHtmlTag *cur, + int begin_pos, int end_pos, + wxHtmlTagsCache *cache) +{ + if (end_pos <= begin_pos) return; + + wxChar c; + int i = begin_pos; + int textBeginning = begin_pos; + + while (i < end_pos) + { + c = m_Source.GetChar(i); + + if (c == wxT('<')) + { + // add text to m_TextPieces: + if (i - textBeginning > 0) + m_TextPieces->Add( + wxHtmlTextPiece(textBeginning, i - textBeginning)); + + // if it is a comment, skip it: + if (i < end_pos-6 && m_Source.GetChar(i+1) == wxT('!') && + m_Source.GetChar(i+2) == wxT('-') && + m_Source.GetChar(i+3) == wxT('-')) + { + // Comments begin with "