From 6c62a62bf732e57dfb39f303f61d92bcc6b404f0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?V=C3=A1clav=20Slav=C3=ADk?= Date: Thu, 19 Jul 2001 22:30:22 +0000 Subject: [PATCH] new wxHtmlParser core git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@11111 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- include/wx/html/htmlpars.h | 42 ++++++++++++++++++++++++++++++-------- include/wx/html/htmltag.h | 33 +++++++++++++++++++++++------- include/wx/html/winpars.h | 6 ++++-- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/include/wx/html/htmlpars.h b/include/wx/html/htmlpars.h index 238a86c704..e28657879f 100644 --- a/include/wx/html/htmlpars.h +++ b/include/wx/html/htmlpars.h @@ -26,6 +26,9 @@ class WXDLLEXPORT wxHtmlParser; class WXDLLEXPORT wxHtmlTagHandler; class WXDLLEXPORT wxHtmlEntitiesParser; +class wxHtmlTextPieces; +class wxHtmlParserState; + // This class handles generic parsing of HTML document : it scans // the document and divide it into blocks of tags (where one block // consists of starting and ending tag and of text between these @@ -59,7 +62,10 @@ public: // Parses the m_Source from begin_pos to end_pos-1. // (in noparams version it parses whole m_Source) void DoParsing(int begin_pos, int end_pos); - inline void DoParsing() {DoParsing(0, m_Source.Length());}; + void DoParsing(); + + // Returns pointer to the tag at parser's current position + wxHtmlTag *GetCurrentTag() const { return m_CurTag; } // Returns product of parsing // Returned value is result of parsing of the part. The type of this result @@ -88,14 +94,30 @@ public: wxString* GetSource() {return &m_Source;} void SetSource(const wxString& src); + + // Sets HTML source and remebers current parser's state so that it can + // later be restored. This is useful for on-line modifications of + // HTML source (for example,
 handler replaces spaces with  
+    // and newlines with 
) + virtual void SetSourceAndSaveState(const wxString& src); + // Restores parser's state from stack or returns FALSE if the stack is + // empty + virtual bool RestoreState(); protected: + // DOM structure + void CreateDOMTree(); + void DestroyDOMTree(); + void CreateDOMSubTree(wxHtmlTag *cur, + int begin_pos, int end_pos, + wxHtmlTagsCache *cache); + // Adds text to the output. // This is called from Parse() and must be overriden in derived classes. // txt is not guaranteed to be only one word. It is largest continuous part of text // (= not broken by tags) // NOTE : using char* because of speed improvements - virtual void AddText(const char* txt) = 0; + virtual void AddText(const wxChar* txt) = 0; // Adds tag and proceeds it. Parse() may (and usually is) called from this method. // This is called from Parse() and may be overriden. @@ -108,13 +130,16 @@ protected: wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; } protected: - // source being parsed - wxString m_Source; - - // tags cache, used during parsing. - wxHtmlTagsCache *m_Cache; - wxHashTable m_HandlersHash; + // DOM tree: + wxHtmlTag *m_CurTag; + wxHtmlTag *m_Tags; + wxHtmlTextPieces *m_TextPieces; + size_t m_CurTextPiece; + wxString m_Source; + + wxHtmlParserState *m_SavedStates; + // handlers that handle particular tags. The table is accessed by // key = tag's name. // This attribute MUST be filled by derived class otherwise it would @@ -125,6 +150,7 @@ protected: // m_HandlersList is list of all handlers and it is guaranteed to contain // only one reference to each handler instance. wxList m_HandlersList; + wxHashTable m_HandlersHash; // class for opening files (file system) wxFileSystem *m_FS; diff --git a/include/wx/html/htmltag.h b/include/wx/html/htmltag.h index 3d549e09cd..981f2374e5 100644 --- a/include/wx/html/htmltag.h +++ b/include/wx/html/htmltag.h @@ -60,13 +60,26 @@ class WXDLLEXPORT wxHtmlTag : public wxObject { DECLARE_CLASS(wxHtmlTag) -public: +protected: // constructs wxHtmlTag object based on HTML tag. // The tag begins (with '<' character) at position pos in source // end_pos is position where parsing ends (usually end of document) - wxHtmlTag(const wxString& source, int pos, int end_pos, + wxHtmlTag(wxHtmlTag *parent, + const wxString& source, int pos, int end_pos, wxHtmlTagsCache *cache, - wxHtmlEntitiesParser *entParser = NULL); + wxHtmlEntitiesParser *entParser); + friend class wxHtmlParser; +public: + ~wxHtmlTag(); + + wxHtmlTag *GetParent() const {return m_Parent;} + wxHtmlTag *GetFirstSibling() const; + wxHtmlTag *GetLastSibling() const; + wxHtmlTag *GetChildren() const { return m_FirstChild; } + wxHtmlTag *GetPreviousSibling() const { return m_Prev; } + wxHtmlTag *GetNextSibling() const {return m_Next; } + // Return next tag, as if tree had been flattened + wxHtmlTag *GetNextTag() const; // Returns tag's name in uppercase. inline wxString GetName() const {return m_Name;} @@ -99,12 +112,13 @@ public: // Returns string containing all params. wxString GetAllParams() const; +#if WXWIN_COMPATIBILITY_2_2 // return TRUE if this is ending tag () or FALSE // if it isn't () - inline bool IsEnding() const {return m_Ending;} + inline bool IsEnding() const {return FALSE;} +#endif - // return TRUE if this is ending tag () or FALSE - // if it isn't () + // return TRUE if this there is matching ending tag inline bool HasEnding() const {return m_End1 >= 0;} // returns beginning position of _internal_ block of text @@ -121,8 +135,13 @@ public: private: wxString m_Name; int m_Begin, m_End1, m_End2; - bool m_Ending; wxArrayString m_ParamNames, m_ParamValues; + + // DOM tree relations: + wxHtmlTag *m_Next; + wxHtmlTag *m_Prev; + wxHtmlTag *m_FirstChild, *m_LastChild; + wxHtmlTag *m_Parent; }; diff --git a/include/wx/html/winpars.h b/include/wx/html/winpars.h index fee91731c2..1b6b02a6d6 100644 --- a/include/wx/html/winpars.h +++ b/include/wx/html/winpars.h @@ -123,11 +123,13 @@ public: virtual wxFont* CreateCurrentFont(); protected: - virtual void AddText(const char *txt); + virtual void AddText(const wxChar* txt); private: bool m_tmpLastWasSpace; - // temporary variable used by AddText + wxChar *m_tmpStrBuf; + size_t m_tmpStrBufSize; + // temporary variables used by AddText wxWindow *m_Window; // window we're parsing for double m_PixelScale; -- 2.45.2