From: Václav Slavík Date: Mon, 11 Sep 2006 11:08:49 +0000 (+0000) Subject: wxXML load/save improvements: added ability to not ignore whitespace and specify... X-Git-Url: https://git.saurik.com/wxWidgets.git/commitdiff_plain/538f3830191e25eaf0edb0a379e08d4f85ee49bc?ds=inline wxXML load/save improvements: added ability to not ignore whitespace and specify indentation level (patch #1541888) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@41157 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- diff --git a/docs/latex/wx/xmldocument.tex b/docs/latex/wx/xmldocument.tex index c99bdae67c..0a5bcb5b12 100644 --- a/docs/latex/wx/xmldocument.tex +++ b/docs/latex/wx/xmldocument.tex @@ -18,7 +18,7 @@ A simple example of using XML classes is: \begin{verbatim} wxXmlDocument doc; -if (!doc.Load(wxT("myfile.xml")) +if (!doc.Load(wxT("myfile.xml"))) return false; // start processing the XML file @@ -51,6 +51,23 @@ while (child) { } \end{verbatim} +{\bf Note:} if you want to preserve the original formatting of the loaded file including whitespaces +and indentation, you need to turn off whitespace-only textnode removal and automatic indentation: + +\begin{verbatim} +wxXmlDocument doc; +doc.Load(wxT("myfile.xml"), wxT("UTF-8"), wxXMLDOC_KEEP_WHITESPACE_NODES); +doc.Save(wxT("myfile2.xml"), wxXML_NO_INDENTATION); // myfile2.xml will be indentic to myfile.xml +\end{verbatim} + +Using default parameters, you will get a reformatted document which in general is different from +the original loaded content: + +\begin{verbatim} +wxXmlDocument doc; +doc.Load(wxT("myfile.xml")); +doc.Save(wxT("myfile2.xml")); // myfile2.xml != myfile.xml +\end{verbatim} \wxheading{Derived from} @@ -74,11 +91,11 @@ while (child) { \func{}{wxXmlDocument}{\void} -\func{}{wxXmlDocument}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}} +\func{}{wxXmlDocument}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}} Loads the given {\it filename} using the given encoding. See \helpref{Load()}{wxxmldocumentload}. -\func{}{wxXmlDocument}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}} +\func{}{wxXmlDocument}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}} Loads the XML document from given stream using the given encoding. See \helpref{Load()}{wxxmldocumentload}. @@ -149,23 +166,36 @@ Returns \true if the document has been loaded successfully. \membersection{wxXmlDocument::Load}\label{wxxmldocumentload} -\func{bool}{Load}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}} +\func{bool}{Load}{\param{const wxString\& }{filename}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}} -Parses {\it filename} as an xml document and loads data. Returns \true on success, \false otherwise. +Parses {\it filename} as an xml document and loads its data. -\func{bool}{Load}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}} +If {\tt flags} does not contain {\tt wxXMLDOC_KEEP_WHITESPACE_NODES}, then, while loading, all nodes of +type {\tt wxXML_TEXT_NODE} (see \helpref{wxXmlNode}{wxxmlnode}) are automatically skipped if they +contain whitespaces only. +The removal of these nodes makes the load process slightly faster and requires less memory however +makes impossible to recreate exactly the loaded text with a \helpref{Save}{wxxmldocumentsave} call later. +Read the initial description of this class for more info. + +Returns \true on success, \false otherwise. + +\func{bool}{Load}{\param{wxInputStream\& }{stream}, \param{const wxString\& }{encoding = wxT("UTF-8")}, \param{int }{flags = wxXMLDOC_NONE}} Like above but takes the data from given input stream. \membersection{wxXmlDocument::Save}\label{wxxmldocumentsave} -\constfunc{bool}{Save}{\param{const wxString\& }{filename}} +\constfunc{bool}{Save}{\param{const wxString\& }{filename}, \param{int }{indentstep = 1}} Saves XML tree creating a file named with given string. -\constfunc{bool}{Save}{\param{wxOutputStream\& }{stream}} +If {\tt indentstep} is greater than or equal to zero, then, while saving, an automatic indentation +is added with steps composed by {\tt indentstep} spaces. +If {\tt indentstep} is {\tt wxXML_NO_INDENTATION}, then, automatic indentation is turned off. + +\constfunc{bool}{Save}{\param{wxOutputStream\& }{stream}, \param{int }{indentstep = 1}} -Saves XML tree in the given output stream. +Saves XML tree in the given output stream. See other overload for a description of {\tt indentstep}. \membersection{wxXmlDocument::SetEncoding}\label{wxxmldocumentsetencoding} diff --git a/docs/latex/wx/xmlnode.tex b/docs/latex/wx/xmlnode.tex index 97d60ffa55..93e89ad363 100644 --- a/docs/latex/wx/xmlnode.tex +++ b/docs/latex/wx/xmlnode.tex @@ -124,7 +124,7 @@ Removes the first properties which has the given {\it name} from the list of pro \membersection{wxXmlNode::GetChildren}\label{wxxmlnodegetchildren} -\constfunc{wxXmlNode*}{GetChildren}{\param{void}{}} +\constfunc{wxXmlNode*}{GetChildren}{\void} Returns the first child of this node. To get a pointer to the second child of this node (if it does exist), use the @@ -139,6 +139,17 @@ Be aware that for nodes of type \texttt{wxXML\_ELEMENT\_NODE} (the most used nod content is an empty string. See \helpref{GetNodeContent()}{wxxmlnodegetnodecontent} for more details. +\membersection{wxXmlNode::GetDepth}\label{wxxmlnodegetdepth} + +\constfunc{int}{GetDepth}{\param{wxXmlNode* }{grandparent = NULL}} + +Returns the number of nodes which separe this node from {\tt grandparent}. + +This function searches only the parents of this node until it finds {\tt grandparent} +or the \NULL node (which is the parent of non-linked nodes or the parent of a +\helpref{wxXmlDocument}{wxxmldocument}'s root node). + + \membersection{wxXmlNode::GetNodeContent}\label{wxxmlnodegetnodecontent} \constfunc{wxString}{GetNodeContent}{\void} @@ -220,6 +231,14 @@ If {\it before\_node} is \NULL, then {\it child} is prepended to the list of chi becomes the first child of this node. Returns \true if {\it before\_node} has been found and the {\it child} node has been inserted. +\membersection{wxXmlNode::IsWhitespaceOnly}\label{wxxmlnodecontainsiswhitespaceonly} + +\constfunc{bool}{IsWhitespaceOnly}{\void} + +Returns \true if the content of this node is a string containing only whitespaces (spaces, +tabs, new lines, etc). Note that this function is locale-independent since the parsing of XML +documents must always produce the exact same tree regardless of the locale it runs under. + \membersection{wxXmlNode::RemoveChild}\label{wxxmlnoderemovechild} \func{bool}{RemoveChild}{\param{wxXmlNode* }{child}} diff --git a/include/wx/xml/xml.h b/include/wx/xml/xml.h index 10ebf5ce3c..7c36f13b9e 100644 --- a/include/wx/xml/xml.h +++ b/include/wx/xml/xml.h @@ -125,6 +125,9 @@ public: wxString GetName() const { return m_name; } wxString GetContent() const { return m_content; } + bool IsWhitespaceOnly() const; + int GetDepth(wxXmlNode *grandparent = NULL) const; + // Gets node content from wxXML_ENTITY_NODE // The problem is, content is represented as // wxXML_ENTITY_NODE name="tag", content="" @@ -165,8 +168,15 @@ private: +// special indentation value for wxXmlDocument::Save +#define wxXML_NO_INDENTATION (-1) - +// flags for wxXmlDocument::Load +enum wxXmlDocumentLoadFlag +{ + wxXMLDOC_NONE = 0, + wxXMLDOC_KEEP_WHITESPACE_NODES = 1 +}; // This class holds XML data/document as parsed by XML parser. @@ -187,13 +197,13 @@ public: // Parses .xml file and loads data. Returns TRUE on success, FALSE // otherwise. virtual bool Load(const wxString& filename, - const wxString& encoding = wxT("UTF-8")); + const wxString& encoding = wxT("UTF-8"), int flags = wxXMLDOC_NONE); virtual bool Load(wxInputStream& stream, - const wxString& encoding = wxT("UTF-8")); + const wxString& encoding = wxT("UTF-8"), int flags = wxXMLDOC_NONE); // Saves document as .xml file. - virtual bool Save(const wxString& filename) const; - virtual bool Save(wxOutputStream& stream) const; + virtual bool Save(const wxString& filename, int indentstep = 1) const; + virtual bool Save(wxOutputStream& stream, int indentstep = 1) const; bool IsOk() const { return m_root != NULL; } diff --git a/src/xml/xml.cpp b/src/xml/xml.cpp index 067cd02319..4a13a5e879 100644 --- a/src/xml/xml.cpp +++ b/src/xml/xml.cpp @@ -39,6 +39,9 @@ WX_CHECK_BUILD_OPTIONS("wxXML") IMPLEMENT_CLASS(wxXmlDocument, wxObject) +// a private utility used by wxXML +static bool wxIsWhiteOnly(const wxChar *buf); + //----------------------------------------------------------------------------- // wxXmlNode @@ -309,6 +312,28 @@ wxString wxXmlNode::GetNodeContent() const return wxEmptyString; } +int wxXmlNode::GetDepth(wxXmlNode *grandparent) const +{ + const wxXmlNode *n = this; + int ret = -1; + + do + { + ret++; + n = n->GetParent(); + if (n == grandparent) + return ret; + + } while (n); + + return wxNOT_FOUND; +} + +bool wxXmlNode::IsWhitespaceOnly() const +{ + return wxIsWhiteOnly(m_content); +} + //----------------------------------------------------------------------------- @@ -364,20 +389,20 @@ void wxXmlDocument::DoCopy(const wxXmlDocument& doc) m_root = new wxXmlNode(*doc.m_root); } -bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding) +bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding, int flags) { wxFileInputStream stream(filename); if (!stream.Ok()) return false; - return Load(stream, encoding); + return Load(stream, encoding, flags); } -bool wxXmlDocument::Save(const wxString& filename) const +bool wxXmlDocument::Save(const wxString& filename, int indentstep) const { wxFileOutputStream stream(filename); if (!stream.Ok()) return false; - return Save(stream); + return Save(stream, indentstep); } @@ -386,11 +411,6 @@ bool wxXmlDocument::Save(const wxString& filename) const // wxXmlDocument loading routines //----------------------------------------------------------------------------- -/* - FIXME: - - process all elements, including CDATA - */ - // converts Expat-produced string in UTF-8 into wxString using the specified // conv or keep in UTF-8 if conv is NULL static wxString CharToString(wxMBConv *conv, @@ -417,6 +437,16 @@ static wxString CharToString(wxMBConv *conv, #endif // wxUSE_UNICODE/!wxUSE_UNICODE } +// returns true if the given string contains only whitespaces +bool wxIsWhiteOnly(const wxChar *buf) +{ + for (const wxChar *c = buf; *c != wxT('\0'); c++) + if (*c != wxT(' ') && *c != wxT('\t') && *c != wxT('\n') && *c != wxT('\r')) + return false; + return true; +} + + struct wxXmlParsingContext { wxMBConv *conv; @@ -426,6 +456,7 @@ struct wxXmlParsingContext wxString encoding; wxString version; bool bLastCdata; + bool removeWhiteOnlyNodes; }; extern "C" { @@ -462,42 +493,32 @@ extern "C" { static void TextHnd(void *userData, const char *s, int len) { wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData; - char *buf = new char[len + 1]; - - buf[len] = '\0'; - memcpy(buf, s, (size_t)len); + wxString str = CharToString(ctx->conv, s, len); if (ctx->lastAsText) { if ( ctx->bLastCdata ) { ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + - CharToString(NULL, buf)); + CharToString(NULL, s, len)); } else { - ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + - CharToString(ctx->conv, buf)); + ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + str); } } else { - bool whiteOnly = true; - for (char *c = buf; *c != '\0'; c++) - if (*c != ' ' && *c != '\t' && *c != '\n' && *c != '\r') - { - whiteOnly = false; - break; - } + bool whiteOnly = false; + if (ctx->removeWhiteOnlyNodes) + whiteOnly = wxIsWhiteOnly(str); + if (!whiteOnly) { - ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"), - CharToString(ctx->conv, buf)); + ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"), str); ctx->node->AddChild(ctx->lastAsText); } } - - delete[] buf; } } @@ -593,7 +614,7 @@ static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData), } } -bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding) +bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int flags) { #if wxUSE_UNICODE (void)encoding; @@ -614,6 +635,7 @@ bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding) if ( encoding != wxT("UTF-8") && encoding != wxT("utf-8") ) ctx.conv = new wxCSConv(encoding); #endif + ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0; ctx.bLastCdata = false; XML_SetUserData(parser, (void*)&ctx); @@ -755,7 +777,7 @@ inline static void OutputIndentation(wxOutputStream& stream, int indent) } static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent, - wxMBConv *convMem, wxMBConv *convFile) + wxMBConv *convMem, wxMBConv *convFile, int indentstep) { wxXmlNode *n, *prev; wxXmlProperty *prop; @@ -793,13 +815,13 @@ static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent, n = node->GetChildren(); while (n) { - if (n && n->GetType() != wxXML_TEXT_NODE) - OutputIndentation(stream, indent + 1); - OutputNode(stream, n, indent + 1, convMem, convFile); + if (indentstep >= 0 && n && n->GetType() != wxXML_TEXT_NODE) + OutputIndentation(stream, indent + indentstep); + OutputNode(stream, n, indent + indentstep, convMem, convFile, indentstep); prev = n; n = n->GetNext(); } - if (prev && prev->GetType() != wxXML_TEXT_NODE) + if (indentstep >= 0 && prev && prev->GetType() != wxXML_TEXT_NODE) OutputIndentation(stream, indent); OutputString(stream, wxT("GetName()); @@ -820,7 +842,7 @@ static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent, } } -bool wxXmlDocument::Save(wxOutputStream& stream) const +bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const { if ( !IsOk() ) return false; @@ -844,7 +866,7 @@ bool wxXmlDocument::Save(wxOutputStream& stream) const GetVersion().c_str(), GetFileEncoding().c_str()); OutputString(stream, s); - OutputNode(stream, GetRoot(), 0, convMem, convFile); + OutputNode(stream, GetRoot(), 0, convMem, convFile, indentstep); OutputString(stream, wxT("\n")); if ( convFile )