1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
20 #include "wx/xml/xml.h"
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
32 #include "wx/scopedptr.h"
34 #include "expat.h" // from Expat
36 // DLL options compatibility check:
37 WX_CHECK_BUILD_OPTIONS("wxXML")
40 IMPLEMENT_CLASS(wxXmlDocument
, wxObject
)
43 // a private utility used by wxXML
44 static bool wxIsWhiteOnly(const wxString
& buf
);
47 //-----------------------------------------------------------------------------
49 //-----------------------------------------------------------------------------
51 wxXmlNode::wxXmlNode(wxXmlNode
*parent
,wxXmlNodeType type
,
52 const wxString
& name
, const wxString
& content
,
53 wxXmlAttribute
*attrs
, wxXmlNode
*next
, int lineNo
)
54 : m_type(type
), m_name(name
), m_content(content
),
55 m_attrs(attrs
), m_parent(parent
),
56 m_children(NULL
), m_next(next
),
62 if (m_parent
->m_children
)
64 m_next
= m_parent
->m_children
;
65 m_parent
->m_children
= this;
68 m_parent
->m_children
= this;
72 wxXmlNode::wxXmlNode(wxXmlNodeType type
, const wxString
& name
,
73 const wxString
& content
,
75 : m_type(type
), m_name(name
), m_content(content
),
76 m_attrs(NULL
), m_parent(NULL
),
77 m_children(NULL
), m_next(NULL
),
78 m_lineNo(lineNo
), m_noConversion(false)
81 wxXmlNode::wxXmlNode(const wxXmlNode
& node
)
88 wxXmlNode::~wxXmlNode()
91 for (c
= m_children
; c
; c
= c2
)
97 wxXmlAttribute
*p
, *p2
;
98 for (p
= m_attrs
; p
; p
= p2
)
105 wxXmlNode
& wxXmlNode::operator=(const wxXmlNode
& node
)
108 wxDELETE(m_children
);
113 void wxXmlNode::DoCopy(const wxXmlNode
& node
)
115 m_type
= node
.m_type
;
116 m_name
= node
.m_name
;
117 m_content
= node
.m_content
;
118 m_lineNo
= node
.m_lineNo
;
119 m_noConversion
= node
.m_noConversion
;
122 wxXmlNode
*n
= node
.m_children
;
125 AddChild(new wxXmlNode(*n
));
130 wxXmlAttribute
*p
= node
.m_attrs
;
133 AddAttribute(p
->GetName(), p
->GetValue());
138 bool wxXmlNode::HasAttribute(const wxString
& attrName
) const
140 wxXmlAttribute
*attr
= GetAttributes();
144 if (attr
->GetName() == attrName
) return true;
145 attr
= attr
->GetNext();
151 bool wxXmlNode::GetAttribute(const wxString
& attrName
, wxString
*value
) const
153 wxCHECK_MSG( value
, false, "value argument must not be NULL" );
155 wxXmlAttribute
*attr
= GetAttributes();
159 if (attr
->GetName() == attrName
)
161 *value
= attr
->GetValue();
164 attr
= attr
->GetNext();
170 wxString
wxXmlNode::GetAttribute(const wxString
& attrName
, const wxString
& defaultVal
) const
173 if (GetAttribute(attrName
, &tmp
))
179 void wxXmlNode::AddChild(wxXmlNode
*child
)
181 if (m_children
== NULL
)
185 wxXmlNode
*ch
= m_children
;
186 while (ch
->m_next
) ch
= ch
->m_next
;
189 child
->m_next
= NULL
;
190 child
->m_parent
= this;
193 // inserts a new node in front of 'followingNode'
194 bool wxXmlNode::InsertChild(wxXmlNode
*child
, wxXmlNode
*followingNode
)
196 wxCHECK_MSG( child
, false, "cannot insert a NULL node!" );
197 wxCHECK_MSG( child
->m_parent
== NULL
, false, "node already has a parent" );
198 wxCHECK_MSG( child
->m_next
== NULL
, false, "node already has m_next" );
199 wxCHECK_MSG( followingNode
== NULL
|| followingNode
->GetParent() == this,
201 "wxXmlNode::InsertChild - followingNode has incorrect parent" );
203 // this is for backward compatibility, NULL was allowed here thanks to
204 // the confusion about followingNode's meaning
205 if ( followingNode
== NULL
)
206 followingNode
= m_children
;
208 if ( m_children
== followingNode
)
210 child
->m_next
= m_children
;
215 wxXmlNode
*ch
= m_children
;
216 while ( ch
&& ch
->m_next
!= followingNode
)
220 wxFAIL_MSG( "followingNode has this node as parent, but couldn't be found among children" );
224 child
->m_next
= followingNode
;
228 child
->m_parent
= this;
232 // inserts a new node right after 'precedingNode'
233 bool wxXmlNode::InsertChildAfter(wxXmlNode
*child
, wxXmlNode
*precedingNode
)
235 wxCHECK_MSG( child
, false, "cannot insert a NULL node!" );
236 wxCHECK_MSG( child
->m_parent
== NULL
, false, "node already has a parent" );
237 wxCHECK_MSG( child
->m_next
== NULL
, false, "node already has m_next" );
238 wxCHECK_MSG( precedingNode
== NULL
|| precedingNode
->m_parent
== this, false,
239 "precedingNode has wrong parent" );
243 child
->m_next
= precedingNode
->m_next
;
244 precedingNode
->m_next
= child
;
246 else // precedingNode == NULL
248 wxCHECK_MSG( m_children
== NULL
, false,
249 "NULL precedingNode only makes sense when there are no children" );
251 child
->m_next
= m_children
;
255 child
->m_parent
= this;
259 bool wxXmlNode::RemoveChild(wxXmlNode
*child
)
261 if (m_children
== NULL
)
263 else if (m_children
== child
)
265 m_children
= child
->m_next
;
266 child
->m_parent
= NULL
;
267 child
->m_next
= NULL
;
272 wxXmlNode
*ch
= m_children
;
275 if (ch
->m_next
== child
)
277 ch
->m_next
= child
->m_next
;
278 child
->m_parent
= NULL
;
279 child
->m_next
= NULL
;
288 void wxXmlNode::AddAttribute(const wxString
& name
, const wxString
& value
)
290 AddProperty(name
, value
);
293 void wxXmlNode::AddAttribute(wxXmlAttribute
*attr
)
298 bool wxXmlNode::DeleteAttribute(const wxString
& name
)
300 return DeleteProperty(name
);
303 void wxXmlNode::AddProperty(const wxString
& name
, const wxString
& value
)
305 AddProperty(new wxXmlAttribute(name
, value
, NULL
));
308 void wxXmlNode::AddProperty(wxXmlAttribute
*attr
)
314 wxXmlAttribute
*p
= m_attrs
;
315 while (p
->GetNext()) p
= p
->GetNext();
320 bool wxXmlNode::DeleteProperty(const wxString
& name
)
322 wxXmlAttribute
*attr
;
327 else if (m_attrs
->GetName() == name
)
330 m_attrs
= attr
->GetNext();
338 wxXmlAttribute
*p
= m_attrs
;
341 if (p
->GetNext()->GetName() == name
)
344 p
->SetNext(attr
->GetNext());
355 wxString
wxXmlNode::GetNodeContent() const
357 wxXmlNode
*n
= GetChildren();
361 if (n
->GetType() == wxXML_TEXT_NODE
||
362 n
->GetType() == wxXML_CDATA_SECTION_NODE
)
363 return n
->GetContent();
366 return wxEmptyString
;
369 int wxXmlNode::GetDepth(wxXmlNode
*grandparent
) const
371 const wxXmlNode
*n
= this;
378 if (n
== grandparent
)
386 bool wxXmlNode::IsWhitespaceOnly() const
388 return wxIsWhiteOnly(m_content
);
393 //-----------------------------------------------------------------------------
395 //-----------------------------------------------------------------------------
397 wxXmlDocument::wxXmlDocument()
398 : m_version(wxS("1.0")), m_fileEncoding(wxS("utf-8")), m_root(NULL
)
401 m_encoding
= wxS("UTF-8");
405 wxXmlDocument::wxXmlDocument(const wxString
& filename
, const wxString
& encoding
)
406 :wxObject(), m_root(NULL
)
408 if ( !Load(filename
, encoding
) )
414 wxXmlDocument::wxXmlDocument(wxInputStream
& stream
, const wxString
& encoding
)
415 :wxObject(), m_root(NULL
)
417 if ( !Load(stream
, encoding
) )
423 wxXmlDocument::wxXmlDocument(const wxXmlDocument
& doc
)
429 wxXmlDocument
& wxXmlDocument::operator=(const wxXmlDocument
& doc
)
436 void wxXmlDocument::DoCopy(const wxXmlDocument
& doc
)
438 m_version
= doc
.m_version
;
440 m_encoding
= doc
.m_encoding
;
442 m_fileEncoding
= doc
.m_fileEncoding
;
445 m_root
= new wxXmlNode(*doc
.m_root
);
450 bool wxXmlDocument::Load(const wxString
& filename
, const wxString
& encoding
, int flags
)
452 wxFileInputStream
stream(filename
);
455 return Load(stream
, encoding
, flags
);
458 bool wxXmlDocument::Save(const wxString
& filename
, int indentstep
) const
460 wxFileOutputStream
stream(filename
);
463 return Save(stream
, indentstep
);
468 //-----------------------------------------------------------------------------
469 // wxXmlDocument loading routines
470 //-----------------------------------------------------------------------------
472 // converts Expat-produced string in UTF-8 into wxString using the specified
473 // conv or keep in UTF-8 if conv is NULL
474 static wxString
CharToString(wxMBConv
*conv
,
475 const char *s
, size_t len
= wxString::npos
)
480 // there can be no embedded NULs in this string so we don't need the
481 // output length, it will be NUL-terminated
482 const wxWCharBuffer
wbuf(
483 wxConvUTF8
.cMB2WC(s
, len
== wxString::npos
? wxNO_LEN
: len
, NULL
));
485 return wxString(wbuf
, *conv
);
487 // else: the string is wanted in UTF-8
488 #endif // !wxUSE_UNICODE
491 return wxString::FromUTF8Unchecked(s
, len
);
494 // returns true if the given string contains only whitespaces
495 bool wxIsWhiteOnly(const wxString
& buf
)
497 for ( wxString::const_iterator i
= buf
.begin(); i
!= buf
.end(); ++i
)
500 if ( c
!= wxS(' ') && c
!= wxS('\t') && c
!= wxS('\n') && c
!= wxS('\r'))
507 struct wxXmlParsingContext
509 wxXmlParsingContext()
515 removeWhiteOnlyNodes(false)
521 wxXmlNode
*node
; // the node being parsed
522 wxXmlNode
*lastChild
; // the last child of "node"
523 wxXmlNode
*lastAsText
; // the last _text_ child of "node"
526 bool removeWhiteOnlyNodes
;
529 // checks that ctx->lastChild is in consistent state
530 #define ASSERT_LAST_CHILD_OK(ctx) \
531 wxASSERT( ctx->lastChild == NULL || \
532 ctx->lastChild->GetNext() == NULL ); \
533 wxASSERT( ctx->lastChild == NULL || \
534 ctx->lastChild->GetParent() == ctx->node )
537 static void StartElementHnd(void *userData
, const char *name
, const char **atts
)
539 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
540 wxXmlNode
*node
= new wxXmlNode(wxXML_ELEMENT_NODE
,
541 CharToString(ctx
->conv
, name
),
543 XML_GetCurrentLineNumber(ctx
->parser
));
544 const char **a
= atts
;
546 // add node attributes
549 node
->AddAttribute(CharToString(ctx
->conv
, a
[0]), CharToString(ctx
->conv
, a
[1]));
553 if (ctx
->root
== NULL
)
559 ASSERT_LAST_CHILD_OK(ctx
);
560 ctx
->node
->InsertChildAfter(node
, ctx
->lastChild
);
563 ctx
->lastAsText
= NULL
;
564 ctx
->lastChild
= NULL
; // our new node "node" has no children yet
569 static void EndElementHnd(void *userData
, const char* WXUNUSED(name
))
571 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
573 // we're exiting the last children of ctx->node->GetParent() and going
574 // back one level up, so current value of ctx->node points to the last
575 // child of ctx->node->GetParent()
576 ctx
->lastChild
= ctx
->node
;
578 ctx
->node
= ctx
->node
->GetParent();
579 ctx
->lastAsText
= NULL
;
582 static void TextHnd(void *userData
, const char *s
, int len
)
584 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
585 wxString str
= CharToString(ctx
->conv
, s
, len
);
589 ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + str
);
593 bool whiteOnly
= false;
594 if (ctx
->removeWhiteOnlyNodes
)
595 whiteOnly
= wxIsWhiteOnly(str
);
599 wxXmlNode
*textnode
=
600 new wxXmlNode(wxXML_TEXT_NODE
, wxS("text"), str
,
601 XML_GetCurrentLineNumber(ctx
->parser
));
603 ASSERT_LAST_CHILD_OK(ctx
);
604 ctx
->node
->InsertChildAfter(textnode
, ctx
->lastChild
);
605 ctx
->lastChild
= ctx
->lastAsText
= textnode
;
610 static void StartCdataHnd(void *userData
)
612 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
614 wxXmlNode
*textnode
=
615 new wxXmlNode(wxXML_CDATA_SECTION_NODE
, wxS("cdata"), wxS(""),
616 XML_GetCurrentLineNumber(ctx
->parser
));
618 ASSERT_LAST_CHILD_OK(ctx
);
619 ctx
->node
->InsertChildAfter(textnode
, ctx
->lastChild
);
620 ctx
->lastChild
= ctx
->lastAsText
= textnode
;
623 static void EndCdataHnd(void *userData
)
625 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
627 // we need to reset this pointer so that subsequent text nodes don't append
628 // their contents to this one but create new wxXML_TEXT_NODE objects (or
629 // not create anything at all if only white space follows the CDATA section
630 // and wxXMLDOC_KEEP_WHITESPACE_NODES is not used as is commonly the case)
631 ctx
->lastAsText
= NULL
;
634 static void CommentHnd(void *userData
, const char *data
)
636 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
640 wxXmlNode
*commentnode
=
641 new wxXmlNode(wxXML_COMMENT_NODE
,
642 wxS("comment"), CharToString(ctx
->conv
, data
),
643 XML_GetCurrentLineNumber(ctx
->parser
));
645 ASSERT_LAST_CHILD_OK(ctx
);
646 ctx
->node
->InsertChildAfter(commentnode
, ctx
->lastChild
);
647 ctx
->lastChild
= commentnode
;
649 //else: ctx->node == NULL happens if there is a comment before
650 // the root element. We current don't have a way to represent
651 // these in wxXmlDocument (FIXME).
653 ctx
->lastAsText
= NULL
;
656 static void DefaultHnd(void *userData
, const char *s
, int len
)
659 if (len
> 6 && memcmp(s
, "<?xml ", 6) == 0)
661 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
663 wxString buf
= CharToString(ctx
->conv
, s
, (size_t)len
);
665 pos
= buf
.Find(wxS("encoding="));
666 if (pos
!= wxNOT_FOUND
)
667 ctx
->encoding
= buf
.Mid(pos
+ 10).BeforeFirst(buf
[(size_t)pos
+9]);
668 pos
= buf
.Find(wxS("version="));
669 if (pos
!= wxNOT_FOUND
)
670 ctx
->version
= buf
.Mid(pos
+ 9).BeforeFirst(buf
[(size_t)pos
+8]);
674 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData
),
675 const XML_Char
*name
, XML_Encoding
*info
)
677 // We must build conversion table for expat. The easiest way to do so
678 // is to let wxCSConv convert as string containing all characters to
679 // wide character representation:
687 for (i
= 0; i
< 255; i
++)
689 mbBuf
[0] = (char)(i
+1);
690 if (conv
.MB2WC(wcBuf
, mbBuf
, 2) == (size_t)-1)
692 // invalid/undefined byte in the encoding:
695 info
->map
[i
+1] = (int)wcBuf
[0];
699 info
->convert
= NULL
;
700 info
->release
= NULL
;
707 bool wxXmlDocument::Load(wxInputStream
& stream
, const wxString
& encoding
, int flags
)
712 m_encoding
= encoding
;
715 const size_t BUFSIZE
= 1024;
717 wxXmlParsingContext ctx
;
719 XML_Parser parser
= XML_ParserCreate(NULL
);
721 ctx
.encoding
= wxS("UTF-8"); // default in absence of encoding=""
724 if ( encoding
.CmpNoCase(wxS("UTF-8")) != 0 )
725 ctx
.conv
= new wxCSConv(encoding
);
727 ctx
.removeWhiteOnlyNodes
= (flags
& wxXMLDOC_KEEP_WHITESPACE_NODES
) == 0;
730 XML_SetUserData(parser
, (void*)&ctx
);
731 XML_SetElementHandler(parser
, StartElementHnd
, EndElementHnd
);
732 XML_SetCharacterDataHandler(parser
, TextHnd
);
733 XML_SetCdataSectionHandler(parser
, StartCdataHnd
, EndCdataHnd
);;
734 XML_SetCommentHandler(parser
, CommentHnd
);
735 XML_SetDefaultHandler(parser
, DefaultHnd
);
736 XML_SetUnknownEncodingHandler(parser
, UnknownEncodingHnd
, NULL
);
741 size_t len
= stream
.Read(buf
, BUFSIZE
).LastRead();
742 done
= (len
< BUFSIZE
);
743 if (!XML_Parse(parser
, buf
, len
, done
))
745 wxString
error(XML_ErrorString(XML_GetErrorCode(parser
)),
747 wxLogError(_("XML parsing error: '%s' at line %d"),
749 (int)XML_GetCurrentLineNumber(parser
));
757 if (!ctx
.version
.empty())
758 SetVersion(ctx
.version
);
759 if (!ctx
.encoding
.empty())
760 SetFileEncoding(ctx
.encoding
);
768 XML_ParserFree(parser
);
780 //-----------------------------------------------------------------------------
781 // wxXmlDocument saving routines
782 //-----------------------------------------------------------------------------
784 // helpers for XML generation
788 // write string to output:
789 bool OutputString(wxOutputStream
& stream
,
798 wxUnusedVar(convMem
);
800 convFile
= &wxConvUTF8
;
802 const wxScopedCharBuffer
buf(str
.mb_str(*convFile
));
805 // conversion failed, can't write this string in an XML file in this
806 // (presumably non-UTF-8) encoding
810 stream
.Write(buf
, buf
.length());
811 #else // !wxUSE_UNICODE
812 if ( convFile
&& convMem
)
814 wxString
str2(str
.wc_str(*convMem
), *convFile
);
815 stream
.Write(str2
.mb_str(), str2
.length());
817 else // no conversions to do
819 stream
.Write(str
.mb_str(), str
.length());
821 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
823 return stream
.IsOk();
832 // Same as above, but create entities first.
833 // Translates '<' to "<", '>' to ">" and so on, according to the spec:
834 // http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping
835 bool OutputEscapedString(wxOutputStream
& stream
,
842 escaped
.reserve(str
.length());
844 for ( wxString::const_iterator i
= str
.begin(); i
!= str
.end(); ++i
)
851 escaped
.append(wxS("<"));
854 escaped
.append(wxS(">"));
857 escaped
.append(wxS("&"));
860 escaped
.append(wxS("
"));
863 if ( mode
== Escape_Attribute
)
868 escaped
.append(wxS("""));
871 escaped
.append(wxS("	"));
874 escaped
.append(wxS("
"));
888 return OutputString(stream
, escaped
, convMem
, convFile
);
891 bool OutputIndentation(wxOutputStream
& stream
,
896 wxString
str(wxS("\n"));
897 str
+= wxString(indent
, wxS(' '));
898 return OutputString(stream
, str
, convMem
, convFile
);
901 bool OutputNode(wxOutputStream
& stream
,
909 switch (node
->GetType())
911 case wxXML_CDATA_SECTION_NODE
:
912 rc
= OutputString(stream
, wxS("<![CDATA["), convMem
, convFile
) &&
913 OutputString(stream
, node
->GetContent(), convMem
, convFile
) &&
914 OutputString(stream
, wxS("]]>"), convMem
, convFile
);
917 case wxXML_TEXT_NODE
:
918 if (node
->GetNoConversion())
920 stream
.Write(node
->GetContent().c_str(), node
->GetContent().Length());
924 rc
= OutputEscapedString(stream
, node
->GetContent(),
929 case wxXML_ELEMENT_NODE
:
930 rc
= OutputString(stream
, wxS("<"), convMem
, convFile
) &&
931 OutputString(stream
, node
->GetName(), convMem
, convFile
);
935 for ( wxXmlAttribute
*attr
= node
->GetAttributes();
937 attr
= attr
->GetNext() )
939 rc
= OutputString(stream
,
940 wxS(" ") + attr
->GetName() + wxS("=\""),
941 convMem
, convFile
) &&
942 OutputEscapedString(stream
, attr
->GetValue(),
945 OutputString(stream
, wxS("\""), convMem
, convFile
);
949 if ( node
->GetChildren() )
951 rc
= OutputString(stream
, wxS(">"), convMem
, convFile
);
953 wxXmlNode
*prev
= NULL
;
954 for ( wxXmlNode
*n
= node
->GetChildren();
958 if ( indentstep
>= 0 && n
->GetType() != wxXML_TEXT_NODE
)
960 rc
= OutputIndentation(stream
, indent
+ indentstep
,
965 rc
= OutputNode(stream
, n
, indent
+ indentstep
,
966 convMem
, convFile
, indentstep
);
971 if ( rc
&& indentstep
>= 0 &&
972 prev
&& prev
->GetType() != wxXML_TEXT_NODE
)
974 rc
= OutputIndentation(stream
, indent
, convMem
, convFile
);
979 rc
= OutputString(stream
, wxS("</"), convMem
, convFile
) &&
980 OutputString(stream
, node
->GetName(),
981 convMem
, convFile
) &&
982 OutputString(stream
, wxS(">"), convMem
, convFile
);
985 else // no children, output "<foo/>"
987 rc
= OutputString(stream
, wxS("/>"), convMem
, convFile
);
991 case wxXML_COMMENT_NODE
:
992 rc
= OutputString(stream
, wxS("<!--"), convMem
, convFile
) &&
993 OutputString(stream
, node
->GetContent(), convMem
, convFile
) &&
994 OutputString(stream
, wxS("-->"), convMem
, convFile
);
998 wxFAIL_MSG("unsupported node type");
1005 } // anonymous namespace
1007 bool wxXmlDocument::Save(wxOutputStream
& stream
, int indentstep
) const
1012 wxScopedPtr
<wxMBConv
> convMem
, convFile
;
1015 convFile
.reset(new wxCSConv(GetFileEncoding()));
1017 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
1019 convFile
.reset(new wxCSConv(GetFileEncoding()));
1020 convMem
.reset(new wxCSConv(GetEncoding()));
1022 //else: file and in-memory encodings are the same, no conversion needed
1025 return OutputString(stream
,
1028 wxS("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
1029 GetVersion(), GetFileEncoding()
1033 OutputNode(stream
, GetRoot(), 0,
1034 convMem
.get(), convFile
.get(), indentstep
) &&
1035 OutputString(stream
, wxS("\n"), convMem
.get(), convFile
.get());