1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
20 #include "wx/xml/xml.h"
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
33 #include "expat.h" // from Expat
35 // DLL options compatibility check:
36 WX_CHECK_BUILD_OPTIONS("wxXML")
39 IMPLEMENT_CLASS(wxXmlDocument
, wxObject
)
42 // a private utility used by wxXML
43 static bool wxIsWhiteOnly(const wxString
& buf
);
46 //-----------------------------------------------------------------------------
48 //-----------------------------------------------------------------------------
50 wxXmlNode::wxXmlNode(wxXmlNode
*parent
,wxXmlNodeType type
,
51 const wxString
& name
, const wxString
& content
,
52 wxXmlAttribute
*attrs
, wxXmlNode
*next
, int lineNo
)
53 : m_type(type
), m_name(name
), m_content(content
),
54 m_attrs(attrs
), m_parent(parent
),
55 m_children(NULL
), m_next(next
),
60 if (m_parent
->m_children
)
62 m_next
= m_parent
->m_children
;
63 m_parent
->m_children
= this;
66 m_parent
->m_children
= this;
70 wxXmlNode::wxXmlNode(wxXmlNodeType type
, const wxString
& name
,
71 const wxString
& content
,
73 : m_type(type
), m_name(name
), m_content(content
),
74 m_attrs(NULL
), m_parent(NULL
),
75 m_children(NULL
), m_next(NULL
),
79 wxXmlNode::wxXmlNode(const wxXmlNode
& node
)
86 wxXmlNode::~wxXmlNode()
89 for (c
= m_children
; c
; c
= c2
)
95 wxXmlAttribute
*p
, *p2
;
96 for (p
= m_attrs
; p
; p
= p2
)
103 wxXmlNode
& wxXmlNode::operator=(const wxXmlNode
& node
)
106 wxDELETE(m_children
);
111 void wxXmlNode::DoCopy(const wxXmlNode
& node
)
113 m_type
= node
.m_type
;
114 m_name
= node
.m_name
;
115 m_content
= node
.m_content
;
116 m_lineNo
= node
.m_lineNo
;
119 wxXmlNode
*n
= node
.m_children
;
122 AddChild(new wxXmlNode(*n
));
127 wxXmlAttribute
*p
= node
.m_attrs
;
130 AddAttribute(p
->GetName(), p
->GetValue());
135 bool wxXmlNode::HasAttribute(const wxString
& attrName
) const
137 wxXmlAttribute
*attr
= GetAttributes();
141 if (attr
->GetName() == attrName
) return true;
142 attr
= attr
->GetNext();
148 bool wxXmlNode::GetAttribute(const wxString
& attrName
, wxString
*value
) const
150 wxCHECK_MSG( value
, false, "value argument must not be NULL" );
152 wxXmlAttribute
*attr
= GetAttributes();
156 if (attr
->GetName() == attrName
)
158 *value
= attr
->GetValue();
161 attr
= attr
->GetNext();
167 wxString
wxXmlNode::GetAttribute(const wxString
& attrName
, const wxString
& defaultVal
) const
170 if (GetAttribute(attrName
, &tmp
))
176 void wxXmlNode::AddChild(wxXmlNode
*child
)
178 if (m_children
== NULL
)
182 wxXmlNode
*ch
= m_children
;
183 while (ch
->m_next
) ch
= ch
->m_next
;
186 child
->m_next
= NULL
;
187 child
->m_parent
= this;
190 // inserts a new node in front of 'followingNode'
191 bool wxXmlNode::InsertChild(wxXmlNode
*child
, wxXmlNode
*followingNode
)
193 wxCHECK_MSG( child
, false, "cannot insert a NULL node!" );
194 wxCHECK_MSG( child
->m_parent
== NULL
, false, "node already has a parent" );
195 wxCHECK_MSG( child
->m_next
== NULL
, false, "node already has m_next" );
196 wxCHECK_MSG( followingNode
== NULL
|| followingNode
->GetParent() == this,
198 "wxXmlNode::InsertChild - followingNode has incorrect parent" );
200 // this is for backward compatibility, NULL was allowed here thanks to
201 // the confusion about followingNode's meaning
202 if ( followingNode
== NULL
)
203 followingNode
= m_children
;
205 if ( m_children
== followingNode
)
207 child
->m_next
= m_children
;
212 wxXmlNode
*ch
= m_children
;
213 while ( ch
&& ch
->m_next
!= followingNode
)
217 wxFAIL_MSG( "followingNode has this node as parent, but couldn't be found among children" );
221 child
->m_next
= followingNode
;
225 child
->m_parent
= this;
229 // inserts a new node right after 'precedingNode'
230 bool wxXmlNode::InsertChildAfter(wxXmlNode
*child
, wxXmlNode
*precedingNode
)
232 wxCHECK_MSG( child
, false, "cannot insert a NULL node!" );
233 wxCHECK_MSG( child
->m_parent
== NULL
, false, "node already has a parent" );
234 wxCHECK_MSG( child
->m_next
== NULL
, false, "node already has m_next" );
235 wxCHECK_MSG( precedingNode
== NULL
|| precedingNode
->m_parent
== this, false,
236 "precedingNode has wrong parent" );
240 child
->m_next
= precedingNode
->m_next
;
241 precedingNode
->m_next
= child
;
243 else // precedingNode == NULL
245 wxCHECK_MSG( m_children
== NULL
, false,
246 "NULL precedingNode only makes sense when there are no children" );
248 child
->m_next
= m_children
;
252 child
->m_parent
= this;
256 bool wxXmlNode::RemoveChild(wxXmlNode
*child
)
258 if (m_children
== NULL
)
260 else if (m_children
== child
)
262 m_children
= child
->m_next
;
263 child
->m_parent
= NULL
;
264 child
->m_next
= NULL
;
269 wxXmlNode
*ch
= m_children
;
272 if (ch
->m_next
== child
)
274 ch
->m_next
= child
->m_next
;
275 child
->m_parent
= NULL
;
276 child
->m_next
= NULL
;
285 void wxXmlNode::AddAttribute(const wxString
& name
, const wxString
& value
)
287 AddProperty(name
, value
);
290 void wxXmlNode::AddAttribute(wxXmlAttribute
*attr
)
295 bool wxXmlNode::DeleteAttribute(const wxString
& name
)
297 return DeleteProperty(name
);
300 void wxXmlNode::AddProperty(const wxString
& name
, const wxString
& value
)
302 AddProperty(new wxXmlAttribute(name
, value
, NULL
));
305 void wxXmlNode::AddProperty(wxXmlAttribute
*attr
)
311 wxXmlAttribute
*p
= m_attrs
;
312 while (p
->GetNext()) p
= p
->GetNext();
317 bool wxXmlNode::DeleteProperty(const wxString
& name
)
319 wxXmlAttribute
*attr
;
324 else if (m_attrs
->GetName() == name
)
327 m_attrs
= attr
->GetNext();
335 wxXmlAttribute
*p
= m_attrs
;
338 if (p
->GetNext()->GetName() == name
)
341 p
->SetNext(attr
->GetNext());
352 wxString
wxXmlNode::GetNodeContent() const
354 wxXmlNode
*n
= GetChildren();
358 if (n
->GetType() == wxXML_TEXT_NODE
||
359 n
->GetType() == wxXML_CDATA_SECTION_NODE
)
360 return n
->GetContent();
363 return wxEmptyString
;
366 int wxXmlNode::GetDepth(wxXmlNode
*grandparent
) const
368 const wxXmlNode
*n
= this;
375 if (n
== grandparent
)
383 bool wxXmlNode::IsWhitespaceOnly() const
385 return wxIsWhiteOnly(m_content
);
390 //-----------------------------------------------------------------------------
392 //-----------------------------------------------------------------------------
394 wxXmlDocument::wxXmlDocument()
395 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL
)
398 m_encoding
= wxT("UTF-8");
402 wxXmlDocument::wxXmlDocument(const wxString
& filename
, const wxString
& encoding
)
403 :wxObject(), m_root(NULL
)
405 if ( !Load(filename
, encoding
) )
411 wxXmlDocument::wxXmlDocument(wxInputStream
& stream
, const wxString
& encoding
)
412 :wxObject(), m_root(NULL
)
414 if ( !Load(stream
, encoding
) )
420 wxXmlDocument::wxXmlDocument(const wxXmlDocument
& doc
)
426 wxXmlDocument
& wxXmlDocument::operator=(const wxXmlDocument
& doc
)
433 void wxXmlDocument::DoCopy(const wxXmlDocument
& doc
)
435 m_version
= doc
.m_version
;
437 m_encoding
= doc
.m_encoding
;
439 m_fileEncoding
= doc
.m_fileEncoding
;
442 m_root
= new wxXmlNode(*doc
.m_root
);
447 bool wxXmlDocument::Load(const wxString
& filename
, const wxString
& encoding
, int flags
)
449 wxFileInputStream
stream(filename
);
452 return Load(stream
, encoding
, flags
);
455 bool wxXmlDocument::Save(const wxString
& filename
, int indentstep
) const
457 wxFileOutputStream
stream(filename
);
460 return Save(stream
, indentstep
);
465 //-----------------------------------------------------------------------------
466 // wxXmlDocument loading routines
467 //-----------------------------------------------------------------------------
469 // converts Expat-produced string in UTF-8 into wxString using the specified
470 // conv or keep in UTF-8 if conv is NULL
471 static wxString
CharToString(wxMBConv
*conv
,
472 const char *s
, size_t len
= wxString::npos
)
477 // there can be no embedded NULs in this string so we don't need the
478 // output length, it will be NUL-terminated
479 const wxWCharBuffer
wbuf(
480 wxConvUTF8
.cMB2WC(s
, len
== wxString::npos
? wxNO_LEN
: len
, NULL
));
482 return wxString(wbuf
, *conv
);
484 // else: the string is wanted in UTF-8
485 #endif // !wxUSE_UNICODE
488 return wxString::FromUTF8Unchecked(s
, len
);
491 // returns true if the given string contains only whitespaces
492 bool wxIsWhiteOnly(const wxString
& buf
)
494 for ( wxString::const_iterator i
= buf
.begin(); i
!= buf
.end(); ++i
)
497 if ( c
!= wxT(' ') && c
!= wxT('\t') && c
!= wxT('\n') && c
!= wxT('\r'))
504 struct wxXmlParsingContext
506 wxXmlParsingContext()
512 removeWhiteOnlyNodes(false)
518 wxXmlNode
*node
; // the node being parsed
519 wxXmlNode
*lastChild
; // the last child of "node"
520 wxXmlNode
*lastAsText
; // the last _text_ child of "node"
523 bool removeWhiteOnlyNodes
;
526 // checks that ctx->lastChild is in consistent state
527 #define ASSERT_LAST_CHILD_OK(ctx) \
528 wxASSERT( ctx->lastChild == NULL || \
529 ctx->lastChild->GetNext() == NULL ); \
530 wxASSERT( ctx->lastChild == NULL || \
531 ctx->lastChild->GetParent() == ctx->node )
534 static void StartElementHnd(void *userData
, const char *name
, const char **atts
)
536 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
537 wxXmlNode
*node
= new wxXmlNode(wxXML_ELEMENT_NODE
,
538 CharToString(ctx
->conv
, name
),
540 XML_GetCurrentLineNumber(ctx
->parser
));
541 const char **a
= atts
;
543 // add node attributes
546 node
->AddAttribute(CharToString(ctx
->conv
, a
[0]), CharToString(ctx
->conv
, a
[1]));
550 if (ctx
->root
== NULL
)
556 ASSERT_LAST_CHILD_OK(ctx
);
557 ctx
->node
->InsertChildAfter(node
, ctx
->lastChild
);
560 ctx
->lastAsText
= NULL
;
561 ctx
->lastChild
= NULL
; // our new node "node" has no children yet
566 static void EndElementHnd(void *userData
, const char* WXUNUSED(name
))
568 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
570 // we're exiting the last children of ctx->node->GetParent() and going
571 // back one level up, so current value of ctx->node points to the last
572 // child of ctx->node->GetParent()
573 ctx
->lastChild
= ctx
->node
;
575 ctx
->node
= ctx
->node
->GetParent();
576 ctx
->lastAsText
= NULL
;
579 static void TextHnd(void *userData
, const char *s
, int len
)
581 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
582 wxString str
= CharToString(ctx
->conv
, s
, len
);
586 ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + str
);
590 bool whiteOnly
= false;
591 if (ctx
->removeWhiteOnlyNodes
)
592 whiteOnly
= wxIsWhiteOnly(str
);
596 wxXmlNode
*textnode
=
597 new wxXmlNode(wxXML_TEXT_NODE
, wxT("text"), str
,
598 XML_GetCurrentLineNumber(ctx
->parser
));
600 ASSERT_LAST_CHILD_OK(ctx
);
601 ctx
->node
->InsertChildAfter(textnode
, ctx
->lastChild
);
602 ctx
->lastChild
= ctx
->lastAsText
= textnode
;
607 static void StartCdataHnd(void *userData
)
609 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
611 wxXmlNode
*textnode
=
612 new wxXmlNode(wxXML_CDATA_SECTION_NODE
, wxT("cdata"), wxT(""),
613 XML_GetCurrentLineNumber(ctx
->parser
));
615 ASSERT_LAST_CHILD_OK(ctx
);
616 ctx
->node
->InsertChildAfter(textnode
, ctx
->lastChild
);
617 ctx
->lastChild
= ctx
->lastAsText
= textnode
;
620 static void CommentHnd(void *userData
, const char *data
)
622 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
626 wxXmlNode
*commentnode
=
627 new wxXmlNode(wxXML_COMMENT_NODE
,
628 wxT("comment"), CharToString(ctx
->conv
, data
),
629 XML_GetCurrentLineNumber(ctx
->parser
));
631 ASSERT_LAST_CHILD_OK(ctx
);
632 ctx
->node
->InsertChildAfter(commentnode
, ctx
->lastChild
);
633 ctx
->lastChild
= commentnode
;
635 //else: ctx->node == NULL happens if there is a comment before
636 // the root element. We current don't have a way to represent
637 // these in wxXmlDocument (FIXME).
639 ctx
->lastAsText
= NULL
;
642 static void DefaultHnd(void *userData
, const char *s
, int len
)
645 if (len
> 6 && memcmp(s
, "<?xml ", 6) == 0)
647 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
649 wxString buf
= CharToString(ctx
->conv
, s
, (size_t)len
);
651 pos
= buf
.Find(wxT("encoding="));
652 if (pos
!= wxNOT_FOUND
)
653 ctx
->encoding
= buf
.Mid(pos
+ 10).BeforeFirst(buf
[(size_t)pos
+9]);
654 pos
= buf
.Find(wxT("version="));
655 if (pos
!= wxNOT_FOUND
)
656 ctx
->version
= buf
.Mid(pos
+ 9).BeforeFirst(buf
[(size_t)pos
+8]);
660 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData
),
661 const XML_Char
*name
, XML_Encoding
*info
)
663 // We must build conversion table for expat. The easiest way to do so
664 // is to let wxCSConv convert as string containing all characters to
665 // wide character representation:
673 for (i
= 0; i
< 255; i
++)
675 mbBuf
[0] = (char)(i
+1);
676 if (conv
.MB2WC(wcBuf
, mbBuf
, 2) == (size_t)-1)
678 // invalid/undefined byte in the encoding:
681 info
->map
[i
+1] = (int)wcBuf
[0];
685 info
->convert
= NULL
;
686 info
->release
= NULL
;
693 bool wxXmlDocument::Load(wxInputStream
& stream
, const wxString
& encoding
, int flags
)
698 m_encoding
= encoding
;
701 const size_t BUFSIZE
= 1024;
703 wxXmlParsingContext ctx
;
705 XML_Parser parser
= XML_ParserCreate(NULL
);
707 ctx
.encoding
= wxT("UTF-8"); // default in absence of encoding=""
710 if ( encoding
.CmpNoCase(wxT("UTF-8")) != 0 )
711 ctx
.conv
= new wxCSConv(encoding
);
713 ctx
.removeWhiteOnlyNodes
= (flags
& wxXMLDOC_KEEP_WHITESPACE_NODES
) == 0;
716 XML_SetUserData(parser
, (void*)&ctx
);
717 XML_SetElementHandler(parser
, StartElementHnd
, EndElementHnd
);
718 XML_SetCharacterDataHandler(parser
, TextHnd
);
719 XML_SetStartCdataSectionHandler(parser
, StartCdataHnd
);
720 XML_SetCommentHandler(parser
, CommentHnd
);
721 XML_SetDefaultHandler(parser
, DefaultHnd
);
722 XML_SetUnknownEncodingHandler(parser
, UnknownEncodingHnd
, NULL
);
727 size_t len
= stream
.Read(buf
, BUFSIZE
).LastRead();
728 done
= (len
< BUFSIZE
);
729 if (!XML_Parse(parser
, buf
, len
, done
))
731 wxString
error(XML_ErrorString(XML_GetErrorCode(parser
)),
733 wxLogError(_("XML parsing error: '%s' at line %d"),
735 XML_GetCurrentLineNumber(parser
));
743 if (!ctx
.version
.empty())
744 SetVersion(ctx
.version
);
745 if (!ctx
.encoding
.empty())
746 SetFileEncoding(ctx
.encoding
);
754 XML_ParserFree(parser
);
766 //-----------------------------------------------------------------------------
767 // wxXmlDocument saving routines
768 //-----------------------------------------------------------------------------
770 // write string to output:
771 inline static void OutputString(wxOutputStream
& stream
, const wxString
& str
,
772 wxMBConv
*convMem
= NULL
,
773 wxMBConv
*convFile
= NULL
)
779 wxUnusedVar(convMem
);
781 const wxWX2MBbuf
buf(str
.mb_str(*(convFile
? convFile
: &wxConvUTF8
)));
782 stream
.Write((const char*)buf
, strlen((const char*)buf
));
783 #else // !wxUSE_UNICODE
784 if ( convFile
&& convMem
)
786 wxString
str2(str
.wc_str(*convMem
), *convFile
);
787 stream
.Write(str2
.mb_str(), str2
.Len());
789 else // no conversions to do
791 stream
.Write(str
.mb_str(), str
.Len());
793 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
796 // flags for OutputStringEnt()
799 XML_ESCAPE_QUOTES
= 1
802 // Same as above, but create entities first.
803 // Translates '<' to "<", '>' to ">" and '&' to "&"
804 static void OutputStringEnt(wxOutputStream
& stream
, const wxString
& str
,
805 wxMBConv
*convMem
= NULL
,
806 wxMBConv
*convFile
= NULL
,
815 for (i
= 0; i
< len
; i
++)
818 if (c
== wxT('<') || c
== wxT('>') ||
819 (c
== wxT('&') && str
.Mid(i
+1, 4) != wxT("amp;")) ||
820 ((flags
& XML_ESCAPE_QUOTES
) && c
== wxT('"')))
822 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
826 OutputString(stream
, wxT("<"));
829 OutputString(stream
, wxT(">"));
832 OutputString(stream
, wxT("&"));
835 OutputString(stream
, wxT("""));
843 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
846 inline static void OutputIndentation(wxOutputStream
& stream
, int indent
)
848 wxString str
= wxT("\n");
849 for (int i
= 0; i
< indent
; i
++)
850 str
<< wxT(' ') << wxT(' ');
851 OutputString(stream
, str
);
854 static void OutputNode(wxOutputStream
& stream
, wxXmlNode
*node
, int indent
,
855 wxMBConv
*convMem
, wxMBConv
*convFile
, int indentstep
)
858 wxXmlAttribute
*attr
;
860 switch (node
->GetType())
862 case wxXML_CDATA_SECTION_NODE
:
863 OutputString( stream
, wxT("<![CDATA["));
864 OutputString( stream
, node
->GetContent() );
865 OutputString( stream
, wxT("]]>") );
868 case wxXML_TEXT_NODE
:
869 OutputStringEnt(stream
, node
->GetContent(), convMem
, convFile
);
872 case wxXML_ELEMENT_NODE
:
873 OutputString(stream
, wxT("<"));
874 OutputString(stream
, node
->GetName());
876 attr
= node
->GetAttributes();
879 OutputString(stream
, wxT(" ") + attr
->GetName() + wxT("=\""));
880 OutputStringEnt(stream
, attr
->GetValue(), convMem
, convFile
,
882 OutputString(stream
, wxT("\""));
883 attr
= attr
->GetNext();
886 if (node
->GetChildren())
888 OutputString(stream
, wxT(">"));
890 n
= node
->GetChildren();
893 if (indentstep
>= 0 && n
&& n
->GetType() != wxXML_TEXT_NODE
)
894 OutputIndentation(stream
, indent
+ indentstep
);
895 OutputNode(stream
, n
, indent
+ indentstep
, convMem
, convFile
, indentstep
);
899 if (indentstep
>= 0 && prev
&& prev
->GetType() != wxXML_TEXT_NODE
)
900 OutputIndentation(stream
, indent
);
901 OutputString(stream
, wxT("</"));
902 OutputString(stream
, node
->GetName());
903 OutputString(stream
, wxT(">"));
906 OutputString(stream
, wxT("/>"));
909 case wxXML_COMMENT_NODE
:
910 OutputString(stream
, wxT("<!--"));
911 OutputString(stream
, node
->GetContent(), convMem
, convFile
);
912 OutputString(stream
, wxT("-->"));
916 wxFAIL_MSG(wxT("unsupported node type"));
920 bool wxXmlDocument::Save(wxOutputStream
& stream
, int indentstep
) const
927 wxMBConv
*convMem
= NULL
,
931 convFile
= new wxCSConv(GetFileEncoding());
934 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
936 convFile
= new wxCSConv(GetFileEncoding());
937 convMem
= new wxCSConv(GetEncoding());
939 else // file and in-memory encodings are the same, no conversion needed
946 s
.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
947 GetVersion().c_str(), GetFileEncoding().c_str());
948 OutputString(stream
, s
);
950 OutputNode(stream
, GetRoot(), 0, convMem
, convFile
, indentstep
);
951 OutputString(stream
, wxT("\n"));