1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
20 #include "wx/xml/xml.h"
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
33 #include "expat.h" // from Expat
35 // DLL options compatibility check:
36 WX_CHECK_BUILD_OPTIONS("wxXML")
39 IMPLEMENT_CLASS(wxXmlDocument
, wxObject
)
42 // a private utility used by wxXML
43 static bool wxIsWhiteOnly(const wxString
& buf
);
46 //-----------------------------------------------------------------------------
48 //-----------------------------------------------------------------------------
50 wxXmlNode::wxXmlNode(wxXmlNode
*parent
,wxXmlNodeType type
,
51 const wxString
& name
, const wxString
& content
,
52 wxXmlAttribute
*attrs
, wxXmlNode
*next
)
53 : m_type(type
), m_name(name
), m_content(content
),
54 m_attrs(attrs
), m_parent(parent
),
55 m_children(NULL
), m_next(next
)
59 if (m_parent
->m_children
)
61 m_next
= m_parent
->m_children
;
62 m_parent
->m_children
= this;
65 m_parent
->m_children
= this;
69 wxXmlNode::wxXmlNode(wxXmlNodeType type
, const wxString
& name
,
70 const wxString
& content
)
71 : m_type(type
), m_name(name
), m_content(content
),
72 m_attrs(NULL
), m_parent(NULL
),
73 m_children(NULL
), m_next(NULL
)
76 wxXmlNode::wxXmlNode(const wxXmlNode
& node
)
83 wxXmlNode::~wxXmlNode()
86 for (c
= m_children
; c
; c
= c2
)
92 wxXmlAttribute
*p
, *p2
;
93 for (p
= m_attrs
; p
; p
= p2
)
100 wxXmlNode
& wxXmlNode::operator=(const wxXmlNode
& node
)
103 wxDELETE(m_children
);
108 void wxXmlNode::DoCopy(const wxXmlNode
& node
)
110 m_type
= node
.m_type
;
111 m_name
= node
.m_name
;
112 m_content
= node
.m_content
;
115 wxXmlNode
*n
= node
.m_children
;
118 AddChild(new wxXmlNode(*n
));
123 wxXmlAttribute
*p
= node
.m_attrs
;
126 AddAttribute(p
->GetName(), p
->GetValue());
131 bool wxXmlNode::HasAttribute(const wxString
& attrName
) const
133 wxXmlAttribute
*attr
= GetAttributes();
137 if (attr
->GetName() == attrName
) return true;
138 attr
= attr
->GetNext();
144 bool wxXmlNode::GetAttribute(const wxString
& attrName
, wxString
*value
) const
146 wxCHECK_MSG( value
, false, "value argument must not be NULL" );
148 wxXmlAttribute
*attr
= GetAttributes();
152 if (attr
->GetName() == attrName
)
154 *value
= attr
->GetValue();
157 attr
= attr
->GetNext();
163 wxString
wxXmlNode::GetAttribute(const wxString
& attrName
, const wxString
& defaultVal
) const
166 if (GetAttribute(attrName
, &tmp
))
172 void wxXmlNode::AddChild(wxXmlNode
*child
)
174 if (m_children
== NULL
)
178 wxXmlNode
*ch
= m_children
;
179 while (ch
->m_next
) ch
= ch
->m_next
;
182 child
->m_next
= NULL
;
183 child
->m_parent
= this;
186 bool wxXmlNode::InsertChild(wxXmlNode
*child
, wxXmlNode
*before_node
)
188 wxCHECK_MSG(before_node
== NULL
|| before_node
->GetParent() == this, false,
189 wxT("wxXmlNode::InsertChild - the node has incorrect parent"));
190 wxCHECK_MSG(child
, false, wxT("Cannot insert a NULL pointer!"));
192 if (m_children
== before_node
)
194 else if (m_children
== NULL
)
196 if (before_node
!= NULL
)
197 return false; // we have no children so we don't need to search
200 else if (before_node
== NULL
)
203 child
->m_parent
= this;
204 child
->m_next
= m_children
;
210 wxXmlNode
*ch
= m_children
;
211 while (ch
&& ch
->m_next
!= before_node
) ch
= ch
->m_next
;
213 return false; // before_node not found
217 child
->m_parent
= this;
218 child
->m_next
= before_node
;
222 bool wxXmlNode::RemoveChild(wxXmlNode
*child
)
224 if (m_children
== NULL
)
226 else if (m_children
== child
)
228 m_children
= child
->m_next
;
229 child
->m_parent
= NULL
;
230 child
->m_next
= NULL
;
235 wxXmlNode
*ch
= m_children
;
238 if (ch
->m_next
== child
)
240 ch
->m_next
= child
->m_next
;
241 child
->m_parent
= NULL
;
242 child
->m_next
= NULL
;
251 void wxXmlNode::AddProperty(const wxString
& name
, const wxString
& value
)
253 AddProperty(new wxXmlAttribute(name
, value
, NULL
));
256 void wxXmlNode::AddProperty(wxXmlAttribute
*attr
)
262 wxXmlAttribute
*p
= m_attrs
;
263 while (p
->GetNext()) p
= p
->GetNext();
268 bool wxXmlNode::DeleteProperty(const wxString
& name
)
270 wxXmlAttribute
*attr
;
275 else if (m_attrs
->GetName() == name
)
278 m_attrs
= attr
->GetNext();
286 wxXmlAttribute
*p
= m_attrs
;
289 if (p
->GetNext()->GetName() == name
)
292 p
->SetNext(attr
->GetNext());
303 wxString
wxXmlNode::GetNodeContent() const
305 wxXmlNode
*n
= GetChildren();
309 if (n
->GetType() == wxXML_TEXT_NODE
||
310 n
->GetType() == wxXML_CDATA_SECTION_NODE
)
311 return n
->GetContent();
314 return wxEmptyString
;
317 int wxXmlNode::GetDepth(wxXmlNode
*grandparent
) const
319 const wxXmlNode
*n
= this;
326 if (n
== grandparent
)
334 bool wxXmlNode::IsWhitespaceOnly() const
336 return wxIsWhiteOnly(m_content
);
341 //-----------------------------------------------------------------------------
343 //-----------------------------------------------------------------------------
345 wxXmlDocument::wxXmlDocument()
346 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL
)
349 m_encoding
= wxT("UTF-8");
353 wxXmlDocument::wxXmlDocument(const wxString
& filename
, const wxString
& encoding
)
354 :wxObject(), m_root(NULL
)
356 if ( !Load(filename
, encoding
) )
362 wxXmlDocument::wxXmlDocument(wxInputStream
& stream
, const wxString
& encoding
)
363 :wxObject(), m_root(NULL
)
365 if ( !Load(stream
, encoding
) )
371 wxXmlDocument::wxXmlDocument(const wxXmlDocument
& doc
)
377 wxXmlDocument
& wxXmlDocument::operator=(const wxXmlDocument
& doc
)
384 void wxXmlDocument::DoCopy(const wxXmlDocument
& doc
)
386 m_version
= doc
.m_version
;
388 m_encoding
= doc
.m_encoding
;
390 m_fileEncoding
= doc
.m_fileEncoding
;
393 m_root
= new wxXmlNode(*doc
.m_root
);
398 bool wxXmlDocument::Load(const wxString
& filename
, const wxString
& encoding
, int flags
)
400 wxFileInputStream
stream(filename
);
403 return Load(stream
, encoding
, flags
);
406 bool wxXmlDocument::Save(const wxString
& filename
, int indentstep
) const
408 wxFileOutputStream
stream(filename
);
411 return Save(stream
, indentstep
);
416 //-----------------------------------------------------------------------------
417 // wxXmlDocument loading routines
418 //-----------------------------------------------------------------------------
420 // converts Expat-produced string in UTF-8 into wxString using the specified
421 // conv or keep in UTF-8 if conv is NULL
422 static wxString
CharToString(wxMBConv
*conv
,
423 const char *s
, size_t len
= wxString::npos
)
428 // there can be no embedded NULs in this string so we don't need the
429 // output length, it will be NUL-terminated
430 const wxWCharBuffer
wbuf(
431 wxConvUTF8
.cMB2WC(s
, len
== wxString::npos
? wxNO_LEN
: len
, NULL
));
433 return wxString(wbuf
, *conv
);
435 // else: the string is wanted in UTF-8
436 #endif // !wxUSE_UNICODE
439 return wxString::FromUTF8(s
, len
);
442 // returns true if the given string contains only whitespaces
443 bool wxIsWhiteOnly(const wxString
& buf
)
445 for ( wxString::const_iterator i
= buf
.begin(); i
!= buf
.end(); ++i
)
448 if ( c
!= wxT(' ') && c
!= wxT('\t') && c
!= wxT('\n') && c
!= wxT('\r'))
455 struct wxXmlParsingContext
460 wxXmlNode
*lastAsText
;
463 bool removeWhiteOnlyNodes
;
467 static void StartElementHnd(void *userData
, const char *name
, const char **atts
)
469 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
470 wxXmlNode
*node
= new wxXmlNode(wxXML_ELEMENT_NODE
, CharToString(ctx
->conv
, name
));
471 const char **a
= atts
;
474 node
->AddAttribute(CharToString(ctx
->conv
, a
[0]), CharToString(ctx
->conv
, a
[1]));
477 if (ctx
->root
== NULL
)
480 ctx
->node
->AddChild(node
);
482 ctx
->lastAsText
= NULL
;
485 static void EndElementHnd(void *userData
, const char* WXUNUSED(name
))
487 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
489 ctx
->node
= ctx
->node
->GetParent();
490 ctx
->lastAsText
= NULL
;
493 static void TextHnd(void *userData
, const char *s
, int len
)
495 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
496 wxString str
= CharToString(ctx
->conv
, s
, len
);
500 ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + str
);
504 bool whiteOnly
= false;
505 if (ctx
->removeWhiteOnlyNodes
)
506 whiteOnly
= wxIsWhiteOnly(str
);
510 ctx
->lastAsText
= new wxXmlNode(wxXML_TEXT_NODE
, wxT("text"), str
);
511 ctx
->node
->AddChild(ctx
->lastAsText
);
516 static void StartCdataHnd(void *userData
)
518 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
520 ctx
->lastAsText
= new wxXmlNode(wxXML_CDATA_SECTION_NODE
, wxT("cdata"),wxT(""));
521 ctx
->node
->AddChild(ctx
->lastAsText
);
524 static void CommentHnd(void *userData
, const char *data
)
526 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
530 // VS: ctx->node == NULL happens if there is a comment before
531 // the root element (e.g. wxDesigner's output). We ignore such
532 // comments, no big deal...
533 ctx
->node
->AddChild(new wxXmlNode(wxXML_COMMENT_NODE
,
534 wxT("comment"), CharToString(ctx
->conv
, data
)));
536 ctx
->lastAsText
= NULL
;
539 static void DefaultHnd(void *userData
, const char *s
, int len
)
542 if (len
> 6 && memcmp(s
, "<?xml ", 6) == 0)
544 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
546 wxString buf
= CharToString(ctx
->conv
, s
, (size_t)len
);
548 pos
= buf
.Find(wxT("encoding="));
549 if (pos
!= wxNOT_FOUND
)
550 ctx
->encoding
= buf
.Mid(pos
+ 10).BeforeFirst(buf
[(size_t)pos
+9]);
551 pos
= buf
.Find(wxT("version="));
552 if (pos
!= wxNOT_FOUND
)
553 ctx
->version
= buf
.Mid(pos
+ 9).BeforeFirst(buf
[(size_t)pos
+8]);
557 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData
),
558 const XML_Char
*name
, XML_Encoding
*info
)
560 // We must build conversion table for expat. The easiest way to do so
561 // is to let wxCSConv convert as string containing all characters to
562 // wide character representation:
570 for (i
= 0; i
< 255; i
++)
572 mbBuf
[0] = (char)(i
+1);
573 if (conv
.MB2WC(wcBuf
, mbBuf
, 2) == (size_t)-1)
575 // invalid/undefined byte in the encoding:
578 info
->map
[i
+1] = (int)wcBuf
[0];
582 info
->convert
= NULL
;
583 info
->release
= NULL
;
590 bool wxXmlDocument::Load(wxInputStream
& stream
, const wxString
& encoding
, int flags
)
595 m_encoding
= encoding
;
598 const size_t BUFSIZE
= 1024;
600 wxXmlParsingContext ctx
;
602 XML_Parser parser
= XML_ParserCreate(NULL
);
604 ctx
.root
= ctx
.node
= NULL
;
605 ctx
.encoding
= wxT("UTF-8"); // default in absence of encoding=""
608 if ( encoding
.CmpNoCase(wxT("UTF-8")) != 0 )
609 ctx
.conv
= new wxCSConv(encoding
);
611 ctx
.removeWhiteOnlyNodes
= (flags
& wxXMLDOC_KEEP_WHITESPACE_NODES
) == 0;
613 XML_SetUserData(parser
, (void*)&ctx
);
614 XML_SetElementHandler(parser
, StartElementHnd
, EndElementHnd
);
615 XML_SetCharacterDataHandler(parser
, TextHnd
);
616 XML_SetStartCdataSectionHandler(parser
, StartCdataHnd
);
617 XML_SetCommentHandler(parser
, CommentHnd
);
618 XML_SetDefaultHandler(parser
, DefaultHnd
);
619 XML_SetUnknownEncodingHandler(parser
, UnknownEncodingHnd
, NULL
);
624 size_t len
= stream
.Read(buf
, BUFSIZE
).LastRead();
625 done
= (len
< BUFSIZE
);
626 if (!XML_Parse(parser
, buf
, len
, done
))
628 wxString
error(XML_ErrorString(XML_GetErrorCode(parser
)),
630 wxLogError(_("XML parsing error: '%s' at line %d"),
632 XML_GetCurrentLineNumber(parser
));
640 if (!ctx
.version
.empty())
641 SetVersion(ctx
.version
);
642 if (!ctx
.encoding
.empty())
643 SetFileEncoding(ctx
.encoding
);
651 XML_ParserFree(parser
);
663 //-----------------------------------------------------------------------------
664 // wxXmlDocument saving routines
665 //-----------------------------------------------------------------------------
667 // write string to output:
668 inline static void OutputString(wxOutputStream
& stream
, const wxString
& str
,
669 wxMBConv
*convMem
= NULL
,
670 wxMBConv
*convFile
= NULL
)
676 wxUnusedVar(convMem
);
678 const wxWX2MBbuf
buf(str
.mb_str(*(convFile
? convFile
: &wxConvUTF8
)));
679 stream
.Write((const char*)buf
, strlen((const char*)buf
));
680 #else // !wxUSE_UNICODE
681 if ( convFile
&& convMem
)
683 wxString
str2(str
.wc_str(*convMem
), *convFile
);
684 stream
.Write(str2
.mb_str(), str2
.Len());
686 else // no conversions to do
688 stream
.Write(str
.mb_str(), str
.Len());
690 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
693 // flags for OutputStringEnt()
696 XML_ESCAPE_QUOTES
= 1
699 // Same as above, but create entities first.
700 // Translates '<' to "<", '>' to ">" and '&' to "&"
701 static void OutputStringEnt(wxOutputStream
& stream
, const wxString
& str
,
702 wxMBConv
*convMem
= NULL
,
703 wxMBConv
*convFile
= NULL
,
712 for (i
= 0; i
< len
; i
++)
715 if (c
== wxT('<') || c
== wxT('>') ||
716 (c
== wxT('&') && str
.Mid(i
+1, 4) != wxT("amp;")) ||
717 ((flags
& XML_ESCAPE_QUOTES
) && c
== wxT('"')))
719 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
723 OutputString(stream
, wxT("<"));
726 OutputString(stream
, wxT(">"));
729 OutputString(stream
, wxT("&"));
732 OutputString(stream
, wxT("""));
740 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
743 inline static void OutputIndentation(wxOutputStream
& stream
, int indent
)
745 wxString str
= wxT("\n");
746 for (int i
= 0; i
< indent
; i
++)
747 str
<< wxT(' ') << wxT(' ');
748 OutputString(stream
, str
);
751 static void OutputNode(wxOutputStream
& stream
, wxXmlNode
*node
, int indent
,
752 wxMBConv
*convMem
, wxMBConv
*convFile
, int indentstep
)
755 wxXmlAttribute
*attr
;
757 switch (node
->GetType())
759 case wxXML_CDATA_SECTION_NODE
:
760 OutputString( stream
, wxT("<![CDATA["));
761 OutputString( stream
, node
->GetContent() );
762 OutputString( stream
, wxT("]]>") );
765 case wxXML_TEXT_NODE
:
766 OutputStringEnt(stream
, node
->GetContent(), convMem
, convFile
);
769 case wxXML_ELEMENT_NODE
:
770 OutputString(stream
, wxT("<"));
771 OutputString(stream
, node
->GetName());
773 attr
= node
->GetAttributes();
776 OutputString(stream
, wxT(" ") + attr
->GetName() + wxT("=\""));
777 OutputStringEnt(stream
, attr
->GetValue(), convMem
, convFile
,
779 OutputString(stream
, wxT("\""));
780 attr
= attr
->GetNext();
783 if (node
->GetChildren())
785 OutputString(stream
, wxT(">"));
787 n
= node
->GetChildren();
790 if (indentstep
>= 0 && n
&& n
->GetType() != wxXML_TEXT_NODE
)
791 OutputIndentation(stream
, indent
+ indentstep
);
792 OutputNode(stream
, n
, indent
+ indentstep
, convMem
, convFile
, indentstep
);
796 if (indentstep
>= 0 && prev
&& prev
->GetType() != wxXML_TEXT_NODE
)
797 OutputIndentation(stream
, indent
);
798 OutputString(stream
, wxT("</"));
799 OutputString(stream
, node
->GetName());
800 OutputString(stream
, wxT(">"));
803 OutputString(stream
, wxT("/>"));
806 case wxXML_COMMENT_NODE
:
807 OutputString(stream
, wxT("<!--"));
808 OutputString(stream
, node
->GetContent(), convMem
, convFile
);
809 OutputString(stream
, wxT("-->"));
813 wxFAIL_MSG(wxT("unsupported node type"));
817 bool wxXmlDocument::Save(wxOutputStream
& stream
, int indentstep
) const
824 wxMBConv
*convMem
= NULL
,
828 convFile
= new wxCSConv(GetFileEncoding());
831 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
833 convFile
= new wxCSConv(GetFileEncoding());
834 convMem
= new wxCSConv(GetEncoding());
836 else // file and in-memory encodings are the same, no conversion needed
843 s
.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
844 GetVersion().c_str(), GetFileEncoding().c_str());
845 OutputString(stream
, s
);
847 OutputNode(stream
, GetRoot(), 0, convMem
, convFile
, indentstep
);
848 OutputString(stream
, wxT("\n"));