1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
20 #include "wx/xml/xml.h"
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
33 #include "expat.h" // from Expat
35 // DLL options compatibility check:
36 WX_CHECK_BUILD_OPTIONS("wxXML")
39 IMPLEMENT_CLASS(wxXmlDocument
, wxObject
)
42 // a private utility used by wxXML
43 static bool wxIsWhiteOnly(const wxChar
*buf
);
46 //-----------------------------------------------------------------------------
48 //-----------------------------------------------------------------------------
50 wxXmlNode::wxXmlNode(wxXmlNode
*parent
,wxXmlNodeType type
,
51 const wxString
& name
, const wxString
& content
,
52 wxXmlProperty
*props
, wxXmlNode
*next
)
53 : m_type(type
), m_name(name
), m_content(content
),
54 m_properties(props
), m_parent(parent
),
55 m_children(NULL
), m_next(next
)
59 if (m_parent
->m_children
)
61 m_next
= m_parent
->m_children
;
62 m_parent
->m_children
= this;
65 m_parent
->m_children
= this;
69 wxXmlNode::wxXmlNode(wxXmlNodeType type
, const wxString
& name
,
70 const wxString
& content
)
71 : m_type(type
), m_name(name
), m_content(content
),
72 m_properties(NULL
), m_parent(NULL
),
73 m_children(NULL
), m_next(NULL
)
76 wxXmlNode::wxXmlNode(const wxXmlNode
& node
)
83 wxXmlNode::~wxXmlNode()
86 for (c
= m_children
; c
; c
= c2
)
92 wxXmlProperty
*p
, *p2
;
93 for (p
= m_properties
; p
; p
= p2
)
100 wxXmlNode
& wxXmlNode::operator=(const wxXmlNode
& node
)
102 wxDELETE(m_properties
);
103 wxDELETE(m_children
);
108 void wxXmlNode::DoCopy(const wxXmlNode
& node
)
110 m_type
= node
.m_type
;
111 m_name
= node
.m_name
;
112 m_content
= node
.m_content
;
115 wxXmlNode
*n
= node
.m_children
;
118 AddChild(new wxXmlNode(*n
));
123 wxXmlProperty
*p
= node
.m_properties
;
126 AddProperty(p
->GetName(), p
->GetValue());
131 bool wxXmlNode::HasProp(const wxString
& propName
) const
133 wxXmlProperty
*prop
= GetProperties();
137 if (prop
->GetName() == propName
) return true;
138 prop
= prop
->GetNext();
144 bool wxXmlNode::GetPropVal(const wxString
& propName
, wxString
*value
) const
146 wxXmlProperty
*prop
= GetProperties();
150 if (prop
->GetName() == propName
)
152 *value
= prop
->GetValue();
155 prop
= prop
->GetNext();
161 wxString
wxXmlNode::GetPropVal(const wxString
& propName
, const wxString
& defaultVal
) const
164 if (GetPropVal(propName
, &tmp
))
170 void wxXmlNode::AddChild(wxXmlNode
*child
)
172 if (m_children
== NULL
)
176 wxXmlNode
*ch
= m_children
;
177 while (ch
->m_next
) ch
= ch
->m_next
;
180 child
->m_next
= NULL
;
181 child
->m_parent
= this;
184 bool wxXmlNode::InsertChild(wxXmlNode
*child
, wxXmlNode
*before_node
)
186 wxCHECK_MSG(before_node
== NULL
|| before_node
->GetParent() == this, false,
187 wxT("wxXmlNode::InsertChild - the node has incorrect parent"));
188 wxCHECK_MSG(child
, false, wxT("Cannot insert a NULL pointer!"));
190 if (m_children
== before_node
)
192 else if (m_children
== NULL
)
194 if (before_node
!= NULL
)
195 return false; // we have no children so we don't need to search
198 else if (before_node
== NULL
)
201 child
->m_parent
= this;
202 child
->m_next
= m_children
;
208 wxXmlNode
*ch
= m_children
;
209 while (ch
&& ch
->m_next
!= before_node
) ch
= ch
->m_next
;
211 return false; // before_node not found
215 child
->m_parent
= this;
216 child
->m_next
= before_node
;
220 bool wxXmlNode::RemoveChild(wxXmlNode
*child
)
222 if (m_children
== NULL
)
224 else if (m_children
== child
)
226 m_children
= child
->m_next
;
227 child
->m_parent
= NULL
;
228 child
->m_next
= NULL
;
233 wxXmlNode
*ch
= m_children
;
236 if (ch
->m_next
== child
)
238 ch
->m_next
= child
->m_next
;
239 child
->m_parent
= NULL
;
240 child
->m_next
= NULL
;
249 void wxXmlNode::AddProperty(const wxString
& name
, const wxString
& value
)
251 AddProperty(new wxXmlProperty(name
, value
, NULL
));
254 void wxXmlNode::AddProperty(wxXmlProperty
*prop
)
256 if (m_properties
== NULL
)
260 wxXmlProperty
*p
= m_properties
;
261 while (p
->GetNext()) p
= p
->GetNext();
266 bool wxXmlNode::DeleteProperty(const wxString
& name
)
270 if (m_properties
== NULL
)
273 else if (m_properties
->GetName() == name
)
276 m_properties
= prop
->GetNext();
284 wxXmlProperty
*p
= m_properties
;
287 if (p
->GetNext()->GetName() == name
)
290 p
->SetNext(prop
->GetNext());
301 wxString
wxXmlNode::GetNodeContent() const
303 wxXmlNode
*n
= GetChildren();
307 if (n
->GetType() == wxXML_TEXT_NODE
||
308 n
->GetType() == wxXML_CDATA_SECTION_NODE
)
309 return n
->GetContent();
312 return wxEmptyString
;
315 int wxXmlNode::GetDepth(wxXmlNode
*grandparent
) const
317 const wxXmlNode
*n
= this;
324 if (n
== grandparent
)
332 bool wxXmlNode::IsWhitespaceOnly() const
334 return wxIsWhiteOnly(m_content
);
339 //-----------------------------------------------------------------------------
341 //-----------------------------------------------------------------------------
343 wxXmlDocument::wxXmlDocument()
344 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL
)
347 m_encoding
= wxT("UTF-8");
351 wxXmlDocument::wxXmlDocument(const wxString
& filename
, const wxString
& encoding
)
352 :wxObject(), m_root(NULL
)
354 if ( !Load(filename
, encoding
) )
360 wxXmlDocument::wxXmlDocument(wxInputStream
& stream
, const wxString
& encoding
)
361 :wxObject(), m_root(NULL
)
363 if ( !Load(stream
, encoding
) )
369 wxXmlDocument::wxXmlDocument(const wxXmlDocument
& doc
)
375 wxXmlDocument
& wxXmlDocument::operator=(const wxXmlDocument
& doc
)
382 void wxXmlDocument::DoCopy(const wxXmlDocument
& doc
)
384 m_version
= doc
.m_version
;
386 m_encoding
= doc
.m_encoding
;
388 m_fileEncoding
= doc
.m_fileEncoding
;
391 m_root
= new wxXmlNode(*doc
.m_root
);
396 bool wxXmlDocument::Load(const wxString
& filename
, const wxString
& encoding
, int flags
)
398 wxFileInputStream
stream(filename
);
401 return Load(stream
, encoding
, flags
);
404 bool wxXmlDocument::Save(const wxString
& filename
, int indentstep
) const
406 wxFileOutputStream
stream(filename
);
409 return Save(stream
, indentstep
);
414 //-----------------------------------------------------------------------------
415 // wxXmlDocument loading routines
416 //-----------------------------------------------------------------------------
418 // converts Expat-produced string in UTF-8 into wxString using the specified
419 // conv or keep in UTF-8 if conv is NULL
420 static wxString
CharToString(wxMBConv
*conv
,
421 const char *s
, size_t len
= wxString::npos
)
426 return wxString(s
, wxConvUTF8
, len
);
427 #else // !wxUSE_UNICODE
430 // there can be no embedded NULs in this string so we don't need the
431 // output length, it will be NUL-terminated
432 const wxWCharBuffer
wbuf(
433 wxConvUTF8
.cMB2WC(s
, len
== wxString::npos
? wxNO_LEN
: len
, NULL
));
435 return wxString(wbuf
, *conv
);
437 else // already in UTF-8, no conversion needed
439 return wxString(s
, len
!= wxString::npos
? len
: strlen(s
));
441 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
444 // returns true if the given string contains only whitespaces
445 bool wxIsWhiteOnly(const wxChar
*buf
)
447 for (const wxChar
*c
= buf
; *c
!= wxT('\0'); c
++)
448 if (*c
!= wxT(' ') && *c
!= wxT('\t') && *c
!= wxT('\n') && *c
!= wxT('\r'))
454 struct wxXmlParsingContext
459 wxXmlNode
*lastAsText
;
463 bool removeWhiteOnlyNodes
;
467 static void StartElementHnd(void *userData
, const char *name
, const char **atts
)
469 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
470 wxXmlNode
*node
= new wxXmlNode(wxXML_ELEMENT_NODE
, CharToString(ctx
->conv
, name
));
471 const char **a
= atts
;
474 node
->AddProperty(CharToString(ctx
->conv
, a
[0]), CharToString(ctx
->conv
, a
[1]));
477 if (ctx
->root
== NULL
)
480 ctx
->node
->AddChild(node
);
482 ctx
->lastAsText
= NULL
;
487 static void EndElementHnd(void *userData
, const char* WXUNUSED(name
))
489 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
491 ctx
->node
= ctx
->node
->GetParent();
492 ctx
->lastAsText
= NULL
;
497 static void TextHnd(void *userData
, const char *s
, int len
)
499 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
500 wxString str
= CharToString(ctx
->conv
, s
, len
);
504 if ( ctx
->bLastCdata
)
506 ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() +
507 CharToString(NULL
, s
, len
));
511 ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + str
);
516 bool whiteOnly
= false;
517 if (ctx
->removeWhiteOnlyNodes
)
518 whiteOnly
= wxIsWhiteOnly(str
);
522 ctx
->lastAsText
= new wxXmlNode(wxXML_TEXT_NODE
, wxT("text"), str
);
523 ctx
->node
->AddChild(ctx
->lastAsText
);
530 static void StartCdataHnd(void *userData
)
532 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
534 ctx
->bLastCdata
= true;
536 ctx
->lastAsText
= new wxXmlNode(wxXML_CDATA_SECTION_NODE
, wxT("cdata"),wxT(""));
537 ctx
->node
->AddChild(ctx
->lastAsText
);
542 static void EndCdataHnd(void *userData
)
544 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
546 ctx
->bLastCdata
= false;
551 static void CommentHnd(void *userData
, const char *data
)
553 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
557 // VS: ctx->node == NULL happens if there is a comment before
558 // the root element (e.g. wxDesigner's output). We ignore such
559 // comments, no big deal...
560 ctx
->node
->AddChild(new wxXmlNode(wxXML_COMMENT_NODE
,
561 wxT("comment"), CharToString(ctx
->conv
, data
)));
563 ctx
->lastAsText
= NULL
;
568 static void DefaultHnd(void *userData
, const char *s
, int len
)
571 if (len
> 6 && memcmp(s
, "<?xml ", 6) == 0)
573 wxXmlParsingContext
*ctx
= (wxXmlParsingContext
*)userData
;
575 wxString buf
= CharToString(ctx
->conv
, s
, (size_t)len
);
577 pos
= buf
.Find(wxT("encoding="));
578 if (pos
!= wxNOT_FOUND
)
579 ctx
->encoding
= buf
.Mid(pos
+ 10).BeforeFirst(buf
[(size_t)pos
+9]);
580 pos
= buf
.Find(wxT("version="));
581 if (pos
!= wxNOT_FOUND
)
582 ctx
->version
= buf
.Mid(pos
+ 9).BeforeFirst(buf
[(size_t)pos
+8]);
588 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData
),
589 const XML_Char
*name
, XML_Encoding
*info
)
591 // We must build conversion table for expat. The easiest way to do so
592 // is to let wxCSConv convert as string containing all characters to
593 // wide character representation:
594 wxString
str(name
, wxConvLibc
);
602 for (i
= 0; i
< 255; i
++)
604 mbBuf
[0] = (char)(i
+1);
605 if (conv
.MB2WC(wcBuf
, mbBuf
, 2) == (size_t)-1)
607 // invalid/undefined byte in the encoding:
610 info
->map
[i
+1] = (int)wcBuf
[0];
614 info
->convert
= NULL
;
615 info
->release
= NULL
;
621 bool wxXmlDocument::Load(wxInputStream
& stream
, const wxString
& encoding
, int flags
)
626 m_encoding
= encoding
;
629 const size_t BUFSIZE
= 1024;
631 wxXmlParsingContext ctx
;
633 XML_Parser parser
= XML_ParserCreate(NULL
);
635 ctx
.root
= ctx
.node
= NULL
;
636 ctx
.encoding
= wxT("UTF-8"); // default in absence of encoding=""
639 if ( encoding
.CmpNoCase(wxT("UTF-8")) != 0 )
640 ctx
.conv
= new wxCSConv(encoding
);
642 ctx
.removeWhiteOnlyNodes
= (flags
& wxXMLDOC_KEEP_WHITESPACE_NODES
) == 0;
643 ctx
.bLastCdata
= false;
645 XML_SetUserData(parser
, (void*)&ctx
);
646 XML_SetElementHandler(parser
, StartElementHnd
, EndElementHnd
);
647 XML_SetCharacterDataHandler(parser
, TextHnd
);
648 XML_SetCdataSectionHandler(parser
, StartCdataHnd
, EndCdataHnd
);
649 XML_SetCommentHandler(parser
, CommentHnd
);
650 XML_SetDefaultHandler(parser
, DefaultHnd
);
651 XML_SetUnknownEncodingHandler(parser
, UnknownEncodingHnd
, NULL
);
656 size_t len
= stream
.Read(buf
, BUFSIZE
).LastRead();
657 done
= (len
< BUFSIZE
);
658 if (!XML_Parse(parser
, buf
, len
, done
))
660 wxString
error(XML_ErrorString(XML_GetErrorCode(parser
)),
662 wxLogError(_("XML parsing error: '%s' at line %d"),
664 XML_GetCurrentLineNumber(parser
));
672 if (!ctx
.version
.empty())
673 SetVersion(ctx
.version
);
674 if (!ctx
.encoding
.empty())
675 SetFileEncoding(ctx
.encoding
);
683 XML_ParserFree(parser
);
695 //-----------------------------------------------------------------------------
696 // wxXmlDocument saving routines
697 //-----------------------------------------------------------------------------
699 // write string to output:
700 inline static void OutputString(wxOutputStream
& stream
, const wxString
& str
,
701 wxMBConv
*convMem
= NULL
,
702 wxMBConv
*convFile
= NULL
)
708 wxUnusedVar(convMem
);
710 const wxWX2MBbuf
buf(str
.mb_str(*(convFile
? convFile
: &wxConvUTF8
)));
711 stream
.Write((const char*)buf
, strlen((const char*)buf
));
712 #else // !wxUSE_UNICODE
713 if ( convFile
&& convMem
)
715 wxString
str2(str
.wc_str(*convMem
), *convFile
);
716 stream
.Write(str2
.mb_str(), str2
.Len());
718 else // no conversions to do
720 stream
.Write(str
.mb_str(), str
.Len());
722 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
725 // flags for OutputStringEnt()
728 XML_ESCAPE_QUOTES
= 1
731 // Same as above, but create entities first.
732 // Translates '<' to "<", '>' to ">" and '&' to "&"
733 static void OutputStringEnt(wxOutputStream
& stream
, const wxString
& str
,
734 wxMBConv
*convMem
= NULL
,
735 wxMBConv
*convFile
= NULL
,
744 for (i
= 0; i
< len
; i
++)
747 if (c
== wxT('<') || c
== wxT('>') ||
748 (c
== wxT('&') && str
.Mid(i
+1, 4) != wxT("amp;")) ||
749 ((flags
& XML_ESCAPE_QUOTES
) && c
== wxT('"')))
751 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
755 OutputString(stream
, wxT("<"));
758 OutputString(stream
, wxT(">"));
761 OutputString(stream
, wxT("&"));
764 OutputString(stream
, wxT("""));
772 OutputString(stream
, str
.Mid(last
, i
- last
), convMem
, convFile
);
775 inline static void OutputIndentation(wxOutputStream
& stream
, int indent
)
777 wxString str
= wxT("\n");
778 for (int i
= 0; i
< indent
; i
++)
779 str
<< wxT(' ') << wxT(' ');
780 OutputString(stream
, str
);
783 static void OutputNode(wxOutputStream
& stream
, wxXmlNode
*node
, int indent
,
784 wxMBConv
*convMem
, wxMBConv
*convFile
, int indentstep
)
789 switch (node
->GetType())
791 case wxXML_CDATA_SECTION_NODE
:
792 OutputString( stream
, wxT("<![CDATA["));
793 OutputString( stream
, node
->GetContent() );
794 OutputString( stream
, wxT("]]>") );
797 case wxXML_TEXT_NODE
:
798 OutputStringEnt(stream
, node
->GetContent(), convMem
, convFile
);
801 case wxXML_ELEMENT_NODE
:
802 OutputString(stream
, wxT("<"));
803 OutputString(stream
, node
->GetName());
805 prop
= node
->GetProperties();
808 OutputString(stream
, wxT(" ") + prop
->GetName() + wxT("=\""));
809 OutputStringEnt(stream
, prop
->GetValue(), convMem
, convFile
,
811 OutputString(stream
, wxT("\""));
812 prop
= prop
->GetNext();
815 if (node
->GetChildren())
817 OutputString(stream
, wxT(">"));
819 n
= node
->GetChildren();
822 if (indentstep
>= 0 && n
&& n
->GetType() != wxXML_TEXT_NODE
)
823 OutputIndentation(stream
, indent
+ indentstep
);
824 OutputNode(stream
, n
, indent
+ indentstep
, convMem
, convFile
, indentstep
);
828 if (indentstep
>= 0 && prev
&& prev
->GetType() != wxXML_TEXT_NODE
)
829 OutputIndentation(stream
, indent
);
830 OutputString(stream
, wxT("</"));
831 OutputString(stream
, node
->GetName());
832 OutputString(stream
, wxT(">"));
835 OutputString(stream
, wxT("/>"));
838 case wxXML_COMMENT_NODE
:
839 OutputString(stream
, wxT("<!--"));
840 OutputString(stream
, node
->GetContent(), convMem
, convFile
);
841 OutputString(stream
, wxT("-->"));
845 wxFAIL_MSG(wxT("unsupported node type"));
849 bool wxXmlDocument::Save(wxOutputStream
& stream
, int indentstep
) const
856 wxMBConv
*convMem
= NULL
,
860 convFile
= new wxCSConv(GetFileEncoding());
863 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
865 convFile
= new wxCSConv(GetFileEncoding());
866 convMem
= new wxCSConv(GetEncoding());
868 else // file and in-memory encodings are the same, no conversion needed
875 s
.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
876 GetVersion().c_str(), GetFileEncoding().c_str());
877 OutputString(stream
, s
);
879 OutputNode(stream
, GetRoot(), 0, convMem
, convFile
, indentstep
);
880 OutputString(stream
, wxT("\n"));