1 ///////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/xml/xml.cpp 
   3 // Purpose:     wxXmlDocument - XML parser & data holder class 
   4 // Author:      Vaclav Slavik 
   7 // Copyright:   (c) 2000 Vaclav Slavik 
   8 // Licence:     wxWindows licence 
   9 ///////////////////////////////////////////////////////////////////////////// 
  11 // For compilers that support precompilation, includes "wx.h". 
  12 #include "wx/wxprec.h" 
  20 #include "wx/xml/xml.h" 
  28 #include "wx/wfstream.h" 
  29 #include "wx/datstrm.h" 
  30 #include "wx/zstream.h" 
  31 #include "wx/strconv.h" 
  33 #include "expat.h" // from Expat 
  35 // DLL options compatibility check: 
  36 WX_CHECK_BUILD_OPTIONS("wxXML") 
  39 IMPLEMENT_CLASS(wxXmlDocument
, wxObject
) 
  42 // a private utility used by wxXML 
  43 static bool wxIsWhiteOnly(const wxChar 
*buf
); 
  46 //----------------------------------------------------------------------------- 
  48 //----------------------------------------------------------------------------- 
  50 wxXmlNode::wxXmlNode(wxXmlNode 
*parent
,wxXmlNodeType type
, 
  51                      const wxString
& name
, const wxString
& content
, 
  52                      wxXmlProperty 
*props
, wxXmlNode 
*next
) 
  53     : m_type(type
), m_name(name
), m_content(content
), 
  54       m_properties(props
), m_parent(parent
), 
  55       m_children(NULL
), m_next(next
) 
  59         if (m_parent
->m_children
) 
  61             m_next 
= m_parent
->m_children
; 
  62             m_parent
->m_children 
= this; 
  65             m_parent
->m_children 
= this; 
  69 wxXmlNode::wxXmlNode(wxXmlNodeType type
, const wxString
& name
, 
  70                      const wxString
& content
) 
  71     : m_type(type
), m_name(name
), m_content(content
), 
  72       m_properties(NULL
), m_parent(NULL
), 
  73       m_children(NULL
), m_next(NULL
) 
  76 wxXmlNode::wxXmlNode(const wxXmlNode
& node
) 
  83 wxXmlNode::~wxXmlNode() 
  86     for (c 
= m_children
; c
; c 
= c2
) 
  92     wxXmlProperty 
*p
, *p2
; 
  93     for (p 
= m_properties
; p
; p 
= p2
) 
 100 wxXmlNode
& wxXmlNode::operator=(const wxXmlNode
& node
) 
 102     wxDELETE(m_properties
); 
 103     wxDELETE(m_children
); 
 108 void wxXmlNode::DoCopy(const wxXmlNode
& node
) 
 110     m_type 
= node
.m_type
; 
 111     m_name 
= node
.m_name
; 
 112     m_content 
= node
.m_content
; 
 115     wxXmlNode 
*n 
= node
.m_children
; 
 118         AddChild(new wxXmlNode(*n
)); 
 123     wxXmlProperty 
*p 
= node
.m_properties
; 
 126        AddProperty(p
->GetName(), p
->GetValue()); 
 131 bool wxXmlNode::HasProp(const wxString
& propName
) const 
 133     wxXmlProperty 
*prop 
= GetProperties(); 
 137         if (prop
->GetName() == propName
) return true; 
 138         prop 
= prop
->GetNext(); 
 144 bool wxXmlNode::GetPropVal(const wxString
& propName
, wxString 
*value
) const 
 146     wxXmlProperty 
*prop 
= GetProperties(); 
 150         if (prop
->GetName() == propName
) 
 152             *value 
= prop
->GetValue(); 
 155         prop 
= prop
->GetNext(); 
 161 wxString 
wxXmlNode::GetPropVal(const wxString
& propName
, const wxString
& defaultVal
) const 
 164     if (GetPropVal(propName
, &tmp
)) 
 170 void wxXmlNode::AddChild(wxXmlNode 
*child
) 
 172     if (m_children 
== NULL
) 
 176         wxXmlNode 
*ch 
= m_children
; 
 177         while (ch
->m_next
) ch 
= ch
->m_next
; 
 180     child
->m_next 
= NULL
; 
 181     child
->m_parent 
= this; 
 184 bool wxXmlNode::InsertChild(wxXmlNode 
*child
, wxXmlNode 
*before_node
) 
 186     wxCHECK_MSG(before_node 
== NULL 
|| before_node
->GetParent() == this, false, 
 187                  wxT("wxXmlNode::InsertChild - the node has incorrect parent")); 
 188     wxCHECK_MSG(child
, false, wxT("Cannot insert a NULL pointer!")); 
 190     if (m_children 
== before_node
) 
 192     else if (m_children 
== NULL
) 
 194         if (before_node 
!= NULL
) 
 195             return false;       // we have no children so we don't need to search 
 198     else if (before_node 
== NULL
) 
 201         child
->m_parent 
= this; 
 202         child
->m_next 
= m_children
; 
 208         wxXmlNode 
*ch 
= m_children
; 
 209         while (ch 
&& ch
->m_next 
!= before_node
) ch 
= ch
->m_next
; 
 211             return false;       // before_node not found 
 215     child
->m_parent 
= this; 
 216     child
->m_next 
= before_node
; 
 220 bool wxXmlNode::RemoveChild(wxXmlNode 
*child
) 
 222     if (m_children 
== NULL
) 
 224     else if (m_children 
== child
) 
 226         m_children 
= child
->m_next
; 
 227         child
->m_parent 
= NULL
; 
 228         child
->m_next 
= NULL
; 
 233         wxXmlNode 
*ch 
= m_children
; 
 236             if (ch
->m_next 
== child
) 
 238                 ch
->m_next 
= child
->m_next
; 
 239                 child
->m_parent 
= NULL
; 
 240                 child
->m_next 
= NULL
; 
 249 void wxXmlNode::AddProperty(const wxString
& name
, const wxString
& value
) 
 251     AddProperty(new wxXmlProperty(name
, value
, NULL
)); 
 254 void wxXmlNode::AddProperty(wxXmlProperty 
*prop
) 
 256     if (m_properties 
== NULL
) 
 260         wxXmlProperty 
*p 
= m_properties
; 
 261         while (p
->GetNext()) p 
= p
->GetNext(); 
 266 bool wxXmlNode::DeleteProperty(const wxString
& name
) 
 270     if (m_properties 
== NULL
) 
 273     else if (m_properties
->GetName() == name
) 
 276         m_properties 
= prop
->GetNext(); 
 284         wxXmlProperty 
*p 
= m_properties
; 
 287             if (p
->GetNext()->GetName() == name
) 
 290                 p
->SetNext(prop
->GetNext()); 
 301 wxString 
wxXmlNode::GetNodeContent() const 
 303     wxXmlNode 
*n 
= GetChildren(); 
 307         if (n
->GetType() == wxXML_TEXT_NODE 
|| 
 308             n
->GetType() == wxXML_CDATA_SECTION_NODE
) 
 309             return n
->GetContent(); 
 312     return wxEmptyString
; 
 315 int wxXmlNode::GetDepth(wxXmlNode 
*grandparent
) const 
 317     const wxXmlNode 
*n 
= this; 
 324         if (n 
== grandparent
) 
 332 bool wxXmlNode::IsWhitespaceOnly() const 
 334     return wxIsWhiteOnly(m_content
); 
 339 //----------------------------------------------------------------------------- 
 341 //----------------------------------------------------------------------------- 
 343 wxXmlDocument::wxXmlDocument() 
 344     : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL
) 
 347     m_encoding 
= wxT("UTF-8"); 
 351 wxXmlDocument::wxXmlDocument(const wxString
& filename
, const wxString
& encoding
) 
 352               :wxObject(), m_root(NULL
) 
 354     if ( !Load(filename
, encoding
) ) 
 360 wxXmlDocument::wxXmlDocument(wxInputStream
& stream
, const wxString
& encoding
) 
 361               :wxObject(), m_root(NULL
) 
 363     if ( !Load(stream
, encoding
) ) 
 369 wxXmlDocument::wxXmlDocument(const wxXmlDocument
& doc
) 
 375 wxXmlDocument
& wxXmlDocument::operator=(const wxXmlDocument
& doc
) 
 382 void wxXmlDocument::DoCopy(const wxXmlDocument
& doc
) 
 384     m_version 
= doc
.m_version
; 
 386     m_encoding 
= doc
.m_encoding
; 
 388     m_fileEncoding 
= doc
.m_fileEncoding
; 
 389     m_root 
= new wxXmlNode(*doc
.m_root
); 
 392 bool wxXmlDocument::Load(const wxString
& filename
, const wxString
& encoding
, int flags
) 
 394     wxFileInputStream 
stream(filename
); 
 397     return Load(stream
, encoding
, flags
); 
 400 bool wxXmlDocument::Save(const wxString
& filename
, int indentstep
) const 
 402     wxFileOutputStream 
stream(filename
); 
 405     return Save(stream
, indentstep
); 
 410 //----------------------------------------------------------------------------- 
 411 //  wxXmlDocument loading routines 
 412 //----------------------------------------------------------------------------- 
 414 // converts Expat-produced string in UTF-8 into wxString using the specified 
 415 // conv or keep in UTF-8 if conv is NULL 
 416 static wxString 
CharToString(wxMBConv 
*conv
, 
 417                                     const char *s
, size_t len 
= wxString::npos
) 
 422     return wxString(s
, wxConvUTF8
, len
); 
 423 #else // !wxUSE_UNICODE 
 426         // there can be no embedded NULs in this string so we don't need the 
 427         // output length, it will be NUL-terminated 
 428         const wxWCharBuffer 
wbuf( 
 429             wxConvUTF8
.cMB2WC(s
, len 
== wxString::npos 
? wxNO_LEN 
: len
, NULL
)); 
 431         return wxString(wbuf
, *conv
); 
 433     else // already in UTF-8, no conversion needed 
 435         return wxString(s
, len 
!= wxString::npos 
? len 
: strlen(s
)); 
 437 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 440 // returns true if the given string contains only whitespaces 
 441 bool wxIsWhiteOnly(const wxChar 
*buf
) 
 443     for (const wxChar 
*c 
= buf
; *c 
!= wxT('\0'); c
++) 
 444         if (*c 
!= wxT(' ') && *c 
!= wxT('\t') && *c 
!= wxT('\n') && *c 
!= wxT('\r')) 
 450 struct wxXmlParsingContext
 
 455     wxXmlNode 
*lastAsText
; 
 459     bool       removeWhiteOnlyNodes
; 
 463 static void StartElementHnd(void *userData
, const char *name
, const char **atts
) 
 465     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 466     wxXmlNode 
*node 
= new wxXmlNode(wxXML_ELEMENT_NODE
, CharToString(ctx
->conv
, name
)); 
 467     const char **a 
= atts
; 
 470         node
->AddProperty(CharToString(ctx
->conv
, a
[0]), CharToString(ctx
->conv
, a
[1])); 
 473     if (ctx
->root 
== NULL
) 
 476         ctx
->node
->AddChild(node
); 
 478     ctx
->lastAsText 
= NULL
; 
 483 static void EndElementHnd(void *userData
, const char* WXUNUSED(name
)) 
 485     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 487     ctx
->node 
= ctx
->node
->GetParent(); 
 488     ctx
->lastAsText 
= NULL
; 
 493 static void TextHnd(void *userData
, const char *s
, int len
) 
 495     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 496     wxString str 
= CharToString(ctx
->conv
, s
, len
); 
 500         if ( ctx
->bLastCdata 
) 
 502             ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + 
 503                                         CharToString(NULL
, s
, len
)); 
 507             ctx
->lastAsText
->SetContent(ctx
->lastAsText
->GetContent() + str
); 
 512         bool whiteOnly 
= false; 
 513         if (ctx
->removeWhiteOnlyNodes
) 
 514             whiteOnly 
= wxIsWhiteOnly(str
); 
 518             ctx
->lastAsText 
= new wxXmlNode(wxXML_TEXT_NODE
, wxT("text"), str
); 
 519             ctx
->node
->AddChild(ctx
->lastAsText
); 
 526 static void StartCdataHnd(void *userData
) 
 528     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 530     ctx
->bLastCdata 
= true; 
 532     ctx
->lastAsText 
= new wxXmlNode(wxXML_CDATA_SECTION_NODE
, wxT("cdata"),wxT("")); 
 533     ctx
->node
->AddChild(ctx
->lastAsText
); 
 538 static void EndCdataHnd(void *userData
) 
 540     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 542     ctx
->bLastCdata 
= false; 
 547 static void CommentHnd(void *userData
, const char *data
) 
 549     wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 553         // VS: ctx->node == NULL happens if there is a comment before 
 554         //     the root element (e.g. wxDesigner's output). We ignore such 
 555         //     comments, no big deal... 
 556         ctx
->node
->AddChild(new wxXmlNode(wxXML_COMMENT_NODE
, 
 557                             wxT("comment"), CharToString(ctx
->conv
, data
))); 
 559     ctx
->lastAsText 
= NULL
; 
 564 static void DefaultHnd(void *userData
, const char *s
, int len
) 
 567     if (len 
> 6 && memcmp(s
, "<?xml ", 6) == 0) 
 569         wxXmlParsingContext 
*ctx 
= (wxXmlParsingContext
*)userData
; 
 571         wxString buf 
= CharToString(ctx
->conv
, s
, (size_t)len
); 
 573         pos 
= buf
.Find(wxT("encoding=")); 
 574         if (pos 
!= wxNOT_FOUND
) 
 575             ctx
->encoding 
= buf
.Mid(pos 
+ 10).BeforeFirst(buf
[(size_t)pos
+9]); 
 576         pos 
= buf
.Find(wxT("version=")); 
 577         if (pos 
!= wxNOT_FOUND
) 
 578             ctx
->version 
= buf
.Mid(pos 
+ 9).BeforeFirst(buf
[(size_t)pos
+8]); 
 584 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData
), 
 585                               const XML_Char 
*name
, XML_Encoding 
*info
) 
 587     // We must build conversion table for expat. The easiest way to do so 
 588     // is to let wxCSConv convert as string containing all characters to 
 589     // wide character representation: 
 590     wxString 
str(name
, wxConvLibc
); 
 598     for (i 
= 0; i 
< 255; i
++) 
 600         mbBuf
[0] = (char)(i
+1); 
 601         if (conv
.MB2WC(wcBuf
, mbBuf
, 2) == (size_t)-1) 
 603             // invalid/undefined byte in the encoding: 
 606         info
->map
[i
+1] = (int)wcBuf
[0]; 
 610     info
->convert 
= NULL
; 
 611     info
->release 
= NULL
; 
 617 bool wxXmlDocument::Load(wxInputStream
& stream
, const wxString
& encoding
, int flags
) 
 622     m_encoding 
= encoding
; 
 625     const size_t BUFSIZE 
= 1024; 
 627     wxXmlParsingContext ctx
; 
 629     XML_Parser parser 
= XML_ParserCreate(NULL
); 
 631     ctx
.root 
= ctx
.node 
= NULL
; 
 632     ctx
.encoding 
= wxT("UTF-8"); // default in absence of encoding="" 
 635     if ( encoding 
!= wxT("UTF-8") && encoding 
!= wxT("utf-8") ) 
 636         ctx
.conv 
= new wxCSConv(encoding
); 
 638     ctx
.removeWhiteOnlyNodes 
= (flags 
& wxXMLDOC_KEEP_WHITESPACE_NODES
) == 0; 
 639     ctx
.bLastCdata 
= false; 
 641     XML_SetUserData(parser
, (void*)&ctx
); 
 642     XML_SetElementHandler(parser
, StartElementHnd
, EndElementHnd
); 
 643     XML_SetCharacterDataHandler(parser
, TextHnd
); 
 644     XML_SetCdataSectionHandler(parser
, StartCdataHnd
, EndCdataHnd 
); 
 645     XML_SetCommentHandler(parser
, CommentHnd
); 
 646     XML_SetDefaultHandler(parser
, DefaultHnd
); 
 647     XML_SetUnknownEncodingHandler(parser
, UnknownEncodingHnd
, NULL
); 
 652         size_t len 
= stream
.Read(buf
, BUFSIZE
).LastRead(); 
 653         done 
= (len 
< BUFSIZE
); 
 654         if (!XML_Parse(parser
, buf
, len
, done
)) 
 656             wxString 
error(XML_ErrorString(XML_GetErrorCode(parser
)), 
 658             wxLogError(_("XML parsing error: '%s' at line %d"), 
 660                        XML_GetCurrentLineNumber(parser
)); 
 668         if (!ctx
.version
.empty()) 
 669             SetVersion(ctx
.version
); 
 670         if (!ctx
.encoding
.empty()) 
 671             SetFileEncoding(ctx
.encoding
); 
 679     XML_ParserFree(parser
); 
 691 //----------------------------------------------------------------------------- 
 692 //  wxXmlDocument saving routines 
 693 //----------------------------------------------------------------------------- 
 695 // write string to output: 
 696 inline static void OutputString(wxOutputStream
& stream
, const wxString
& str
, 
 697                                 wxMBConv 
*convMem 
= NULL
, 
 698                                 wxMBConv 
*convFile 
= NULL
) 
 704     wxUnusedVar(convMem
); 
 706     const wxWX2MBbuf 
buf(str
.mb_str(*(convFile 
? convFile 
: &wxConvUTF8
))); 
 707     stream
.Write((const char*)buf
, strlen((const char*)buf
)); 
 708 #else // !wxUSE_UNICODE 
 709     if ( convFile 
&& convMem 
) 
 711         wxString 
str2(str
.wc_str(*convMem
), *convFile
); 
 712         stream
.Write(str2
.mb_str(), str2
.Len()); 
 714     else // no conversions to do 
 716         stream
.Write(str
.mb_str(), str
.Len()); 
 718 #endif // wxUSE_UNICODE/!wxUSE_UNICODE 
 721 // flags for OutputStringEnt() 
 724     XML_ESCAPE_QUOTES 
= 1 
 727 // Same as above, but create entities first. 
 728 // Translates '<' to "<", '>' to ">" and '&' to "&" 
 729 static void OutputStringEnt(wxOutputStream
& stream
, const wxString
& str
, 
 730                             wxMBConv 
*convMem 
= NULL
, 
 731                             wxMBConv 
*convFile 
= NULL
, 
 740     for (i 
= 0; i 
< len
; i
++) 
 743         if (c 
== wxT('<') || c 
== wxT('>') || 
 744             (c 
== wxT('&') && str
.Mid(i
+1, 4) != wxT("amp;")) || 
 745             ((flags 
& XML_ESCAPE_QUOTES
) && c 
== wxT('"'))) 
 747             OutputString(stream
, str
.Mid(last
, i 
- last
), convMem
, convFile
); 
 751                     OutputString(stream
, wxT("<")); 
 754                     OutputString(stream
, wxT(">")); 
 757                     OutputString(stream
, wxT("&")); 
 760                     OutputString(stream
, wxT(""")); 
 768     OutputString(stream
, str
.Mid(last
, i 
- last
), convMem
, convFile
); 
 771 inline static void OutputIndentation(wxOutputStream
& stream
, int indent
) 
 773     wxString str 
= wxT("\n"); 
 774     for (int i 
= 0; i 
< indent
; i
++) 
 775         str 
<< wxT(' ') << wxT(' '); 
 776     OutputString(stream
, str
); 
 779 static void OutputNode(wxOutputStream
& stream
, wxXmlNode 
*node
, int indent
, 
 780                        wxMBConv 
*convMem
, wxMBConv 
*convFile
, int indentstep
) 
 785     switch (node
->GetType()) 
 787         case wxXML_CDATA_SECTION_NODE
: 
 788             OutputString( stream
, wxT("<![CDATA[")); 
 789             OutputString( stream
, node
->GetContent() ); 
 790             OutputString( stream
, wxT("]]>") ); 
 793         case wxXML_TEXT_NODE
: 
 794             OutputStringEnt(stream
, node
->GetContent(), convMem
, convFile
); 
 797         case wxXML_ELEMENT_NODE
: 
 798             OutputString(stream
, wxT("<")); 
 799             OutputString(stream
, node
->GetName()); 
 801             prop 
= node
->GetProperties(); 
 804                 OutputString(stream
, wxT(" ") + prop
->GetName() +  wxT("=\"")); 
 805                 OutputStringEnt(stream
, prop
->GetValue(), convMem
, convFile
, 
 807                 OutputString(stream
, wxT("\"")); 
 808                 prop 
= prop
->GetNext(); 
 811             if (node
->GetChildren()) 
 813                 OutputString(stream
, wxT(">")); 
 815                 n 
= node
->GetChildren(); 
 818                     if (indentstep 
>= 0 && n 
&& n
->GetType() != wxXML_TEXT_NODE
) 
 819                         OutputIndentation(stream
, indent 
+ indentstep
); 
 820                     OutputNode(stream
, n
, indent 
+ indentstep
, convMem
, convFile
, indentstep
); 
 824                 if (indentstep 
>= 0 && prev 
&& prev
->GetType() != wxXML_TEXT_NODE
) 
 825                     OutputIndentation(stream
, indent
); 
 826                 OutputString(stream
, wxT("</")); 
 827                 OutputString(stream
, node
->GetName()); 
 828                 OutputString(stream
, wxT(">")); 
 831                 OutputString(stream
, wxT("/>")); 
 834         case wxXML_COMMENT_NODE
: 
 835             OutputString(stream
, wxT("<!--")); 
 836             OutputString(stream
, node
->GetContent(), convMem
, convFile
); 
 837             OutputString(stream
, wxT("-->")); 
 841             wxFAIL_MSG(wxT("unsupported node type")); 
 845 bool wxXmlDocument::Save(wxOutputStream
& stream
, int indentstep
) const 
 852     wxMBConv 
*convMem 
= NULL
; 
 855     wxMBConv 
*convFile 
= new wxCSConv(GetFileEncoding()); 
 857     wxMBConv 
*convFile 
= NULL
; 
 858     if ( GetFileEncoding() != GetEncoding() ) 
 860         convFile 
= new wxCSConv(GetFileEncoding()); 
 861         convMem 
= new wxCSConv(GetEncoding()); 
 865     s
.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"), 
 866              GetVersion().c_str(), GetFileEncoding().c_str()); 
 867     OutputString(stream
, s
); 
 869     OutputNode(stream
, GetRoot(), 0, convMem
, convFile
, indentstep
); 
 870     OutputString(stream
, wxT("\n"));