Add support for elements preceding the document node in wxXML.
[wxWidgets.git] / src / xml / xml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
5 // Created: 2000/03/05
6 // RCS-ID: $Id$
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
10
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
13
14 #ifdef __BORLANDC__
15 #pragma hdrstop
16 #endif
17
18 #if wxUSE_XML
19
20 #include "wx/xml/xml.h"
21
22 #ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
25 #include "wx/app.h"
26 #endif
27
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
32 #include "wx/scopedptr.h"
33 #include "wx/versioninfo.h"
34
35 #include "expat.h" // from Expat
36
37 // DLL options compatibility check:
38 WX_CHECK_BUILD_OPTIONS("wxXML")
39
40
41 IMPLEMENT_CLASS(wxXmlDocument, wxObject)
42
43
44 // a private utility used by wxXML
45 static bool wxIsWhiteOnly(const wxString& buf);
46
47
48 //-----------------------------------------------------------------------------
49 // wxXmlNode
50 //-----------------------------------------------------------------------------
51
52 wxXmlNode::wxXmlNode(wxXmlNode *parent,wxXmlNodeType type,
53 const wxString& name, const wxString& content,
54 wxXmlAttribute *attrs, wxXmlNode *next, int lineNo)
55 : m_type(type), m_name(name), m_content(content),
56 m_attrs(attrs), m_parent(parent),
57 m_children(NULL), m_next(next),
58 m_lineNo(lineNo),
59 m_noConversion(false)
60 {
61 if (m_parent)
62 {
63 if (m_parent->m_children)
64 {
65 m_next = m_parent->m_children;
66 m_parent->m_children = this;
67 }
68 else
69 m_parent->m_children = this;
70 }
71 }
72
73 wxXmlNode::wxXmlNode(wxXmlNodeType type, const wxString& name,
74 const wxString& content,
75 int lineNo)
76 : m_type(type), m_name(name), m_content(content),
77 m_attrs(NULL), m_parent(NULL),
78 m_children(NULL), m_next(NULL),
79 m_lineNo(lineNo), m_noConversion(false)
80 {}
81
82 wxXmlNode::wxXmlNode(const wxXmlNode& node)
83 {
84 m_next = NULL;
85 m_parent = NULL;
86 DoCopy(node);
87 }
88
89 wxXmlNode::~wxXmlNode()
90 {
91 wxXmlNode *c, *c2;
92 for (c = m_children; c; c = c2)
93 {
94 c2 = c->m_next;
95 delete c;
96 }
97
98 wxXmlAttribute *p, *p2;
99 for (p = m_attrs; p; p = p2)
100 {
101 p2 = p->GetNext();
102 delete p;
103 }
104 }
105
106 wxXmlNode& wxXmlNode::operator=(const wxXmlNode& node)
107 {
108 wxDELETE(m_attrs);
109 wxDELETE(m_children);
110 DoCopy(node);
111 return *this;
112 }
113
114 void wxXmlNode::DoCopy(const wxXmlNode& node)
115 {
116 m_type = node.m_type;
117 m_name = node.m_name;
118 m_content = node.m_content;
119 m_lineNo = node.m_lineNo;
120 m_noConversion = node.m_noConversion;
121 m_children = NULL;
122
123 wxXmlNode *n = node.m_children;
124 while (n)
125 {
126 AddChild(new wxXmlNode(*n));
127 n = n->GetNext();
128 }
129
130 m_attrs = NULL;
131 wxXmlAttribute *p = node.m_attrs;
132 while (p)
133 {
134 AddAttribute(p->GetName(), p->GetValue());
135 p = p->GetNext();
136 }
137 }
138
139 bool wxXmlNode::HasAttribute(const wxString& attrName) const
140 {
141 wxXmlAttribute *attr = GetAttributes();
142
143 while (attr)
144 {
145 if (attr->GetName() == attrName) return true;
146 attr = attr->GetNext();
147 }
148
149 return false;
150 }
151
152 bool wxXmlNode::GetAttribute(const wxString& attrName, wxString *value) const
153 {
154 wxCHECK_MSG( value, false, "value argument must not be NULL" );
155
156 wxXmlAttribute *attr = GetAttributes();
157
158 while (attr)
159 {
160 if (attr->GetName() == attrName)
161 {
162 *value = attr->GetValue();
163 return true;
164 }
165 attr = attr->GetNext();
166 }
167
168 return false;
169 }
170
171 wxString wxXmlNode::GetAttribute(const wxString& attrName, const wxString& defaultVal) const
172 {
173 wxString tmp;
174 if (GetAttribute(attrName, &tmp))
175 return tmp;
176
177 return defaultVal;
178 }
179
180 void wxXmlNode::AddChild(wxXmlNode *child)
181 {
182 if (m_children == NULL)
183 m_children = child;
184 else
185 {
186 wxXmlNode *ch = m_children;
187 while (ch->m_next) ch = ch->m_next;
188 ch->m_next = child;
189 }
190 child->m_next = NULL;
191 child->m_parent = this;
192 }
193
194 // inserts a new node in front of 'followingNode'
195 bool wxXmlNode::InsertChild(wxXmlNode *child, wxXmlNode *followingNode)
196 {
197 wxCHECK_MSG( child, false, "cannot insert a NULL node!" );
198 wxCHECK_MSG( child->m_parent == NULL, false, "node already has a parent" );
199 wxCHECK_MSG( child->m_next == NULL, false, "node already has m_next" );
200 wxCHECK_MSG( followingNode == NULL || followingNode->GetParent() == this,
201 false,
202 "wxXmlNode::InsertChild - followingNode has incorrect parent" );
203
204 // this is for backward compatibility, NULL was allowed here thanks to
205 // the confusion about followingNode's meaning
206 if ( followingNode == NULL )
207 followingNode = m_children;
208
209 if ( m_children == followingNode )
210 {
211 child->m_next = m_children;
212 m_children = child;
213 }
214 else
215 {
216 wxXmlNode *ch = m_children;
217 while ( ch && ch->m_next != followingNode )
218 ch = ch->m_next;
219 if ( !ch )
220 {
221 wxFAIL_MSG( "followingNode has this node as parent, but couldn't be found among children" );
222 return false;
223 }
224
225 child->m_next = followingNode;
226 ch->m_next = child;
227 }
228
229 child->m_parent = this;
230 return true;
231 }
232
233 // inserts a new node right after 'precedingNode'
234 bool wxXmlNode::InsertChildAfter(wxXmlNode *child, wxXmlNode *precedingNode)
235 {
236 wxCHECK_MSG( child, false, "cannot insert a NULL node!" );
237 wxCHECK_MSG( child->m_parent == NULL, false, "node already has a parent" );
238 wxCHECK_MSG( child->m_next == NULL, false, "node already has m_next" );
239 wxCHECK_MSG( precedingNode == NULL || precedingNode->m_parent == this, false,
240 "precedingNode has wrong parent" );
241
242 if ( precedingNode )
243 {
244 child->m_next = precedingNode->m_next;
245 precedingNode->m_next = child;
246 }
247 else // precedingNode == NULL
248 {
249 wxCHECK_MSG( m_children == NULL, false,
250 "NULL precedingNode only makes sense when there are no children" );
251
252 child->m_next = m_children;
253 m_children = child;
254 }
255
256 child->m_parent = this;
257 return true;
258 }
259
260 bool wxXmlNode::RemoveChild(wxXmlNode *child)
261 {
262 if (m_children == NULL)
263 return false;
264 else if (m_children == child)
265 {
266 m_children = child->m_next;
267 child->m_parent = NULL;
268 child->m_next = NULL;
269 return true;
270 }
271 else
272 {
273 wxXmlNode *ch = m_children;
274 while (ch->m_next)
275 {
276 if (ch->m_next == child)
277 {
278 ch->m_next = child->m_next;
279 child->m_parent = NULL;
280 child->m_next = NULL;
281 return true;
282 }
283 ch = ch->m_next;
284 }
285 return false;
286 }
287 }
288
289 void wxXmlNode::AddAttribute(const wxString& name, const wxString& value)
290 {
291 AddProperty(name, value);
292 }
293
294 void wxXmlNode::AddAttribute(wxXmlAttribute *attr)
295 {
296 AddProperty(attr);
297 }
298
299 bool wxXmlNode::DeleteAttribute(const wxString& name)
300 {
301 return DeleteProperty(name);
302 }
303
304 void wxXmlNode::AddProperty(const wxString& name, const wxString& value)
305 {
306 AddProperty(new wxXmlAttribute(name, value, NULL));
307 }
308
309 void wxXmlNode::AddProperty(wxXmlAttribute *attr)
310 {
311 if (m_attrs == NULL)
312 m_attrs = attr;
313 else
314 {
315 wxXmlAttribute *p = m_attrs;
316 while (p->GetNext()) p = p->GetNext();
317 p->SetNext(attr);
318 }
319 }
320
321 bool wxXmlNode::DeleteProperty(const wxString& name)
322 {
323 wxXmlAttribute *attr;
324
325 if (m_attrs == NULL)
326 return false;
327
328 else if (m_attrs->GetName() == name)
329 {
330 attr = m_attrs;
331 m_attrs = attr->GetNext();
332 attr->SetNext(NULL);
333 delete attr;
334 return true;
335 }
336
337 else
338 {
339 wxXmlAttribute *p = m_attrs;
340 while (p->GetNext())
341 {
342 if (p->GetNext()->GetName() == name)
343 {
344 attr = p->GetNext();
345 p->SetNext(attr->GetNext());
346 attr->SetNext(NULL);
347 delete attr;
348 return true;
349 }
350 p = p->GetNext();
351 }
352 return false;
353 }
354 }
355
356 wxString wxXmlNode::GetNodeContent() const
357 {
358 wxXmlNode *n = GetChildren();
359
360 while (n)
361 {
362 if (n->GetType() == wxXML_TEXT_NODE ||
363 n->GetType() == wxXML_CDATA_SECTION_NODE)
364 return n->GetContent();
365 n = n->GetNext();
366 }
367 return wxEmptyString;
368 }
369
370 int wxXmlNode::GetDepth(wxXmlNode *grandparent) const
371 {
372 const wxXmlNode *n = this;
373 int ret = -1;
374
375 do
376 {
377 ret++;
378 n = n->GetParent();
379 if (n == grandparent)
380 return ret;
381
382 } while (n);
383
384 return wxNOT_FOUND;
385 }
386
387 bool wxXmlNode::IsWhitespaceOnly() const
388 {
389 return wxIsWhiteOnly(m_content);
390 }
391
392
393
394 //-----------------------------------------------------------------------------
395 // wxXmlDocument
396 //-----------------------------------------------------------------------------
397
398 wxXmlDocument::wxXmlDocument()
399 : m_version(wxS("1.0")), m_fileEncoding(wxS("UTF-8")), m_docNode(NULL)
400 {
401 #if !wxUSE_UNICODE
402 m_encoding = wxS("UTF-8");
403 #endif
404 }
405
406 wxXmlDocument::wxXmlDocument(const wxString& filename, const wxString& encoding)
407 :wxObject(), m_docNode(NULL)
408 {
409 if ( !Load(filename, encoding) )
410 {
411 wxDELETE(m_docNode);
412 }
413 }
414
415 wxXmlDocument::wxXmlDocument(wxInputStream& stream, const wxString& encoding)
416 :wxObject(), m_docNode(NULL)
417 {
418 if ( !Load(stream, encoding) )
419 {
420 wxDELETE(m_docNode);
421 }
422 }
423
424 wxXmlDocument::wxXmlDocument(const wxXmlDocument& doc)
425 :wxObject()
426 {
427 DoCopy(doc);
428 }
429
430 wxXmlDocument& wxXmlDocument::operator=(const wxXmlDocument& doc)
431 {
432 wxDELETE(m_docNode);
433 DoCopy(doc);
434 return *this;
435 }
436
437 void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
438 {
439 m_version = doc.m_version;
440 #if !wxUSE_UNICODE
441 m_encoding = doc.m_encoding;
442 #endif
443 m_fileEncoding = doc.m_fileEncoding;
444
445 if (doc.m_docNode)
446 m_docNode = new wxXmlNode(*doc.m_docNode);
447 else
448 m_docNode = NULL;
449 }
450
451 bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding, int flags)
452 {
453 wxFileInputStream stream(filename);
454 if (!stream.Ok())
455 return false;
456 return Load(stream, encoding, flags);
457 }
458
459 bool wxXmlDocument::Save(const wxString& filename, int indentstep) const
460 {
461 wxFileOutputStream stream(filename);
462 if (!stream.Ok())
463 return false;
464 return Save(stream, indentstep);
465 }
466
467 wxXmlNode *wxXmlDocument::GetRoot() const
468 {
469 wxXmlNode *node = m_docNode;
470 if (node)
471 {
472 node = m_docNode->GetChildren();
473 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
474 node = node->GetNext();
475 }
476 return node;
477 }
478
479 wxXmlNode *wxXmlDocument::DetachRoot()
480 {
481 wxXmlNode *node = m_docNode;
482 if (node)
483 {
484 node = m_docNode->GetChildren();
485 wxXmlNode *prev = NULL;
486 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
487 {
488 prev = node;
489 node = node->GetNext();
490 }
491 if (node)
492 {
493 if (node == m_docNode->GetChildren())
494 m_docNode->SetChildren(node->GetNext());
495
496 if (prev)
497 prev->SetNext(node->GetNext());
498
499 node->SetParent(NULL);
500 node->SetNext(NULL);
501 }
502 }
503 return node;
504 }
505
506 void wxXmlDocument::SetRoot(wxXmlNode *root)
507 {
508 wxXmlNode *node = m_docNode;
509 if (node)
510 {
511 node = m_docNode->GetChildren();
512 wxXmlNode *prev = NULL;
513 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
514 {
515 prev = node;
516 node = node->GetNext();
517 }
518 if (node)
519 {
520 root->SetNext( node->GetNext() );
521 wxDELETE(node);
522 }
523 if (prev)
524 prev->SetNext(root);
525 else
526 m_docNode->SetChildren(root);
527 }
528 else
529 {
530 m_docNode = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
531 }
532 root->SetParent(m_docNode);
533 }
534
535 void wxXmlDocument::AppendToProlog(wxXmlNode *node)
536 {
537 if (!m_docNode)
538 m_docNode = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
539 if (IsOk())
540 m_docNode->InsertChild( node, GetRoot() );
541 else
542 m_docNode->AddChild( node );
543 }
544
545 //-----------------------------------------------------------------------------
546 // wxXmlDocument loading routines
547 //-----------------------------------------------------------------------------
548
549 // converts Expat-produced string in UTF-8 into wxString using the specified
550 // conv or keep in UTF-8 if conv is NULL
551 static wxString CharToString(wxMBConv *conv,
552 const char *s, size_t len = wxString::npos)
553 {
554 #if !wxUSE_UNICODE
555 if ( conv )
556 {
557 // there can be no embedded NULs in this string so we don't need the
558 // output length, it will be NUL-terminated
559 const wxWCharBuffer wbuf(
560 wxConvUTF8.cMB2WC(s, len == wxString::npos ? wxNO_LEN : len, NULL));
561
562 return wxString(wbuf, *conv);
563 }
564 // else: the string is wanted in UTF-8
565 #endif // !wxUSE_UNICODE
566
567 wxUnusedVar(conv);
568 return wxString::FromUTF8Unchecked(s, len);
569 }
570
571 // returns true if the given string contains only whitespaces
572 bool wxIsWhiteOnly(const wxString& buf)
573 {
574 for ( wxString::const_iterator i = buf.begin(); i != buf.end(); ++i )
575 {
576 wxChar c = *i;
577 if ( c != wxS(' ') && c != wxS('\t') && c != wxS('\n') && c != wxS('\r'))
578 return false;
579 }
580 return true;
581 }
582
583
584 struct wxXmlParsingContext
585 {
586 wxXmlParsingContext()
587 : conv(NULL),
588 node(NULL),
589 lastChild(NULL),
590 lastAsText(NULL),
591 removeWhiteOnlyNodes(false)
592 {}
593
594 XML_Parser parser;
595 wxMBConv *conv;
596 wxXmlNode *node; // the node being parsed
597 wxXmlNode *lastChild; // the last child of "node"
598 wxXmlNode *lastAsText; // the last _text_ child of "node"
599 wxString encoding;
600 wxString version;
601 bool removeWhiteOnlyNodes;
602 };
603
604 // checks that ctx->lastChild is in consistent state
605 #define ASSERT_LAST_CHILD_OK(ctx) \
606 wxASSERT( ctx->lastChild == NULL || \
607 ctx->lastChild->GetNext() == NULL ); \
608 wxASSERT( ctx->lastChild == NULL || \
609 ctx->lastChild->GetParent() == ctx->node )
610
611 extern "C" {
612 static void StartElementHnd(void *userData, const char *name, const char **atts)
613 {
614 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
615 wxXmlNode *node = new wxXmlNode(wxXML_ELEMENT_NODE,
616 CharToString(ctx->conv, name),
617 wxEmptyString,
618 XML_GetCurrentLineNumber(ctx->parser));
619 const char **a = atts;
620
621 // add node attributes
622 while (*a)
623 {
624 node->AddAttribute(CharToString(ctx->conv, a[0]), CharToString(ctx->conv, a[1]));
625 a += 2;
626 }
627
628 ASSERT_LAST_CHILD_OK(ctx);
629 ctx->node->InsertChildAfter(node, ctx->lastChild);
630 ctx->lastAsText = NULL;
631 ctx->lastChild = NULL; // our new node "node" has no children yet
632
633 ctx->node = node;
634 }
635
636 static void EndElementHnd(void *userData, const char* WXUNUSED(name))
637 {
638 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
639
640 // we're exiting the last children of ctx->node->GetParent() and going
641 // back one level up, so current value of ctx->node points to the last
642 // child of ctx->node->GetParent()
643 ctx->lastChild = ctx->node;
644
645 ctx->node = ctx->node->GetParent();
646 ctx->lastAsText = NULL;
647 }
648
649 static void TextHnd(void *userData, const char *s, int len)
650 {
651 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
652 wxString str = CharToString(ctx->conv, s, len);
653
654 if (ctx->lastAsText)
655 {
656 ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + str);
657 }
658 else
659 {
660 bool whiteOnly = false;
661 if (ctx->removeWhiteOnlyNodes)
662 whiteOnly = wxIsWhiteOnly(str);
663
664 if (!whiteOnly)
665 {
666 wxXmlNode *textnode =
667 new wxXmlNode(wxXML_TEXT_NODE, wxS("text"), str,
668 XML_GetCurrentLineNumber(ctx->parser));
669
670 ASSERT_LAST_CHILD_OK(ctx);
671 ctx->node->InsertChildAfter(textnode, ctx->lastChild);
672 ctx->lastChild= ctx->lastAsText = textnode;
673 }
674 }
675 }
676
677 static void StartCdataHnd(void *userData)
678 {
679 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
680
681 wxXmlNode *textnode =
682 new wxXmlNode(wxXML_CDATA_SECTION_NODE, wxS("cdata"), wxS(""),
683 XML_GetCurrentLineNumber(ctx->parser));
684
685 ASSERT_LAST_CHILD_OK(ctx);
686 ctx->node->InsertChildAfter(textnode, ctx->lastChild);
687 ctx->lastChild= ctx->lastAsText = textnode;
688 }
689
690 static void EndCdataHnd(void *userData)
691 {
692 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
693
694 // we need to reset this pointer so that subsequent text nodes don't append
695 // their contents to this one but create new wxXML_TEXT_NODE objects (or
696 // not create anything at all if only white space follows the CDATA section
697 // and wxXMLDOC_KEEP_WHITESPACE_NODES is not used as is commonly the case)
698 ctx->lastAsText = NULL;
699 }
700
701 static void CommentHnd(void *userData, const char *data)
702 {
703 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
704
705 wxXmlNode *commentnode =
706 new wxXmlNode(wxXML_COMMENT_NODE,
707 wxS("comment"), CharToString(ctx->conv, data),
708 XML_GetCurrentLineNumber(ctx->parser));
709
710 ASSERT_LAST_CHILD_OK(ctx);
711 ctx->node->InsertChildAfter(commentnode, ctx->lastChild);
712 ctx->lastChild = commentnode;
713 ctx->lastAsText = NULL;
714 }
715
716 static void PIHnd(void *userData, const char *target, const char *data)
717 {
718 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
719
720 wxXmlNode *pinode =
721 new wxXmlNode(wxXML_PI_NODE, CharToString(ctx->conv, target),
722 CharToString(ctx->conv, data),
723 XML_GetCurrentLineNumber(ctx->parser));
724
725 ASSERT_LAST_CHILD_OK(ctx);
726 ctx->node->InsertChildAfter(pinode, ctx->lastChild);
727 ctx->lastChild = pinode;
728 ctx->lastAsText = NULL;
729 }
730
731 static void DefaultHnd(void *userData, const char *s, int len)
732 {
733 // XML header:
734 if (len > 6 && memcmp(s, "<?xml ", 6) == 0)
735 {
736 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
737
738 wxString buf = CharToString(ctx->conv, s, (size_t)len);
739 int pos;
740 pos = buf.Find(wxS("encoding="));
741 if (pos != wxNOT_FOUND)
742 ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
743 pos = buf.Find(wxS("version="));
744 if (pos != wxNOT_FOUND)
745 ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
746 }
747 }
748
749 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
750 const XML_Char *name, XML_Encoding *info)
751 {
752 // We must build conversion table for expat. The easiest way to do so
753 // is to let wxCSConv convert as string containing all characters to
754 // wide character representation:
755 wxCSConv conv(name);
756 char mbBuf[2];
757 wchar_t wcBuf[10];
758 size_t i;
759
760 mbBuf[1] = 0;
761 info->map[0] = 0;
762 for (i = 0; i < 255; i++)
763 {
764 mbBuf[0] = (char)(i+1);
765 if (conv.MB2WC(wcBuf, mbBuf, 2) == (size_t)-1)
766 {
767 // invalid/undefined byte in the encoding:
768 info->map[i+1] = -1;
769 }
770 info->map[i+1] = (int)wcBuf[0];
771 }
772
773 info->data = NULL;
774 info->convert = NULL;
775 info->release = NULL;
776
777 return 1;
778 }
779
780 } // extern "C"
781
782 bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int flags)
783 {
784 #if wxUSE_UNICODE
785 (void)encoding;
786 #else
787 m_encoding = encoding;
788 #endif
789
790 const size_t BUFSIZE = 1024;
791 char buf[BUFSIZE];
792 wxXmlParsingContext ctx;
793 bool done;
794 XML_Parser parser = XML_ParserCreate(NULL);
795 wxXmlNode *root = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
796
797 ctx.encoding = wxS("UTF-8"); // default in absence of encoding=""
798 ctx.conv = NULL;
799 #if !wxUSE_UNICODE
800 if ( encoding.CmpNoCase(wxS("UTF-8")) != 0 )
801 ctx.conv = new wxCSConv(encoding);
802 #endif
803 ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0;
804 ctx.parser = parser;
805 ctx.node = root;
806
807 XML_SetUserData(parser, (void*)&ctx);
808 XML_SetElementHandler(parser, StartElementHnd, EndElementHnd);
809 XML_SetCharacterDataHandler(parser, TextHnd);
810 XML_SetCdataSectionHandler(parser, StartCdataHnd, EndCdataHnd);;
811 XML_SetCommentHandler(parser, CommentHnd);
812 XML_SetProcessingInstructionHandler(parser, PIHnd);
813 XML_SetDefaultHandler(parser, DefaultHnd);
814 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
815
816 bool ok = true;
817 do
818 {
819 size_t len = stream.Read(buf, BUFSIZE).LastRead();
820 done = (len < BUFSIZE);
821 if (!XML_Parse(parser, buf, len, done))
822 {
823 wxString error(XML_ErrorString(XML_GetErrorCode(parser)),
824 *wxConvCurrent);
825 wxLogError(_("XML parsing error: '%s' at line %d"),
826 error.c_str(),
827 (int)XML_GetCurrentLineNumber(parser));
828 ok = false;
829 break;
830 }
831 } while (!done);
832
833 if (ok)
834 {
835 if (!ctx.version.empty())
836 SetVersion(ctx.version);
837 if (!ctx.encoding.empty())
838 SetFileEncoding(ctx.encoding);
839 SetDocumentNode(root);
840 }
841 else
842 {
843 delete root;
844 }
845
846 XML_ParserFree(parser);
847 #if !wxUSE_UNICODE
848 if ( ctx.conv )
849 delete ctx.conv;
850 #endif
851
852 return ok;
853
854 }
855
856
857
858 //-----------------------------------------------------------------------------
859 // wxXmlDocument saving routines
860 //-----------------------------------------------------------------------------
861
862 // helpers for XML generation
863 namespace
864 {
865
866 // write string to output:
867 bool OutputString(wxOutputStream& stream,
868 const wxString& str,
869 wxMBConv *convMem,
870 wxMBConv *convFile)
871 {
872 if (str.empty())
873 return true;
874
875 #if wxUSE_UNICODE
876 wxUnusedVar(convMem);
877 if ( !convFile )
878 convFile = &wxConvUTF8;
879
880 const wxScopedCharBuffer buf(str.mb_str(*convFile));
881 if ( !buf.length() )
882 {
883 // conversion failed, can't write this string in an XML file in this
884 // (presumably non-UTF-8) encoding
885 return false;
886 }
887
888 stream.Write(buf, buf.length());
889 #else // !wxUSE_UNICODE
890 if ( convFile && convMem )
891 {
892 wxString str2(str.wc_str(*convMem), *convFile);
893 stream.Write(str2.mb_str(), str2.length());
894 }
895 else // no conversions to do
896 {
897 stream.Write(str.mb_str(), str.length());
898 }
899 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
900
901 return stream.IsOk();
902 }
903
904 enum EscapingMode
905 {
906 Escape_Text,
907 Escape_Attribute
908 };
909
910 // Same as above, but create entities first.
911 // Translates '<' to "&lt;", '>' to "&gt;" and so on, according to the spec:
912 // http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping
913 bool OutputEscapedString(wxOutputStream& stream,
914 const wxString& str,
915 wxMBConv *convMem,
916 wxMBConv *convFile,
917 EscapingMode mode)
918 {
919 wxString escaped;
920 escaped.reserve(str.length());
921
922 for ( wxString::const_iterator i = str.begin(); i != str.end(); ++i )
923 {
924 const wxChar c = *i;
925
926 switch ( c )
927 {
928 case wxS('<'):
929 escaped.append(wxS("&lt;"));
930 break;
931 case wxS('>'):
932 escaped.append(wxS("&gt;"));
933 break;
934 case wxS('&'):
935 escaped.append(wxS("&amp;"));
936 break;
937 case wxS('\r'):
938 escaped.append(wxS("&#xD;"));
939 break;
940 default:
941 if ( mode == Escape_Attribute )
942 {
943 switch ( c )
944 {
945 case wxS('"'):
946 escaped.append(wxS("&quot;"));
947 break;
948 case wxS('\t'):
949 escaped.append(wxS("&#x9;"));
950 break;
951 case wxS('\n'):
952 escaped.append(wxS("&#xA;"));
953 break;
954 default:
955 escaped.append(c);
956 }
957
958 }
959 else
960 {
961 escaped.append(c);
962 }
963 }
964 }
965
966 return OutputString(stream, escaped, convMem, convFile);
967 }
968
969 bool OutputIndentation(wxOutputStream& stream,
970 int indent,
971 wxMBConv *convMem,
972 wxMBConv *convFile)
973 {
974 wxString str(wxS("\n"));
975 str += wxString(indent, wxS(' '));
976 return OutputString(stream, str, convMem, convFile);
977 }
978
979 bool OutputNode(wxOutputStream& stream,
980 wxXmlNode *node,
981 int indent,
982 wxMBConv *convMem,
983 wxMBConv *convFile,
984 int indentstep)
985 {
986 bool rc;
987 switch (node->GetType())
988 {
989 case wxXML_CDATA_SECTION_NODE:
990 rc = OutputString(stream, wxS("<![CDATA["), convMem, convFile) &&
991 OutputString(stream, node->GetContent(), convMem, convFile) &&
992 OutputString(stream, wxS("]]>"), convMem, convFile);
993 break;
994
995 case wxXML_TEXT_NODE:
996 if (node->GetNoConversion())
997 {
998 stream.Write(node->GetContent().c_str(), node->GetContent().Length());
999 rc = true;
1000 }
1001 else
1002 rc = OutputEscapedString(stream, node->GetContent(),
1003 convMem, convFile,
1004 Escape_Text);
1005 break;
1006
1007 case wxXML_ELEMENT_NODE:
1008 rc = OutputString(stream, wxS("<"), convMem, convFile) &&
1009 OutputString(stream, node->GetName(), convMem, convFile);
1010
1011 if ( rc )
1012 {
1013 for ( wxXmlAttribute *attr = node->GetAttributes();
1014 attr && rc;
1015 attr = attr->GetNext() )
1016 {
1017 rc = OutputString(stream,
1018 wxS(" ") + attr->GetName() + wxS("=\""),
1019 convMem, convFile) &&
1020 OutputEscapedString(stream, attr->GetValue(),
1021 convMem, convFile,
1022 Escape_Attribute) &&
1023 OutputString(stream, wxS("\""), convMem, convFile);
1024 }
1025 }
1026
1027 if ( node->GetChildren() )
1028 {
1029 rc = OutputString(stream, wxS(">"), convMem, convFile);
1030
1031 wxXmlNode *prev = NULL;
1032 for ( wxXmlNode *n = node->GetChildren();
1033 n && rc;
1034 n = n->GetNext() )
1035 {
1036 if ( indentstep >= 0 && n->GetType() != wxXML_TEXT_NODE )
1037 {
1038 rc = OutputIndentation(stream, indent + indentstep,
1039 convMem, convFile);
1040 }
1041
1042 if ( rc )
1043 rc = OutputNode(stream, n, indent + indentstep,
1044 convMem, convFile, indentstep);
1045
1046 prev = n;
1047 }
1048
1049 if ( rc && indentstep >= 0 &&
1050 prev && prev->GetType() != wxXML_TEXT_NODE )
1051 {
1052 rc = OutputIndentation(stream, indent, convMem, convFile);
1053 }
1054
1055 if ( rc )
1056 {
1057 rc = OutputString(stream, wxS("</"), convMem, convFile) &&
1058 OutputString(stream, node->GetName(),
1059 convMem, convFile) &&
1060 OutputString(stream, wxS(">"), convMem, convFile);
1061 }
1062 }
1063 else // no children, output "<foo/>"
1064 {
1065 rc = OutputString(stream, wxS("/>"), convMem, convFile);
1066 }
1067 break;
1068
1069 case wxXML_COMMENT_NODE:
1070 rc = OutputString(stream, wxS("<!--"), convMem, convFile) &&
1071 OutputString(stream, node->GetContent(), convMem, convFile) &&
1072 OutputString(stream, wxS("-->"), convMem, convFile);
1073 break;
1074
1075 case wxXML_PI_NODE:
1076 rc = OutputString(stream, wxT("<?"), convMem, convFile) &&
1077 OutputString(stream, node->GetName(), convMem, convFile) &&
1078 OutputString(stream, wxT(" "), convMem, convFile) &&
1079 OutputString(stream, node->GetContent(), convMem, convFile) &&
1080 OutputString(stream, wxT("?>"), convMem, convFile);
1081 break;
1082
1083 default:
1084 wxFAIL_MSG("unsupported node type");
1085 rc = false;
1086 }
1087
1088 return rc;
1089 }
1090
1091 } // anonymous namespace
1092
1093 bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const
1094 {
1095 if ( !IsOk() )
1096 return false;
1097
1098 wxScopedPtr<wxMBConv> convMem, convFile;
1099
1100 #if wxUSE_UNICODE
1101 convFile.reset(new wxCSConv(GetFileEncoding()));
1102 #else
1103 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
1104 {
1105 convFile.reset(new wxCSConv(GetFileEncoding()));
1106 convMem.reset(new wxCSConv(GetEncoding()));
1107 }
1108 //else: file and in-memory encodings are the same, no conversion needed
1109 #endif
1110
1111 wxString dec = wxString::Format(
1112 wxS("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
1113 GetVersion(), GetFileEncoding()
1114 );
1115 bool rc = OutputString(stream, dec, convMem.get(), convFile.get());
1116
1117 wxXmlNode *node = GetDocumentNode();
1118 if ( node )
1119 node = node->GetChildren();
1120
1121 while( rc && node )
1122 {
1123 rc = OutputNode(stream, node, 0, convMem.get(),
1124 convFile.get(), indentstep) &&
1125 OutputString(stream, wxS("\n"), convMem.get(), convFile.get());
1126 node = node->GetNext();
1127 }
1128 return rc;
1129 }
1130
1131 /*static*/ wxVersionInfo wxXmlDocument::GetLibraryVersionInfo()
1132 {
1133 return wxVersionInfo("expat",
1134 XML_MAJOR_VERSION,
1135 XML_MINOR_VERSION,
1136 XML_MICRO_VERSION);
1137 }
1138
1139 #endif // wxUSE_XML