Remove obsolete VisualAge-related files.
[wxWidgets.git] / src / xml / xml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
5 // Created: 2000/03/05
6 // Copyright: (c) 2000 Vaclav Slavik
7 // Licence: wxWindows licence
8 /////////////////////////////////////////////////////////////////////////////
9
10 // For compilers that support precompilation, includes "wx.h".
11 #include "wx/wxprec.h"
12
13 #ifdef __BORLANDC__
14 #pragma hdrstop
15 #endif
16
17 #if wxUSE_XML
18
19 #include "wx/xml/xml.h"
20
21 #ifndef WX_PRECOMP
22 #include "wx/intl.h"
23 #include "wx/log.h"
24 #include "wx/app.h"
25 #endif
26
27 #include "wx/wfstream.h"
28 #include "wx/datstrm.h"
29 #include "wx/zstream.h"
30 #include "wx/strconv.h"
31 #include "wx/scopedptr.h"
32 #include "wx/versioninfo.h"
33
34 #include "expat.h" // from Expat
35
36 // DLL options compatibility check:
37 WX_CHECK_BUILD_OPTIONS("wxXML")
38
39
40 IMPLEMENT_CLASS(wxXmlDocument, wxObject)
41
42
43 // a private utility used by wxXML
44 static bool wxIsWhiteOnly(const wxString& buf);
45
46
47 //-----------------------------------------------------------------------------
48 // wxXmlNode
49 //-----------------------------------------------------------------------------
50
51 wxXmlNode::wxXmlNode(wxXmlNode *parent,wxXmlNodeType type,
52 const wxString& name, const wxString& content,
53 wxXmlAttribute *attrs, wxXmlNode *next, int lineNo)
54 : m_type(type), m_name(name), m_content(content),
55 m_attrs(attrs), m_parent(parent),
56 m_children(NULL), m_next(next),
57 m_lineNo(lineNo),
58 m_noConversion(false)
59 {
60 wxASSERT_MSG ( type != wxXML_ELEMENT_NODE || content.empty(), "element nodes can't have content" );
61
62 if (m_parent)
63 {
64 if (m_parent->m_children)
65 {
66 m_next = m_parent->m_children;
67 m_parent->m_children = this;
68 }
69 else
70 m_parent->m_children = this;
71 }
72 }
73
74 wxXmlNode::wxXmlNode(wxXmlNodeType type, const wxString& name,
75 const wxString& content,
76 int lineNo)
77 : m_type(type), m_name(name), m_content(content),
78 m_attrs(NULL), m_parent(NULL),
79 m_children(NULL), m_next(NULL),
80 m_lineNo(lineNo), m_noConversion(false)
81 {
82 wxASSERT_MSG ( type != wxXML_ELEMENT_NODE || content.empty(), "element nodes can't have content" );
83 }
84
85 wxXmlNode::wxXmlNode(const wxXmlNode& node)
86 {
87 m_next = NULL;
88 m_parent = NULL;
89 DoCopy(node);
90 }
91
92 wxXmlNode::~wxXmlNode()
93 {
94 DoFree();
95 }
96
97 wxXmlNode& wxXmlNode::operator=(const wxXmlNode& node)
98 {
99 if ( &node != this )
100 {
101 DoFree();
102 DoCopy(node);
103 }
104
105 return *this;
106 }
107
108 void wxXmlNode::DoFree()
109 {
110 wxXmlNode *c, *c2;
111 for (c = m_children; c; c = c2)
112 {
113 c2 = c->m_next;
114 delete c;
115 }
116
117 wxXmlAttribute *p, *p2;
118 for (p = m_attrs; p; p = p2)
119 {
120 p2 = p->GetNext();
121 delete p;
122 }
123 }
124
125 void wxXmlNode::DoCopy(const wxXmlNode& node)
126 {
127 m_type = node.m_type;
128 m_name = node.m_name;
129 m_content = node.m_content;
130 m_lineNo = node.m_lineNo;
131 m_noConversion = node.m_noConversion;
132 m_children = NULL;
133
134 wxXmlNode *n = node.m_children;
135 while (n)
136 {
137 AddChild(new wxXmlNode(*n));
138 n = n->GetNext();
139 }
140
141 m_attrs = NULL;
142 wxXmlAttribute *p = node.m_attrs;
143 while (p)
144 {
145 AddAttribute(p->GetName(), p->GetValue());
146 p = p->GetNext();
147 }
148 }
149
150 bool wxXmlNode::HasAttribute(const wxString& attrName) const
151 {
152 wxXmlAttribute *attr = GetAttributes();
153
154 while (attr)
155 {
156 if (attr->GetName() == attrName) return true;
157 attr = attr->GetNext();
158 }
159
160 return false;
161 }
162
163 bool wxXmlNode::GetAttribute(const wxString& attrName, wxString *value) const
164 {
165 wxCHECK_MSG( value, false, "value argument must not be NULL" );
166
167 wxXmlAttribute *attr = GetAttributes();
168
169 while (attr)
170 {
171 if (attr->GetName() == attrName)
172 {
173 *value = attr->GetValue();
174 return true;
175 }
176 attr = attr->GetNext();
177 }
178
179 return false;
180 }
181
182 wxString wxXmlNode::GetAttribute(const wxString& attrName, const wxString& defaultVal) const
183 {
184 wxString tmp;
185 if (GetAttribute(attrName, &tmp))
186 return tmp;
187
188 return defaultVal;
189 }
190
191 void wxXmlNode::AddChild(wxXmlNode *child)
192 {
193 if (m_children == NULL)
194 m_children = child;
195 else
196 {
197 wxXmlNode *ch = m_children;
198 while (ch->m_next) ch = ch->m_next;
199 ch->m_next = child;
200 }
201 child->m_next = NULL;
202 child->m_parent = this;
203 }
204
205 // inserts a new node in front of 'followingNode'
206 bool wxXmlNode::InsertChild(wxXmlNode *child, wxXmlNode *followingNode)
207 {
208 wxCHECK_MSG( child, false, "cannot insert a NULL node!" );
209 wxCHECK_MSG( child->m_parent == NULL, false, "node already has a parent" );
210 wxCHECK_MSG( child->m_next == NULL, false, "node already has m_next" );
211 wxCHECK_MSG( followingNode == NULL || followingNode->GetParent() == this,
212 false,
213 "wxXmlNode::InsertChild - followingNode has incorrect parent" );
214
215 // this is for backward compatibility, NULL was allowed here thanks to
216 // the confusion about followingNode's meaning
217 if ( followingNode == NULL )
218 followingNode = m_children;
219
220 if ( m_children == followingNode )
221 {
222 child->m_next = m_children;
223 m_children = child;
224 }
225 else
226 {
227 wxXmlNode *ch = m_children;
228 while ( ch && ch->m_next != followingNode )
229 ch = ch->m_next;
230 if ( !ch )
231 {
232 wxFAIL_MSG( "followingNode has this node as parent, but couldn't be found among children" );
233 return false;
234 }
235
236 child->m_next = followingNode;
237 ch->m_next = child;
238 }
239
240 child->m_parent = this;
241 return true;
242 }
243
244 // inserts a new node right after 'precedingNode'
245 bool wxXmlNode::InsertChildAfter(wxXmlNode *child, wxXmlNode *precedingNode)
246 {
247 wxCHECK_MSG( child, false, "cannot insert a NULL node!" );
248 wxCHECK_MSG( child->m_parent == NULL, false, "node already has a parent" );
249 wxCHECK_MSG( child->m_next == NULL, false, "node already has m_next" );
250 wxCHECK_MSG( precedingNode == NULL || precedingNode->m_parent == this, false,
251 "precedingNode has wrong parent" );
252
253 if ( precedingNode )
254 {
255 child->m_next = precedingNode->m_next;
256 precedingNode->m_next = child;
257 }
258 else // precedingNode == NULL
259 {
260 wxCHECK_MSG( m_children == NULL, false,
261 "NULL precedingNode only makes sense when there are no children" );
262
263 child->m_next = m_children;
264 m_children = child;
265 }
266
267 child->m_parent = this;
268 return true;
269 }
270
271 bool wxXmlNode::RemoveChild(wxXmlNode *child)
272 {
273 if (m_children == NULL)
274 return false;
275 else if (m_children == child)
276 {
277 m_children = child->m_next;
278 child->m_parent = NULL;
279 child->m_next = NULL;
280 return true;
281 }
282 else
283 {
284 wxXmlNode *ch = m_children;
285 while (ch->m_next)
286 {
287 if (ch->m_next == child)
288 {
289 ch->m_next = child->m_next;
290 child->m_parent = NULL;
291 child->m_next = NULL;
292 return true;
293 }
294 ch = ch->m_next;
295 }
296 return false;
297 }
298 }
299
300 void wxXmlNode::AddAttribute(const wxString& name, const wxString& value)
301 {
302 AddProperty(name, value);
303 }
304
305 void wxXmlNode::AddAttribute(wxXmlAttribute *attr)
306 {
307 AddProperty(attr);
308 }
309
310 bool wxXmlNode::DeleteAttribute(const wxString& name)
311 {
312 return DeleteProperty(name);
313 }
314
315 void wxXmlNode::AddProperty(const wxString& name, const wxString& value)
316 {
317 AddProperty(new wxXmlAttribute(name, value, NULL));
318 }
319
320 void wxXmlNode::AddProperty(wxXmlAttribute *attr)
321 {
322 if (m_attrs == NULL)
323 m_attrs = attr;
324 else
325 {
326 wxXmlAttribute *p = m_attrs;
327 while (p->GetNext()) p = p->GetNext();
328 p->SetNext(attr);
329 }
330 }
331
332 bool wxXmlNode::DeleteProperty(const wxString& name)
333 {
334 wxXmlAttribute *attr;
335
336 if (m_attrs == NULL)
337 return false;
338
339 else if (m_attrs->GetName() == name)
340 {
341 attr = m_attrs;
342 m_attrs = attr->GetNext();
343 attr->SetNext(NULL);
344 delete attr;
345 return true;
346 }
347
348 else
349 {
350 wxXmlAttribute *p = m_attrs;
351 while (p->GetNext())
352 {
353 if (p->GetNext()->GetName() == name)
354 {
355 attr = p->GetNext();
356 p->SetNext(attr->GetNext());
357 attr->SetNext(NULL);
358 delete attr;
359 return true;
360 }
361 p = p->GetNext();
362 }
363 return false;
364 }
365 }
366
367 wxString wxXmlNode::GetNodeContent() const
368 {
369 wxXmlNode *n = GetChildren();
370
371 while (n)
372 {
373 if (n->GetType() == wxXML_TEXT_NODE ||
374 n->GetType() == wxXML_CDATA_SECTION_NODE)
375 return n->GetContent();
376 n = n->GetNext();
377 }
378 return wxEmptyString;
379 }
380
381 int wxXmlNode::GetDepth(wxXmlNode *grandparent) const
382 {
383 const wxXmlNode *n = this;
384 int ret = -1;
385
386 do
387 {
388 ret++;
389 n = n->GetParent();
390 if (n == grandparent)
391 return ret;
392
393 } while (n);
394
395 return wxNOT_FOUND;
396 }
397
398 bool wxXmlNode::IsWhitespaceOnly() const
399 {
400 return wxIsWhiteOnly(m_content);
401 }
402
403
404
405 //-----------------------------------------------------------------------------
406 // wxXmlDocument
407 //-----------------------------------------------------------------------------
408
409 wxXmlDocument::wxXmlDocument()
410 : m_version(wxS("1.0")), m_fileEncoding(wxS("UTF-8")), m_docNode(NULL)
411 {
412 #if !wxUSE_UNICODE
413 m_encoding = wxS("UTF-8");
414 #endif
415 }
416
417 wxXmlDocument::wxXmlDocument(const wxString& filename, const wxString& encoding)
418 :wxObject(), m_docNode(NULL)
419 {
420 if ( !Load(filename, encoding) )
421 {
422 wxDELETE(m_docNode);
423 }
424 }
425
426 wxXmlDocument::wxXmlDocument(wxInputStream& stream, const wxString& encoding)
427 :wxObject(), m_docNode(NULL)
428 {
429 if ( !Load(stream, encoding) )
430 {
431 wxDELETE(m_docNode);
432 }
433 }
434
435 wxXmlDocument::wxXmlDocument(const wxXmlDocument& doc)
436 :wxObject()
437 {
438 DoCopy(doc);
439 }
440
441 wxXmlDocument& wxXmlDocument::operator=(const wxXmlDocument& doc)
442 {
443 wxDELETE(m_docNode);
444 DoCopy(doc);
445 return *this;
446 }
447
448 void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
449 {
450 m_version = doc.m_version;
451 #if !wxUSE_UNICODE
452 m_encoding = doc.m_encoding;
453 #endif
454 m_fileEncoding = doc.m_fileEncoding;
455
456 if (doc.m_docNode)
457 m_docNode = new wxXmlNode(*doc.m_docNode);
458 else
459 m_docNode = NULL;
460 }
461
462 bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding, int flags)
463 {
464 wxFileInputStream stream(filename);
465 if (!stream.IsOk())
466 return false;
467 return Load(stream, encoding, flags);
468 }
469
470 bool wxXmlDocument::Save(const wxString& filename, int indentstep) const
471 {
472 wxFileOutputStream stream(filename);
473 if (!stream.IsOk())
474 return false;
475 return Save(stream, indentstep);
476 }
477
478 wxXmlNode *wxXmlDocument::GetRoot() const
479 {
480 wxXmlNode *node = m_docNode;
481 if (node)
482 {
483 node = m_docNode->GetChildren();
484 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
485 node = node->GetNext();
486 }
487 return node;
488 }
489
490 wxXmlNode *wxXmlDocument::DetachRoot()
491 {
492 wxXmlNode *node = m_docNode;
493 if (node)
494 {
495 node = m_docNode->GetChildren();
496 wxXmlNode *prev = NULL;
497 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
498 {
499 prev = node;
500 node = node->GetNext();
501 }
502 if (node)
503 {
504 if (node == m_docNode->GetChildren())
505 m_docNode->SetChildren(node->GetNext());
506
507 if (prev)
508 prev->SetNext(node->GetNext());
509
510 node->SetParent(NULL);
511 node->SetNext(NULL);
512 }
513 }
514 return node;
515 }
516
517 void wxXmlDocument::SetRoot(wxXmlNode *root)
518 {
519 if (root)
520 {
521 wxASSERT_MSG( root->GetType() == wxXML_ELEMENT_NODE,
522 "Can only set an element type node as root" );
523 }
524
525 wxXmlNode *node = m_docNode;
526 if (node)
527 {
528 node = m_docNode->GetChildren();
529 wxXmlNode *prev = NULL;
530 while (node != NULL && node->GetType() != wxXML_ELEMENT_NODE)
531 {
532 prev = node;
533 node = node->GetNext();
534 }
535 if (node && root)
536 {
537 root->SetNext( node->GetNext() );
538 wxDELETE(node);
539 }
540 if (prev)
541 prev->SetNext(root);
542 else
543 m_docNode->SetChildren(root);
544 }
545 else
546 {
547 m_docNode = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
548 m_docNode->SetChildren(root);
549 }
550 if (root)
551 root->SetParent(m_docNode);
552 }
553
554 void wxXmlDocument::AppendToProlog(wxXmlNode *node)
555 {
556 if (!m_docNode)
557 m_docNode = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
558 if (IsOk())
559 m_docNode->InsertChild( node, GetRoot() );
560 else
561 m_docNode->AddChild( node );
562 }
563
564 //-----------------------------------------------------------------------------
565 // wxXmlDocument loading routines
566 //-----------------------------------------------------------------------------
567
568 // converts Expat-produced string in UTF-8 into wxString using the specified
569 // conv or keep in UTF-8 if conv is NULL
570 static wxString CharToString(wxMBConv *conv,
571 const char *s, size_t len = wxString::npos)
572 {
573 #if !wxUSE_UNICODE
574 if ( conv )
575 {
576 // there can be no embedded NULs in this string so we don't need the
577 // output length, it will be NUL-terminated
578 const wxWCharBuffer wbuf(
579 wxConvUTF8.cMB2WC(s, len == wxString::npos ? wxNO_LEN : len, NULL));
580
581 return wxString(wbuf, *conv);
582 }
583 // else: the string is wanted in UTF-8
584 #endif // !wxUSE_UNICODE
585
586 wxUnusedVar(conv);
587 return wxString::FromUTF8Unchecked(s, len);
588 }
589
590 // returns true if the given string contains only whitespaces
591 bool wxIsWhiteOnly(const wxString& buf)
592 {
593 for ( wxString::const_iterator i = buf.begin(); i != buf.end(); ++i )
594 {
595 wxChar c = *i;
596 if ( c != wxS(' ') && c != wxS('\t') && c != wxS('\n') && c != wxS('\r'))
597 return false;
598 }
599 return true;
600 }
601
602
603 struct wxXmlParsingContext
604 {
605 wxXmlParsingContext()
606 : conv(NULL),
607 node(NULL),
608 lastChild(NULL),
609 lastAsText(NULL),
610 removeWhiteOnlyNodes(false)
611 {}
612
613 XML_Parser parser;
614 wxMBConv *conv;
615 wxXmlNode *node; // the node being parsed
616 wxXmlNode *lastChild; // the last child of "node"
617 wxXmlNode *lastAsText; // the last _text_ child of "node"
618 wxString encoding;
619 wxString version;
620 bool removeWhiteOnlyNodes;
621 };
622
623 // checks that ctx->lastChild is in consistent state
624 #define ASSERT_LAST_CHILD_OK(ctx) \
625 wxASSERT( ctx->lastChild == NULL || \
626 ctx->lastChild->GetNext() == NULL ); \
627 wxASSERT( ctx->lastChild == NULL || \
628 ctx->lastChild->GetParent() == ctx->node )
629
630 extern "C" {
631 static void StartElementHnd(void *userData, const char *name, const char **atts)
632 {
633 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
634 wxXmlNode *node = new wxXmlNode(wxXML_ELEMENT_NODE,
635 CharToString(ctx->conv, name),
636 wxEmptyString,
637 XML_GetCurrentLineNumber(ctx->parser));
638 const char **a = atts;
639
640 // add node attributes
641 while (*a)
642 {
643 node->AddAttribute(CharToString(ctx->conv, a[0]), CharToString(ctx->conv, a[1]));
644 a += 2;
645 }
646
647 ASSERT_LAST_CHILD_OK(ctx);
648 ctx->node->InsertChildAfter(node, ctx->lastChild);
649 ctx->lastAsText = NULL;
650 ctx->lastChild = NULL; // our new node "node" has no children yet
651
652 ctx->node = node;
653 }
654
655 static void EndElementHnd(void *userData, const char* WXUNUSED(name))
656 {
657 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
658
659 // we're exiting the last children of ctx->node->GetParent() and going
660 // back one level up, so current value of ctx->node points to the last
661 // child of ctx->node->GetParent()
662 ctx->lastChild = ctx->node;
663
664 ctx->node = ctx->node->GetParent();
665 ctx->lastAsText = NULL;
666 }
667
668 static void TextHnd(void *userData, const char *s, int len)
669 {
670 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
671 wxString str = CharToString(ctx->conv, s, len);
672
673 if (ctx->lastAsText)
674 {
675 ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() + str);
676 }
677 else
678 {
679 bool whiteOnly = false;
680 if (ctx->removeWhiteOnlyNodes)
681 whiteOnly = wxIsWhiteOnly(str);
682
683 if (!whiteOnly)
684 {
685 wxXmlNode *textnode =
686 new wxXmlNode(wxXML_TEXT_NODE, wxS("text"), str,
687 XML_GetCurrentLineNumber(ctx->parser));
688
689 ASSERT_LAST_CHILD_OK(ctx);
690 ctx->node->InsertChildAfter(textnode, ctx->lastChild);
691 ctx->lastChild= ctx->lastAsText = textnode;
692 }
693 }
694 }
695
696 static void StartCdataHnd(void *userData)
697 {
698 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
699
700 wxXmlNode *textnode =
701 new wxXmlNode(wxXML_CDATA_SECTION_NODE, wxS("cdata"), wxS(""),
702 XML_GetCurrentLineNumber(ctx->parser));
703
704 ASSERT_LAST_CHILD_OK(ctx);
705 ctx->node->InsertChildAfter(textnode, ctx->lastChild);
706 ctx->lastChild= ctx->lastAsText = textnode;
707 }
708
709 static void EndCdataHnd(void *userData)
710 {
711 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
712
713 // we need to reset this pointer so that subsequent text nodes don't append
714 // their contents to this one but create new wxXML_TEXT_NODE objects (or
715 // not create anything at all if only white space follows the CDATA section
716 // and wxXMLDOC_KEEP_WHITESPACE_NODES is not used as is commonly the case)
717 ctx->lastAsText = NULL;
718 }
719
720 static void CommentHnd(void *userData, const char *data)
721 {
722 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
723
724 wxXmlNode *commentnode =
725 new wxXmlNode(wxXML_COMMENT_NODE,
726 wxS("comment"), CharToString(ctx->conv, data),
727 XML_GetCurrentLineNumber(ctx->parser));
728
729 ASSERT_LAST_CHILD_OK(ctx);
730 ctx->node->InsertChildAfter(commentnode, ctx->lastChild);
731 ctx->lastChild = commentnode;
732 ctx->lastAsText = NULL;
733 }
734
735 static void PIHnd(void *userData, const char *target, const char *data)
736 {
737 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
738
739 wxXmlNode *pinode =
740 new wxXmlNode(wxXML_PI_NODE, CharToString(ctx->conv, target),
741 CharToString(ctx->conv, data),
742 XML_GetCurrentLineNumber(ctx->parser));
743
744 ASSERT_LAST_CHILD_OK(ctx);
745 ctx->node->InsertChildAfter(pinode, ctx->lastChild);
746 ctx->lastChild = pinode;
747 ctx->lastAsText = NULL;
748 }
749
750 static void DefaultHnd(void *userData, const char *s, int len)
751 {
752 // XML header:
753 if (len > 6 && memcmp(s, "<?xml ", 6) == 0)
754 {
755 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
756
757 wxString buf = CharToString(ctx->conv, s, (size_t)len);
758 int pos;
759 pos = buf.Find(wxS("encoding="));
760 if (pos != wxNOT_FOUND)
761 ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
762 pos = buf.Find(wxS("version="));
763 if (pos != wxNOT_FOUND)
764 ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
765 }
766 }
767
768 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
769 const XML_Char *name, XML_Encoding *info)
770 {
771 // We must build conversion table for expat. The easiest way to do so
772 // is to let wxCSConv convert as string containing all characters to
773 // wide character representation:
774 wxCSConv conv(name);
775 char mbBuf[2];
776 wchar_t wcBuf[10];
777 size_t i;
778
779 mbBuf[1] = 0;
780 info->map[0] = 0;
781 for (i = 0; i < 255; i++)
782 {
783 mbBuf[0] = (char)(i+1);
784 if (conv.MB2WC(wcBuf, mbBuf, 2) == (size_t)-1)
785 {
786 // invalid/undefined byte in the encoding:
787 info->map[i+1] = -1;
788 }
789 info->map[i+1] = (int)wcBuf[0];
790 }
791
792 info->data = NULL;
793 info->convert = NULL;
794 info->release = NULL;
795
796 return 1;
797 }
798
799 } // extern "C"
800
801 bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding, int flags)
802 {
803 #if wxUSE_UNICODE
804 (void)encoding;
805 #else
806 m_encoding = encoding;
807 #endif
808
809 const size_t BUFSIZE = 1024;
810 char buf[BUFSIZE];
811 wxXmlParsingContext ctx;
812 bool done;
813 XML_Parser parser = XML_ParserCreate(NULL);
814 wxXmlNode *root = new wxXmlNode(wxXML_DOCUMENT_NODE, wxEmptyString);
815
816 ctx.encoding = wxS("UTF-8"); // default in absence of encoding=""
817 ctx.conv = NULL;
818 #if !wxUSE_UNICODE
819 if ( encoding.CmpNoCase(wxS("UTF-8")) != 0 )
820 ctx.conv = new wxCSConv(encoding);
821 #endif
822 ctx.removeWhiteOnlyNodes = (flags & wxXMLDOC_KEEP_WHITESPACE_NODES) == 0;
823 ctx.parser = parser;
824 ctx.node = root;
825
826 XML_SetUserData(parser, (void*)&ctx);
827 XML_SetElementHandler(parser, StartElementHnd, EndElementHnd);
828 XML_SetCharacterDataHandler(parser, TextHnd);
829 XML_SetCdataSectionHandler(parser, StartCdataHnd, EndCdataHnd);;
830 XML_SetCommentHandler(parser, CommentHnd);
831 XML_SetProcessingInstructionHandler(parser, PIHnd);
832 XML_SetDefaultHandler(parser, DefaultHnd);
833 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
834
835 bool ok = true;
836 do
837 {
838 size_t len = stream.Read(buf, BUFSIZE).LastRead();
839 done = (len < BUFSIZE);
840 if (!XML_Parse(parser, buf, len, done))
841 {
842 wxString error(XML_ErrorString(XML_GetErrorCode(parser)),
843 *wxConvCurrent);
844 wxLogError(_("XML parsing error: '%s' at line %d"),
845 error.c_str(),
846 (int)XML_GetCurrentLineNumber(parser));
847 ok = false;
848 break;
849 }
850 } while (!done);
851
852 if (ok)
853 {
854 if (!ctx.version.empty())
855 SetVersion(ctx.version);
856 if (!ctx.encoding.empty())
857 SetFileEncoding(ctx.encoding);
858 SetDocumentNode(root);
859 }
860 else
861 {
862 delete root;
863 }
864
865 XML_ParserFree(parser);
866 #if !wxUSE_UNICODE
867 if ( ctx.conv )
868 delete ctx.conv;
869 #endif
870
871 return ok;
872
873 }
874
875
876
877 //-----------------------------------------------------------------------------
878 // wxXmlDocument saving routines
879 //-----------------------------------------------------------------------------
880
881 // helpers for XML generation
882 namespace
883 {
884
885 // write string to output:
886 bool OutputString(wxOutputStream& stream,
887 const wxString& str,
888 wxMBConv *convMem,
889 wxMBConv *convFile)
890 {
891 if (str.empty())
892 return true;
893
894 #if wxUSE_UNICODE
895 wxUnusedVar(convMem);
896 if ( !convFile )
897 convFile = &wxConvUTF8;
898
899 const wxScopedCharBuffer buf(str.mb_str(*convFile));
900 if ( !buf.length() )
901 {
902 // conversion failed, can't write this string in an XML file in this
903 // (presumably non-UTF-8) encoding
904 return false;
905 }
906
907 stream.Write(buf, buf.length());
908 #else // !wxUSE_UNICODE
909 if ( convFile && convMem )
910 {
911 wxString str2(str.wc_str(*convMem), *convFile);
912 stream.Write(str2.mb_str(), str2.length());
913 }
914 else // no conversions to do
915 {
916 stream.Write(str.mb_str(), str.length());
917 }
918 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
919
920 return stream.IsOk();
921 }
922
923 enum EscapingMode
924 {
925 Escape_Text,
926 Escape_Attribute
927 };
928
929 // Same as above, but create entities first.
930 // Translates '<' to "&lt;", '>' to "&gt;" and so on, according to the spec:
931 // http://www.w3.org/TR/2000/WD-xml-c14n-20000119.html#charescaping
932 bool OutputEscapedString(wxOutputStream& stream,
933 const wxString& str,
934 wxMBConv *convMem,
935 wxMBConv *convFile,
936 EscapingMode mode)
937 {
938 wxString escaped;
939 escaped.reserve(str.length());
940
941 for ( wxString::const_iterator i = str.begin(); i != str.end(); ++i )
942 {
943 const wxChar c = *i;
944
945 switch ( c )
946 {
947 case wxS('<'):
948 escaped.append(wxS("&lt;"));
949 break;
950 case wxS('>'):
951 escaped.append(wxS("&gt;"));
952 break;
953 case wxS('&'):
954 escaped.append(wxS("&amp;"));
955 break;
956 case wxS('\r'):
957 escaped.append(wxS("&#xD;"));
958 break;
959 default:
960 if ( mode == Escape_Attribute )
961 {
962 switch ( c )
963 {
964 case wxS('"'):
965 escaped.append(wxS("&quot;"));
966 break;
967 case wxS('\t'):
968 escaped.append(wxS("&#x9;"));
969 break;
970 case wxS('\n'):
971 escaped.append(wxS("&#xA;"));
972 break;
973 default:
974 escaped.append(c);
975 }
976
977 }
978 else
979 {
980 escaped.append(c);
981 }
982 }
983 }
984
985 return OutputString(stream, escaped, convMem, convFile);
986 }
987
988 bool OutputIndentation(wxOutputStream& stream,
989 int indent,
990 wxMBConv *convMem,
991 wxMBConv *convFile)
992 {
993 wxString str(wxS("\n"));
994 str += wxString(indent, wxS(' '));
995 return OutputString(stream, str, convMem, convFile);
996 }
997
998 bool OutputNode(wxOutputStream& stream,
999 wxXmlNode *node,
1000 int indent,
1001 wxMBConv *convMem,
1002 wxMBConv *convFile,
1003 int indentstep)
1004 {
1005 bool rc;
1006 switch (node->GetType())
1007 {
1008 case wxXML_CDATA_SECTION_NODE:
1009 rc = OutputString(stream, wxS("<![CDATA["), convMem, convFile) &&
1010 OutputString(stream, node->GetContent(), convMem, convFile) &&
1011 OutputString(stream, wxS("]]>"), convMem, convFile);
1012 break;
1013
1014 case wxXML_TEXT_NODE:
1015 if (node->GetNoConversion())
1016 {
1017 stream.Write(node->GetContent().c_str(), node->GetContent().Length());
1018 rc = true;
1019 }
1020 else
1021 rc = OutputEscapedString(stream, node->GetContent(),
1022 convMem, convFile,
1023 Escape_Text);
1024 break;
1025
1026 case wxXML_ELEMENT_NODE:
1027 rc = OutputString(stream, wxS("<"), convMem, convFile) &&
1028 OutputString(stream, node->GetName(), convMem, convFile);
1029
1030 if ( rc )
1031 {
1032 for ( wxXmlAttribute *attr = node->GetAttributes();
1033 attr && rc;
1034 attr = attr->GetNext() )
1035 {
1036 rc = OutputString(stream,
1037 wxS(" ") + attr->GetName() + wxS("=\""),
1038 convMem, convFile) &&
1039 OutputEscapedString(stream, attr->GetValue(),
1040 convMem, convFile,
1041 Escape_Attribute) &&
1042 OutputString(stream, wxS("\""), convMem, convFile);
1043 }
1044 }
1045
1046 if ( node->GetChildren() )
1047 {
1048 rc = OutputString(stream, wxS(">"), convMem, convFile);
1049
1050 wxXmlNode *prev = NULL;
1051 for ( wxXmlNode *n = node->GetChildren();
1052 n && rc;
1053 n = n->GetNext() )
1054 {
1055 if ( indentstep >= 0 && n->GetType() != wxXML_TEXT_NODE )
1056 {
1057 rc = OutputIndentation(stream, indent + indentstep,
1058 convMem, convFile);
1059 }
1060
1061 if ( rc )
1062 rc = OutputNode(stream, n, indent + indentstep,
1063 convMem, convFile, indentstep);
1064
1065 prev = n;
1066 }
1067
1068 if ( rc && indentstep >= 0 &&
1069 prev && prev->GetType() != wxXML_TEXT_NODE )
1070 {
1071 rc = OutputIndentation(stream, indent, convMem, convFile);
1072 }
1073
1074 if ( rc )
1075 {
1076 rc = OutputString(stream, wxS("</"), convMem, convFile) &&
1077 OutputString(stream, node->GetName(),
1078 convMem, convFile) &&
1079 OutputString(stream, wxS(">"), convMem, convFile);
1080 }
1081 }
1082 else // no children, output "<foo/>"
1083 {
1084 rc = OutputString(stream, wxS("/>"), convMem, convFile);
1085 }
1086 break;
1087
1088 case wxXML_COMMENT_NODE:
1089 rc = OutputString(stream, wxS("<!--"), convMem, convFile) &&
1090 OutputString(stream, node->GetContent(), convMem, convFile) &&
1091 OutputString(stream, wxS("-->"), convMem, convFile);
1092 break;
1093
1094 case wxXML_PI_NODE:
1095 rc = OutputString(stream, wxT("<?"), convMem, convFile) &&
1096 OutputString(stream, node->GetName(), convMem, convFile) &&
1097 OutputString(stream, wxT(" "), convMem, convFile) &&
1098 OutputString(stream, node->GetContent(), convMem, convFile) &&
1099 OutputString(stream, wxT("?>"), convMem, convFile);
1100 break;
1101
1102 default:
1103 wxFAIL_MSG("unsupported node type");
1104 rc = false;
1105 }
1106
1107 return rc;
1108 }
1109
1110 } // anonymous namespace
1111
1112 bool wxXmlDocument::Save(wxOutputStream& stream, int indentstep) const
1113 {
1114 if ( !IsOk() )
1115 return false;
1116
1117 wxScopedPtr<wxMBConv> convMem, convFile;
1118
1119 #if wxUSE_UNICODE
1120 convFile.reset(new wxCSConv(GetFileEncoding()));
1121 #else
1122 if ( GetFileEncoding().CmpNoCase(GetEncoding()) != 0 )
1123 {
1124 convFile.reset(new wxCSConv(GetFileEncoding()));
1125 convMem.reset(new wxCSConv(GetEncoding()));
1126 }
1127 //else: file and in-memory encodings are the same, no conversion needed
1128 #endif
1129
1130 wxString dec = wxString::Format(
1131 wxS("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
1132 GetVersion(), GetFileEncoding()
1133 );
1134 bool rc = OutputString(stream, dec, convMem.get(), convFile.get());
1135
1136 wxXmlNode *node = GetDocumentNode();
1137 if ( node )
1138 node = node->GetChildren();
1139
1140 while( rc && node )
1141 {
1142 rc = OutputNode(stream, node, 0, convMem.get(),
1143 convFile.get(), indentstep) &&
1144 OutputString(stream, wxS("\n"), convMem.get(), convFile.get());
1145 node = node->GetNext();
1146 }
1147 return rc;
1148 }
1149
1150 /*static*/ wxVersionInfo wxXmlDocument::GetLibraryVersionInfo()
1151 {
1152 return wxVersionInfo("expat",
1153 XML_MAJOR_VERSION,
1154 XML_MINOR_VERSION,
1155 XML_MICRO_VERSION);
1156 }
1157
1158 #endif // wxUSE_XML