code cleanup: make OutputString[Ent]() simpler to call by providing defaults for...
[wxWidgets.git] / src / xml / xml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
5 // Created: 2000/03/05
6 // RCS-ID: $Id$
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
10
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
13
14 #ifdef __BORLANDC__
15 #pragma hdrstop
16 #endif
17
18 #include "wx/xml/xml.h"
19
20 #if wxUSE_XML
21
22 #include "wx/wfstream.h"
23 #include "wx/datstrm.h"
24 #include "wx/zstream.h"
25 #include "wx/log.h"
26 #include "wx/intl.h"
27 #include "wx/strconv.h"
28
29 #include "expat.h" // from Expat
30
31 // DLL options compatibility check:
32 #include "wx/app.h"
33 WX_CHECK_BUILD_OPTIONS("wxXML")
34
35 //-----------------------------------------------------------------------------
36 // wxXmlNode
37 //-----------------------------------------------------------------------------
38
39 wxXmlNode::wxXmlNode(wxXmlNode *parent,wxXmlNodeType type,
40 const wxString& name, const wxString& content,
41 wxXmlProperty *props, wxXmlNode *next)
42 : m_type(type), m_name(name), m_content(content),
43 m_properties(props), m_parent(parent),
44 m_children(NULL), m_next(next)
45 {
46 if (m_parent)
47 {
48 if (m_parent->m_children)
49 {
50 m_next = m_parent->m_children;
51 m_parent->m_children = this;
52 }
53 else
54 m_parent->m_children = this;
55 }
56 }
57
58 wxXmlNode::wxXmlNode(wxXmlNodeType type, const wxString& name,
59 const wxString& content)
60 : m_type(type), m_name(name), m_content(content),
61 m_properties(NULL), m_parent(NULL),
62 m_children(NULL), m_next(NULL)
63 {}
64
65 wxXmlNode::wxXmlNode(const wxXmlNode& node)
66 {
67 m_next = NULL;
68 m_parent = NULL;
69 DoCopy(node);
70 }
71
72 wxXmlNode::~wxXmlNode()
73 {
74 wxXmlNode *c, *c2;
75 for (c = m_children; c; c = c2)
76 {
77 c2 = c->m_next;
78 delete c;
79 }
80
81 wxXmlProperty *p, *p2;
82 for (p = m_properties; p; p = p2)
83 {
84 p2 = p->GetNext();
85 delete p;
86 }
87 }
88
89 wxXmlNode& wxXmlNode::operator=(const wxXmlNode& node)
90 {
91 wxDELETE(m_properties);
92 wxDELETE(m_children);
93 DoCopy(node);
94 return *this;
95 }
96
97 void wxXmlNode::DoCopy(const wxXmlNode& node)
98 {
99 m_type = node.m_type;
100 m_name = node.m_name;
101 m_content = node.m_content;
102 m_children = NULL;
103
104 wxXmlNode *n = node.m_children;
105 while (n)
106 {
107 AddChild(new wxXmlNode(*n));
108 n = n->GetNext();
109 }
110
111 m_properties = NULL;
112 wxXmlProperty *p = node.m_properties;
113 while (p)
114 {
115 AddProperty(p->GetName(), p->GetValue());
116 p = p->GetNext();
117 }
118 }
119
120 bool wxXmlNode::HasProp(const wxString& propName) const
121 {
122 wxXmlProperty *prop = GetProperties();
123
124 while (prop)
125 {
126 if (prop->GetName() == propName) return true;
127 prop = prop->GetNext();
128 }
129
130 return false;
131 }
132
133 bool wxXmlNode::GetPropVal(const wxString& propName, wxString *value) const
134 {
135 wxXmlProperty *prop = GetProperties();
136
137 while (prop)
138 {
139 if (prop->GetName() == propName)
140 {
141 *value = prop->GetValue();
142 return true;
143 }
144 prop = prop->GetNext();
145 }
146
147 return false;
148 }
149
150 wxString wxXmlNode::GetPropVal(const wxString& propName, const wxString& defaultVal) const
151 {
152 wxString tmp;
153 if (GetPropVal(propName, &tmp))
154 return tmp;
155
156 return defaultVal;
157 }
158
159 void wxXmlNode::AddChild(wxXmlNode *child)
160 {
161 if (m_children == NULL)
162 m_children = child;
163 else
164 {
165 wxXmlNode *ch = m_children;
166 while (ch->m_next) ch = ch->m_next;
167 ch->m_next = child;
168 }
169 child->m_next = NULL;
170 child->m_parent = this;
171 }
172
173 void wxXmlNode::InsertChild(wxXmlNode *child, wxXmlNode *before_node)
174 {
175 wxASSERT_MSG(before_node->GetParent() == this, wxT("wxXmlNode::InsertChild - the node has incorrect parent"));
176
177 if (m_children == before_node)
178 m_children = child;
179 else
180 {
181 wxXmlNode *ch = m_children;
182 while (ch->m_next != before_node) ch = ch->m_next;
183 ch->m_next = child;
184 }
185
186 child->m_parent = this;
187 child->m_next = before_node;
188 }
189
190 bool wxXmlNode::RemoveChild(wxXmlNode *child)
191 {
192 if (m_children == NULL)
193 return false;
194 else if (m_children == child)
195 {
196 m_children = child->m_next;
197 child->m_parent = NULL;
198 child->m_next = NULL;
199 return true;
200 }
201 else
202 {
203 wxXmlNode *ch = m_children;
204 while (ch->m_next)
205 {
206 if (ch->m_next == child)
207 {
208 ch->m_next = child->m_next;
209 child->m_parent = NULL;
210 child->m_next = NULL;
211 return true;
212 }
213 ch = ch->m_next;
214 }
215 return false;
216 }
217 }
218
219 void wxXmlNode::AddProperty(const wxString& name, const wxString& value)
220 {
221 AddProperty(new wxXmlProperty(name, value, NULL));
222 }
223
224 void wxXmlNode::AddProperty(wxXmlProperty *prop)
225 {
226 if (m_properties == NULL)
227 m_properties = prop;
228 else
229 {
230 wxXmlProperty *p = m_properties;
231 while (p->GetNext()) p = p->GetNext();
232 p->SetNext(prop);
233 }
234 }
235
236 bool wxXmlNode::DeleteProperty(const wxString& name)
237 {
238 wxXmlProperty *prop;
239
240 if (m_properties == NULL)
241 return false;
242
243 else if (m_properties->GetName() == name)
244 {
245 prop = m_properties;
246 m_properties = prop->GetNext();
247 prop->SetNext(NULL);
248 delete prop;
249 return true;
250 }
251
252 else
253 {
254 wxXmlProperty *p = m_properties;
255 while (p->GetNext())
256 {
257 if (p->GetNext()->GetName() == name)
258 {
259 prop = p->GetNext();
260 p->SetNext(prop->GetNext());
261 prop->SetNext(NULL);
262 delete prop;
263 return true;
264 }
265 p = p->GetNext();
266 }
267 return false;
268 }
269 }
270
271
272
273 //-----------------------------------------------------------------------------
274 // wxXmlDocument
275 //-----------------------------------------------------------------------------
276
277 wxXmlDocument::wxXmlDocument()
278 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL)
279 {
280 #if !wxUSE_UNICODE
281 m_encoding = wxT("UTF-8");
282 #endif
283 }
284
285 wxXmlDocument::wxXmlDocument(const wxString& filename, const wxString& encoding)
286 :wxObject(), m_root(NULL)
287 {
288 if ( !Load(filename, encoding) )
289 {
290 wxDELETE(m_root);
291 }
292 }
293
294 wxXmlDocument::wxXmlDocument(wxInputStream& stream, const wxString& encoding)
295 :wxObject(), m_root(NULL)
296 {
297 if ( !Load(stream, encoding) )
298 {
299 wxDELETE(m_root);
300 }
301 }
302
303 wxXmlDocument::wxXmlDocument(const wxXmlDocument& doc)
304 :wxObject()
305 {
306 DoCopy(doc);
307 }
308
309 wxXmlDocument& wxXmlDocument::operator=(const wxXmlDocument& doc)
310 {
311 wxDELETE(m_root);
312 DoCopy(doc);
313 return *this;
314 }
315
316 void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
317 {
318 m_version = doc.m_version;
319 #if !wxUSE_UNICODE
320 m_encoding = doc.m_encoding;
321 #endif
322 m_fileEncoding = doc.m_fileEncoding;
323 m_root = new wxXmlNode(*doc.m_root);
324 }
325
326 bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding)
327 {
328 wxFileInputStream stream(filename);
329 return Load(stream, encoding);
330 }
331
332 bool wxXmlDocument::Save(const wxString& filename) const
333 {
334 wxFileOutputStream stream(filename);
335 return Save(stream);
336 }
337
338
339
340 //-----------------------------------------------------------------------------
341 // wxXmlDocument loading routines
342 //-----------------------------------------------------------------------------
343
344 /*
345 FIXME:
346 - process all elements, including CDATA
347 */
348
349 // converts Expat-produced string in UTF-8 into wxString.
350 inline static wxString CharToString(wxMBConv *conv,
351 const char *s, size_t len = wxSTRING_MAXLEN)
352 {
353 #if wxUSE_UNICODE
354 (void)conv;
355 return wxString(s, wxConvUTF8, len);
356 #else
357 if ( conv )
358 {
359 size_t nLen = (len != wxSTRING_MAXLEN) ? len :
360 wxConvUTF8.MB2WC((wchar_t*) NULL, s, 0);
361
362 wchar_t *buf = new wchar_t[nLen+1];
363 wxConvUTF8.MB2WC(buf, s, nLen);
364 buf[nLen] = 0;
365 wxString str(buf, *conv, len);
366 delete[] buf;
367 return str;
368 }
369 else
370 return wxString(s, len != wxSTRING_MAXLEN ? len : strlen(s));
371 #endif
372 }
373
374 struct wxXmlParsingContext
375 {
376 wxMBConv *conv;
377 wxXmlNode *root;
378 wxXmlNode *node;
379 wxXmlNode *lastAsText;
380 wxString encoding;
381 wxString version;
382 };
383
384 extern "C" {
385 static void StartElementHnd(void *userData, const char *name, const char **atts)
386 {
387 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
388 wxXmlNode *node = new wxXmlNode(wxXML_ELEMENT_NODE, CharToString(ctx->conv, name));
389 const char **a = atts;
390 while (*a)
391 {
392 node->AddProperty(CharToString(ctx->conv, a[0]), CharToString(ctx->conv, a[1]));
393 a += 2;
394 }
395 if (ctx->root == NULL)
396 ctx->root = node;
397 else
398 ctx->node->AddChild(node);
399 ctx->node = node;
400 ctx->lastAsText = NULL;
401 }
402 }
403
404 extern "C" {
405 static void EndElementHnd(void *userData, const char* WXUNUSED(name))
406 {
407 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
408
409 ctx->node = ctx->node->GetParent();
410 ctx->lastAsText = NULL;
411 }
412 }
413
414 extern "C" {
415 static void TextHnd(void *userData, const char *s, int len)
416 {
417 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
418 char *buf = new char[len + 1];
419
420 buf[len] = '\0';
421 memcpy(buf, s, (size_t)len);
422
423 if (ctx->lastAsText)
424 {
425 ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() +
426 CharToString(ctx->conv, buf));
427 }
428 else
429 {
430 bool whiteOnly = true;
431 for (char *c = buf; *c != '\0'; c++)
432 if (*c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
433 {
434 whiteOnly = false;
435 break;
436 }
437 if (!whiteOnly)
438 {
439 ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"),
440 CharToString(ctx->conv, buf));
441 ctx->node->AddChild(ctx->lastAsText);
442 }
443 }
444
445 delete[] buf;
446 }
447 }
448
449 extern "C" {
450 static void CommentHnd(void *userData, const char *data)
451 {
452 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
453
454 if (ctx->node)
455 {
456 // VS: ctx->node == NULL happens if there is a comment before
457 // the root element (e.g. wxDesigner's output). We ignore such
458 // comments, no big deal...
459 ctx->node->AddChild(new wxXmlNode(wxXML_COMMENT_NODE,
460 wxT("comment"), CharToString(ctx->conv, data)));
461 }
462 ctx->lastAsText = NULL;
463 }
464 }
465
466 extern "C" {
467 static void DefaultHnd(void *userData, const char *s, int len)
468 {
469 // XML header:
470 if (len > 6 && memcmp(s, "<?xml ", 6) == 0)
471 {
472 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
473
474 wxString buf = CharToString(ctx->conv, s, (size_t)len);
475 int pos;
476 pos = buf.Find(wxT("encoding="));
477 if (pos != wxNOT_FOUND)
478 ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
479 pos = buf.Find(wxT("version="));
480 if (pos != wxNOT_FOUND)
481 ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
482 }
483 }
484 }
485
486 extern "C" {
487 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
488 const XML_Char *name, XML_Encoding *info)
489 {
490 // We must build conversion table for expat. The easiest way to do so
491 // is to let wxCSConv convert as string containing all characters to
492 // wide character representation:
493 wxString str(name, wxConvLibc);
494 wxCSConv conv(str);
495 char mbBuf[2];
496 wchar_t wcBuf[10];
497 size_t i;
498
499 mbBuf[1] = 0;
500 info->map[0] = 0;
501 for (i = 0; i < 255; i++)
502 {
503 mbBuf[0] = (char)(i+1);
504 if (conv.MB2WC(wcBuf, mbBuf, 2) == (size_t)-1)
505 {
506 // invalid/undefined byte in the encoding:
507 info->map[i+1] = -1;
508 }
509 info->map[i+1] = (int)wcBuf[0];
510 }
511
512 info->data = NULL;
513 info->convert = NULL;
514 info->release = NULL;
515
516 return 1;
517 }
518 }
519
520 bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding)
521 {
522 #if wxUSE_UNICODE
523 (void)encoding;
524 #else
525 m_encoding = encoding;
526 #endif
527
528 const size_t BUFSIZE = 1024;
529 char buf[BUFSIZE];
530 wxXmlParsingContext ctx;
531 bool done;
532 XML_Parser parser = XML_ParserCreate(NULL);
533
534 ctx.root = ctx.node = NULL;
535 ctx.encoding = wxT("UTF-8"); // default in absence of encoding=""
536 ctx.conv = NULL;
537 #if !wxUSE_UNICODE
538 if ( encoding != wxT("UTF-8") && encoding != wxT("utf-8") )
539 ctx.conv = new wxCSConv(encoding);
540 #endif
541
542 XML_SetUserData(parser, (void*)&ctx);
543 XML_SetElementHandler(parser, StartElementHnd, EndElementHnd);
544 XML_SetCharacterDataHandler(parser, TextHnd);
545 XML_SetCommentHandler(parser, CommentHnd);
546 XML_SetDefaultHandler(parser, DefaultHnd);
547 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
548
549 bool ok = true;
550 do
551 {
552 size_t len = stream.Read(buf, BUFSIZE).LastRead();
553 done = (len < BUFSIZE);
554 if (!XML_Parse(parser, buf, len, done))
555 {
556 wxString error(XML_ErrorString(XML_GetErrorCode(parser)),
557 *wxConvCurrent);
558 wxLogError(_("XML parsing error: '%s' at line %d"),
559 error.c_str(),
560 XML_GetCurrentLineNumber(parser));
561 ok = false;
562 break;
563 }
564 } while (!done);
565
566 if (ok)
567 {
568 if (!ctx.version.empty())
569 SetVersion(ctx.version);
570 if (!ctx.encoding.empty())
571 SetFileEncoding(ctx.encoding);
572 SetRoot(ctx.root);
573 }
574 else
575 {
576 delete ctx.root;
577 }
578
579 XML_ParserFree(parser);
580 #if !wxUSE_UNICODE
581 if ( ctx.conv )
582 delete ctx.conv;
583 #endif
584
585 return ok;
586
587 }
588
589
590
591 //-----------------------------------------------------------------------------
592 // wxXmlDocument saving routines
593 //-----------------------------------------------------------------------------
594
595 // write string to output:
596 inline static void OutputString(wxOutputStream& stream, const wxString& str,
597 wxMBConv *convMem = NULL,
598 wxMBConv *convFile = NULL)
599 {
600 if (str.empty())
601 return;
602
603 #if wxUSE_UNICODE
604 wxUnusedVar(convMem);
605
606 const wxWX2MBbuf buf(str.mb_str(*(convFile ? convFile : &wxConvUTF8)));
607 stream.Write((const char*)buf, strlen((const char*)buf));
608 #else // !wxUSE_UNICODE
609 if ( convFile && convMem )
610 {
611 wxString str2(str.wc_str(*convMem), *convFile);
612 stream.Write(str2.mb_str(), str2.Len());
613 }
614 else // no conversions to do
615 {
616 stream.Write(str.mb_str(), str.Len());
617 }
618 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
619 }
620
621 // flags for OutputStringEnt()
622 enum
623 {
624 XML_ESCAPE_QUOTES = 1
625 };
626
627 // Same as above, but create entities first.
628 // Translates '<' to "&lt;", '>' to "&gt;" and '&' to "&amp;"
629 static void OutputStringEnt(wxOutputStream& stream, const wxString& str,
630 wxMBConv *convMem = NULL,
631 wxMBConv *convFile = NULL,
632 int flags = 0)
633 {
634 wxString buf;
635 size_t i, last, len;
636 wxChar c;
637
638 len = str.Len();
639 last = 0;
640 for (i = 0; i < len; i++)
641 {
642 c = str.GetChar(i);
643 if (c == wxT('<') || c == wxT('>') ||
644 (c == wxT('&') && str.Mid(i+1, 4) != wxT("amp;")) ||
645 ((flags & XML_ESCAPE_QUOTES) && c == wxT('"')))
646 {
647 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
648 switch (c)
649 {
650 case wxT('<'):
651 OutputString(stream, wxT("&lt;"));
652 break;
653 case wxT('>'):
654 OutputString(stream, wxT("&gt;"));
655 break;
656 case wxT('&'):
657 OutputString(stream, wxT("&amp;"));
658 break;
659 case wxT('"'):
660 OutputString(stream, wxT("&quot;"));
661 break;
662 default:
663 break;
664 }
665 last = i + 1;
666 }
667 }
668 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
669 }
670
671 inline static void OutputIndentation(wxOutputStream& stream, int indent)
672 {
673 wxString str = wxT("\n");
674 for (int i = 0; i < indent; i++)
675 str << wxT(' ') << wxT(' ');
676 OutputString(stream, str);
677 }
678
679 static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
680 wxMBConv *convMem, wxMBConv *convFile)
681 {
682 wxXmlNode *n, *prev;
683 wxXmlProperty *prop;
684
685 switch (node->GetType())
686 {
687 case wxXML_TEXT_NODE:
688 OutputStringEnt(stream, node->GetContent(), convMem, convFile);
689 break;
690
691 case wxXML_ELEMENT_NODE:
692 OutputString(stream, wxT("<"));
693 OutputString(stream, node->GetName());
694
695 prop = node->GetProperties();
696 while (prop)
697 {
698 OutputString(stream, wxT(" ") + prop->GetName() + wxT("=\""));
699 OutputStringEnt(stream, prop->GetValue(), convMem, convFile,
700 XML_ESCAPE_QUOTES);
701 OutputString(stream, wxT("\""));
702 prop = prop->GetNext();
703 }
704
705 if (node->GetChildren())
706 {
707 OutputString(stream, wxT(">"));
708 prev = NULL;
709 n = node->GetChildren();
710 while (n)
711 {
712 if (n && n->GetType() != wxXML_TEXT_NODE)
713 OutputIndentation(stream, indent + 1);
714 OutputNode(stream, n, indent + 1, convMem, convFile);
715 prev = n;
716 n = n->GetNext();
717 }
718 if (prev && prev->GetType() != wxXML_TEXT_NODE)
719 OutputIndentation(stream, indent);
720 OutputString(stream, wxT("</"));
721 OutputString(stream, node->GetName());
722 OutputString(stream, wxT(">"));
723 }
724 else
725 OutputString(stream, wxT("/>"));
726 break;
727
728 case wxXML_COMMENT_NODE:
729 OutputString(stream, wxT("<!--"));
730 OutputString(stream, node->GetContent(), convMem, convFile);
731 OutputString(stream, wxT("-->"));
732 break;
733
734 default:
735 wxFAIL_MSG(wxT("unsupported node type"));
736 }
737 }
738
739 bool wxXmlDocument::Save(wxOutputStream& stream) const
740 {
741 if ( !IsOk() )
742 return false;
743
744 wxString s;
745
746 wxMBConv *convMem = NULL;
747
748 #if wxUSE_UNICODE
749 wxMBConv *convFile = new wxCSConv(GetFileEncoding());
750 #else
751 wxMBConv *convFile = NULL;
752 if ( GetFileEncoding() != GetEncoding() )
753 {
754 convFile = new wxCSConv(GetFileEncoding());
755 convMem = new wxCSConv(GetEncoding());
756 }
757 #endif
758
759 s.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
760 GetVersion().c_str(), GetFileEncoding().c_str());
761 OutputString(stream, s);
762
763 OutputNode(stream, GetRoot(), 0, convMem, convFile);
764 OutputString(stream, wxT("\n"));
765
766 if ( convFile )
767 delete convFile;
768 if ( convMem )
769 delete convMem;
770
771 return true;
772 }
773
774 #endif // wxUSE_XML