fix off by one error in CharToString(); also simplified the conversion code a bit
[wxWidgets.git] / src / xml / xml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
5 // Created: 2000/03/05
6 // RCS-ID: $Id$
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
10
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
13
14 #ifdef __BORLANDC__
15 #pragma hdrstop
16 #endif
17
18 #include "wx/xml/xml.h"
19
20 #if wxUSE_XML
21
22 #include "wx/wfstream.h"
23 #include "wx/datstrm.h"
24 #include "wx/zstream.h"
25 #include "wx/log.h"
26 #include "wx/intl.h"
27 #include "wx/strconv.h"
28
29 #include "expat.h" // from Expat
30
31 // DLL options compatibility check:
32 #include "wx/app.h"
33 WX_CHECK_BUILD_OPTIONS("wxXML")
34
35
36 IMPLEMENT_CLASS(wxXmlDocument, wxObject)
37
38
39
40 //-----------------------------------------------------------------------------
41 // wxXmlNode
42 //-----------------------------------------------------------------------------
43
44 wxXmlNode::wxXmlNode(wxXmlNode *parent,wxXmlNodeType type,
45 const wxString& name, const wxString& content,
46 wxXmlProperty *props, wxXmlNode *next)
47 : m_type(type), m_name(name), m_content(content),
48 m_properties(props), m_parent(parent),
49 m_children(NULL), m_next(next)
50 {
51 if (m_parent)
52 {
53 if (m_parent->m_children)
54 {
55 m_next = m_parent->m_children;
56 m_parent->m_children = this;
57 }
58 else
59 m_parent->m_children = this;
60 }
61 }
62
63 wxXmlNode::wxXmlNode(wxXmlNodeType type, const wxString& name,
64 const wxString& content)
65 : m_type(type), m_name(name), m_content(content),
66 m_properties(NULL), m_parent(NULL),
67 m_children(NULL), m_next(NULL)
68 {}
69
70 wxXmlNode::wxXmlNode(const wxXmlNode& node)
71 {
72 m_next = NULL;
73 m_parent = NULL;
74 DoCopy(node);
75 }
76
77 wxXmlNode::~wxXmlNode()
78 {
79 wxXmlNode *c, *c2;
80 for (c = m_children; c; c = c2)
81 {
82 c2 = c->m_next;
83 delete c;
84 }
85
86 wxXmlProperty *p, *p2;
87 for (p = m_properties; p; p = p2)
88 {
89 p2 = p->GetNext();
90 delete p;
91 }
92 }
93
94 wxXmlNode& wxXmlNode::operator=(const wxXmlNode& node)
95 {
96 wxDELETE(m_properties);
97 wxDELETE(m_children);
98 DoCopy(node);
99 return *this;
100 }
101
102 void wxXmlNode::DoCopy(const wxXmlNode& node)
103 {
104 m_type = node.m_type;
105 m_name = node.m_name;
106 m_content = node.m_content;
107 m_children = NULL;
108
109 wxXmlNode *n = node.m_children;
110 while (n)
111 {
112 AddChild(new wxXmlNode(*n));
113 n = n->GetNext();
114 }
115
116 m_properties = NULL;
117 wxXmlProperty *p = node.m_properties;
118 while (p)
119 {
120 AddProperty(p->GetName(), p->GetValue());
121 p = p->GetNext();
122 }
123 }
124
125 bool wxXmlNode::HasProp(const wxString& propName) const
126 {
127 wxXmlProperty *prop = GetProperties();
128
129 while (prop)
130 {
131 if (prop->GetName() == propName) return true;
132 prop = prop->GetNext();
133 }
134
135 return false;
136 }
137
138 bool wxXmlNode::GetPropVal(const wxString& propName, wxString *value) const
139 {
140 wxXmlProperty *prop = GetProperties();
141
142 while (prop)
143 {
144 if (prop->GetName() == propName)
145 {
146 *value = prop->GetValue();
147 return true;
148 }
149 prop = prop->GetNext();
150 }
151
152 return false;
153 }
154
155 wxString wxXmlNode::GetPropVal(const wxString& propName, const wxString& defaultVal) const
156 {
157 wxString tmp;
158 if (GetPropVal(propName, &tmp))
159 return tmp;
160
161 return defaultVal;
162 }
163
164 void wxXmlNode::AddChild(wxXmlNode *child)
165 {
166 if (m_children == NULL)
167 m_children = child;
168 else
169 {
170 wxXmlNode *ch = m_children;
171 while (ch->m_next) ch = ch->m_next;
172 ch->m_next = child;
173 }
174 child->m_next = NULL;
175 child->m_parent = this;
176 }
177
178 void wxXmlNode::InsertChild(wxXmlNode *child, wxXmlNode *before_node)
179 {
180 wxASSERT_MSG(before_node->GetParent() == this, wxT("wxXmlNode::InsertChild - the node has incorrect parent"));
181
182 if (m_children == before_node)
183 m_children = child;
184 else
185 {
186 wxXmlNode *ch = m_children;
187 while (ch->m_next != before_node) ch = ch->m_next;
188 ch->m_next = child;
189 }
190
191 child->m_parent = this;
192 child->m_next = before_node;
193 }
194
195 bool wxXmlNode::RemoveChild(wxXmlNode *child)
196 {
197 if (m_children == NULL)
198 return false;
199 else if (m_children == child)
200 {
201 m_children = child->m_next;
202 child->m_parent = NULL;
203 child->m_next = NULL;
204 return true;
205 }
206 else
207 {
208 wxXmlNode *ch = m_children;
209 while (ch->m_next)
210 {
211 if (ch->m_next == child)
212 {
213 ch->m_next = child->m_next;
214 child->m_parent = NULL;
215 child->m_next = NULL;
216 return true;
217 }
218 ch = ch->m_next;
219 }
220 return false;
221 }
222 }
223
224 void wxXmlNode::AddProperty(const wxString& name, const wxString& value)
225 {
226 AddProperty(new wxXmlProperty(name, value, NULL));
227 }
228
229 void wxXmlNode::AddProperty(wxXmlProperty *prop)
230 {
231 if (m_properties == NULL)
232 m_properties = prop;
233 else
234 {
235 wxXmlProperty *p = m_properties;
236 while (p->GetNext()) p = p->GetNext();
237 p->SetNext(prop);
238 }
239 }
240
241 bool wxXmlNode::DeleteProperty(const wxString& name)
242 {
243 wxXmlProperty *prop;
244
245 if (m_properties == NULL)
246 return false;
247
248 else if (m_properties->GetName() == name)
249 {
250 prop = m_properties;
251 m_properties = prop->GetNext();
252 prop->SetNext(NULL);
253 delete prop;
254 return true;
255 }
256
257 else
258 {
259 wxXmlProperty *p = m_properties;
260 while (p->GetNext())
261 {
262 if (p->GetNext()->GetName() == name)
263 {
264 prop = p->GetNext();
265 p->SetNext(prop->GetNext());
266 prop->SetNext(NULL);
267 delete prop;
268 return true;
269 }
270 p = p->GetNext();
271 }
272 return false;
273 }
274 }
275
276 wxString wxXmlNode::GetNodeContent() const
277 {
278 wxXmlNode *n = GetChildren();
279
280 while (n)
281 {
282 if (n->GetType() == wxXML_TEXT_NODE ||
283 n->GetType() == wxXML_CDATA_SECTION_NODE)
284 return n->GetContent();
285 n = n->GetNext();
286 }
287 return wxEmptyString;
288 }
289
290
291
292 //-----------------------------------------------------------------------------
293 // wxXmlDocument
294 //-----------------------------------------------------------------------------
295
296 wxXmlDocument::wxXmlDocument()
297 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL)
298 {
299 #if !wxUSE_UNICODE
300 m_encoding = wxT("UTF-8");
301 #endif
302 }
303
304 wxXmlDocument::wxXmlDocument(const wxString& filename, const wxString& encoding)
305 :wxObject(), m_root(NULL)
306 {
307 if ( !Load(filename, encoding) )
308 {
309 wxDELETE(m_root);
310 }
311 }
312
313 wxXmlDocument::wxXmlDocument(wxInputStream& stream, const wxString& encoding)
314 :wxObject(), m_root(NULL)
315 {
316 if ( !Load(stream, encoding) )
317 {
318 wxDELETE(m_root);
319 }
320 }
321
322 wxXmlDocument::wxXmlDocument(const wxXmlDocument& doc)
323 :wxObject()
324 {
325 DoCopy(doc);
326 }
327
328 wxXmlDocument& wxXmlDocument::operator=(const wxXmlDocument& doc)
329 {
330 wxDELETE(m_root);
331 DoCopy(doc);
332 return *this;
333 }
334
335 void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
336 {
337 m_version = doc.m_version;
338 #if !wxUSE_UNICODE
339 m_encoding = doc.m_encoding;
340 #endif
341 m_fileEncoding = doc.m_fileEncoding;
342 m_root = new wxXmlNode(*doc.m_root);
343 }
344
345 bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding)
346 {
347 wxFileInputStream stream(filename);
348 return Load(stream, encoding);
349 }
350
351 bool wxXmlDocument::Save(const wxString& filename) const
352 {
353 wxFileOutputStream stream(filename);
354 return Save(stream);
355 }
356
357
358
359 //-----------------------------------------------------------------------------
360 // wxXmlDocument loading routines
361 //-----------------------------------------------------------------------------
362
363 /*
364 FIXME:
365 - process all elements, including CDATA
366 */
367
368 // converts Expat-produced string in UTF-8 into wxString using the specified
369 // conv or keep in UTF-8 if conv is NULL
370 static wxString CharToString(wxMBConv *conv,
371 const char *s, size_t len = wxSTRING_MAXLEN)
372 {
373 #if wxUSE_UNICODE
374 wxUnusedVar(conv);
375
376 return wxString(s, wxConvUTF8, len);
377 #else // !wxUSE_UNICODE
378 if ( conv )
379 {
380 // there can be no embedded NULs in this string so we don't need the
381 // output length, it will be NUL-terminated
382 const wxWCharBuffer wbuf(
383 wxConvUTF8.cMB2WC(s, len == wxSTRING_MAXLEN ? wxNO_LEN : len, NULL));
384
385 return wxString(wbuf, conv);
386 }
387 else // already in UTF-8, no conversion needed
388 {
389 return wxString(s, len != wxSTRING_MAXLEN ? len : strlen(s));
390 }
391 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
392 }
393
394 struct wxXmlParsingContext
395 {
396 wxMBConv *conv;
397 wxXmlNode *root;
398 wxXmlNode *node;
399 wxXmlNode *lastAsText;
400 wxString encoding;
401 wxString version;
402 };
403
404 extern "C" {
405 static void StartElementHnd(void *userData, const char *name, const char **atts)
406 {
407 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
408 wxXmlNode *node = new wxXmlNode(wxXML_ELEMENT_NODE, CharToString(ctx->conv, name));
409 const char **a = atts;
410 while (*a)
411 {
412 node->AddProperty(CharToString(ctx->conv, a[0]), CharToString(ctx->conv, a[1]));
413 a += 2;
414 }
415 if (ctx->root == NULL)
416 ctx->root = node;
417 else
418 ctx->node->AddChild(node);
419 ctx->node = node;
420 ctx->lastAsText = NULL;
421 }
422 }
423
424 extern "C" {
425 static void EndElementHnd(void *userData, const char* WXUNUSED(name))
426 {
427 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
428
429 ctx->node = ctx->node->GetParent();
430 ctx->lastAsText = NULL;
431 }
432 }
433
434 extern "C" {
435 static void TextHnd(void *userData, const char *s, int len)
436 {
437 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
438 char *buf = new char[len + 1];
439
440 buf[len] = '\0';
441 memcpy(buf, s, (size_t)len);
442
443 if (ctx->lastAsText)
444 {
445 ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() +
446 CharToString(ctx->conv, buf));
447 }
448 else
449 {
450 bool whiteOnly = true;
451 for (char *c = buf; *c != '\0'; c++)
452 if (*c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
453 {
454 whiteOnly = false;
455 break;
456 }
457 if (!whiteOnly)
458 {
459 ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"),
460 CharToString(ctx->conv, buf));
461 ctx->node->AddChild(ctx->lastAsText);
462 }
463 }
464
465 delete[] buf;
466 }
467 }
468
469 extern "C" {
470 static void CommentHnd(void *userData, const char *data)
471 {
472 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
473
474 if (ctx->node)
475 {
476 // VS: ctx->node == NULL happens if there is a comment before
477 // the root element (e.g. wxDesigner's output). We ignore such
478 // comments, no big deal...
479 ctx->node->AddChild(new wxXmlNode(wxXML_COMMENT_NODE,
480 wxT("comment"), CharToString(ctx->conv, data)));
481 }
482 ctx->lastAsText = NULL;
483 }
484 }
485
486 extern "C" {
487 static void DefaultHnd(void *userData, const char *s, int len)
488 {
489 // XML header:
490 if (len > 6 && memcmp(s, "<?xml ", 6) == 0)
491 {
492 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
493
494 wxString buf = CharToString(ctx->conv, s, (size_t)len);
495 int pos;
496 pos = buf.Find(wxT("encoding="));
497 if (pos != wxNOT_FOUND)
498 ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
499 pos = buf.Find(wxT("version="));
500 if (pos != wxNOT_FOUND)
501 ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
502 }
503 }
504 }
505
506 extern "C" {
507 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
508 const XML_Char *name, XML_Encoding *info)
509 {
510 // We must build conversion table for expat. The easiest way to do so
511 // is to let wxCSConv convert as string containing all characters to
512 // wide character representation:
513 wxString str(name, wxConvLibc);
514 wxCSConv conv(str);
515 char mbBuf[2];
516 wchar_t wcBuf[10];
517 size_t i;
518
519 mbBuf[1] = 0;
520 info->map[0] = 0;
521 for (i = 0; i < 255; i++)
522 {
523 mbBuf[0] = (char)(i+1);
524 if (conv.MB2WC(wcBuf, mbBuf, 2) == (size_t)-1)
525 {
526 // invalid/undefined byte in the encoding:
527 info->map[i+1] = -1;
528 }
529 info->map[i+1] = (int)wcBuf[0];
530 }
531
532 info->data = NULL;
533 info->convert = NULL;
534 info->release = NULL;
535
536 return 1;
537 }
538 }
539
540 bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding)
541 {
542 #if wxUSE_UNICODE
543 (void)encoding;
544 #else
545 m_encoding = encoding;
546 #endif
547
548 const size_t BUFSIZE = 1024;
549 char buf[BUFSIZE];
550 wxXmlParsingContext ctx;
551 bool done;
552 XML_Parser parser = XML_ParserCreate(NULL);
553
554 ctx.root = ctx.node = NULL;
555 ctx.encoding = wxT("UTF-8"); // default in absence of encoding=""
556 ctx.conv = NULL;
557 #if !wxUSE_UNICODE
558 if ( encoding != wxT("UTF-8") && encoding != wxT("utf-8") )
559 ctx.conv = new wxCSConv(encoding);
560 #endif
561
562 XML_SetUserData(parser, (void*)&ctx);
563 XML_SetElementHandler(parser, StartElementHnd, EndElementHnd);
564 XML_SetCharacterDataHandler(parser, TextHnd);
565 XML_SetCommentHandler(parser, CommentHnd);
566 XML_SetDefaultHandler(parser, DefaultHnd);
567 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
568
569 bool ok = true;
570 do
571 {
572 size_t len = stream.Read(buf, BUFSIZE).LastRead();
573 done = (len < BUFSIZE);
574 if (!XML_Parse(parser, buf, len, done))
575 {
576 wxString error(XML_ErrorString(XML_GetErrorCode(parser)),
577 *wxConvCurrent);
578 wxLogError(_("XML parsing error: '%s' at line %d"),
579 error.c_str(),
580 XML_GetCurrentLineNumber(parser));
581 ok = false;
582 break;
583 }
584 } while (!done);
585
586 if (ok)
587 {
588 if (!ctx.version.empty())
589 SetVersion(ctx.version);
590 if (!ctx.encoding.empty())
591 SetFileEncoding(ctx.encoding);
592 SetRoot(ctx.root);
593 }
594 else
595 {
596 delete ctx.root;
597 }
598
599 XML_ParserFree(parser);
600 #if !wxUSE_UNICODE
601 if ( ctx.conv )
602 delete ctx.conv;
603 #endif
604
605 return ok;
606
607 }
608
609
610
611 //-----------------------------------------------------------------------------
612 // wxXmlDocument saving routines
613 //-----------------------------------------------------------------------------
614
615 // write string to output:
616 inline static void OutputString(wxOutputStream& stream, const wxString& str,
617 wxMBConv *convMem = NULL,
618 wxMBConv *convFile = NULL)
619 {
620 if (str.empty())
621 return;
622
623 #if wxUSE_UNICODE
624 wxUnusedVar(convMem);
625
626 const wxWX2MBbuf buf(str.mb_str(*(convFile ? convFile : &wxConvUTF8)));
627 stream.Write((const char*)buf, strlen((const char*)buf));
628 #else // !wxUSE_UNICODE
629 if ( convFile && convMem )
630 {
631 wxString str2(str.wc_str(*convMem), *convFile);
632 stream.Write(str2.mb_str(), str2.Len());
633 }
634 else // no conversions to do
635 {
636 stream.Write(str.mb_str(), str.Len());
637 }
638 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
639 }
640
641 // flags for OutputStringEnt()
642 enum
643 {
644 XML_ESCAPE_QUOTES = 1
645 };
646
647 // Same as above, but create entities first.
648 // Translates '<' to "&lt;", '>' to "&gt;" and '&' to "&amp;"
649 static void OutputStringEnt(wxOutputStream& stream, const wxString& str,
650 wxMBConv *convMem = NULL,
651 wxMBConv *convFile = NULL,
652 int flags = 0)
653 {
654 wxString buf;
655 size_t i, last, len;
656 wxChar c;
657
658 len = str.Len();
659 last = 0;
660 for (i = 0; i < len; i++)
661 {
662 c = str.GetChar(i);
663 if (c == wxT('<') || c == wxT('>') ||
664 (c == wxT('&') && str.Mid(i+1, 4) != wxT("amp;")) ||
665 ((flags & XML_ESCAPE_QUOTES) && c == wxT('"')))
666 {
667 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
668 switch (c)
669 {
670 case wxT('<'):
671 OutputString(stream, wxT("&lt;"));
672 break;
673 case wxT('>'):
674 OutputString(stream, wxT("&gt;"));
675 break;
676 case wxT('&'):
677 OutputString(stream, wxT("&amp;"));
678 break;
679 case wxT('"'):
680 OutputString(stream, wxT("&quot;"));
681 break;
682 default:
683 break;
684 }
685 last = i + 1;
686 }
687 }
688 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
689 }
690
691 inline static void OutputIndentation(wxOutputStream& stream, int indent)
692 {
693 wxString str = wxT("\n");
694 for (int i = 0; i < indent; i++)
695 str << wxT(' ') << wxT(' ');
696 OutputString(stream, str);
697 }
698
699 static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
700 wxMBConv *convMem, wxMBConv *convFile)
701 {
702 wxXmlNode *n, *prev;
703 wxXmlProperty *prop;
704
705 switch (node->GetType())
706 {
707 case wxXML_TEXT_NODE:
708 OutputStringEnt(stream, node->GetContent(), convMem, convFile);
709 break;
710
711 case wxXML_ELEMENT_NODE:
712 OutputString(stream, wxT("<"));
713 OutputString(stream, node->GetName());
714
715 prop = node->GetProperties();
716 while (prop)
717 {
718 OutputString(stream, wxT(" ") + prop->GetName() + wxT("=\""));
719 OutputStringEnt(stream, prop->GetValue(), convMem, convFile,
720 XML_ESCAPE_QUOTES);
721 OutputString(stream, wxT("\""));
722 prop = prop->GetNext();
723 }
724
725 if (node->GetChildren())
726 {
727 OutputString(stream, wxT(">"));
728 prev = NULL;
729 n = node->GetChildren();
730 while (n)
731 {
732 if (n && n->GetType() != wxXML_TEXT_NODE)
733 OutputIndentation(stream, indent + 1);
734 OutputNode(stream, n, indent + 1, convMem, convFile);
735 prev = n;
736 n = n->GetNext();
737 }
738 if (prev && prev->GetType() != wxXML_TEXT_NODE)
739 OutputIndentation(stream, indent);
740 OutputString(stream, wxT("</"));
741 OutputString(stream, node->GetName());
742 OutputString(stream, wxT(">"));
743 }
744 else
745 OutputString(stream, wxT("/>"));
746 break;
747
748 case wxXML_COMMENT_NODE:
749 OutputString(stream, wxT("<!--"));
750 OutputString(stream, node->GetContent(), convMem, convFile);
751 OutputString(stream, wxT("-->"));
752 break;
753
754 default:
755 wxFAIL_MSG(wxT("unsupported node type"));
756 }
757 }
758
759 bool wxXmlDocument::Save(wxOutputStream& stream) const
760 {
761 if ( !IsOk() )
762 return false;
763
764 wxString s;
765
766 wxMBConv *convMem = NULL;
767
768 #if wxUSE_UNICODE
769 wxMBConv *convFile = new wxCSConv(GetFileEncoding());
770 #else
771 wxMBConv *convFile = NULL;
772 if ( GetFileEncoding() != GetEncoding() )
773 {
774 convFile = new wxCSConv(GetFileEncoding());
775 convMem = new wxCSConv(GetEncoding());
776 }
777 #endif
778
779 s.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
780 GetVersion().c_str(), GetFileEncoding().c_str());
781 OutputString(stream, s);
782
783 OutputNode(stream, GetRoot(), 0, convMem, convFile);
784 OutputString(stream, wxT("\n"));
785
786 if ( convFile )
787 delete convFile;
788 if ( convMem )
789 delete convMem;
790
791 return true;
792 }
793
794 #endif // wxUSE_XML