]> git.saurik.com Git - wxWidgets.git/blob - src/xml/xml.cpp
gtk_text_iter_get_line and get_line_offset work fine on the end iter. Don't bail...
[wxWidgets.git] / src / xml / xml.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/xml/xml.cpp
3 // Purpose: wxXmlDocument - XML parser & data holder class
4 // Author: Vaclav Slavik
5 // Created: 2000/03/05
6 // RCS-ID: $Id$
7 // Copyright: (c) 2000 Vaclav Slavik
8 // Licence: wxWindows licence
9 /////////////////////////////////////////////////////////////////////////////
10
11 // For compilers that support precompilation, includes "wx.h".
12 #include "wx/wxprec.h"
13
14 #ifdef __BORLANDC__
15 #pragma hdrstop
16 #endif
17
18 #if wxUSE_XML
19
20 #include "wx/xml/xml.h"
21
22 #ifndef WX_PRECOMP
23 #include "wx/intl.h"
24 #include "wx/log.h"
25 #include "wx/app.h"
26 #endif
27
28 #include "wx/wfstream.h"
29 #include "wx/datstrm.h"
30 #include "wx/zstream.h"
31 #include "wx/strconv.h"
32
33 #include "expat.h" // from Expat
34
35 // DLL options compatibility check:
36 WX_CHECK_BUILD_OPTIONS("wxXML")
37
38
39 IMPLEMENT_CLASS(wxXmlDocument, wxObject)
40
41
42
43 //-----------------------------------------------------------------------------
44 // wxXmlNode
45 //-----------------------------------------------------------------------------
46
47 wxXmlNode::wxXmlNode(wxXmlNode *parent,wxXmlNodeType type,
48 const wxString& name, const wxString& content,
49 wxXmlProperty *props, wxXmlNode *next)
50 : m_type(type), m_name(name), m_content(content),
51 m_properties(props), m_parent(parent),
52 m_children(NULL), m_next(next)
53 {
54 if (m_parent)
55 {
56 if (m_parent->m_children)
57 {
58 m_next = m_parent->m_children;
59 m_parent->m_children = this;
60 }
61 else
62 m_parent->m_children = this;
63 }
64 }
65
66 wxXmlNode::wxXmlNode(wxXmlNodeType type, const wxString& name,
67 const wxString& content)
68 : m_type(type), m_name(name), m_content(content),
69 m_properties(NULL), m_parent(NULL),
70 m_children(NULL), m_next(NULL)
71 {}
72
73 wxXmlNode::wxXmlNode(const wxXmlNode& node)
74 {
75 m_next = NULL;
76 m_parent = NULL;
77 DoCopy(node);
78 }
79
80 wxXmlNode::~wxXmlNode()
81 {
82 wxXmlNode *c, *c2;
83 for (c = m_children; c; c = c2)
84 {
85 c2 = c->m_next;
86 delete c;
87 }
88
89 wxXmlProperty *p, *p2;
90 for (p = m_properties; p; p = p2)
91 {
92 p2 = p->GetNext();
93 delete p;
94 }
95 }
96
97 wxXmlNode& wxXmlNode::operator=(const wxXmlNode& node)
98 {
99 wxDELETE(m_properties);
100 wxDELETE(m_children);
101 DoCopy(node);
102 return *this;
103 }
104
105 void wxXmlNode::DoCopy(const wxXmlNode& node)
106 {
107 m_type = node.m_type;
108 m_name = node.m_name;
109 m_content = node.m_content;
110 m_children = NULL;
111
112 wxXmlNode *n = node.m_children;
113 while (n)
114 {
115 AddChild(new wxXmlNode(*n));
116 n = n->GetNext();
117 }
118
119 m_properties = NULL;
120 wxXmlProperty *p = node.m_properties;
121 while (p)
122 {
123 AddProperty(p->GetName(), p->GetValue());
124 p = p->GetNext();
125 }
126 }
127
128 bool wxXmlNode::HasProp(const wxString& propName) const
129 {
130 wxXmlProperty *prop = GetProperties();
131
132 while (prop)
133 {
134 if (prop->GetName() == propName) return true;
135 prop = prop->GetNext();
136 }
137
138 return false;
139 }
140
141 bool wxXmlNode::GetPropVal(const wxString& propName, wxString *value) const
142 {
143 wxXmlProperty *prop = GetProperties();
144
145 while (prop)
146 {
147 if (prop->GetName() == propName)
148 {
149 *value = prop->GetValue();
150 return true;
151 }
152 prop = prop->GetNext();
153 }
154
155 return false;
156 }
157
158 wxString wxXmlNode::GetPropVal(const wxString& propName, const wxString& defaultVal) const
159 {
160 wxString tmp;
161 if (GetPropVal(propName, &tmp))
162 return tmp;
163
164 return defaultVal;
165 }
166
167 void wxXmlNode::AddChild(wxXmlNode *child)
168 {
169 if (m_children == NULL)
170 m_children = child;
171 else
172 {
173 wxXmlNode *ch = m_children;
174 while (ch->m_next) ch = ch->m_next;
175 ch->m_next = child;
176 }
177 child->m_next = NULL;
178 child->m_parent = this;
179 }
180
181 void wxXmlNode::InsertChild(wxXmlNode *child, wxXmlNode *before_node)
182 {
183 wxASSERT_MSG(before_node->GetParent() == this, wxT("wxXmlNode::InsertChild - the node has incorrect parent"));
184
185 if (m_children == before_node)
186 m_children = child;
187 else
188 {
189 wxXmlNode *ch = m_children;
190 while (ch->m_next != before_node) ch = ch->m_next;
191 ch->m_next = child;
192 }
193
194 child->m_parent = this;
195 child->m_next = before_node;
196 }
197
198 bool wxXmlNode::RemoveChild(wxXmlNode *child)
199 {
200 if (m_children == NULL)
201 return false;
202 else if (m_children == child)
203 {
204 m_children = child->m_next;
205 child->m_parent = NULL;
206 child->m_next = NULL;
207 return true;
208 }
209 else
210 {
211 wxXmlNode *ch = m_children;
212 while (ch->m_next)
213 {
214 if (ch->m_next == child)
215 {
216 ch->m_next = child->m_next;
217 child->m_parent = NULL;
218 child->m_next = NULL;
219 return true;
220 }
221 ch = ch->m_next;
222 }
223 return false;
224 }
225 }
226
227 void wxXmlNode::AddProperty(const wxString& name, const wxString& value)
228 {
229 AddProperty(new wxXmlProperty(name, value, NULL));
230 }
231
232 void wxXmlNode::AddProperty(wxXmlProperty *prop)
233 {
234 if (m_properties == NULL)
235 m_properties = prop;
236 else
237 {
238 wxXmlProperty *p = m_properties;
239 while (p->GetNext()) p = p->GetNext();
240 p->SetNext(prop);
241 }
242 }
243
244 bool wxXmlNode::DeleteProperty(const wxString& name)
245 {
246 wxXmlProperty *prop;
247
248 if (m_properties == NULL)
249 return false;
250
251 else if (m_properties->GetName() == name)
252 {
253 prop = m_properties;
254 m_properties = prop->GetNext();
255 prop->SetNext(NULL);
256 delete prop;
257 return true;
258 }
259
260 else
261 {
262 wxXmlProperty *p = m_properties;
263 while (p->GetNext())
264 {
265 if (p->GetNext()->GetName() == name)
266 {
267 prop = p->GetNext();
268 p->SetNext(prop->GetNext());
269 prop->SetNext(NULL);
270 delete prop;
271 return true;
272 }
273 p = p->GetNext();
274 }
275 return false;
276 }
277 }
278
279 wxString wxXmlNode::GetNodeContent() const
280 {
281 wxXmlNode *n = GetChildren();
282
283 while (n)
284 {
285 if (n->GetType() == wxXML_TEXT_NODE ||
286 n->GetType() == wxXML_CDATA_SECTION_NODE)
287 return n->GetContent();
288 n = n->GetNext();
289 }
290 return wxEmptyString;
291 }
292
293
294
295 //-----------------------------------------------------------------------------
296 // wxXmlDocument
297 //-----------------------------------------------------------------------------
298
299 wxXmlDocument::wxXmlDocument()
300 : m_version(wxT("1.0")), m_fileEncoding(wxT("utf-8")), m_root(NULL)
301 {
302 #if !wxUSE_UNICODE
303 m_encoding = wxT("UTF-8");
304 #endif
305 }
306
307 wxXmlDocument::wxXmlDocument(const wxString& filename, const wxString& encoding)
308 :wxObject(), m_root(NULL)
309 {
310 if ( !Load(filename, encoding) )
311 {
312 wxDELETE(m_root);
313 }
314 }
315
316 wxXmlDocument::wxXmlDocument(wxInputStream& stream, const wxString& encoding)
317 :wxObject(), m_root(NULL)
318 {
319 if ( !Load(stream, encoding) )
320 {
321 wxDELETE(m_root);
322 }
323 }
324
325 wxXmlDocument::wxXmlDocument(const wxXmlDocument& doc)
326 :wxObject()
327 {
328 DoCopy(doc);
329 }
330
331 wxXmlDocument& wxXmlDocument::operator=(const wxXmlDocument& doc)
332 {
333 wxDELETE(m_root);
334 DoCopy(doc);
335 return *this;
336 }
337
338 void wxXmlDocument::DoCopy(const wxXmlDocument& doc)
339 {
340 m_version = doc.m_version;
341 #if !wxUSE_UNICODE
342 m_encoding = doc.m_encoding;
343 #endif
344 m_fileEncoding = doc.m_fileEncoding;
345 m_root = new wxXmlNode(*doc.m_root);
346 }
347
348 bool wxXmlDocument::Load(const wxString& filename, const wxString& encoding)
349 {
350 wxFileInputStream stream(filename);
351 return Load(stream, encoding);
352 }
353
354 bool wxXmlDocument::Save(const wxString& filename) const
355 {
356 wxFileOutputStream stream(filename);
357 return Save(stream);
358 }
359
360
361
362 //-----------------------------------------------------------------------------
363 // wxXmlDocument loading routines
364 //-----------------------------------------------------------------------------
365
366 /*
367 FIXME:
368 - process all elements, including CDATA
369 */
370
371 // converts Expat-produced string in UTF-8 into wxString using the specified
372 // conv or keep in UTF-8 if conv is NULL
373 static wxString CharToString(wxMBConv *conv,
374 const char *s, size_t len = wxSTRING_MAXLEN)
375 {
376 #if wxUSE_UNICODE
377 wxUnusedVar(conv);
378
379 return wxString(s, wxConvUTF8, len);
380 #else // !wxUSE_UNICODE
381 if ( conv )
382 {
383 // there can be no embedded NULs in this string so we don't need the
384 // output length, it will be NUL-terminated
385 const wxWCharBuffer wbuf(
386 wxConvUTF8.cMB2WC(s, len == wxSTRING_MAXLEN ? wxNO_LEN : len, NULL));
387
388 return wxString(wbuf, *conv);
389 }
390 else // already in UTF-8, no conversion needed
391 {
392 return wxString(s, len != wxSTRING_MAXLEN ? len : strlen(s));
393 }
394 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
395 }
396
397 struct wxXmlParsingContext
398 {
399 wxMBConv *conv;
400 wxXmlNode *root;
401 wxXmlNode *node;
402 wxXmlNode *lastAsText;
403 wxString encoding;
404 wxString version;
405 };
406
407 extern "C" {
408 static void StartElementHnd(void *userData, const char *name, const char **atts)
409 {
410 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
411 wxXmlNode *node = new wxXmlNode(wxXML_ELEMENT_NODE, CharToString(ctx->conv, name));
412 const char **a = atts;
413 while (*a)
414 {
415 node->AddProperty(CharToString(ctx->conv, a[0]), CharToString(ctx->conv, a[1]));
416 a += 2;
417 }
418 if (ctx->root == NULL)
419 ctx->root = node;
420 else
421 ctx->node->AddChild(node);
422 ctx->node = node;
423 ctx->lastAsText = NULL;
424 }
425 }
426
427 extern "C" {
428 static void EndElementHnd(void *userData, const char* WXUNUSED(name))
429 {
430 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
431
432 ctx->node = ctx->node->GetParent();
433 ctx->lastAsText = NULL;
434 }
435 }
436
437 extern "C" {
438 static void TextHnd(void *userData, const char *s, int len)
439 {
440 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
441 char *buf = new char[len + 1];
442
443 buf[len] = '\0';
444 memcpy(buf, s, (size_t)len);
445
446 if (ctx->lastAsText)
447 {
448 ctx->lastAsText->SetContent(ctx->lastAsText->GetContent() +
449 CharToString(ctx->conv, buf));
450 }
451 else
452 {
453 bool whiteOnly = true;
454 for (char *c = buf; *c != '\0'; c++)
455 if (*c != ' ' && *c != '\t' && *c != '\n' && *c != '\r')
456 {
457 whiteOnly = false;
458 break;
459 }
460 if (!whiteOnly)
461 {
462 ctx->lastAsText = new wxXmlNode(wxXML_TEXT_NODE, wxT("text"),
463 CharToString(ctx->conv, buf));
464 ctx->node->AddChild(ctx->lastAsText);
465 }
466 }
467
468 delete[] buf;
469 }
470 }
471
472 extern "C" {
473 static void CommentHnd(void *userData, const char *data)
474 {
475 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
476
477 if (ctx->node)
478 {
479 // VS: ctx->node == NULL happens if there is a comment before
480 // the root element (e.g. wxDesigner's output). We ignore such
481 // comments, no big deal...
482 ctx->node->AddChild(new wxXmlNode(wxXML_COMMENT_NODE,
483 wxT("comment"), CharToString(ctx->conv, data)));
484 }
485 ctx->lastAsText = NULL;
486 }
487 }
488
489 extern "C" {
490 static void DefaultHnd(void *userData, const char *s, int len)
491 {
492 // XML header:
493 if (len > 6 && memcmp(s, "<?xml ", 6) == 0)
494 {
495 wxXmlParsingContext *ctx = (wxXmlParsingContext*)userData;
496
497 wxString buf = CharToString(ctx->conv, s, (size_t)len);
498 int pos;
499 pos = buf.Find(wxT("encoding="));
500 if (pos != wxNOT_FOUND)
501 ctx->encoding = buf.Mid(pos + 10).BeforeFirst(buf[(size_t)pos+9]);
502 pos = buf.Find(wxT("version="));
503 if (pos != wxNOT_FOUND)
504 ctx->version = buf.Mid(pos + 9).BeforeFirst(buf[(size_t)pos+8]);
505 }
506 }
507 }
508
509 extern "C" {
510 static int UnknownEncodingHnd(void * WXUNUSED(encodingHandlerData),
511 const XML_Char *name, XML_Encoding *info)
512 {
513 // We must build conversion table for expat. The easiest way to do so
514 // is to let wxCSConv convert as string containing all characters to
515 // wide character representation:
516 wxString str(name, wxConvLibc);
517 wxCSConv conv(str);
518 char mbBuf[2];
519 wchar_t wcBuf[10];
520 size_t i;
521
522 mbBuf[1] = 0;
523 info->map[0] = 0;
524 for (i = 0; i < 255; i++)
525 {
526 mbBuf[0] = (char)(i+1);
527 if (conv.MB2WC(wcBuf, mbBuf, 2) == (size_t)-1)
528 {
529 // invalid/undefined byte in the encoding:
530 info->map[i+1] = -1;
531 }
532 info->map[i+1] = (int)wcBuf[0];
533 }
534
535 info->data = NULL;
536 info->convert = NULL;
537 info->release = NULL;
538
539 return 1;
540 }
541 }
542
543 bool wxXmlDocument::Load(wxInputStream& stream, const wxString& encoding)
544 {
545 #if wxUSE_UNICODE
546 (void)encoding;
547 #else
548 m_encoding = encoding;
549 #endif
550
551 const size_t BUFSIZE = 1024;
552 char buf[BUFSIZE];
553 wxXmlParsingContext ctx;
554 bool done;
555 XML_Parser parser = XML_ParserCreate(NULL);
556
557 ctx.root = ctx.node = NULL;
558 ctx.encoding = wxT("UTF-8"); // default in absence of encoding=""
559 ctx.conv = NULL;
560 #if !wxUSE_UNICODE
561 if ( encoding != wxT("UTF-8") && encoding != wxT("utf-8") )
562 ctx.conv = new wxCSConv(encoding);
563 #endif
564
565 XML_SetUserData(parser, (void*)&ctx);
566 XML_SetElementHandler(parser, StartElementHnd, EndElementHnd);
567 XML_SetCharacterDataHandler(parser, TextHnd);
568 XML_SetCommentHandler(parser, CommentHnd);
569 XML_SetDefaultHandler(parser, DefaultHnd);
570 XML_SetUnknownEncodingHandler(parser, UnknownEncodingHnd, NULL);
571
572 bool ok = true;
573 do
574 {
575 size_t len = stream.Read(buf, BUFSIZE).LastRead();
576 done = (len < BUFSIZE);
577 if (!XML_Parse(parser, buf, len, done))
578 {
579 wxString error(XML_ErrorString(XML_GetErrorCode(parser)),
580 *wxConvCurrent);
581 wxLogError(_("XML parsing error: '%s' at line %d"),
582 error.c_str(),
583 XML_GetCurrentLineNumber(parser));
584 ok = false;
585 break;
586 }
587 } while (!done);
588
589 if (ok)
590 {
591 if (!ctx.version.empty())
592 SetVersion(ctx.version);
593 if (!ctx.encoding.empty())
594 SetFileEncoding(ctx.encoding);
595 SetRoot(ctx.root);
596 }
597 else
598 {
599 delete ctx.root;
600 }
601
602 XML_ParserFree(parser);
603 #if !wxUSE_UNICODE
604 if ( ctx.conv )
605 delete ctx.conv;
606 #endif
607
608 return ok;
609
610 }
611
612
613
614 //-----------------------------------------------------------------------------
615 // wxXmlDocument saving routines
616 //-----------------------------------------------------------------------------
617
618 // write string to output:
619 inline static void OutputString(wxOutputStream& stream, const wxString& str,
620 wxMBConv *convMem = NULL,
621 wxMBConv *convFile = NULL)
622 {
623 if (str.empty())
624 return;
625
626 #if wxUSE_UNICODE
627 wxUnusedVar(convMem);
628
629 const wxWX2MBbuf buf(str.mb_str(*(convFile ? convFile : &wxConvUTF8)));
630 stream.Write((const char*)buf, strlen((const char*)buf));
631 #else // !wxUSE_UNICODE
632 if ( convFile && convMem )
633 {
634 wxString str2(str.wc_str(*convMem), *convFile);
635 stream.Write(str2.mb_str(), str2.Len());
636 }
637 else // no conversions to do
638 {
639 stream.Write(str.mb_str(), str.Len());
640 }
641 #endif // wxUSE_UNICODE/!wxUSE_UNICODE
642 }
643
644 // flags for OutputStringEnt()
645 enum
646 {
647 XML_ESCAPE_QUOTES = 1
648 };
649
650 // Same as above, but create entities first.
651 // Translates '<' to "&lt;", '>' to "&gt;" and '&' to "&amp;"
652 static void OutputStringEnt(wxOutputStream& stream, const wxString& str,
653 wxMBConv *convMem = NULL,
654 wxMBConv *convFile = NULL,
655 int flags = 0)
656 {
657 wxString buf;
658 size_t i, last, len;
659 wxChar c;
660
661 len = str.Len();
662 last = 0;
663 for (i = 0; i < len; i++)
664 {
665 c = str.GetChar(i);
666 if (c == wxT('<') || c == wxT('>') ||
667 (c == wxT('&') && str.Mid(i+1, 4) != wxT("amp;")) ||
668 ((flags & XML_ESCAPE_QUOTES) && c == wxT('"')))
669 {
670 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
671 switch (c)
672 {
673 case wxT('<'):
674 OutputString(stream, wxT("&lt;"));
675 break;
676 case wxT('>'):
677 OutputString(stream, wxT("&gt;"));
678 break;
679 case wxT('&'):
680 OutputString(stream, wxT("&amp;"));
681 break;
682 case wxT('"'):
683 OutputString(stream, wxT("&quot;"));
684 break;
685 default:
686 break;
687 }
688 last = i + 1;
689 }
690 }
691 OutputString(stream, str.Mid(last, i - last), convMem, convFile);
692 }
693
694 inline static void OutputIndentation(wxOutputStream& stream, int indent)
695 {
696 wxString str = wxT("\n");
697 for (int i = 0; i < indent; i++)
698 str << wxT(' ') << wxT(' ');
699 OutputString(stream, str);
700 }
701
702 static void OutputNode(wxOutputStream& stream, wxXmlNode *node, int indent,
703 wxMBConv *convMem, wxMBConv *convFile)
704 {
705 wxXmlNode *n, *prev;
706 wxXmlProperty *prop;
707
708 switch (node->GetType())
709 {
710 case wxXML_TEXT_NODE:
711 OutputStringEnt(stream, node->GetContent(), convMem, convFile);
712 break;
713
714 case wxXML_ELEMENT_NODE:
715 OutputString(stream, wxT("<"));
716 OutputString(stream, node->GetName());
717
718 prop = node->GetProperties();
719 while (prop)
720 {
721 OutputString(stream, wxT(" ") + prop->GetName() + wxT("=\""));
722 OutputStringEnt(stream, prop->GetValue(), convMem, convFile,
723 XML_ESCAPE_QUOTES);
724 OutputString(stream, wxT("\""));
725 prop = prop->GetNext();
726 }
727
728 if (node->GetChildren())
729 {
730 OutputString(stream, wxT(">"));
731 prev = NULL;
732 n = node->GetChildren();
733 while (n)
734 {
735 if (n && n->GetType() != wxXML_TEXT_NODE)
736 OutputIndentation(stream, indent + 1);
737 OutputNode(stream, n, indent + 1, convMem, convFile);
738 prev = n;
739 n = n->GetNext();
740 }
741 if (prev && prev->GetType() != wxXML_TEXT_NODE)
742 OutputIndentation(stream, indent);
743 OutputString(stream, wxT("</"));
744 OutputString(stream, node->GetName());
745 OutputString(stream, wxT(">"));
746 }
747 else
748 OutputString(stream, wxT("/>"));
749 break;
750
751 case wxXML_COMMENT_NODE:
752 OutputString(stream, wxT("<!--"));
753 OutputString(stream, node->GetContent(), convMem, convFile);
754 OutputString(stream, wxT("-->"));
755 break;
756
757 default:
758 wxFAIL_MSG(wxT("unsupported node type"));
759 }
760 }
761
762 bool wxXmlDocument::Save(wxOutputStream& stream) const
763 {
764 if ( !IsOk() )
765 return false;
766
767 wxString s;
768
769 wxMBConv *convMem = NULL;
770
771 #if wxUSE_UNICODE
772 wxMBConv *convFile = new wxCSConv(GetFileEncoding());
773 #else
774 wxMBConv *convFile = NULL;
775 if ( GetFileEncoding() != GetEncoding() )
776 {
777 convFile = new wxCSConv(GetFileEncoding());
778 convMem = new wxCSConv(GetEncoding());
779 }
780 #endif
781
782 s.Printf(wxT("<?xml version=\"%s\" encoding=\"%s\"?>\n"),
783 GetVersion().c_str(), GetFileEncoding().c_str());
784 OutputString(stream, s);
785
786 OutputNode(stream, GetRoot(), 0, convMem, convFile);
787 OutputString(stream, wxT("\n"));
788
789 if ( convFile )
790 delete convFile;
791 if ( convMem )
792 delete convMem;
793
794 return true;
795 }
796
797 #endif // wxUSE_XML