Add support for elements preceding the document node in wxXML.
[wxWidgets.git] / interface / wx / xml / xml.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: xml/xml.h
3 // Purpose: interface of wxXmlNode, wxXmlAttribute, wxXmlDocument
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9
10 /// Represents XML node type.
11 enum wxXmlNodeType
12 {
13 // note: values are synchronized with xmlElementType from libxml
14 wxXML_ELEMENT_NODE = 1,
15 wxXML_ATTRIBUTE_NODE = 2,
16 wxXML_TEXT_NODE = 3,
17 wxXML_CDATA_SECTION_NODE = 4,
18 wxXML_ENTITY_REF_NODE = 5,
19 wxXML_ENTITY_NODE = 6,
20 wxXML_PI_NODE = 7,
21 wxXML_COMMENT_NODE = 8,
22 wxXML_DOCUMENT_NODE = 9,
23 wxXML_DOCUMENT_TYPE_NODE = 10,
24 wxXML_DOCUMENT_FRAG_NODE = 11,
25 wxXML_NOTATION_NODE = 12,
26 wxXML_HTML_DOCUMENT_NODE = 13
27 };
28
29 /**
30 @class wxXmlNode
31
32 Represents a node in an XML document. See wxXmlDocument.
33
34 Node has a name and may have content and attributes.
35
36 Most common node types are @c wxXML_TEXT_NODE (name and attributes are irrelevant)
37 and @c wxXML_ELEMENT_NODE.
38
39 Example: in <tt>\<title\>hi\</title\></tt> there is an element with the name
40 @c title and irrelevant content and one child of type @c wxXML_TEXT_NODE
41 with @c hi as content.
42
43 The @c wxXML_PI_NODE type sets the name to the PI target and the contents to
44 the instructions. Note that whilst the PI instructions are often in the form
45 of pseudo-attributes these do not use the nodes attribute system. It is the users
46 responsibility to code and decode the instruction text.
47
48 If @c wxUSE_UNICODE is 0, all strings are encoded in the encoding given to
49 wxXmlDocument::Load (default is UTF-8).
50
51 @library{wxxml}
52 @category{xml}
53
54 @see wxXmlDocument, wxXmlAttribute
55 */
56 class wxXmlNode
57 {
58 public:
59 /**
60 Creates this XML node and eventually insert it into an existing XML tree.
61
62 @param parent
63 The parent node to which append this node instance.
64 If this argument is @NULL this new node will be floating and it can
65 be appended later to another one using the AddChild() or InsertChild()
66 functions. Otherwise the child is already added to the XML tree by
67 this constructor and it shouldn't be done again.
68 @param type
69 One of the ::wxXmlNodeType enumeration value.
70 @param name
71 The name of the node. This is the string which appears between angular brackets.
72 @param content
73 The content of the node.
74 Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
75 @param attrs
76 If not @NULL, this wxXmlAttribute object and its eventual siblings are attached to the node.
77 @param next
78 If not @NULL, this node and its eventual siblings are attached to the node.
79 @param lineNo
80 Number of line this node was present at in input file or -1.
81 */
82 wxXmlNode(wxXmlNode* parent, wxXmlNodeType type,
83 const wxString& name,
84 const wxString& content = wxEmptyString,
85 wxXmlAttribute* attrs = NULL,
86 wxXmlNode* next = NULL, int lineNo = -1);
87
88 /**
89 A simplified version of the first constructor form, assuming a @NULL parent.
90
91 @param type
92 One of the ::wxXmlNodeType enumeration value.
93 @param name
94 The name of the node. This is the string which appears between angular brackets.
95 @param content
96 The content of the node.
97 Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
98 @param lineNo
99 Number of line this node was present at in input file or -1.
100 */
101 wxXmlNode(wxXmlNodeType type, const wxString& name,
102 const wxString& content = wxEmptyString,
103 int lineNo = -1);
104
105 /**
106 Copy constructor.
107
108 Note that this does NOT copy siblings and parent pointer, i.e. GetParent()
109 and GetNext() will return @NULL after using copy ctor and are never unmodified by operator=().
110 On the other hand, it DOES copy children and attributes.
111 */
112 wxXmlNode(const wxXmlNode& node);
113
114 /**
115 The virtual destructor. Deletes attached children and attributes.
116 */
117 virtual ~wxXmlNode();
118
119 /**
120 Appends a attribute with given @a name and @a value to the list of
121 attributes for this node.
122 */
123 virtual void AddAttribute(const wxString& name, const wxString& value);
124
125 /**
126 Appends given attribute to the list of attributes for this node.
127 */
128 virtual void AddAttribute(wxXmlAttribute* attr);
129
130 /**
131 Adds node @a child as the last child of this node.
132
133 @note
134 Note that this function works in O(n) time where @e n is the number
135 of existing children. Consequently, adding large number of child
136 nodes using this method can be expensive, because it has O(n^2) time
137 complexity in number of nodes to be added. Use InsertChildAfter() to
138 populate XML tree in linear time.
139
140 @see InsertChild(), InsertChildAfter()
141 */
142 virtual void AddChild(wxXmlNode* child);
143
144 /**
145 Removes the first attributes which has the given @a name from the list of
146 attributes for this node.
147 */
148 virtual bool DeleteAttribute(const wxString& name);
149
150 /**
151 Returns true if a attribute named attrName could be found.
152 The value of that attribute is saved in value (which must not be @NULL).
153 */
154 bool GetAttribute(const wxString& attrName, wxString* value) const;
155
156 /**
157 Returns the value of the attribute named @a attrName if it does exist.
158 If it does not exist, the @a defaultVal is returned.
159 */
160 wxString GetAttribute(const wxString& attrName,
161 const wxString& defaultVal = wxEmptyString) const;
162
163 /**
164 Return a pointer to the first attribute of this node.
165 */
166 wxXmlAttribute* GetAttributes() const;
167
168 /**
169 Returns the first child of this node.
170 To get a pointer to the second child of this node (if it does exist), use the
171 GetNext() function on the returned value.
172 */
173 wxXmlNode* GetChildren() const;
174
175 /**
176 Returns the content of this node. Can be an empty string.
177 Be aware that for nodes of type @c wxXML_ELEMENT_NODE (the most used node type)
178 the content is an empty string. See GetNodeContent() for more details.
179 */
180 const wxString& GetContent() const;
181
182 /**
183 Returns the number of nodes which separate this node from @c grandparent.
184
185 This function searches only the parents of this node until it finds
186 @a grandparent or the @NULL node (which is the parent of non-linked
187 nodes or the parent of a wxXmlDocument's root element node).
188 */
189 int GetDepth(wxXmlNode* grandparent = NULL) const;
190
191 /**
192 Returns a flag indicating whether encoding conversion is necessary when saving. The default is @false.
193
194 You can improve saving efficiency considerably by setting this value.
195 */
196 bool GetNoConversion() const;
197
198 /**
199 Returns line number of the node in the input XML file or @c -1 if it is unknown.
200 */
201 int GetLineNumber() const;
202
203 /**
204 Returns the name of this node.
205 Can be an empty string (e.g. for nodes of type @c wxXML_TEXT_NODE or
206 @c wxXML_CDATA_SECTION_NODE).
207 */
208 const wxString& GetName() const;
209
210 /**
211 Returns a pointer to the sibling of this node or @NULL if there are no
212 siblings.
213 */
214 wxXmlNode* GetNext() const;
215
216 /**
217 Returns the content of the first child node of type @c wxXML_TEXT_NODE
218 or @c wxXML_CDATA_SECTION_NODE.
219 This function is very useful since the XML snippet @c "tagnametagcontent/tagname"
220 is represented by expat with the following tag tree:
221
222 @code
223 wxXML_ELEMENT_NODE name="tagname", content=""
224 |-- wxXML_TEXT_NODE name="", content="tagcontent"
225 @endcode
226
227 or eventually:
228
229 @code
230 wxXML_ELEMENT_NODE name="tagname", content=""
231 |-- wxXML_CDATA_SECTION_NODE name="", content="tagcontent"
232 @endcode
233
234 An empty string is returned if the node has no children of type
235 @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE, or if the content
236 of the first child of such types is empty.
237 */
238 wxString GetNodeContent() const;
239
240 /**
241 Returns a pointer to the parent of this node or @NULL if this node has no
242 parent.
243 */
244 wxXmlNode* GetParent() const;
245
246 /**
247 Returns the type of this node.
248 */
249 wxXmlNodeType GetType() const;
250
251 /**
252 Returns @true if this node has a attribute named @a attrName.
253 */
254 bool HasAttribute(const wxString& attrName) const;
255
256 /**
257 Inserts the @a child node immediately before @a followingNode in the
258 children list.
259
260 @return @true if @a followingNode has been found and the @a child
261 node has been inserted.
262
263 @note
264 For historical reasons, @a followingNode may be @NULL. In that case,
265 then @a child is prepended to the list of children and becomes the
266 first child of this node, i.e. it behaves identically to using the
267 first children (as returned by GetChildren()) for @a followingNode).
268
269 @see AddChild(), InsertChildAfter()
270 */
271 virtual bool InsertChild(wxXmlNode* child, wxXmlNode* followingNode);
272
273 /**
274 Inserts the @a child node immediately after @a precedingNode in the
275 children list.
276
277 @return @true if @a precedingNode has been found and the @a child
278 node has been inserted.
279
280 @param child
281 The child to insert.
282 @param precedingNode
283 The node to insert @a child after. As a special case, this can be
284 @NULL if this node has no children yet -- in that case, @a child
285 will become this node's only child node.
286
287 @since 2.8.8
288
289 @see InsertChild(), AddChild()
290 */
291 virtual bool InsertChildAfter(wxXmlNode* child, wxXmlNode* precedingNode);
292
293 /**
294 Returns @true if the content of this node is a string containing only
295 whitespaces (spaces, tabs, new lines, etc).
296
297 Note that this function is locale-independent since the parsing of XML
298 documents must always produce the exact same tree regardless of the
299 locale it runs under.
300 */
301 bool IsWhitespaceOnly() const;
302
303 /**
304 Removes the given node from the children list.
305
306 Returns @true if the node was found and removed or @false if the node
307 could not be found.
308 Note that the caller is responsible for deleting the removed node in order
309 to avoid memory leaks.
310 */
311 virtual bool RemoveChild(wxXmlNode* child);
312
313 /**
314 Sets as first attribute the given wxXmlAttribute object.
315
316 The caller is responsible for deleting any previously present attributes
317 attached to this node.
318 */
319 void SetAttributes(wxXmlAttribute* attr);
320
321 /**
322 Sets as first child the given node.
323
324 The caller is responsible for deleting any previously present children node.
325 */
326 void SetChildren(wxXmlNode* child);
327
328 /**
329 Sets the content of this node.
330 */
331 void SetContent(const wxString& con);
332
333 /**
334 Sets the name of this node.
335 */
336 void SetName(const wxString& name);
337
338 /**
339 Sets as sibling the given node.
340
341 The caller is responsible for deleting any previously present sibling node.
342 */
343 void SetNext(wxXmlNode* next);
344
345 /**
346 Sets a flag to indicate whether encoding conversion is necessary when saving. The default is @false.
347
348 You can improve saving efficiency considerably by setting this value.
349 */
350 void SetNoConversion(bool noconversion);
351
352 /**
353 Sets as parent the given node.
354
355 The caller is responsible for deleting any previously present parent node.
356 */
357 void SetParent(wxXmlNode* parent);
358
359 /**
360 Sets the type of this node.
361 */
362 void SetType(wxXmlNodeType type);
363
364 /**
365 See the copy constructor for more info.
366 */
367 wxXmlNode& operator=(const wxXmlNode& node);
368 };
369
370
371
372 /**
373 @class wxXmlAttribute
374
375 Represents a node attribute.
376
377 Example: in <tt>\<img src="hello.gif" id="3"/\></tt>, @c src is an attribute
378 with value @c hello.gif and @c id is an attribute with value @c 3.
379
380 @library{wxxml}
381 @category{xml}
382
383 @see wxXmlDocument, wxXmlNode
384 */
385 class wxXmlAttribute
386 {
387 public:
388 /**
389 Default constructor.
390 */
391 wxXmlAttribute();
392
393 /**
394 Creates the attribute with given @a name and @a value.
395 If @a next is not @NULL, then sets it as sibling of this attribute.
396 */
397 wxXmlAttribute(const wxString& name, const wxString& value,
398 wxXmlAttribute* next = NULL);
399
400 /**
401 The virtual destructor.
402 */
403 virtual ~wxXmlAttribute();
404
405 /**
406 Returns the name of this attribute.
407 */
408 wxString GetName() const;
409
410 /**
411 Returns the sibling of this attribute or @NULL if there are no siblings.
412 */
413 wxXmlAttribute* GetNext() const;
414
415 /**
416 Returns the value of this attribute.
417 */
418 wxString GetValue() const;
419
420 /**
421 Sets the name of this attribute.
422 */
423 void SetName(const wxString& name);
424
425 /**
426 Sets the sibling of this attribute.
427 */
428 void SetNext(wxXmlAttribute* next);
429
430 /**
431 Sets the value of this attribute.
432 */
433 void SetValue(const wxString& value);
434 };
435
436
437
438 /**
439 @class wxXmlDocument
440
441 This class holds XML data/document as parsed by XML parser in the root node.
442
443 wxXmlDocument internally uses the expat library which comes with wxWidgets to
444 parse the given stream.
445
446 A simple example of using XML classes is:
447
448 @code
449 wxXmlDocument doc;
450 if (!doc.Load("myfile.xml"))
451 return false;
452
453 // start processing the XML file
454 if (doc.GetRoot()->GetName() != "myroot-node")
455 return false;
456
457 // examine prologue
458 wxXmlNode *prolog = doc.GetDocumentNode()->GetChildren();
459 while (prolog) {
460
461 if (prolog->GetType() == wxXML_PI_NODE && prolog->GetName() == "target") {
462
463 // process Process Instruction contents
464 wxString pi = prolog->GetContent();
465
466 ...
467
468 }
469 }
470
471 wxXmlNode *child = doc.GetRoot()->GetChildren();
472 while (child) {
473
474 if (child->GetName() == "tag1") {
475
476 // process text enclosed by tag1/tag1
477 wxString content = child->GetNodeContent();
478
479 ...
480
481 // process attributes of tag1
482 wxString attrvalue1 =
483 child->GetAttribute("attr1", "default-value");
484 wxString attrvalue2 =
485 child->GetAttribute("attr2", "default-value");
486
487 ...
488
489 } else if (child->GetName() == "tag2") {
490
491 // process tag2 ...
492 }
493
494 child = child->GetNext();
495 }
496 @endcode
497
498 Note that if you want to preserve the original formatting of the loaded file
499 including whitespaces and indentation, you need to turn off whitespace-only
500 textnode removal and automatic indentation:
501
502 @code
503 wxXmlDocument doc;
504 doc.Load("myfile.xml", "UTF-8", wxXMLDOC_KEEP_WHITESPACE_NODES);
505
506 // myfile2.xml will be identical to myfile.xml saving it this way:
507 doc.Save("myfile2.xml", wxXML_NO_INDENTATION);
508 @endcode
509
510 Using default parameters, you will get a reformatted document which in general
511 is different from the original loaded content:
512
513 @code
514 wxXmlDocument doc;
515 doc.Load("myfile.xml");
516 doc.Save("myfile2.xml"); // myfile2.xml != myfile.xml
517 @endcode
518
519 @library{wxxml}
520 @category{xml}
521
522 @see wxXmlNode, wxXmlAttribute
523 */
524 class wxXmlDocument : public wxObject
525 {
526 public:
527 /**
528 Default constructor.
529 */
530 wxXmlDocument();
531
532 /**
533 Copy constructor. Deep copies all the XML tree of the given document.
534 */
535 wxXmlDocument(const wxXmlDocument& doc);
536
537 /**
538 Loads the given filename using the given encoding. See Load().
539 */
540 wxXmlDocument(const wxString& filename,
541 const wxString& encoding = "UTF-8"));
542
543 /**
544 Loads the XML document from given stream using the given encoding. See Load().
545 */
546 wxXmlDocument(wxInputStream& stream,
547 const wxString& encoding = "UTF-8");
548
549 /**
550 Virtual destructor. Frees the document root node.
551 */
552 virtual ~wxXmlDocument();
553
554 /**
555 Appends a Process Instruction or Comment node to the document prologue.
556
557 Calling this function will create a prologue or attach the node to the
558 end of an existing prologue.
559
560 @since 2.9.2
561 */
562 void AppendToProlog(wxXmlNode* node);
563
564 /**
565 Detaches the document node and returns it.
566
567 The document node will be set to @NULL and thus IsOk() will
568 return @false after calling this function.
569
570 Note that the caller is responsible for deleting the returned node in order
571 to avoid memory leaks.
572
573 @since 2.9.2
574 */
575 wxXmlNode* DetachDocumentNode();
576
577 /**
578 Detaches the root entity node and returns it.
579
580 After calling this function, the document node will remain together with
581 any prologue nodes, but IsOk() will return @false since the root entity
582 will be missing.
583
584 Note that the caller is reponsible for deleting the returned node in order
585 to avoid memory leaks.
586 */
587 wxXmlNode* DetachRoot();
588
589 /**
590 Returns encoding of in-memory representation of the document
591 (same as passed to Load() or constructor, defaults to UTF-8).
592
593 @note this is meaningless in Unicode build where data are stored as @c wchar_t*.
594 */
595 wxString GetEncoding() const;
596
597 /**
598 Returns encoding of document (may be empty).
599
600 @note This is the encoding original file was saved in, @b not the
601 encoding of in-memory representation!
602 */
603 const wxString& GetFileEncoding() const;
604
605 /**
606 Returns the document node of the document.
607
608 @since 2.9.2
609 */
610 wxXmlNode* GetDocumentNode() const;
611
612 /**
613 Returns the root element node of the document.
614 */
615 wxXmlNode* GetRoot() const;
616
617 /**
618 Returns the version of document.
619
620 This is the value in the @c \<?xml version="1.0"?\> header of the XML document.
621 If the version attribute was not explicitly given in the header, this function
622 returns an empty string.
623 */
624 const wxString& GetVersion() const;
625
626 /**
627 Returns @true if the document has been loaded successfully.
628 */
629 bool IsOk() const;
630
631
632 /**
633 Parses @a filename as an xml document and loads its data.
634
635 If @a flags does not contain wxXMLDOC_KEEP_WHITESPACE_NODES, then, while loading,
636 all nodes of type @c wxXML_TEXT_NODE (see wxXmlNode) are automatically skipped
637 if they contain whitespaces only.
638
639 The removal of these nodes makes the load process slightly faster and requires
640 less memory however makes impossible to recreate exactly the loaded text with a
641 Save() call later. Read the initial description of this class for more info.
642
643 Returns true on success, false otherwise.
644 */
645 virtual bool Load(const wxString& filename,
646 const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
647
648 /**
649 Like Load(const wxString&, const wxString&, int) but takes the data from
650 given input stream.
651 */
652 virtual bool Load(wxInputStream& stream,
653 const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
654
655 /**
656 Saves XML tree creating a file named with given string.
657
658 If @a indentstep is greater than or equal to zero, then, while saving,
659 an automatic indentation is added with steps composed by indentstep spaces.
660
661 If @a indentstep is @c wxXML_NO_INDENTATION, then, automatic indentation
662 is turned off.
663 */
664 virtual bool Save(const wxString& filename, int indentstep = 2) const;
665
666 /**
667 Saves XML tree in the given output stream.
668 See Save(const wxString&, int) for a description of @a indentstep.
669 */
670 virtual bool Save(wxOutputStream& stream, int indentstep = 2) const;
671
672 /**
673 Sets the document node of this document.
674
675 Deletes any previous document node.
676 Use DetachDocumentNode() and then SetDocumentNode() if you want to
677 replace the document node without deleting the old document tree.
678
679 @since 2.9.2
680 */
681 void SetDocumentNode(wxXmlNode* node);
682
683 /**
684 Sets the encoding of the document.
685 */
686 void SetEncoding(const wxString& enc);
687
688 /**
689 Sets the enconding of the file which will be used to save the document.
690 */
691 void SetFileEncoding(const wxString& encoding);
692
693 /**
694 Sets the root element node of this document.
695
696 Will create the document node if necessary. Any previous
697 root element node is deleted.
698 */
699 void SetRoot(wxXmlNode* node);
700
701 /**
702 Sets the version of the XML file which will be used to save the document.
703 */
704 void SetVersion(const wxString& version);
705
706 /**
707 Deep copies the given document.
708 */
709 wxXmlDocument& operator=(const wxXmlDocument& doc);
710
711 /**
712 Get expat library version information.
713
714 @since 2.9.2
715 @see wxVersionInfo
716 */
717 static wxVersionInfo GetLibraryVersionInfo();
718 };
719