Added a flag suppressing node content conversion when saving to XML.
[wxWidgets.git] / interface / wx / xml / xml.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: xml/xml.h
3 // Purpose: interface of wxXmlNode, wxXmlAttribute, wxXmlDocument
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9
10 /// Represents XML node type.
11 enum wxXmlNodeType
12 {
13 // note: values are synchronized with xmlElementType from libxml
14 wxXML_ELEMENT_NODE = 1,
15 wxXML_ATTRIBUTE_NODE = 2,
16 wxXML_TEXT_NODE = 3,
17 wxXML_CDATA_SECTION_NODE = 4,
18 wxXML_ENTITY_REF_NODE = 5,
19 wxXML_ENTITY_NODE = 6,
20 wxXML_PI_NODE = 7,
21 wxXML_COMMENT_NODE = 8,
22 wxXML_DOCUMENT_NODE = 9,
23 wxXML_DOCUMENT_TYPE_NODE = 10,
24 wxXML_DOCUMENT_FRAG_NODE = 11,
25 wxXML_NOTATION_NODE = 12,
26 wxXML_HTML_DOCUMENT_NODE = 13
27 };
28
29 /**
30 @class wxXmlNode
31
32 Represents a node in an XML document. See wxXmlDocument.
33
34 Node has a name and may have content and attributes.
35
36 Most common node types are @c wxXML_TEXT_NODE (name and attributes are irrelevant)
37 and @c wxXML_ELEMENT_NODE.
38
39 Example: in <tt>\<title\>hi\</title\></tt> there is an element with the name
40 @c title and irrelevant content and one child of type @c wxXML_TEXT_NODE
41 with @c hi as content.
42
43 If @c wxUSE_UNICODE is 0, all strings are encoded in the encoding given to
44 wxXmlDocument::Load (default is UTF-8).
45
46 @library{wxxml}
47 @category{xml}
48
49 @see wxXmlDocument, wxXmlAttribute
50 */
51 class wxXmlNode
52 {
53 public:
54 /**
55 Creates this XML node and eventually insert it into an existing XML tree.
56
57 @param parent
58 The parent node to which append this node instance.
59 If this argument is @NULL this new node will be floating and it can
60 be appended later to another one using the AddChild() or InsertChild()
61 functions. Otherwise the child is already added to the XML tree by
62 this constructor and it shouldn't be done again.
63 @param type
64 One of the ::wxXmlNodeType enumeration value.
65 @param name
66 The name of the node. This is the string which appears between angular brackets.
67 @param content
68 The content of the node.
69 Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
70 @param attrs
71 If not @NULL, this wxXmlAttribute object and its eventual siblings are attached to the node.
72 @param next
73 If not @NULL, this node and its eventual siblings are attached to the node.
74 @param lineNo
75 Number of line this node was present at in input file or -1.
76 */
77 wxXmlNode(wxXmlNode* parent, wxXmlNodeType type,
78 const wxString& name,
79 const wxString& content = wxEmptyString,
80 wxXmlAttribute* attrs = NULL,
81 wxXmlNode* next = NULL, int lineNo = -1);
82
83 /**
84 A simplified version of the first constructor form, assuming a @NULL parent.
85
86 @param type
87 One of the ::wxXmlNodeType enumeration value.
88 @param name
89 The name of the node. This is the string which appears between angular brackets.
90 @param content
91 The content of the node.
92 Only meaningful when type is @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE.
93 @param lineNo
94 Number of line this node was present at in input file or -1.
95 */
96 wxXmlNode(wxXmlNodeType type, const wxString& name,
97 const wxString& content = wxEmptyString,
98 int lineNo = -1);
99
100 /**
101 Copy constructor.
102
103 Note that this does NOT copy syblings and parent pointer, i.e. GetParent()
104 and GetNext() will return @NULL after using copy ctor and are never unmodified by operator=().
105 On the other hand, it DOES copy children and attributes.
106 */
107 wxXmlNode(const wxXmlNode& node);
108
109 /**
110 The virtual destructor. Deletes attached children and attributes.
111 */
112 virtual ~wxXmlNode();
113
114 /**
115 Appends a attribute with given @a name and @a value to the list of
116 attributes for this node.
117 */
118 virtual void AddAttribute(const wxString& name, const wxString& value);
119
120 /**
121 Appends given attribute to the list of attributes for this node.
122 */
123 virtual void AddAttribute(wxXmlAttribute* attr);
124
125 /**
126 Adds node @a child as the last child of this node.
127
128 @note
129 Note that this function works in O(n) time where @e n is the number
130 of existing children. Consequently, adding large number of child
131 nodes using this method can be expensive, because it has O(n^2) time
132 complexity in number of nodes to be added. Use InsertChildAfter() to
133 populate XML tree in linear time.
134
135 @see InsertChild(), InsertChildAfter()
136 */
137 virtual void AddChild(wxXmlNode* child);
138
139 /**
140 Removes the first attributes which has the given @a name from the list of
141 attributes for this node.
142 */
143 virtual bool DeleteAttribute(const wxString& name);
144
145 /**
146 Returns true if a attribute named attrName could be found.
147 The value of that attribute is saved in value (which must not be @NULL).
148 */
149 bool GetAttribute(const wxString& attrName, wxString* value) const;
150
151 /**
152 Returns the value of the attribute named @a attrName if it does exist.
153 If it does not exist, the @a defaultVal is returned.
154 */
155 wxString GetAttribute(const wxString& attrName,
156 const wxString& defaultVal = wxEmptyString) const;
157
158 /**
159 Return a pointer to the first attribute of this node.
160 */
161 wxXmlAttribute* GetAttributes() const;
162
163 /**
164 Returns the first child of this node.
165 To get a pointer to the second child of this node (if it does exist), use the
166 GetNext() function on the returned value.
167 */
168 wxXmlNode* GetChildren() const;
169
170 /**
171 Returns the content of this node. Can be an empty string.
172 Be aware that for nodes of type @c wxXML_ELEMENT_NODE (the most used node type)
173 the content is an empty string. See GetNodeContent() for more details.
174 */
175 const wxString& GetContent() const;
176
177 /**
178 Returns the number of nodes which separe this node from @c grandparent.
179
180 This function searches only the parents of this node until it finds
181 @a grandparent or the @NULL node (which is the parent of non-linked
182 nodes or the parent of a wxXmlDocument's root node).
183 */
184 int GetDepth(wxXmlNode* grandparent = NULL) const;
185
186 /**
187 Returns a flag indicating whether encoding conversion is necessary when saving. The default is @false.
188
189 You can improve saving efficiency considerably by setting this value.
190 */
191 bool GetNoConversion() const;
192
193 /**
194 Returns line number of the node in the input XML file or @c -1 if it is unknown.
195 */
196 int GetLineNumber() const;
197
198 /**
199 Returns the name of this node.
200 Can be an empty string (e.g. for nodes of type @c wxXML_TEXT_NODE or
201 @c wxXML_CDATA_SECTION_NODE).
202 */
203 const wxString& GetName() const;
204
205 /**
206 Returns a pointer to the sibling of this node or @NULL if there are no
207 siblings.
208 */
209 wxXmlNode* GetNext() const;
210
211 /**
212 Returns the content of the first child node of type @c wxXML_TEXT_NODE
213 or @c wxXML_CDATA_SECTION_NODE.
214 This function is very useful since the XML snippet @c "tagnametagcontent/tagname"
215 is represented by expat with the following tag tree:
216
217 @code
218 wxXML_ENTITY_NODE name="tagname", content=""
219 |-- wxXML_TEXT_NODE name="", content="tagcontent"
220 @endcode
221
222 or eventually:
223
224 @code
225 wxXML_ENTITY_NODE name="tagname", content=""
226 |-- wxXML_CDATA_SECTION_NODE name="", content="tagcontent"
227 @endcode
228
229 An empty string is returned if the node has no children of type
230 @c wxXML_TEXT_NODE or @c wxXML_CDATA_SECTION_NODE, or if the content
231 of the first child of such types is empty.
232 */
233 wxString GetNodeContent() const;
234
235 /**
236 Returns a pointer to the parent of this node or @NULL if this node has no
237 parent.
238 */
239 wxXmlNode* GetParent() const;
240
241 /**
242 Returns the type of this node.
243 */
244 wxXmlNodeType GetType() const;
245
246 /**
247 Returns @true if this node has a attribute named @a attrName.
248 */
249 bool HasAttribute(const wxString& attrName) const;
250
251 /**
252 Inserts the @a child node immediately before @a followingNode in the
253 children list.
254
255 @return @true if @a followingNode has been found and the @a child
256 node has been inserted.
257
258 @note
259 For historical reasons, @a followingNode may be @NULL. In that case,
260 then @a child is prepended to the list of children and becomes the
261 first child of this node, i.e. it behaves identically to using the
262 first children (as returned by GetChildren()) for @a followingNode).
263
264 @see AddChild(), InsertChildAfter()
265 */
266 virtual bool InsertChild(wxXmlNode* child, wxXmlNode* followingNode);
267
268 /**
269 Inserts the @a child node immediately after @a precedingNode in the
270 children list.
271
272 @return @true if @a precedingNode has been found and the @a child
273 node has been inserted.
274
275 @param child
276 The child to insert.
277 @param precedingNode
278 The node to insert @a child after. As a special case, this can be
279 @NULL if this node has no children yet -- in that case, @a child
280 will become this node's only child node.
281
282 @since 2.8.8
283
284 @see InsertChild(), AddChild()
285 */
286 virtual bool InsertChildAfter(wxXmlNode* child, wxXmlNode* precedingNode);
287
288 /**
289 Returns @true if the content of this node is a string containing only
290 whitespaces (spaces, tabs, new lines, etc).
291
292 Note that this function is locale-independent since the parsing of XML
293 documents must always produce the exact same tree regardless of the
294 locale it runs under.
295 */
296 bool IsWhitespaceOnly() const;
297
298 /**
299 Removes the given node from the children list.
300
301 Returns @true if the node was found and removed or @false if the node
302 could not be found.
303 Note that the caller is reponsible for deleting the removed node in order
304 to avoid memory leaks.
305 */
306 virtual bool RemoveChild(wxXmlNode* child);
307
308 /**
309 Sets as first attribute the given wxXmlAttribute object.
310
311 The caller is responsible for deleting any previously present attributes
312 attached to this node.
313 */
314 void SetAttributes(wxXmlAttribute* attr);
315
316 /**
317 Sets as first child the given node.
318
319 The caller is responsible for deleting any previously present children node.
320 */
321 void SetChildren(wxXmlNode* child);
322
323 /**
324 Sets the content of this node.
325 */
326 void SetContent(const wxString& con);
327
328 /**
329 Sets the name of this node.
330 */
331 void SetName(const wxString& name);
332
333 /**
334 Sets as sibling the given node.
335
336 The caller is responsible for deleting any previously present sibling node.
337 */
338 void SetNext(wxXmlNode* next);
339
340 /**
341 Sets a flag to indicate whether encoding conversion is necessary when saving. The default is @false.
342
343 You can improve saving efficiency considerably by setting this value.
344 */
345 void SetNoConversion(bool noconversion);
346
347 /**
348 Sets as parent the given node.
349
350 The caller is responsible for deleting any previously present parent node.
351 */
352 void SetParent(wxXmlNode* parent);
353
354 /**
355 Sets the type of this node.
356 */
357 void SetType(wxXmlNodeType type);
358
359 /**
360 See the copy constructor for more info.
361 */
362 wxXmlNode& operator=(const wxXmlNode& node);
363 };
364
365
366
367 /**
368 @class wxXmlAttribute
369
370 Represents a node attribute.
371
372 Example: in <tt>\<img src="hello.gif" id="3"/\></tt>, @c src is an attribute
373 with value @c hello.gif and @c id is an attribute with value @c 3.
374
375 @library{wxxml}
376 @category{xml}
377
378 @see wxXmlDocument, wxXmlNode
379 */
380 class wxXmlAttribute
381 {
382 public:
383 /**
384 Default constructor.
385 */
386 wxXmlAttribute();
387
388 /**
389 Creates the attribute with given @a name and @a value.
390 If @a next is not @NULL, then sets it as sibling of this attribute.
391 */
392 wxXmlAttribute(const wxString& name, const wxString& value,
393 wxXmlAttribute* next = NULL);
394
395 /**
396 The virtual destructor.
397 */
398 virtual ~wxXmlAttribute();
399
400 /**
401 Returns the name of this attribute.
402 */
403 wxString GetName() const;
404
405 /**
406 Returns the sibling of this attribute or @NULL if there are no siblings.
407 */
408 wxXmlAttribute* GetNext() const;
409
410 /**
411 Returns the value of this attribute.
412 */
413 wxString GetValue() const;
414
415 /**
416 Sets the name of this attribute.
417 */
418 void SetName(const wxString& name);
419
420 /**
421 Sets the sibling of this attribute.
422 */
423 void SetNext(wxXmlAttribute* next);
424
425 /**
426 Sets the value of this attribute.
427 */
428 void SetValue(const wxString& value);
429 };
430
431
432
433 /**
434 @class wxXmlDocument
435
436 This class holds XML data/document as parsed by XML parser in the root node.
437
438 wxXmlDocument internally uses the expat library which comes with wxWidgets to
439 parse the given stream.
440
441 A simple example of using XML classes is:
442
443 @code
444 wxXmlDocument doc;
445 if (!doc.Load("myfile.xml"))
446 return false;
447
448 // start processing the XML file
449 if (doc.GetRoot()->GetName() != "myroot-node")
450 return false;
451
452 wxXmlNode *child = doc.GetRoot()->GetChildren();
453 while (child) {
454
455 if (child->GetName() == "tag1") {
456
457 // process text enclosed by tag1/tag1
458 wxString content = child->GetNodeContent();
459
460 ...
461
462 // process attributes of tag1
463 wxString attrvalue1 =
464 child->GetAttribute("attr1", "default-value");
465 wxString attrvalue2 =
466 child->GetAttribute("attr2", "default-value");
467
468 ...
469
470 } else if (child->GetName() == "tag2") {
471
472 // process tag2 ...
473 }
474
475 child = child->GetNext();
476 }
477 @endcode
478
479 Note that if you want to preserve the original formatting of the loaded file
480 including whitespaces and indentation, you need to turn off whitespace-only
481 textnode removal and automatic indentation:
482
483 @code
484 wxXmlDocument doc;
485 doc.Load("myfile.xml", "UTF-8", wxXMLDOC_KEEP_WHITESPACE_NODES);
486
487 // myfile2.xml will be indentic to myfile.xml saving it this way:
488 doc.Save("myfile2.xml", wxXML_NO_INDENTATION);
489 @endcode
490
491 Using default parameters, you will get a reformatted document which in general
492 is different from the original loaded content:
493
494 @code
495 wxXmlDocument doc;
496 doc.Load("myfile.xml");
497 doc.Save("myfile2.xml"); // myfile2.xml != myfile.xml
498 @endcode
499
500 @library{wxxml}
501 @category{xml}
502
503 @see wxXmlNode, wxXmlAttribute
504 */
505 class wxXmlDocument : public wxObject
506 {
507 public:
508 /**
509 Default constructor.
510 */
511 wxXmlDocument();
512
513 /**
514 Copy constructor. Deep copies all the XML tree of the given document.
515 */
516 wxXmlDocument(const wxXmlDocument& doc);
517
518 /**
519 Loads the given filename using the given encoding. See Load().
520 */
521 wxXmlDocument(const wxString& filename,
522 const wxString& encoding = "UTF-8"));
523
524 /**
525 Loads the XML document from given stream using the given encoding. See Load().
526 */
527 wxXmlDocument(wxInputStream& stream,
528 const wxString& encoding = "UTF-8");
529
530 /**
531 Virtual destructor. Frees the document root node.
532 */
533 virtual ~wxXmlDocument();
534
535 /**
536 Detaches the document root node and returns it.
537
538 The document root node will be set to @NULL and thus IsOk() will
539 return @false after calling this function.
540
541 Note that the caller is reponsible for deleting the returned node in order
542 to avoid memory leaks.
543 */
544 wxXmlNode* DetachRoot();
545
546 /**
547 Returns encoding of in-memory representation of the document
548 (same as passed to Load() or constructor, defaults to UTF-8).
549
550 @note this is meaningless in Unicode build where data are stored as @c wchar_t*.
551 */
552 wxString GetEncoding() const;
553
554 /**
555 Returns encoding of document (may be empty).
556
557 @note This is the encoding original file was saved in, @b not the
558 encoding of in-memory representation!
559 */
560 const wxString& GetFileEncoding() const;
561
562 /**
563 Returns the root node of the document.
564 */
565 wxXmlNode* GetRoot() const;
566
567 /**
568 Returns the version of document.
569
570 This is the value in the @c \<?xml version="1.0"?\> header of the XML document.
571 If the version attribute was not explicitely given in the header, this function
572 returns an empty string.
573 */
574 const wxString& GetVersion() const;
575
576 /**
577 Returns @true if the document has been loaded successfully.
578 */
579 bool IsOk() const;
580
581
582 /**
583 Parses @a filename as an xml document and loads its data.
584
585 If @a flags does not contain wxXMLDOC_KEEP_WHITESPACE_NODES, then, while loading,
586 all nodes of type @c wxXML_TEXT_NODE (see wxXmlNode) are automatically skipped
587 if they contain whitespaces only.
588
589 The removal of these nodes makes the load process slightly faster and requires
590 less memory however makes impossible to recreate exactly the loaded text with a
591 Save() call later. Read the initial description of this class for more info.
592
593 Returns true on success, false otherwise.
594 */
595 virtual bool Load(const wxString& filename,
596 const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
597
598 /**
599 Like Load(const wxString&, const wxString&, int) but takes the data from
600 given input stream.
601 */
602 virtual bool Load(wxInputStream& stream,
603 const wxString& encoding = "UTF-8", int flags = wxXMLDOC_NONE);
604
605 /**
606 Saves XML tree creating a file named with given string.
607
608 If @a indentstep is greater than or equal to zero, then, while saving,
609 an automatic indentation is added with steps composed by indentstep spaces.
610
611 If @a indentstep is @c wxXML_NO_INDENTATION, then, automatic indentation
612 is turned off.
613 */
614 virtual bool Save(const wxString& filename, int indentstep = 2) const;
615
616 /**
617 Saves XML tree in the given output stream.
618 See Save(const wxString&, int) for a description of @a indentstep.
619 */
620 virtual bool Save(wxOutputStream& stream, int indentstep = 2) const;
621
622 /**
623 Sets the enconding of the document.
624 */
625 void SetEncoding(const wxString& enc);
626
627 /**
628 Sets the enconding of the file which will be used to save the document.
629 */
630 void SetFileEncoding(const wxString& encoding);
631
632 /**
633 Sets the root node of this document. Deletes previous root node.
634 Use DetachRoot() and then SetRoot() if you want to replace the root
635 node without deleting the old document tree.
636 */
637 void SetRoot(wxXmlNode* node);
638
639 /**
640 Sets the version of the XML file which will be used to save the document.
641 */
642 void SetVersion(const wxString& version);
643
644 /**
645 Deep copies the given document.
646 */
647 wxXmlDocument& operator=(const wxXmlDocument& doc);
648 };
649