]> git.saurik.com Git - apple/cf.git/blob - CFXMLParser.h
CF-550.13.tar.gz
[apple/cf.git] / CFXMLParser.h
1 /*
2 * Copyright (c) 2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFXMLParser.h
25 Copyright (c) 1998-2009, Apple Inc. All rights reserved.
26 */
27
28 /* CFXMLParser will be officially deprecated in a future release of Mac OS X. Clients should be
29 aware of the fact that CFXMLParser has some serious deficiencies in terms of both performance
30 and standards compliance and should migrate their XML parsing to NSXMLParser, NSXMLDocument, or
31 other XML parsing technologies that will suit their needs better than CFXMLParser.
32 */
33
34 #if !defined(__COREFOUNDATION_CFXMLPARSER__)
35 #define __COREFOUNDATION_CFXMLPARSER__ 1
36
37 #include <CoreFoundation/CFBase.h>
38 #include <CoreFoundation/CFArray.h>
39 #include <CoreFoundation/CFData.h>
40 #include <CoreFoundation/CFDictionary.h>
41 #include <CoreFoundation/CFTree.h>
42 #include <CoreFoundation/CFURL.h>
43 #include <CoreFoundation/CFXMLNode.h>
44
45 CF_EXTERN_C_BEGIN
46
47 typedef struct __CFXMLParser * CFXMLParserRef;
48
49 /* These are the various options you can configure the parser with. These are
50 chosen such that an option flag of 0 (kCFXMLParserNoOptions) leaves the XML
51 as "intact" as possible (reports all structures; performs no replacements).
52 Hence, to make the parser do the most work, returning only the pure element
53 tree, set the option flag to kCFXMLParserAllOptions.
54
55 kCFXMLParserValidateDocument -
56 validate the document against its grammar from the DTD, reporting any errors.
57 Currently not supported.
58
59 kCFXMLParserSkipMetaData -
60 silently skip over metadata constructs (the DTD and comments)
61
62 kCFXMLParserReplacePhysicalEntities -
63 replace declared entities like &lt;. Note that other than the 5 predefined
64 entities (lt, gt, quot, amp, apos), these must be defined in the DTD.
65 Currently not supported.
66
67 kCFXMLParserSkipWhitespace -
68 skip over all whitespace that does not abut non-whitespace character data.
69 In other words, given <foo> <bar> blah </bar></foo>, the whitespace between
70 foo's open tag and bar's open tag would be suppressed, but the whitespace
71 around blah would be preserved.
72
73 kCFXMLParserAddImpliedAttributes -
74 where the DTD specifies implied attribute-value pairs for a particular element,
75 add those pairs to any occurances of the element in the element tree.
76 Currently not supported.
77 */
78
79 enum {
80 kCFXMLParserValidateDocument = (1UL << 0),
81 kCFXMLParserSkipMetaData = (1UL << 1),
82 kCFXMLParserReplacePhysicalEntities = (1UL << 2),
83 kCFXMLParserSkipWhitespace = (1UL << 3),
84 kCFXMLParserResolveExternalEntities = (1UL << 4),
85 kCFXMLParserAddImpliedAttributes = (1UL << 5),
86 kCFXMLParserAllOptions = 0x00FFFFFF,
87 kCFXMLParserNoOptions = 0
88 };
89 typedef CFOptionFlags CFXMLParserOptions;
90
91 /* This list is expected to grow */
92 enum {
93 kCFXMLStatusParseNotBegun = -2,
94 kCFXMLStatusParseInProgress = -1,
95 kCFXMLStatusParseSuccessful = 0,
96 kCFXMLErrorUnexpectedEOF = 1,
97 kCFXMLErrorUnknownEncoding,
98 kCFXMLErrorEncodingConversionFailure,
99 kCFXMLErrorMalformedProcessingInstruction,
100 kCFXMLErrorMalformedDTD,
101 kCFXMLErrorMalformedName,
102 kCFXMLErrorMalformedCDSect,
103 kCFXMLErrorMalformedCloseTag,
104 kCFXMLErrorMalformedStartTag,
105 kCFXMLErrorMalformedDocument,
106 kCFXMLErrorElementlessDocument,
107 kCFXMLErrorMalformedComment,
108 kCFXMLErrorMalformedCharacterReference,
109 kCFXMLErrorMalformedParsedCharacterData,
110 kCFXMLErrorNoData
111 };
112 typedef CFIndex CFXMLParserStatusCode;
113
114
115 /* These functions are called as a parse progresses.
116
117 createXMLStructure -
118 called as new XML structures are encountered by the parser. May return NULL to indicate
119 that the given structure should be skipped; if NULL is returned for a given structure,
120 only minimal parsing is done for that structure (enough to correctly determine its end,
121 and to extract any data necessary for the remainder of the parse, such as Entity definitions).
122 createXMLStructure (or indeed, any of the tree-creation callbacks) will not be called for any
123 children of the skipped structure. The only exception is that the top-most element will always
124 be reported even if NULL was returned for the document as a whole. NOTE: for performance reasons,
125 the node passed to createXMLStructure cannot be safely retained by the client; the node as
126 a whole must be copied (via CFXMLNodeCreateCopy), or its contents must be extracted and copied.
127
128 addChild -
129 called as children are parsed and are ready to be added to the tree. If createXMLStructure
130 returns NULL for a given structure, that structure is omitted entirely, and addChild will
131 NOT be called for either a NULL child or parent.
132
133 endXMLStructure -
134 called once a structure (and all its children) are completely parsed. As elements are encountered,
135 createXMLStructure is called for them first, then addChild to add the new structure to its parent,
136 then addChild (potentially several times) to add the new structure's children to it, then finally
137 endXMLStructure to show that the structure has been fully parsed.
138
139 createXMLStructure, addChild, and endXMLStructure are all REQUIRED TO BE NON-NULL.
140
141 resolveExternalEntity -
142 called when external entities are referenced (NOT when they are simply defined). If the function
143 pointer is NULL, the parser uses its internal routines to try and resolve the entity. If the
144 function pointer is set, and the function returns NULL, a place holder for the external entity
145 is inserted into the tree. In this manner, the parser's client can prevent any external network
146 or file accesses.
147
148 handleError - called as errors/warnings are encountered in the data stream. At some point, we will
149 have an enum of the expected errors, some of which will be fatal, others of which will not. If
150 the function pointer is NULL, the parser will silently attempt to recover. The
151 handleError function may always return false to force the parser to stop; if handleError returns
152 true, the parser will attempt to recover (fatal errors will still cause the parse to abort
153 immediately).
154 */
155
156 typedef void * (*CFXMLParserCreateXMLStructureCallBack)(CFXMLParserRef parser, CFXMLNodeRef nodeDesc, void *info);
157 typedef void (*CFXMLParserAddChildCallBack)(CFXMLParserRef parser, void *parent, void *child, void *info);
158 typedef void (*CFXMLParserEndXMLStructureCallBack)(CFXMLParserRef parser, void *xmlType, void *info);
159 typedef CFDataRef (*CFXMLParserResolveExternalEntityCallBack)(CFXMLParserRef parser, CFXMLExternalID *extID, void *info);
160 typedef Boolean (*CFXMLParserHandleErrorCallBack)(CFXMLParserRef parser, CFXMLParserStatusCode error, void *info);
161 typedef struct {
162 CFIndex version;
163 CFXMLParserCreateXMLStructureCallBack createXMLStructure;
164 CFXMLParserAddChildCallBack addChild;
165 CFXMLParserEndXMLStructureCallBack endXMLStructure;
166 CFXMLParserResolveExternalEntityCallBack resolveExternalEntity;
167 CFXMLParserHandleErrorCallBack handleError;
168 } CFXMLParserCallBacks;
169
170 typedef const void * (*CFXMLParserRetainCallBack)(const void *info);
171 typedef void (*CFXMLParserReleaseCallBack)(const void *info);
172 typedef CFStringRef (*CFXMLParserCopyDescriptionCallBack)(const void *info);
173 typedef struct {
174 CFIndex version;
175 void * info;
176 CFXMLParserRetainCallBack retain;
177 CFXMLParserReleaseCallBack release;
178 CFXMLParserCopyDescriptionCallBack copyDescription;
179 } CFXMLParserContext;
180
181 CF_EXPORT
182 CFTypeID CFXMLParserGetTypeID(void);
183
184 /* Creates a parser which will parse the given data with the given options. xmlData may not be NULL.
185 dataSource should be the URL from which the data came, and may be NULL; it is used to resolve any
186 relative references found in xmlData. versionOfNodes determines which version CFXMLNodes are produced
187 by the parser; see CFXMLNode.h for more details. callBacks are the callbacks called by the parser as
188 the parse progresses; callBacks, callBacks->createXMLStructure, callBacks->addChild, and
189 callBacks->endXMLStructure must all be non-NULL. context determines what if any info pointer is
190 passed to the callbacks as the parse progresses; context may be NULL. */
191 CF_EXPORT
192 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
193
194 /* Arguments as above, except that the data to be parsed is loaded directly
195 from dataSource. dataSource may not be NULL. */
196 CF_EXPORT
197 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
198
199 CF_EXPORT
200 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context);
201
202 CF_EXPORT
203 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks);
204
205 CF_EXPORT
206 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser);
207
208 /* Returns the character index of the current parse location */
209 CF_EXPORT
210 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser);
211
212 /* Returns the line number of the current parse location */
213 CF_EXPORT
214 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser);
215
216 /* Returns the top-most object returned by the createXMLStructure callback */
217 CF_EXPORT
218 void *CFXMLParserGetDocument(CFXMLParserRef parser);
219
220 /* Get the status code or a user-readable description of the last error that occurred in a parse.
221 If no error has occurred, a null description string is returned. See the enum above for
222 possible status returns */
223 CF_EXPORT
224 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser);
225
226 CF_EXPORT
227 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser);
228
229 /* Cause any in-progress parse to abort with the given error code and description. errorCode
230 must be positive, and errorDescription may not be NULL. Cannot be called asynchronously
231 (i.e. must be called from within a parser callback) */
232 CF_EXPORT
233 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription);
234
235 /* Starts a parse of the data the parser was created with; returns success or failure.
236 Upon success, use CFXMLParserGetDocument() to get the product of the parse. Upon
237 failure, use CFXMLParserGetErrorCode() or CFXMLParserCopyErrorDescription() to get
238 information about the error. It is an error to call CFXMLParserParse() while a
239 parse is already underway. */
240 CF_EXPORT
241 Boolean CFXMLParserParse(CFXMLParserRef parser);
242
243 /* These functions provide a higher-level interface. The XML data is parsed to a
244 special CFTree (an CFXMLTree) with known contexts and callbacks. See CFXMLNode.h
245 for full details on using an CFXMLTree and the CFXMLNodes contained therein.
246 */
247 /* Parse to an CFXMLTreeRef. parseOptions are as above. versionOfNodes determines
248 what version CFXMLNodes are used to populate the tree. */
249 CF_EXPORT
250 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
251
252 /* As above, with the additional by-reference pass of a CFDictionaryRef containing
253 various error information (see below). The caller is responsible for releasing the
254 returned dictionary. If the error dictionary is not desired, pass NULL. */
255 CF_EXPORT
256 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFDictionaryRef *errorDict) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
257
258 /* Loads the data to be parsed directly from dataSource. Arguments as above. */
259 CF_EXPORT
260 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
261
262 /* Generate the XMLData (ready to be written to whatever permanent storage is to be
263 used) from an CFXMLTree. Will NOT regenerate entity references (except those
264 required for syntactic correctness) if they were replaced at the parse time;
265 clients that wish this should walk the tree and re-insert any entity references
266 that should appear in the final output file. */
267 CF_EXPORT
268 CFDataRef CFXMLTreeCreateXMLData(CFAllocatorRef allocator, CFXMLTreeRef xmlTree);
269
270 /* Escaping and unescaping XML entities in CFStrings. The standard XML entities
271 are always replaced. */
272 /* Creates a CFString by replacing entities that appear in the entities dictionary.
273 Dictionary keys are the entities themselves, and the values should be CFStrings
274 containing the expansion. Pass NULL for entitiesDictionary to indicate no entities
275 other than the standard five. */
276 CF_EXPORT
277 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
278
279 CF_EXPORT
280 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
281
282 /* CFXMLTreeCreateFromDataWithError error dictionary key constants. */
283 CF_EXPORT const CFStringRef kCFXMLTreeErrorDescription AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
284 /* value is a CFString containing the readable error string. */
285
286 CF_EXPORT const CFStringRef kCFXMLTreeErrorLineNumber AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
287 /* value is a CFNumber containing the line on which the error appears. */
288
289 CF_EXPORT const CFStringRef kCFXMLTreeErrorLocation AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
290 /* value is a CFNumber containing the byte location at which the error occurred. */
291
292 CF_EXPORT const CFStringRef kCFXMLTreeErrorStatusCode AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
293 /* value is a CFNumber containing the error status code. */
294
295 CF_EXTERN_C_END
296
297 #endif /* ! __COREFOUNDATION_CFXMLPARSER__ */
298