]> git.saurik.com Git - apple/cf.git/blob - CFXMLParser.h
CF-635.tar.gz
[apple/cf.git] / CFXMLParser.h
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFXMLParser.h
25 Copyright (c) 1998-2011, Apple Inc. All rights reserved.
26 */
27
28 /* CFXMLParser is deprecated. Clients should be aware of the fact that CFXMLParser has some serious
29 deficiencies in terms of both performance and standards compliance and should migrate their XML
30 parsing to NSXMLParser, NSXMLDocument, or other XML parsing technologies.
31 */
32
33 #if !defined(__COREFOUNDATION_CFXMLPARSER__)
34 #define __COREFOUNDATION_CFXMLPARSER__ 1
35
36 #include <CoreFoundation/CFBase.h>
37 #include <CoreFoundation/CFArray.h>
38 #include <CoreFoundation/CFData.h>
39 #include <CoreFoundation/CFDictionary.h>
40 #include <CoreFoundation/CFTree.h>
41 #include <CoreFoundation/CFURL.h>
42 #include <CoreFoundation/CFXMLNode.h>
43
44 CF_EXTERN_C_BEGIN
45
46 typedef struct __CFXMLParser * CFXMLParserRef;
47
48 /* These are the various options you can configure the parser with. These are
49 chosen such that an option flag of 0 (kCFXMLParserNoOptions) leaves the XML
50 as "intact" as possible (reports all structures; performs no replacements).
51 Hence, to make the parser do the most work, returning only the pure element
52 tree, set the option flag to kCFXMLParserAllOptions.
53
54 kCFXMLParserValidateDocument -
55 validate the document against its grammar from the DTD, reporting any errors.
56 Currently not supported.
57
58 kCFXMLParserSkipMetaData -
59 silently skip over metadata constructs (the DTD and comments)
60
61 kCFXMLParserReplacePhysicalEntities -
62 replace declared entities like &lt;. Note that other than the 5 predefined
63 entities (lt, gt, quot, amp, apos), these must be defined in the DTD.
64 Currently not supported.
65
66 kCFXMLParserSkipWhitespace -
67 skip over all whitespace that does not abut non-whitespace character data.
68 In other words, given <foo> <bar> blah </bar></foo>, the whitespace between
69 foo's open tag and bar's open tag would be suppressed, but the whitespace
70 around blah would be preserved.
71
72 kCFXMLParserAddImpliedAttributes -
73 where the DTD specifies implied attribute-value pairs for a particular element,
74 add those pairs to any occurances of the element in the element tree.
75 Currently not supported.
76 */
77
78 enum {
79 kCFXMLParserValidateDocument = (1UL << 0),
80 kCFXMLParserSkipMetaData = (1UL << 1),
81 kCFXMLParserReplacePhysicalEntities = (1UL << 2),
82 kCFXMLParserSkipWhitespace = (1UL << 3),
83 kCFXMLParserResolveExternalEntities = (1UL << 4),
84 kCFXMLParserAddImpliedAttributes = (1UL << 5),
85 kCFXMLParserAllOptions = 0x00FFFFFF,
86 kCFXMLParserNoOptions = 0
87 };
88 typedef CFOptionFlags CFXMLParserOptions;
89
90 /* This list is expected to grow */
91 enum {
92 kCFXMLStatusParseNotBegun = -2,
93 kCFXMLStatusParseInProgress = -1,
94 kCFXMLStatusParseSuccessful = 0,
95 kCFXMLErrorUnexpectedEOF = 1,
96 kCFXMLErrorUnknownEncoding,
97 kCFXMLErrorEncodingConversionFailure,
98 kCFXMLErrorMalformedProcessingInstruction,
99 kCFXMLErrorMalformedDTD,
100 kCFXMLErrorMalformedName,
101 kCFXMLErrorMalformedCDSect,
102 kCFXMLErrorMalformedCloseTag,
103 kCFXMLErrorMalformedStartTag,
104 kCFXMLErrorMalformedDocument,
105 kCFXMLErrorElementlessDocument,
106 kCFXMLErrorMalformedComment,
107 kCFXMLErrorMalformedCharacterReference,
108 kCFXMLErrorMalformedParsedCharacterData,
109 kCFXMLErrorNoData
110 };
111 typedef CFIndex CFXMLParserStatusCode;
112
113
114 /* These functions are called as a parse progresses.
115
116 createXMLStructure -
117 called as new XML structures are encountered by the parser. May return NULL to indicate
118 that the given structure should be skipped; if NULL is returned for a given structure,
119 only minimal parsing is done for that structure (enough to correctly determine its end,
120 and to extract any data necessary for the remainder of the parse, such as Entity definitions).
121 createXMLStructure (or indeed, any of the tree-creation callbacks) will not be called for any
122 children of the skipped structure. The only exception is that the top-most element will always
123 be reported even if NULL was returned for the document as a whole. NOTE: for performance reasons,
124 the node passed to createXMLStructure cannot be safely retained by the client; the node as
125 a whole must be copied (via CFXMLNodeCreateCopy), or its contents must be extracted and copied.
126
127 addChild -
128 called as children are parsed and are ready to be added to the tree. If createXMLStructure
129 returns NULL for a given structure, that structure is omitted entirely, and addChild will
130 NOT be called for either a NULL child or parent.
131
132 endXMLStructure -
133 called once a structure (and all its children) are completely parsed. As elements are encountered,
134 createXMLStructure is called for them first, then addChild to add the new structure to its parent,
135 then addChild (potentially several times) to add the new structure's children to it, then finally
136 endXMLStructure to show that the structure has been fully parsed.
137
138 createXMLStructure, addChild, and endXMLStructure are all REQUIRED TO BE NON-NULL.
139
140 resolveExternalEntity -
141 called when external entities are referenced (NOT when they are simply defined). If the function
142 pointer is NULL, the parser uses its internal routines to try and resolve the entity. If the
143 function pointer is set, and the function returns NULL, a place holder for the external entity
144 is inserted into the tree. In this manner, the parser's client can prevent any external network
145 or file accesses.
146
147 handleError - called as errors/warnings are encountered in the data stream. At some point, we will
148 have an enum of the expected errors, some of which will be fatal, others of which will not. If
149 the function pointer is NULL, the parser will silently attempt to recover. The
150 handleError function may always return false to force the parser to stop; if handleError returns
151 true, the parser will attempt to recover (fatal errors will still cause the parse to abort
152 immediately).
153 */
154
155 typedef void * (*CFXMLParserCreateXMLStructureCallBack)(CFXMLParserRef parser, CFXMLNodeRef nodeDesc, void *info);
156 typedef void (*CFXMLParserAddChildCallBack)(CFXMLParserRef parser, void *parent, void *child, void *info);
157 typedef void (*CFXMLParserEndXMLStructureCallBack)(CFXMLParserRef parser, void *xmlType, void *info);
158 typedef CFDataRef (*CFXMLParserResolveExternalEntityCallBack)(CFXMLParserRef parser, CFXMLExternalID *extID, void *info);
159 typedef Boolean (*CFXMLParserHandleErrorCallBack)(CFXMLParserRef parser, CFXMLParserStatusCode error, void *info);
160 typedef struct {
161 CFIndex version;
162 CFXMLParserCreateXMLStructureCallBack createXMLStructure;
163 CFXMLParserAddChildCallBack addChild;
164 CFXMLParserEndXMLStructureCallBack endXMLStructure;
165 CFXMLParserResolveExternalEntityCallBack resolveExternalEntity;
166 CFXMLParserHandleErrorCallBack handleError;
167 } CFXMLParserCallBacks;
168
169 typedef const void * (*CFXMLParserRetainCallBack)(const void *info);
170 typedef void (*CFXMLParserReleaseCallBack)(const void *info);
171 typedef CFStringRef (*CFXMLParserCopyDescriptionCallBack)(const void *info);
172 typedef struct {
173 CFIndex version;
174 void * info;
175 CFXMLParserRetainCallBack retain;
176 CFXMLParserReleaseCallBack release;
177 CFXMLParserCopyDescriptionCallBack copyDescription;
178 } CFXMLParserContext;
179
180 CF_EXPORT
181 CFTypeID CFXMLParserGetTypeID(void);
182
183 /* Creates a parser which will parse the given data with the given options. xmlData may not be NULL.
184 dataSource should be the URL from which the data came, and may be NULL; it is used to resolve any
185 relative references found in xmlData. versionOfNodes determines which version CFXMLNodes are produced
186 by the parser; see CFXMLNode.h for more details. callBacks are the callbacks called by the parser as
187 the parse progresses; callBacks, callBacks->createXMLStructure, callBacks->addChild, and
188 callBacks->endXMLStructure must all be non-NULL. context determines what if any info pointer is
189 passed to the callbacks as the parse progresses; context may be NULL. */
190 CF_EXPORT
191 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
192
193 /* Arguments as above, except that the data to be parsed is loaded directly
194 from dataSource. dataSource may not be NULL. */
195 CF_EXPORT
196 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
197
198 CF_EXPORT
199 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context);
200
201 CF_EXPORT
202 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks);
203
204 CF_EXPORT
205 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser);
206
207 /* Returns the character index of the current parse location */
208 CF_EXPORT
209 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser);
210
211 /* Returns the line number of the current parse location */
212 CF_EXPORT
213 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser);
214
215 /* Returns the top-most object returned by the createXMLStructure callback */
216 CF_EXPORT
217 void *CFXMLParserGetDocument(CFXMLParserRef parser);
218
219 /* Get the status code or a user-readable description of the last error that occurred in a parse.
220 If no error has occurred, a null description string is returned. See the enum above for
221 possible status returns */
222 CF_EXPORT
223 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser);
224
225 CF_EXPORT
226 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser);
227
228 /* Cause any in-progress parse to abort with the given error code and description. errorCode
229 must be positive, and errorDescription may not be NULL. Cannot be called asynchronously
230 (i.e. must be called from within a parser callback) */
231 CF_EXPORT
232 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription);
233
234 /* Starts a parse of the data the parser was created with; returns success or failure.
235 Upon success, use CFXMLParserGetDocument() to get the product of the parse. Upon
236 failure, use CFXMLParserGetErrorCode() or CFXMLParserCopyErrorDescription() to get
237 information about the error. It is an error to call CFXMLParserParse() while a
238 parse is already underway. */
239 CF_EXPORT
240 Boolean CFXMLParserParse(CFXMLParserRef parser);
241
242 /* These functions provide a higher-level interface. The XML data is parsed to a
243 special CFTree (an CFXMLTree) with known contexts and callbacks. See CFXMLNode.h
244 for full details on using an CFXMLTree and the CFXMLNodes contained therein.
245 */
246 /* Parse to an CFXMLTreeRef. parseOptions are as above. versionOfNodes determines
247 what version CFXMLNodes are used to populate the tree. */
248 CF_EXPORT
249 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
250
251 /* As above, with the additional by-reference pass of a CFDictionaryRef containing
252 various error information (see below). The caller is responsible for releasing the
253 returned dictionary. If the error dictionary is not desired, pass NULL. */
254 CF_EXPORT
255 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFDictionaryRef *errorDict);
256
257 /* Loads the data to be parsed directly from dataSource. Arguments as above. */
258 CF_EXPORT
259 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
260
261 /* Generate the XMLData (ready to be written to whatever permanent storage is to be
262 used) from an CFXMLTree. Will NOT regenerate entity references (except those
263 required for syntactic correctness) if they were replaced at the parse time;
264 clients that wish this should walk the tree and re-insert any entity references
265 that should appear in the final output file. */
266 CF_EXPORT
267 CFDataRef CFXMLTreeCreateXMLData(CFAllocatorRef allocator, CFXMLTreeRef xmlTree);
268
269 /* Escaping and unescaping XML entities in CFStrings. The standard XML entities
270 are always replaced. */
271 /* Creates a CFString by replacing entities that appear in the entities dictionary.
272 Dictionary keys are the entities themselves, and the values should be CFStrings
273 containing the expansion. Pass NULL for entitiesDictionary to indicate no entities
274 other than the standard five. */
275 CF_EXPORT
276 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary);
277
278 CF_EXPORT
279 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary);
280
281 /* CFXMLTreeCreateFromDataWithError error dictionary key constants. */
282 CF_EXPORT const CFStringRef kCFXMLTreeErrorDescription;
283 /* value is a CFString containing the readable error string. */
284
285 CF_EXPORT const CFStringRef kCFXMLTreeErrorLineNumber;
286 /* value is a CFNumber containing the line on which the error appears. */
287
288 CF_EXPORT const CFStringRef kCFXMLTreeErrorLocation;
289 /* value is a CFNumber containing the byte location at which the error occurred. */
290
291 CF_EXPORT const CFStringRef kCFXMLTreeErrorStatusCode;
292 /* value is a CFNumber containing the error status code. */
293
294 CF_EXTERN_C_END
295
296 #endif /* ! __COREFOUNDATION_CFXMLPARSER__ */
297