]> git.saurik.com Git - apple/cf.git/blob - Parsing.subproj/CFXMLParser.h
CF-368.1.tar.gz
[apple/cf.git] / Parsing.subproj / CFXMLParser.h
1 /*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFXMLParser.h
24 Copyright (c) 1998-2005, Apple, Inc. All rights reserved.
25 */
26
27 #if !defined(__COREFOUNDATION_CFXMLPARSER__)
28 #define __COREFOUNDATION_CFXMLPARSER__ 1
29
30 #include <CoreFoundation/CFBase.h>
31 #include <CoreFoundation/CFArray.h>
32 #include <CoreFoundation/CFData.h>
33 #include <CoreFoundation/CFDictionary.h>
34 #include <CoreFoundation/CFTree.h>
35 #include <CoreFoundation/CFURL.h>
36 #include <CoreFoundation/CFXMLNode.h>
37
38 #if defined(__cplusplus)
39 extern "C" {
40 #endif
41
42 typedef struct __CFXMLParser * CFXMLParserRef;
43
44 /* These are the various options you can configure the parser with. These are
45 chosen such that an option flag of 0 (kCFXMLParserNoOptions) leaves the XML
46 as "intact" as possible (reports all structures; performs no replacements).
47 Hence, to make the parser do the most work, returning only the pure element
48 tree, set the option flag to kCFXMLParserAllOptions.
49
50 kCFXMLParserValidateDocument -
51 validate the document against its grammar from the DTD, reporting any errors.
52 Currently not supported.
53
54 kCFXMLParserSkipMetaData -
55 silently skip over metadata constructs (the DTD and comments)
56
57 kCFXMLParserReplacePhysicalEntities -
58 replace declared entities like &lt;. Note that other than the 5 predefined
59 entities (lt, gt, quot, amp, apos), these must be defined in the DTD.
60 Currently not supported.
61
62 kCFXMLParserSkipWhitespace -
63 skip over all whitespace that does not abut non-whitespace character data.
64 In other words, given <foo> <bar> blah </bar></foo>, the whitespace between
65 foo's open tag and bar's open tag would be suppressed, but the whitespace
66 around blah would be preserved.
67
68 kCFXMLParserAddImpliedAttributes -
69 where the DTD specifies implied attribute-value pairs for a particular element,
70 add those pairs to any occurances of the element in the element tree.
71 Currently not supported.
72 */
73
74 typedef enum {
75 kCFXMLParserValidateDocument = (1 << 0),
76 kCFXMLParserSkipMetaData = (1 << 1),
77 kCFXMLParserReplacePhysicalEntities = (1 << 2),
78 kCFXMLParserSkipWhitespace = (1 << 3),
79 kCFXMLParserResolveExternalEntities = (1 << 4),
80 kCFXMLParserAddImpliedAttributes = (1 << 5),
81 kCFXMLParserAllOptions = 0x00FFFFFF,
82 kCFXMLParserNoOptions = 0
83 } CFXMLParserOptions;
84
85 /* This list is expected to grow */
86 typedef enum {
87 kCFXMLStatusParseNotBegun = -2,
88 kCFXMLStatusParseInProgress = -1,
89 kCFXMLStatusParseSuccessful = 0,
90 kCFXMLErrorUnexpectedEOF = 1,
91 kCFXMLErrorUnknownEncoding,
92 kCFXMLErrorEncodingConversionFailure,
93 kCFXMLErrorMalformedProcessingInstruction,
94 kCFXMLErrorMalformedDTD,
95 kCFXMLErrorMalformedName,
96 kCFXMLErrorMalformedCDSect,
97 kCFXMLErrorMalformedCloseTag,
98 kCFXMLErrorMalformedStartTag,
99 kCFXMLErrorMalformedDocument,
100 kCFXMLErrorElementlessDocument,
101 kCFXMLErrorMalformedComment,
102 kCFXMLErrorMalformedCharacterReference,
103 kCFXMLErrorMalformedParsedCharacterData,
104 kCFXMLErrorNoData
105 } CFXMLParserStatusCode;
106
107
108 /* These functions are called as a parse progresses.
109
110 createXMLStructure -
111 called as new XML structures are encountered by the parser. May return NULL to indicate
112 that the given structure should be skipped; if NULL is returned for a given structure,
113 only minimal parsing is done for that structure (enough to correctly determine its end,
114 and to extract any data necessary for the remainder of the parse, such as Entity definitions).
115 createXMLStructure (or indeed, any of the tree-creation callbacks) will not be called for any
116 children of the skipped structure. The only exception is that the top-most element will always
117 be reported even if NULL was returned for the document as a whole. NOTE: for performance reasons,
118 the node passed to createXMLStructure cannot be safely retained by the client; the node as
119 a whole must be copied (via CFXMLNodeCreateCopy), or its contents must be extracted and copied.
120
121 addChild -
122 called as children are parsed and are ready to be added to the tree. If createXMLStructure
123 returns NULL for a given structure, that structure is omitted entirely, and addChild will
124 NOT be called for either a NULL child or parent.
125
126 endXMLStructure -
127 called once a structure (and all its children) are completely parsed. As elements are encountered,
128 createXMLStructure is called for them first, then addChild to add the new structure to its parent,
129 then addChild (potentially several times) to add the new structure's children to it, then finally
130 endXMLStructure to show that the structure has been fully parsed.
131
132 createXMLStructure, addChild, and endXMLStructure are all REQUIRED TO BE NON-NULL.
133
134 resolveExternalEntity -
135 called when external entities are referenced (NOT when they are simply defined). If the function
136 pointer is NULL, the parser uses its internal routines to try and resolve the entity. If the
137 function pointer is set, and the function returns NULL, a place holder for the external entity
138 is inserted into the tree. In this manner, the parser's client can prevent any external network
139 or file accesses.
140
141 handleError - called as errors/warnings are encountered in the data stream. At some point, we will
142 have an enum of the expected errors, some of which will be fatal, others of which will not. If
143 the function pointer is NULL, the parser will silently attempt to recover. The
144 handleError function may always return false to force the parser to stop; if handleError returns
145 true, the parser will attempt to recover (fatal errors will still cause the parse to abort
146 immediately).
147 */
148
149 typedef void * (*CFXMLParserCreateXMLStructureCallBack)(CFXMLParserRef parser, CFXMLNodeRef nodeDesc, void *info);
150 typedef void (*CFXMLParserAddChildCallBack)(CFXMLParserRef parser, void *parent, void *child, void *info);
151 typedef void (*CFXMLParserEndXMLStructureCallBack)(CFXMLParserRef parser, void *xmlType, void *info);
152 typedef CFDataRef (*CFXMLParserResolveExternalEntityCallBack)(CFXMLParserRef parser, CFXMLExternalID *extID, void *info);
153 typedef Boolean (*CFXMLParserHandleErrorCallBack)(CFXMLParserRef parser, CFXMLParserStatusCode error, void *info);
154 typedef struct {
155 CFIndex version;
156 CFXMLParserCreateXMLStructureCallBack createXMLStructure;
157 CFXMLParserAddChildCallBack addChild;
158 CFXMLParserEndXMLStructureCallBack endXMLStructure;
159 CFXMLParserResolveExternalEntityCallBack resolveExternalEntity;
160 CFXMLParserHandleErrorCallBack handleError;
161 } CFXMLParserCallBacks;
162
163 typedef const void * (*CFXMLParserRetainCallBack)(const void *info);
164 typedef void (*CFXMLParserReleaseCallBack)(const void *info);
165 typedef CFStringRef (*CFXMLParserCopyDescriptionCallBack)(const void *info);
166 typedef struct {
167 CFIndex version;
168 void * info;
169 CFXMLParserRetainCallBack retain;
170 CFXMLParserReleaseCallBack release;
171 CFXMLParserCopyDescriptionCallBack copyDescription;
172 } CFXMLParserContext;
173
174 CF_EXPORT
175 CFTypeID CFXMLParserGetTypeID(void);
176
177 /* Creates a parser which will parse the given data with the given options. xmlData may not be NULL.
178 dataSource should be the URL from which the data came, and may be NULL; it is used to resolve any
179 relative references found in xmlData. versionOfNodes determines which version CFXMLNodes are produced
180 by the parser; see CFXMLNode.h for more details. callBacks are the callbacks called by the parser as
181 the parse progresses; callBacks, callBacks->createXMLStructure, callBacks->addChild, and
182 callBacks->endXMLStructure must all be non-NULL. context determines what if any info pointer is
183 passed to the callbacks as the parse progresses; context may be NULL. */
184 CF_EXPORT
185 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
186
187 /* Arguments as above, except that the data to be parsed is loaded directly
188 from dataSource. dataSource may not be NULL. */
189 CF_EXPORT
190 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context);
191
192 CF_EXPORT
193 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context);
194
195 CF_EXPORT
196 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks);
197
198 CF_EXPORT
199 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser);
200
201 /* Returns the character index of the current parse location */
202 CF_EXPORT
203 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser);
204
205 /* Returns the line number of the current parse location */
206 CF_EXPORT
207 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser);
208
209 /* Returns the top-most object returned by the createXMLStructure callback */
210 CF_EXPORT
211 void *CFXMLParserGetDocument(CFXMLParserRef parser);
212
213 /* Get the status code or a user-readable description of the last error that occurred in a parse.
214 If no error has occurred, a null description string is returned. See the enum above for
215 possible status returns */
216 CF_EXPORT
217 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser);
218
219 CF_EXPORT
220 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser);
221
222 /* Cause any in-progress parse to abort with the given error code and description. errorCode
223 must be positive, and errorDescription may not be NULL. Cannot be called asynchronously
224 (i.e. must be called from within a parser callback) */
225 CF_EXPORT
226 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription);
227
228 /* Starts a parse of the data the parser was created with; returns success or failure.
229 Upon success, use CFXMLParserGetDocument() to get the product of the parse. Upon
230 failure, use CFXMLParserGetErrorCode() or CFXMLParserCopyErrorDescription() to get
231 information about the error. It is an error to call CFXMLParserParse() while a
232 parse is already underway. */
233 CF_EXPORT
234 Boolean CFXMLParserParse(CFXMLParserRef parser);
235
236 /* These functions provide a higher-level interface. The XML data is parsed to a
237 special CFTree (an CFXMLTree) with known contexts and callbacks. See CFXMLNode.h
238 for full details on using an CFXMLTree and the CFXMLNodes contained therein.
239 */
240 /* Parse to an CFXMLTreeRef. parseOptions are as above. versionOfNodes determines
241 what version CFXMLNodes are used to populate the tree. */
242 CF_EXPORT
243 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
244
245 /* As above, with the additional by-reference pass of a CFDictionaryRef containing
246 various error information (see below). The caller is responsible for releasing the
247 returned dictionary. If the error dictionary is not desired, pass NULL. */
248 CF_EXPORT
249 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFDictionaryRef *errorDict) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
250
251 /* Loads the data to be parsed directly from dataSource. Arguments as above. */
252 CF_EXPORT
253 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes);
254
255 /* Generate the XMLData (ready to be written to whatever permanent storage is to be
256 used) from an CFXMLTree. Will NOT regenerate entity references (except those
257 required for syntactic correctness) if they were replaced at the parse time;
258 clients that wish this should walk the tree and re-insert any entity references
259 that should appear in the final output file. */
260 CF_EXPORT
261 CFDataRef CFXMLTreeCreateXMLData(CFAllocatorRef allocator, CFXMLTreeRef xmlTree);
262
263 /* Escaping and unescaping XML entities in CFStrings. The standard XML entities
264 are always replaced. */
265 /* Creates a CFString by replacing entities that appear in the entities dictionary.
266 Dictionary keys are the entities themselves, and the values should be CFStrings
267 containing the expansion. Pass NULL for entitiesDictionary to indicate no entities
268 other than the standard five. */
269 CF_EXPORT
270 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
271
272 CF_EXPORT
273 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
274
275 /* CFXMLTreeCreateFromDataWithError error dictionary key constants. */
276 CF_EXPORT const CFStringRef kCFXMLTreeErrorDescription AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
277 /* value is a CFString containing the readable error string. */
278
279 CF_EXPORT const CFStringRef kCFXMLTreeErrorLineNumber AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
280 /* value is a CFNumber containing the line on which the error appears. */
281
282 CF_EXPORT const CFStringRef kCFXMLTreeErrorLocation AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
283 /* value is a CFNumber containing the byte location at which the error occurred. */
284
285 CF_EXPORT const CFStringRef kCFXMLTreeErrorStatusCode AVAILABLE_MAC_OS_X_VERSION_10_3_AND_LATER;
286 /* value is a CFNumber containing the error status code. */
287
288 #if defined(__cplusplus)
289 }
290 #endif
291
292 #endif /* ! __COREFOUNDATION_CFXMLPARSER__ */
293