]>
Commit | Line | Data |
---|---|---|
9ce05555 | 1 | /* |
8ca704e1 | 2 | * Copyright (c) 2011 Apple Inc. All rights reserved. |
9ce05555 A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
9ce05555 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. Please obtain a copy of the License at | |
10 | * http://www.opensource.apple.com/apsl/ and read it before using this | |
11 | * file. | |
12 | * | |
13 | * The Original Code and all software distributed under the License are | |
14 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
15 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
16 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
18 | * Please see the License for the specific language governing rights and | |
19 | * limitations under the License. | |
20 | * | |
21 | * @APPLE_LICENSE_HEADER_END@ | |
22 | */ | |
f64f9b69 | 23 | |
9ce05555 | 24 | /* CFXMLParser.c |
8ca704e1 A |
25 | Copyright (c) 1999-2011, Apple Inc. All rights reserved. |
26 | Responsibility: David Smith | |
9ce05555 A |
27 | */ |
28 | ||
29 | #include <CoreFoundation/CFXMLParser.h> | |
d8925383 | 30 | #include <CoreFoundation/CFNumber.h> |
bd5b749c | 31 | #include "CFXMLInputStream.h" |
9ce05555 A |
32 | #include "CFUniChar.h" |
33 | #include "CFInternal.h" | |
34 | ||
35 | struct __CFXMLParser { | |
36 | CFRuntimeBase _cfBase; | |
37 | ||
38 | _CFXMLInputStream input; | |
39 | ||
40 | void **stack; | |
41 | void **top; | |
42 | UInt32 capacity; | |
43 | ||
44 | struct __CFXMLNode *node; // Our private node; we use it to report back information | |
45 | CFMutableDictionaryRef argDict; | |
46 | CFMutableArrayRef argArray; | |
47 | ||
48 | UInt32 options; | |
49 | CFXMLParserCallBacks callBacks; | |
50 | CFXMLParserContext context; | |
51 | ||
52 | CFXMLParserStatusCode status; | |
53 | CFStringRef errorString; | |
54 | }; | |
55 | ||
56 | static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) { | |
bd5b749c A |
57 | const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf; |
58 | return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser); | |
9ce05555 A |
59 | } |
60 | ||
61 | static void __CFXMLParserDeallocate(CFTypeRef cf) { | |
62 | struct __CFXMLParser *parser = (struct __CFXMLParser *)cf; | |
63 | CFAllocatorRef alloc = CFGetAllocator(parser); | |
64 | _freeInputStream(&(parser->input)); | |
65 | if (parser->argDict) CFRelease(parser->argDict); | |
66 | if (parser->argArray) CFRelease(parser->argArray); | |
67 | if (parser->errorString) CFRelease(parser->errorString); | |
68 | if (parser->node) CFRelease(parser->node); | |
69 | CFAllocatorDeallocate(alloc, parser->stack); | |
70 | if (parser->context.info && parser->context.release) { | |
71 | parser->context.release(parser->context.info); | |
72 | } | |
73 | } | |
74 | ||
75 | static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID; | |
76 | ||
77 | static const CFRuntimeClass __CFXMLParserClass = { | |
78 | 0, | |
79 | "CFXMLParser", | |
80 | NULL, // init | |
81 | NULL, // copy | |
82 | __CFXMLParserDeallocate, | |
83 | NULL, | |
84 | NULL, | |
85 | NULL, // | |
86 | __CFXMLParserCopyDescription | |
87 | }; | |
88 | ||
bd5b749c | 89 | static void __CFXMLParserInitialize(void) { |
9ce05555 A |
90 | __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass); |
91 | } | |
92 | ||
93 | CFTypeID CFXMLParserGetTypeID(void) { | |
bd5b749c | 94 | if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize(); |
9ce05555 A |
95 | return __kCFXMLParserTypeID; |
96 | } | |
97 | ||
9ce05555 A |
98 | void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) { |
99 | CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__); | |
bd5b749c | 100 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
101 | if (context) { |
102 | context->version = parser->context.version; | |
103 | context->info = parser->context.info; | |
bd5b749c A |
104 | context->retain = parser->context.retain; |
105 | context->release = parser->context.release; | |
106 | context->copyDescription = parser->context.copyDescription; | |
107 | UNFAULT_CALLBACK(context->retain); | |
108 | UNFAULT_CALLBACK(context->release); | |
109 | UNFAULT_CALLBACK(context->copyDescription); | |
9ce05555 A |
110 | } |
111 | } | |
112 | ||
113 | void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) { | |
bd5b749c | 114 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
115 | if (callBacks) { |
116 | callBacks->version = parser->callBacks.version; | |
bd5b749c A |
117 | callBacks->createXMLStructure = parser->callBacks.createXMLStructure; |
118 | callBacks->addChild = parser->callBacks.addChild; | |
119 | callBacks->endXMLStructure = parser->callBacks.endXMLStructure; | |
120 | callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity; | |
121 | callBacks->handleError = parser->callBacks.handleError; | |
122 | UNFAULT_CALLBACK(callBacks->createXMLStructure); | |
123 | UNFAULT_CALLBACK(callBacks->addChild); | |
124 | UNFAULT_CALLBACK(callBacks->endXMLStructure); | |
125 | UNFAULT_CALLBACK(callBacks->resolveExternalEntity); | |
126 | UNFAULT_CALLBACK(callBacks->handleError); | |
9ce05555 A |
127 | } |
128 | } | |
129 | ||
9ce05555 | 130 | CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) { |
bd5b749c | 131 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
132 | return parser->input.url; |
133 | } | |
134 | ||
135 | /* Returns the character index or line number of the current parse location */ | |
136 | CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) { | |
bd5b749c | 137 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
138 | return _inputStreamCurrentLocation(&parser->input); |
139 | } | |
140 | ||
141 | CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) { | |
bd5b749c | 142 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
143 | return _inputStreamCurrentLine(&parser->input); |
144 | } | |
145 | ||
146 | /* Returns the top-most object returned by the createXMLStructure callback */ | |
147 | void *CFXMLParserGetDocument(CFXMLParserRef parser) { | |
bd5b749c | 148 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
149 | if (parser->capacity > 0) |
150 | return parser->stack[0]; | |
151 | else | |
152 | return NULL; | |
153 | } | |
154 | ||
155 | CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) { | |
bd5b749c | 156 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
157 | return parser->status; |
158 | } | |
159 | ||
160 | CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) { | |
bd5b749c A |
161 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
162 | return (CFStringRef)CFRetain(parser->errorString); | |
9ce05555 A |
163 | } |
164 | ||
165 | void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) { | |
bd5b749c | 166 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
167 | CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__); |
168 | CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__); | |
169 | __CFGenericValidateType(errorDescription, CFStringGetTypeID()); | |
170 | ||
171 | parser->status = errorCode; | |
172 | if (parser->errorString) CFRelease(parser->errorString); | |
bd5b749c | 173 | parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription); |
9ce05555 A |
174 | } |
175 | ||
176 | ||
177 | static Boolean parseXML(CFXMLParserRef parser); | |
178 | static Boolean parseComment(CFXMLParserRef parser, Boolean report); | |
179 | static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report); | |
180 | static Boolean parseInlineDTD(CFXMLParserRef parser); | |
181 | static Boolean parseDTD(CFXMLParserRef parser); | |
182 | static Boolean parsePhysicalEntityReference(CFXMLParserRef parser); | |
183 | static Boolean parseCDSect(CFXMLParserRef parser); | |
184 | static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report); | |
185 | static Boolean parsePCData(CFXMLParserRef parser); | |
186 | static Boolean parseWhitespace(CFXMLParserRef parser); | |
187 | static Boolean parseAttributeListDeclaration(CFXMLParserRef parser); | |
188 | static Boolean parseNotationDeclaration(CFXMLParserRef parser); | |
189 | static Boolean parseElementDeclaration(CFXMLParserRef parser); | |
190 | static Boolean parseEntityDeclaration(CFXMLParserRef parser); | |
191 | static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID); | |
192 | static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag); | |
193 | static Boolean parseTagContent(CFXMLParserRef parser); | |
194 | static Boolean parseTag(CFXMLParserRef parser); | |
195 | static Boolean parseAttributes(CFXMLParserRef parser); | |
196 | static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str); | |
197 | ||
198 | // Utilities; may need to make these accessible to the property list parser to avoid code duplication | |
199 | static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str); | |
200 | static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go | |
201 | static void pushXMLNode(CFXMLParserRef parser, void *node); | |
202 | ||
203 | static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { | |
bd5b749c | 204 | struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL); |
9ce05555 A |
205 | struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL); |
206 | UniChar *buf; | |
207 | if (parser && node) { | |
208 | alloc = CFGetAllocator(parser); | |
209 | _initializeInputStream(&(parser->input), alloc, dataSource, xmlData); | |
210 | parser->top = parser->stack; | |
211 | parser->stack = NULL; | |
212 | parser->capacity = 0; | |
213 | ||
bd5b749c | 214 | buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0); |
9ce05555 A |
215 | parser->node = node; |
216 | parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc); | |
217 | parser->node->additionalData = NULL; | |
218 | parser->node->version = version; | |
219 | parser->argDict = NULL; // don't create these until necessary | |
220 | parser->argArray = NULL; | |
221 | ||
222 | parser->options = options; | |
223 | parser->callBacks = *callBacks; | |
224 | ||
225 | FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure)); | |
226 | FAULT_CALLBACK((void **)&(parser->callBacks.addChild)); | |
227 | FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure)); | |
228 | FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity)); | |
229 | FAULT_CALLBACK((void **)&(parser->callBacks.handleError)); | |
230 | ||
231 | if (context) { | |
232 | parser->context = *context; | |
233 | if (parser->context.info && parser->context.retain) { | |
234 | parser->context.retain(parser->context.info); | |
235 | } | |
236 | } else { | |
237 | parser->context.version = 0; | |
238 | parser->context.info = NULL; | |
239 | parser->context.retain = NULL; | |
240 | parser->context.release = NULL; | |
241 | parser->context.copyDescription = NULL; | |
242 | } | |
243 | parser->status = kCFXMLStatusParseNotBegun; | |
244 | parser->errorString = NULL; | |
245 | } else { | |
246 | if (parser) CFRelease(parser); | |
247 | if (node) CFRelease(node); | |
248 | parser = NULL; | |
249 | } | |
250 | return parser; | |
251 | } | |
252 | ||
253 | CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { | |
254 | CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__); | |
255 | __CFGenericValidateType(xmlData, CFDataGetTypeID()); | |
256 | CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); | |
257 | CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__); | |
258 | CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes); | |
259 | CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__); | |
260 | return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context); | |
261 | } | |
262 | ||
d8925383 A |
263 | CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { |
264 | CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); | |
265 | CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__); | |
266 | CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes); | |
267 | CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__); | |
268 | ||
269 | return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context); | |
270 | } | |
9ce05555 A |
271 | |
272 | Boolean CFXMLParserParse(CFXMLParserRef parser) { | |
273 | CFXMLDocumentInfo docData; | |
bd5b749c | 274 | __CFGenericValidateType(parser, CFXMLParserGetTypeID()); |
9ce05555 A |
275 | if (parser->status != kCFXMLStatusParseNotBegun) return false; |
276 | parser->status = kCFXMLStatusParseInProgress; | |
277 | ||
278 | if (!_openInputStream(&parser->input)) { | |
279 | if (!parser->input.data) { | |
280 | // couldn't load URL | |
281 | parser->status = kCFXMLErrorNoData; | |
282 | parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url)); | |
283 | } else { | |
284 | // couldn't figure out the encoding | |
285 | CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened"); | |
286 | parser->status = kCFXMLErrorUnknownEncoding; | |
287 | parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII); | |
288 | } | |
289 | if (parser->callBacks.handleError) { | |
290 | INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info); | |
291 | } | |
292 | return false; | |
293 | } | |
294 | ||
295 | // Create the document | |
bd5b749c | 296 | parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0); |
9ce05555 A |
297 | parser->capacity = 16; |
298 | parser->node->dataTypeID = kCFXMLNodeTypeDocument; | |
299 | docData.encoding = _inputStreamGetEncoding(&parser->input); | |
300 | docData.sourceURL = parser->input.url; | |
301 | parser->node->additionalData = &docData; | |
302 | parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); | |
303 | parser->top = parser->stack; | |
304 | parser->node->additionalData = NULL; | |
305 | ||
306 | // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback | |
307 | if (parser->status != kCFXMLStatusParseInProgress) { | |
308 | _CFReportError(parser, parser->status, NULL); | |
309 | return false; | |
310 | } | |
311 | return parseXML(parser); | |
312 | } | |
313 | ||
314 | /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */ | |
315 | ||
316 | // [3] S ::= (#x20 | #x9 | #xD | #xA)+ | |
317 | static Boolean parseWhitespace(CFXMLParserRef parser) { | |
318 | CFIndex len; | |
319 | Boolean report = !(parser->options & kCFXMLParserSkipWhitespace); | |
320 | len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL); | |
321 | if (report && len) { | |
322 | parser->node->dataTypeID = kCFXMLNodeTypeWhitespace; | |
323 | parser->node->additionalData = NULL; | |
324 | return reportNewLeaf(parser); | |
325 | } else { | |
326 | return true; | |
327 | } | |
328 | } | |
329 | ||
330 | // parser should be just past "<!--" | |
331 | static Boolean parseComment(CFXMLParserRef parser, Boolean report) { | |
332 | const UniChar dashes[2] = {'-', '-'}; | |
333 | UniChar ch; | |
334 | report = report && (!(parser->options & kCFXMLParserSkipMetaData)); | |
335 | if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) { | |
336 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment"); | |
337 | return false; | |
338 | } else if (ch != '>') { | |
339 | _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment"); | |
340 | return false; | |
341 | } else if (report) { | |
342 | parser->node->dataTypeID = kCFXMLNodeTypeComment; | |
343 | parser->node->additionalData = NULL; | |
344 | return reportNewLeaf(parser); | |
345 | } else { | |
346 | return true; | |
347 | } | |
348 | } | |
349 | ||
350 | /* | |
351 | [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' | |
352 | [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) | |
353 | */ | |
354 | // parser should be set to the first character after "<?" | |
355 | static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) { | |
356 | const UniChar piTermination[2] = {'?', '>'}; | |
357 | CFMutableStringRef str; | |
358 | CFStringRef name; | |
359 | ||
360 | if (!_inputStreamScanXMLName(&parser->input, false, &name)) { | |
361 | _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction"); | |
362 | return false; | |
363 | } | |
364 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
365 | str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL; | |
366 | if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) { | |
367 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction"); | |
368 | if (str) CFRelease(str); | |
369 | return false; | |
370 | } | |
371 | ||
372 | if (str) { | |
373 | CFXMLProcessingInstructionInfo data; | |
374 | Boolean result; | |
375 | CFStringRef tmp = parser->node->dataString; | |
376 | parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction; | |
377 | parser->node->dataString = name; | |
378 | data.dataString = str; | |
379 | parser->node->additionalData = &data; | |
380 | result = reportNewLeaf(parser); | |
381 | parser->node->additionalData = NULL; | |
382 | parser->node->dataString = tmp; | |
383 | CFRelease(str); | |
384 | return result; | |
385 | } else { | |
386 | return true; | |
387 | } | |
388 | } | |
389 | ||
390 | /* | |
391 | [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' | |
392 | */ | |
393 | static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'}; | |
394 | // first character should be immediately after the "<!" | |
395 | static Boolean parseDTD(CFXMLParserRef parser) { | |
396 | UniChar ch; | |
397 | Boolean success, hasExtID = false; | |
398 | CFXMLDocumentTypeInfo docData = {{NULL, NULL}}; | |
399 | void *dtdStructure = NULL; | |
400 | CFStringRef name; | |
401 | ||
402 | // First pass "DOCTYPE" | |
403 | success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7); | |
404 | success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0; | |
405 | success = success && _inputStreamScanXMLName(&parser->input, false, &name); | |
406 | if (success) { | |
407 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
408 | success = _inputStreamPeekCharacter(&parser->input, &ch); | |
409 | } else { | |
410 | // didn't make it past "DOCTYPE" successfully. | |
411 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD"); | |
412 | return false; | |
413 | } | |
414 | if (success && ch != '[' && ch != '>') { | |
415 | // ExternalID | |
416 | hasExtID = true; | |
417 | success = parseExternalID(parser, false, &(docData.externalID)); | |
418 | if (success) { | |
419 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
420 | success = _inputStreamPeekCharacter(&parser->input, &ch); | |
421 | } | |
422 | } | |
423 | ||
424 | if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) { | |
425 | CFStringRef tmp = parser->node->dataString; | |
426 | parser->node->dataTypeID = kCFXMLNodeTypeDocumentType; | |
427 | parser->node->dataString = name; | |
428 | parser->node->additionalData = &docData; | |
429 | dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); | |
430 | if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) { | |
431 | INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info); | |
432 | } | |
433 | parser->node->additionalData = NULL; | |
434 | parser->node->dataString = tmp; | |
435 | if (parser->status != kCFXMLStatusParseInProgress) { | |
436 | // callback called CFXMLParserAbort() | |
437 | _CFReportError(parser, parser->status, NULL); | |
438 | return false; | |
439 | } | |
440 | } else { | |
441 | dtdStructure = NULL; | |
442 | } | |
443 | if (docData.externalID.publicID) CFRelease(docData.externalID.publicID); | |
444 | if (docData.externalID.systemID) CFRelease(docData.externalID.systemID); | |
445 | pushXMLNode(parser, dtdStructure); | |
446 | ||
447 | if (success && ch == '[') { | |
448 | // inline DTD | |
449 | _inputStreamGetCharacter(&parser->input, &ch); | |
450 | if (!parseInlineDTD(parser)) return false; | |
451 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
452 | success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>'; | |
453 | } else if (success && ch == '>') { | |
454 | // End of the DTD | |
455 | _inputStreamGetCharacter(&parser->input, &ch); | |
456 | } | |
457 | if (!success) { | |
458 | if (_inputStreamAtEOF(&parser->input)) { | |
459 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD"); | |
460 | } else { | |
461 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD"); | |
462 | } | |
463 | return false; | |
464 | } | |
465 | ||
466 | parser->top --; // Remove dtdStructure from the stack | |
467 | ||
468 | if (success && dtdStructure) { | |
469 | INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info); | |
470 | if (parser->status != kCFXMLStatusParseInProgress) { | |
471 | _CFReportError(parser, parser->status, NULL); | |
472 | return false; | |
473 | } | |
474 | } | |
475 | return true; | |
476 | } | |
477 | ||
478 | /* | |
479 | [69] PEReference ::= '%' Name ';' | |
480 | */ | |
481 | static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) { | |
482 | UniChar ch; | |
483 | CFStringRef name; | |
484 | if (!_inputStreamScanXMLName(&parser->input, false, &name)) { | |
485 | _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference"); | |
486 | return false; | |
487 | } else if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
488 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference"); | |
489 | return false; | |
490 | } else if (ch != ';') { | |
491 | _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference"); | |
492 | return false; | |
493 | } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) { | |
494 | CFXMLEntityReferenceInfo myData; | |
495 | Boolean result; | |
496 | CFStringRef tmp = parser->node->dataString; | |
497 | parser->node->dataTypeID = kCFXMLNodeTypeEntityReference; | |
498 | parser->node->dataString = name; | |
499 | myData.entityType = kCFXMLEntityTypeParameter; | |
500 | parser->node->additionalData = &myData; | |
501 | result = reportNewLeaf(parser); | |
502 | parser->node->additionalData = NULL; | |
503 | parser->node->dataString = tmp; | |
504 | return result; | |
505 | } else { | |
506 | return true; | |
507 | } | |
508 | } | |
509 | ||
510 | /* | |
511 | [54] AttType ::= StringType | TokenizedType | EnumeratedType | |
512 | [55] StringType ::= 'CDATA' | |
513 | [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS' | |
514 | [57] EnumeratedType ::= NotationType | Enumeration | |
515 | [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' | |
516 | [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' | |
517 | */ | |
518 | static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) { | |
519 | UniChar ch; | |
520 | Boolean done = false; | |
521 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
522 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
523 | return false; | |
524 | } else if (ch != '(') { | |
525 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
526 | return false; | |
527 | } | |
528 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
529 | if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) { | |
530 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
531 | return false; | |
532 | } | |
533 | while (!done) { | |
534 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
535 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
536 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
537 | return false; | |
538 | } else if (ch == ')') { | |
539 | done = true; | |
540 | } else if (ch == '|') { | |
541 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
542 | if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) { | |
543 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
544 | return false; | |
545 | } | |
546 | } else { | |
547 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
548 | return false; | |
549 | } | |
550 | } | |
551 | return true; | |
552 | } | |
553 | ||
554 | static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) { | |
555 | Boolean success = false; | |
556 | static const UniChar attTypeStrings[6][8] = { | |
557 | {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'}, | |
558 | {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'}, | |
559 | {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'}, | |
560 | {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'}, | |
561 | {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'}, | |
562 | {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} }; | |
563 | if (str) _inputStreamSetMark(&parser->input); | |
564 | if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) || | |
565 | _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) || | |
566 | _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) || | |
567 | _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) || | |
568 | _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) || | |
569 | _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) || | |
570 | _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) || | |
571 | _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) { | |
572 | success = true; | |
573 | } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) { | |
574 | // Notation | |
575 | if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) { | |
576 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
577 | success = false; | |
578 | } else { | |
579 | success = parseEnumeration(parser, false); | |
580 | } | |
581 | } else { | |
582 | success = parseEnumeration(parser, true); | |
583 | } | |
584 | if (str) { | |
585 | if (success) { | |
586 | _inputStreamGetCharactersFromMark(&parser->input, str); | |
587 | } | |
588 | _inputStreamClearMark(&parser->input); | |
589 | } | |
590 | return success; | |
591 | } | |
592 | ||
593 | /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */ | |
594 | static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) { | |
595 | const UniChar strings[3][8] = { | |
596 | {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'}, | |
597 | {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'}, | |
598 | {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}}; | |
599 | UniChar ch; | |
600 | Boolean success; | |
601 | if (str) _inputStreamSetMark(&parser->input); | |
602 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
603 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
604 | success = false; | |
605 | } else if (ch == '#') { | |
606 | if (_inputStreamMatchString(&parser->input, strings[0], 8) || | |
607 | _inputStreamMatchString(&parser->input, strings[1], 7)) { | |
608 | success = true; | |
609 | } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) { | |
610 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
611 | success = false; | |
612 | } else { | |
613 | // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped. | |
614 | success = parseAttributeValue(parser, NULL); | |
615 | } | |
616 | } else { | |
617 | _inputStreamReturnCharacter(&parser->input, ch); | |
618 | success = parseAttributeValue(parser, NULL); | |
619 | } | |
620 | if (str) { | |
621 | if (success) { | |
622 | _inputStreamGetCharactersFromMark(&parser->input, str); | |
623 | } | |
624 | _inputStreamClearMark(&parser->input); | |
625 | } | |
626 | return success; | |
627 | } | |
628 | ||
629 | /* | |
630 | [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' | |
631 | [53] AttDef ::= S Name S AttType S DefaultDecl | |
632 | */ | |
633 | static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) { | |
634 | const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'}; | |
635 | CFXMLAttributeListDeclarationInfo attListData; | |
636 | CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray; | |
637 | CFIndex capacity = 8; | |
638 | UniChar ch; | |
639 | Boolean success = true; | |
640 | CFStringRef name; | |
641 | if (!_inputStreamMatchString(&parser->input, attList, 7) || | |
642 | _inputStreamSkipWhitespace(&parser->input, NULL) == 0 || | |
643 | !_inputStreamScanXMLName(&parser->input, false, &name)) { | |
644 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
645 | return false; | |
646 | } | |
647 | attListData.numberOfAttributes = 0; | |
648 | if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) { | |
649 | // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000 | |
650 | attributes = NULL; | |
651 | } | |
652 | while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) { | |
653 | CFXMLAttributeDeclarationInfo *attribute = NULL; | |
654 | if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>') | |
655 | break; | |
656 | if (attributes) { | |
657 | if (capacity == attListData.numberOfAttributes) { | |
658 | capacity = 2*capacity; | |
659 | if (attributes != attributeArray) { | |
bd5b749c | 660 | attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0); |
9ce05555 | 661 | } else { |
bd5b749c | 662 | attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0); |
9ce05555 A |
663 | } |
664 | } | |
665 | attribute = &(attributes[attListData.numberOfAttributes]); | |
666 | // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000 | |
667 | attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
668 | attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
669 | } | |
670 | if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) { | |
671 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
672 | success = false; | |
673 | break; | |
674 | } | |
675 | if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) { | |
676 | success = false; | |
677 | break; | |
678 | } | |
679 | if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) { | |
680 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
681 | success = false; | |
682 | break; | |
683 | } | |
684 | if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) { | |
685 | success = false; | |
686 | break; | |
687 | } | |
688 | attListData.numberOfAttributes ++; | |
689 | } | |
690 | if (success) { | |
691 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
692 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
693 | success = false; | |
694 | } else if (ch != '>') { | |
695 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
696 | success = false; | |
697 | } else if (attributes) { | |
698 | CFStringRef tmp = parser->node->dataString; | |
699 | parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration; | |
700 | parser->node->dataString = name; | |
701 | attListData.attributes = attributes; | |
702 | parser->node->additionalData = (void *)&attListData; | |
703 | success = reportNewLeaf(parser); | |
704 | parser->node->additionalData = NULL; | |
705 | parser->node->dataString = tmp; | |
706 | } | |
707 | } | |
708 | if (attributes) { | |
709 | // Free up all that memory | |
710 | CFIndex idx; | |
711 | for (idx = 0; idx < attListData.numberOfAttributes; idx ++) { | |
712 | // Do not release attributeName here; it's a uniqued string from scanXMLName | |
713 | CFRelease(attributes[idx].typeString); | |
714 | CFRelease(attributes[idx].defaultString); | |
715 | } | |
716 | if (attributes != attributeArray) { | |
717 | CFAllocatorDeallocate(CFGetAllocator(parser), attributes); | |
718 | } | |
719 | } | |
720 | return success; | |
721 | } | |
722 | ||
723 | CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) { | |
724 | Boolean success; | |
725 | if (extID) { | |
726 | CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
727 | if (_inputStreamScanQuotedString(&parser->input, urlStr)) { | |
728 | success = true; | |
729 | extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url); | |
730 | } else { | |
731 | extID->systemID = NULL; | |
732 | success = false; | |
733 | } | |
734 | CFRelease(urlStr); | |
735 | } else { | |
736 | success = _inputStreamScanQuotedString(&parser->input, NULL); | |
737 | } | |
738 | return success; | |
739 | } | |
740 | ||
741 | /* | |
742 | [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral | |
743 | [83] PublicID ::= 'PUBLIC' S PubidLiteral | |
744 | [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" | |
745 | [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] | |
746 | [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") | |
747 | */ | |
748 | // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000 | |
749 | static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) { | |
750 | const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'}; | |
751 | const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'}; | |
752 | Boolean success; | |
753 | if (extID) { | |
754 | extID->systemID = NULL; | |
755 | extID->publicID = NULL; | |
756 | } | |
757 | if (_inputStreamMatchString(&parser->input, publicString, 6)) { | |
758 | success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0; | |
759 | if (extID) { | |
760 | extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
761 | success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID); | |
762 | } else { | |
763 | success = success && _inputStreamScanQuotedString(&parser->input, NULL); | |
764 | } | |
765 | if (success) { | |
766 | UniChar ch; | |
767 | if (alsoAcceptPublicID) { | |
768 | _inputStreamSetMark(&parser->input); // In case we need to roll back the parser | |
769 | } | |
770 | if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0 | |
771 | || !_inputStreamPeekCharacter(&parser->input, &ch) | |
772 | || (ch != '\'' && ch != '\"') | |
773 | || !parseSystemLiteral(parser, extID)) { | |
774 | success = alsoAcceptPublicID; | |
775 | if (alsoAcceptPublicID) { | |
776 | _inputStreamBackUpToMark(&parser->input); | |
777 | } | |
778 | } else { | |
779 | success = true; | |
780 | } | |
781 | if (alsoAcceptPublicID) { | |
782 | _inputStreamClearMark(&parser->input); | |
783 | } | |
784 | } | |
785 | } else if (_inputStreamMatchString(&parser->input, systemString, 6)) { | |
786 | success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID); | |
787 | } else { | |
788 | success = false; | |
789 | } | |
790 | return success; | |
791 | } | |
792 | ||
793 | /* | |
794 | [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' | |
795 | */ | |
796 | static Boolean parseNotationDeclaration(CFXMLParserRef parser) { | |
797 | static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'}; | |
798 | Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); | |
799 | CFXMLNotationInfo notationData = {{NULL, NULL}}; | |
800 | CFStringRef name; | |
801 | Boolean success = | |
802 | _inputStreamMatchString(&parser->input, notationString, 8) && | |
803 | _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && | |
804 | _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && | |
805 | _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && | |
806 | parseExternalID(parser, true, report ? &(notationData.externalID) : NULL); | |
807 | ||
808 | if (success) { | |
809 | UniChar ch; | |
810 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
811 | success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>'); | |
812 | } | |
813 | if (!success) { | |
814 | if (_inputStreamAtEOF(&parser->input)) { | |
815 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
816 | } else { | |
817 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
818 | } | |
819 | } else if (report) { | |
820 | CFStringRef tmp = parser->node->dataString; | |
821 | parser->node->dataTypeID = kCFXMLNodeTypeNotation; | |
822 | parser->node->dataString = name; | |
823 | parser->node->additionalData = ¬ationData; | |
824 | success = reportNewLeaf(parser); | |
825 | parser->node->additionalData = NULL; | |
826 | parser->node->dataString = tmp; | |
827 | } | |
828 | if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID); | |
829 | if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID); | |
830 | return success; | |
831 | } | |
832 | ||
833 | /* | |
834 | [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? | |
835 | [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')' | |
836 | [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' | |
837 | */ | |
838 | static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) { | |
839 | UniChar ch, separator; | |
840 | if (!pastParen) { | |
841 | if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false; | |
842 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
843 | } | |
844 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; | |
845 | ||
846 | /* Now scanning cp, production [48] */ | |
847 | if (ch == '(') { | |
848 | if (!parseChoiceOrSequence(parser, false)) return false; | |
849 | } else { | |
850 | if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; | |
851 | } | |
852 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; | |
853 | if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch); | |
854 | ||
855 | /* Now past cp */ | |
856 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
857 | if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; | |
858 | if (ch == ')') return true; | |
859 | if (ch != '|' && ch != ',') return false; | |
860 | separator = ch; | |
861 | while (ch == separator) { | |
862 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
863 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; | |
864 | if (ch != '(') { | |
865 | if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; | |
866 | } else if (!parseChoiceOrSequence(parser, false)) { | |
867 | return false; | |
868 | } | |
869 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
870 | if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; | |
871 | } | |
872 | return ch == ')'; | |
873 | } | |
874 | ||
875 | /* | |
876 | [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' | |
877 | */ | |
878 | static Boolean parseMixedElementContent(CFXMLParserRef parser) { | |
879 | static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'}; | |
880 | UniChar ch; | |
881 | if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false; | |
882 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
883 | if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false; | |
884 | if (ch == ')') return true; | |
885 | ||
886 | while (ch == '|') { | |
887 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
888 | if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false; | |
889 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
890 | if (!_inputStreamGetCharacter(&parser->input, &ch)) return false; | |
891 | } | |
892 | if (ch != ')') return false; | |
893 | if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false; | |
894 | return true; | |
895 | } | |
896 | ||
897 | /* | |
898 | [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children | |
899 | [47] children ::= (choice | seq) ('?' | '*' | '+')? | |
900 | */ | |
901 | static Boolean parseElementContentSpec(CFXMLParserRef parser) { | |
902 | static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'}; | |
903 | static const UniChar eltContentAny[3] = {'A', 'N', 'Y'}; | |
904 | UniChar ch; | |
905 | if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) { | |
906 | return true; | |
907 | } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') { | |
908 | return false; | |
909 | } else { | |
910 | // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000 | |
911 | _inputStreamGetCharacter(&parser->input, &ch); | |
912 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
913 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false; | |
914 | if (ch == '#') { | |
915 | // Mixed | |
916 | return parseMixedElementContent(parser); | |
917 | } else { | |
918 | if (parseChoiceOrSequence(parser, true)) { | |
919 | if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) { | |
920 | _inputStreamGetCharacter(&parser->input, &ch); | |
921 | } | |
922 | return true; | |
923 | } else { | |
924 | return false; | |
925 | } | |
926 | } | |
927 | } | |
928 | } | |
929 | ||
930 | /* | |
931 | [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' | |
932 | */ | |
933 | static Boolean parseElementDeclaration(CFXMLParserRef parser) { | |
934 | Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); | |
935 | Boolean success; | |
936 | static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'}; | |
937 | UniChar ch = '>'; | |
938 | CFMutableStringRef contentDesc = NULL; | |
939 | CFStringRef name; | |
940 | success = _inputStreamMatchString(&parser->input, eltChars, 7) | |
941 | && _inputStreamSkipWhitespace(&parser->input, NULL) != 0 | |
942 | && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) | |
943 | && _inputStreamSkipWhitespace(&parser->input, NULL) != 0; | |
944 | if (success) { | |
945 | if (report) _inputStreamSetMark(&parser->input); | |
946 | success = parseElementContentSpec(parser); | |
947 | if (success && report) { | |
948 | contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
949 | _inputStreamGetCharactersFromMark(&parser->input, contentDesc); | |
950 | } | |
951 | if (report) _inputStreamClearMark(&parser->input); | |
952 | if (success) _inputStreamSkipWhitespace(&parser->input, NULL); | |
953 | success = success && _inputStreamMatchString(&parser->input, &ch, 1); | |
954 | } | |
955 | if (!success) { | |
956 | if (_inputStreamAtEOF(&parser->input)) { | |
957 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
958 | } else { | |
959 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
960 | } | |
961 | } else if (report) { | |
962 | CFXMLElementTypeDeclarationInfo eltData; | |
963 | CFStringRef tmp = parser->node->dataString; | |
964 | parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration; | |
965 | parser->node->dataString = name; | |
966 | eltData.contentDescription = contentDesc; | |
967 | parser->node->additionalData = &eltData; | |
968 | success = reportNewLeaf(parser); | |
969 | parser->node->additionalData = NULL; | |
970 | parser->node->dataString = tmp; | |
971 | } | |
972 | if (contentDesc) CFRelease(contentDesc); | |
973 | return success; | |
974 | } | |
975 | ||
976 | /* | |
977 | [70] EntityDecl ::= GEDecl | PEDecl | |
978 | [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' | |
979 | [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' | |
980 | [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) | |
981 | [74] PEDef ::= EntityValue | ExternalID | |
982 | [76] NDataDecl ::= S 'NDATA' S Name | |
983 | [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'" | |
984 | */ | |
985 | static Boolean parseEntityDeclaration(CFXMLParserRef parser) { | |
986 | const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'}; | |
987 | UniChar ch; | |
988 | Boolean isPEDecl = false; | |
989 | CFXMLEntityInfo entityData; | |
990 | CFStringRef name; | |
991 | Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData); | |
992 | Boolean success = | |
993 | _inputStreamMatchString(&parser->input, entityStr, 6) && | |
994 | (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && | |
995 | _inputStreamPeekCharacter(&parser->input, &ch); | |
996 | ||
997 | entityData.replacementText = NULL; | |
998 | entityData.entityID.publicID = NULL; | |
999 | entityData.entityID.systemID = NULL; | |
1000 | entityData.notationName = NULL; | |
1001 | // We will set entityType immediately before reporting | |
1002 | ||
1003 | if (success && ch == '%') { | |
1004 | _inputStreamGetCharacter(&parser->input, &ch); | |
1005 | success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0; | |
1006 | isPEDecl = true; | |
1007 | } | |
1008 | success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch); | |
1009 | if (success && (ch == '\"' || ch == '\'')) { | |
1010 | // EntityValue | |
1011 | // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000 | |
1012 | if (report) { | |
1013 | entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
1014 | success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText); | |
1015 | } else { | |
1016 | success = _inputStreamScanQuotedString(&parser->input, NULL); | |
1017 | } | |
1018 | } else if (success) { | |
1019 | // ExternalID | |
1020 | success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL); | |
1021 | if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) { | |
1022 | // There could be an option NDataDecl | |
1023 | // Don't we need to set entityData.notationName? -- REW, 3/6/2000 | |
1024 | const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'}; | |
1025 | if (_inputStreamMatchString(&parser->input, nDataStr, 5)) { | |
1026 | success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL); | |
1027 | } | |
1028 | } | |
1029 | } | |
1030 | if (success) { | |
1031 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1032 | success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>'; | |
1033 | } | |
1034 | if (!success) { | |
1035 | if (_inputStreamAtEOF(&parser->input)) { | |
1036 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
1037 | } else { | |
1038 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
1039 | } | |
1040 | } else { | |
1041 | CFStringRef tmp = parser->node->dataString; | |
1042 | if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter; | |
1043 | else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal; | |
1044 | else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal; | |
1045 | else entityData.entityType = kCFXMLEntityTypeUnparsed; | |
1046 | parser->node->dataTypeID = kCFXMLNodeTypeEntity; | |
1047 | parser->node->dataString = name; | |
1048 | parser->node->additionalData = &entityData; | |
1049 | success = reportNewLeaf(parser); | |
1050 | parser->node->additionalData = NULL; | |
1051 | parser->node->dataString = tmp; | |
1052 | if (entityData.replacementText) CFRelease(entityData.replacementText); | |
1053 | } | |
1054 | if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID); | |
1055 | if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID); | |
1056 | return success; | |
1057 | } | |
1058 | ||
1059 | /* | |
1060 | [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>' | |
1061 | [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment | |
1062 | */ | |
1063 | // First character should be just past '[' | |
1064 | static Boolean parseInlineDTD(CFXMLParserRef parser) { | |
1065 | Boolean success = true; | |
1066 | while (success && !_inputStreamAtEOF(&parser->input)) { | |
1067 | UniChar ch; | |
1068 | ||
1069 | parseWhitespace(parser); | |
1070 | if (!_inputStreamGetCharacter(&parser->input, &ch)) break; | |
1071 | if (ch == '%') { | |
1072 | // PEReference | |
1073 | success = parsePhysicalEntityReference(parser); | |
1074 | } else if (ch == '<') { | |
1075 | // markupdecl | |
1076 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
1077 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
1078 | return false; | |
1079 | } | |
1080 | if (ch == '?') { | |
1081 | // Processing Instruction | |
1082 | success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true | |
1083 | } else if (ch == '!') { | |
1084 | UniChar dashes[2] = {'-', '-'}; | |
1085 | if (_inputStreamMatchString(&parser->input, dashes, 2)) { | |
1086 | // Comment | |
1087 | success = parseComment(parser, true); | |
1088 | } else { | |
1089 | // elementdecl | AttListDecl | EntityDecl | NotationDecl | |
1090 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) { | |
1091 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
1092 | return false; | |
1093 | } else if (ch == 'A') { | |
1094 | // AttListDecl | |
1095 | success = parseAttributeListDeclaration(parser); | |
1096 | } else if (ch == 'N') { | |
1097 | success = parseNotationDeclaration(parser); | |
1098 | } else if (ch == 'E') { | |
1099 | // elementdecl | EntityDecl | |
1100 | _inputStreamGetCharacter(&parser->input, &ch); | |
1101 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) { | |
1102 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
1103 | return false; | |
1104 | } | |
1105 | _inputStreamReturnCharacter(&parser->input, 'E'); | |
1106 | if (ch == 'L') { | |
1107 | success = parseElementDeclaration(parser); | |
1108 | } else if (ch == 'N') { | |
1109 | success = parseEntityDeclaration(parser); | |
1110 | } else { | |
1111 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
1112 | return false; | |
1113 | } | |
1114 | } else { | |
1115 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
1116 | return false; | |
1117 | } | |
1118 | } | |
1119 | } else { | |
1120 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
1121 | return false; | |
1122 | } | |
1123 | } else if (ch == ']') { | |
1124 | return true; | |
1125 | } else { | |
1126 | _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD"); | |
1127 | return false; | |
1128 | } | |
1129 | } | |
1130 | if (success) { | |
1131 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD"); | |
1132 | } | |
1133 | return false; | |
1134 | } | |
1135 | ||
1136 | /* | |
1137 | [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)* | |
1138 | */ | |
1139 | static Boolean parseTagContent(CFXMLParserRef parser) { | |
1140 | while (!_inputStreamAtEOF(&parser->input)) { | |
1141 | UniChar ch; | |
1142 | CFIndex numWhitespaceCharacters; | |
1143 | ||
1144 | _inputStreamSetMark(&parser->input); | |
1145 | numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL); | |
1146 | // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data. | |
1147 | if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF | |
1148 | ||
1149 | if (ch != '<' && ch != '&') { // CharData | |
1150 | // Back off the whitespace; we'll report it with the PCData | |
1151 | _inputStreamBackUpToMark(&parser->input); | |
1152 | _inputStreamClearMark(&parser->input); | |
1153 | if (!parsePCData(parser)) return false; | |
1154 | if(_inputStreamComposingErrorOccurred(&parser->input)) { | |
1155 | _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error"); | |
1156 | return false; | |
1157 | } | |
1158 | continue; | |
1159 | } | |
1160 | ||
1161 | // element | Reference | CDSect | PI | Comment | |
1162 | // We can safely report any whitespace now | |
1163 | if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) { | |
1164 | _inputStreamReturnCharacter(&parser->input, ch); | |
1165 | _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString)); | |
1166 | parser->node->dataTypeID = kCFXMLNodeTypeWhitespace; | |
1167 | parser->node->additionalData = NULL; | |
1168 | if (!reportNewLeaf(parser)) return false; | |
1169 | _inputStreamGetCharacter(&parser->input, &ch); | |
1170 | } | |
1171 | _inputStreamClearMark(&parser->input); | |
1172 | ||
1173 | if (ch == '&') { | |
1174 | // Reference; for the time being, we don't worry about processing these; just report them as Entity references | |
1175 | if (!parseEntityReference(parser, true)) return false; | |
1176 | continue; | |
1177 | } | |
1178 | ||
1179 | // ch == '<'; element | CDSect | PI | Comment | |
1180 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) break; | |
1181 | if (ch == '?') { // PI | |
1182 | _inputStreamGetCharacter(&parser->input, &ch); | |
1183 | if (!parseProcessingInstruction(parser, true)) | |
1184 | return false; | |
1185 | } else if (ch == '/') { // end tag; we're passing outside of content's production | |
1186 | _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<' | |
1187 | return true; | |
1188 | } else if (ch != '!') { // element | |
1189 | if (!parseTag(parser)) return false; | |
1190 | } else { | |
1191 | // Comment | CDSect | |
1192 | UniChar dashes[3] = {'!', '-', '-'}; | |
1193 | if (_inputStreamMatchString(&parser->input, dashes, 3)) { | |
1194 | // Comment | |
1195 | if (!parseComment(parser, true)) return false; | |
1196 | } else { | |
1197 | // Should have a CDSect; back off the "<!" and call parseCDSect | |
1198 | _inputStreamReturnCharacter(&parser->input, '<'); | |
1199 | if (!parseCDSect(parser)) return false; | |
1200 | } | |
1201 | } | |
1202 | } | |
1203 | ||
1204 | if(_inputStreamComposingErrorOccurred(&parser->input)) { | |
1205 | _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error"); | |
1206 | return false; | |
1207 | } | |
1208 | // Only way to get here is if premature EOF was found | |
1209 | //#warning CF:Include the tag name here | |
1210 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content"); | |
1211 | return false; | |
1212 | } | |
1213 | ||
1214 | static Boolean parseCDSect(CFXMLParserRef parser) { | |
1215 | const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['}; | |
1216 | const UniChar _CDSectClose[3] = {']', ']', '>'}; | |
1217 | if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) { | |
1218 | _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section"); | |
1219 | return false; | |
1220 | } | |
1221 | if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) { | |
1222 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section"); | |
1223 | return false; | |
1224 | } | |
1225 | ||
1226 | parser->node->dataTypeID = kCFXMLNodeTypeCDATASection; | |
1227 | parser->node->additionalData = NULL; | |
1228 | return reportNewLeaf(parser); | |
1229 | } | |
1230 | ||
1231 | /* | |
1232 | [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' | |
1233 | */ | |
1234 | static Boolean validateCharacterReference(CFStringRef str) { | |
1235 | Boolean isHex; | |
1236 | CFIndex idx, len = CFStringGetLength(str); | |
1237 | if (len < 2) return false; | |
1238 | if (CFStringGetCharacterAtIndex(str, 0) != '#') return false; | |
1239 | if (CFStringGetCharacterAtIndex(str, 1) == 'x') { | |
1240 | isHex = true; | |
1241 | idx = 2; | |
1242 | if (len == 2) return false; | |
1243 | } else { | |
1244 | isHex = false; | |
1245 | idx = 1; | |
1246 | } | |
1247 | ||
1248 | while (idx < len) { | |
1249 | UniChar ch; | |
1250 | ch = CFStringGetCharacterAtIndex(str, idx); | |
1251 | idx ++; | |
1252 | if (!(ch <= '9' && ch >= '0') && | |
1253 | !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) { | |
1254 | break; | |
1255 | } | |
1256 | } | |
1257 | return (idx == len); | |
1258 | } | |
1259 | ||
1260 | /* | |
1261 | [67] Reference ::= EntityRef | CharRef | |
1262 | [68] EntityRef ::= '&' Name ';' | |
1263 | */ | |
1264 | static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) { | |
1265 | UniChar ch; | |
1266 | CFXMLEntityReferenceInfo entData; | |
1267 | CFStringRef name = NULL; | |
1268 | if (!_inputStreamPeekCharacter(&parser->input, &ch)) { | |
1269 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); | |
1270 | return false; | |
1271 | } | |
1272 | if (ch == '#') { | |
1273 | ch = ';'; | |
1274 | if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) { | |
1275 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); | |
1276 | return false; | |
1277 | } else if (!validateCharacterReference(parser->node->dataString)) { | |
1278 | _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference"); | |
1279 | return false; | |
1280 | } | |
1281 | entData.entityType = kCFXMLEntityTypeCharacter; | |
1282 | name = parser->node->dataString; | |
1283 | } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') { | |
1284 | if (_inputStreamAtEOF(&parser->input)) { | |
1285 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference"); | |
1286 | return false; | |
1287 | } else { | |
1288 | _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference"); | |
1289 | return false; | |
1290 | } | |
1291 | } else { | |
1292 | entData.entityType = kCFXMLEntityTypeParsedInternal; | |
1293 | } | |
1294 | if (report) { | |
1295 | CFStringRef tmp = parser->node->dataString; | |
1296 | Boolean success; | |
1297 | parser->node->dataTypeID = kCFXMLNodeTypeEntityReference; | |
1298 | parser->node->dataString = name; | |
1299 | parser->node->additionalData = &entData; | |
1300 | success = reportNewLeaf(parser); | |
1301 | parser->node->additionalData = NULL; | |
1302 | parser->node->dataString = tmp; | |
1303 | return success; | |
1304 | } else { | |
1305 | return true; | |
1306 | } | |
1307 | } | |
1308 | ||
1309 | #if 0 | |
1310 | // Kept from old entity reference parsing.... | |
1311 | { | |
1312 | switch (*(parser->curr)) { | |
1313 | case 'l': // "lt" | |
1314 | if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') { | |
1315 | ch = '<'; | |
1316 | parser->curr += 3; | |
1317 | break; | |
1318 | } | |
1319 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); | |
1320 | return; | |
1321 | case 'g': // "gt" | |
1322 | if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') { | |
1323 | ch = '>'; | |
1324 | parser->curr += 3; | |
1325 | break; | |
1326 | } | |
1327 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); | |
1328 | return; | |
1329 | case 'a': // "apos" or "amp" | |
1330 | if (len < 4) { // Not enough characters for either conversion | |
1331 | parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); | |
1332 | return; | |
1333 | } | |
1334 | if (*(parser->curr+1) == 'm') { | |
1335 | // "amp" | |
1336 | if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') { | |
1337 | ch = '&'; | |
1338 | parser->curr += 4; | |
1339 | break; | |
1340 | } | |
1341 | } else if (*(parser->curr+1) == 'p') { | |
1342 | // "apos" | |
1343 | if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') { | |
1344 | ch = '\''; | |
1345 | parser->curr += 5; | |
1346 | break; | |
1347 | } | |
1348 | } | |
1349 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); | |
1350 | return; | |
1351 | case 'q': // "quote" | |
1352 | if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') { | |
1353 | ch = '\"'; | |
1354 | parser->curr += 6; | |
1355 | break; | |
1356 | } | |
1357 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); | |
1358 | return; | |
1359 | case '#': | |
1360 | { | |
1361 | UniChar num = 0; | |
1362 | Boolean isHex = false; | |
1363 | if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;" | |
1364 | parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); | |
1365 | return; | |
1366 | } | |
1367 | parser->curr ++; | |
1368 | if (*(parser->curr) == 'x') { | |
1369 | isHex = true; | |
1370 | parser->curr ++; | |
1371 | } | |
1372 | while (parser->curr < parser->end) { | |
1373 | ch = *(parser->curr); | |
1374 | if (ch == ';') { | |
1375 | CFStringAppendCharacters(string, &num, 1); | |
1376 | parser->curr ++; | |
1377 | return; | |
1378 | } | |
1379 | if (!isHex) num = num*10; | |
1380 | else num = num << 4; | |
1381 | if (ch <= '9' && ch >= '0') { | |
1382 | num += (ch - '0'); | |
1383 | } else if (!isHex) { | |
1384 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser)); | |
1385 | return; | |
1386 | } else if (ch >= 'a' && ch <= 'f') { | |
1387 | num += 10 + (ch - 'a'); | |
1388 | } else if (ch >= 'A' && ch <= 'F') { | |
1389 | num += 10 + (ch - 'A'); | |
1390 | } else { | |
1391 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser)); | |
1392 | return; | |
1393 | } | |
1394 | } | |
1395 | parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII); | |
1396 | return; | |
1397 | } | |
1398 | default: | |
1399 | parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser)); | |
1400 | return; | |
1401 | } | |
1402 | CFStringAppendCharacters(string, &ch, 1); | |
1403 | } | |
1404 | #endif | |
1405 | ||
1406 | /* | |
1407 | [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) | |
1408 | */ | |
1409 | static Boolean parsePCData(CFXMLParserRef parser) { | |
1410 | UniChar ch; | |
1411 | Boolean done = false; | |
1412 | _inputStreamSetMark(&parser->input); | |
1413 | while (!done && _inputStreamGetCharacter(&parser->input, &ch)) { | |
1414 | switch (ch) { | |
1415 | case '<': | |
1416 | case '&': | |
1417 | _inputStreamReturnCharacter(&parser->input, ch); | |
1418 | done = true; | |
1419 | break; | |
1420 | case ']': | |
1421 | { | |
1422 | const UniChar endSequence[2] = {']', '>'}; | |
1423 | if (_inputStreamMatchString(&parser->input, endSequence, 2)) { | |
1424 | _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data"); | |
1425 | _inputStreamClearMark(&parser->input); | |
1426 | return false; | |
1427 | } | |
1428 | break; | |
1429 | } | |
1430 | default: | |
1431 | ; | |
1432 | } | |
1433 | } | |
1434 | _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString)); | |
1435 | _inputStreamClearMark(&parser->input); | |
1436 | parser->node->dataTypeID = kCFXMLNodeTypeText; | |
1437 | parser->node->additionalData = NULL; | |
1438 | return reportNewLeaf(parser); | |
1439 | } | |
1440 | ||
1441 | /* | |
1442 | [42] ETag ::= '</' Name S? '>' | |
1443 | */ | |
1444 | static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) { | |
1445 | const UniChar beginEndTag[2] = {'<', '/'}; | |
1446 | Boolean unexpectedEOF = false, mismatch = false; | |
1447 | CFStringRef closeTag; | |
1448 | ||
1449 | // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique. | |
1450 | if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) { | |
1451 | ||
1452 | UniChar ch; | |
1453 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1454 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
1455 | unexpectedEOF = true; | |
1456 | } else if (ch != '>') { | |
1457 | mismatch = true; | |
1458 | } | |
1459 | } else if (_inputStreamAtEOF(&parser->input)) { | |
1460 | unexpectedEOF = true; | |
1461 | } else { | |
1462 | mismatch = true; | |
1463 | } | |
1464 | ||
1465 | if (unexpectedEOF || mismatch) { | |
1466 | if (unexpectedEOF) { | |
1467 | parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag); | |
1468 | parser->status = kCFXMLErrorUnexpectedEOF; | |
1469 | if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info); | |
1470 | } else { | |
1471 | parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag); | |
1472 | parser->status = kCFXMLErrorMalformedCloseTag; | |
1473 | if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info); | |
1474 | } | |
1475 | return false; | |
1476 | } | |
1477 | return true; | |
1478 | } | |
1479 | ||
1480 | /* | |
1481 | [39] element ::= EmptyElementTag | STag content ETag | |
1482 | [40] STag ::= '<' Name (S Attribute)* S? '>' | |
1483 | [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' | |
1484 | */ | |
1485 | static Boolean parseTag(CFXMLParserRef parser) { | |
1486 | UniChar ch; | |
1487 | void *tag; | |
1488 | CFXMLElementInfo data; | |
1489 | Boolean success = true; | |
1490 | CFStringRef tagName; | |
1491 | ||
1492 | if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) { | |
1493 | _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag"); | |
1494 | return false; | |
1495 | } | |
1496 | ||
1497 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1498 | ||
1499 | if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace | |
1500 | data.attributes = parser->argDict; | |
1501 | data.attributeOrder = parser->argArray; | |
1502 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
1503 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF"); | |
1504 | return false; | |
1505 | } | |
1506 | if (ch == '/') { | |
1507 | data.isEmpty = true; | |
1508 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
1509 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF"); | |
1510 | return false; | |
1511 | } | |
1512 | } else { | |
1513 | data.isEmpty = false; | |
1514 | } | |
1515 | if (ch != '>') { | |
1516 | _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag"); | |
1517 | return false; | |
1518 | } | |
1519 | ||
1520 | if (*parser->top || parser->top == parser->stack) { | |
1521 | CFStringRef oldStr = parser->node->dataString; | |
1522 | parser->node->dataTypeID = kCFXMLNodeTypeElement; | |
1523 | parser->node->dataString = tagName; | |
1524 | parser->node->additionalData = &data; | |
1525 | tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); | |
1526 | if (tag && parser->status == kCFXMLStatusParseInProgress) { | |
1527 | INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info); | |
1528 | } | |
1529 | parser->node->additionalData = NULL; | |
1530 | parser->node->dataString = oldStr; | |
1531 | if (parser->status != kCFXMLStatusParseInProgress) { | |
1532 | // callback called CFXMLParserAbort() | |
1533 | _CFReportError(parser, parser->status, NULL); | |
1534 | return false; | |
1535 | } | |
1536 | } else { | |
1537 | tag = NULL; | |
1538 | } | |
1539 | ||
1540 | pushXMLNode(parser, tag); | |
1541 | if (!data.isEmpty) { | |
1542 | success = parseTagContent(parser); | |
1543 | if (success) { | |
1544 | success = parseCloseTag(parser, tagName); | |
1545 | } | |
1546 | } | |
1547 | parser->top --; | |
1548 | ||
1549 | if (success && tag) { | |
1550 | INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info); | |
1551 | if (parser->status != kCFXMLStatusParseInProgress) { | |
1552 | _CFReportError(parser, parser->status, NULL); | |
1553 | return false; | |
1554 | } | |
1555 | } | |
1556 | return success; | |
1557 | } | |
1558 | ||
1559 | /* | |
1560 | [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" | |
1561 | [67] Reference ::= EntityRef | CharRef | |
1562 | [68] EntityRef ::= '&' Name ';' | |
1563 | */ | |
1564 | // For the moment, we don't worry about references in the attribute values. | |
1565 | static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) { | |
1566 | UniChar quote, ch; | |
1567 | Boolean success = _inputStreamGetCharacter(&parser->input, "e); | |
1568 | if (!success || (quote != '\'' && quote != '\"')) return false; | |
1569 | if (str) _inputStreamSetMark(&parser->input); | |
1570 | while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) { | |
1571 | switch (ch) { | |
1572 | case '<': success = false; break; | |
1573 | case '&': | |
1574 | if (!parseEntityReference(parser, false)) { | |
1575 | success = false; | |
1576 | break; | |
1577 | } | |
1578 | default: | |
1579 | ; | |
1580 | } | |
1581 | } | |
1582 | ||
1583 | if (success && _inputStreamAtEOF(&parser->input)) { | |
1584 | success = false; | |
1585 | } | |
1586 | if (str) { | |
1587 | if (success) { | |
1588 | _inputStreamReturnCharacter(&parser->input, quote); | |
1589 | _inputStreamGetCharactersFromMark(&parser->input, str); | |
1590 | _inputStreamGetCharacter(&parser->input, &ch); | |
1591 | } | |
1592 | _inputStreamClearMark(&parser->input); | |
1593 | } | |
1594 | return success; | |
1595 | } | |
1596 | ||
1597 | /* | |
1598 | [40] STag ::= '<' Name (S Attribute)* S? '>' | |
1599 | [41] Attribute ::= Name Eq AttValue | |
1600 | [25] Eq ::= S? '=' S? | |
1601 | */ | |
1602 | ||
1603 | // Expects parser->curr to be at the first content character; will consume the trailing whitespace. | |
1604 | Boolean parseAttributes(CFXMLParserRef parser) { | |
1605 | UniChar ch; | |
1606 | CFMutableDictionaryRef dict; | |
1607 | CFMutableArrayRef array; | |
1608 | Boolean failure = false; | |
1609 | if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') { | |
1610 | if (parser->argDict) { | |
1611 | CFDictionaryRemoveAllValues(parser->argDict); | |
1612 | CFArrayRemoveAllValues(parser->argArray); | |
1613 | } | |
1614 | return true; // No attributes; let caller deal with it | |
1615 | } | |
1616 | if (!parser->argDict) { | |
1617 | parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); | |
1618 | parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks); | |
1619 | } else { | |
1620 | CFDictionaryRemoveAllValues(parser->argDict); | |
1621 | CFArrayRemoveAllValues(parser->argArray); | |
1622 | } | |
1623 | dict = parser->argDict; | |
1624 | array = parser->argArray; | |
1625 | while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') { | |
1626 | CFStringRef key; | |
1627 | CFMutableStringRef value; | |
1628 | if (!_inputStreamScanXMLName(&parser->input, false, &key)) { | |
1629 | failure = true; | |
1630 | break; | |
1631 | } | |
1632 | if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) { | |
8ca704e1 A |
1633 | _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute"); |
1634 | return false; | |
9ce05555 A |
1635 | } |
1636 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1637 | if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') { | |
1638 | failure = true; | |
1639 | break; | |
1640 | } | |
1641 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1642 | value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)); | |
1643 | if (!parseAttributeValue(parser, value)) { | |
1644 | CFRelease(value); | |
1645 | failure = true; | |
1646 | break; | |
1647 | } | |
1648 | CFArrayAppendValue(array, key); | |
1649 | CFDictionarySetValue(dict, key, value); | |
1650 | CFRelease(value); | |
1651 | _inputStreamSkipWhitespace(&parser->input, NULL); | |
1652 | } | |
1653 | if (failure) { | |
1654 | //#warning CF:Include tag name in this error report | |
1655 | _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag"); | |
1656 | return false; | |
1657 | } else if (_inputStreamAtEOF(&parser->input)) { | |
1658 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes"); | |
1659 | return false; | |
1660 | } else { | |
1661 | return true; | |
1662 | } | |
1663 | } | |
1664 | ||
1665 | /* | |
1666 | [1] document ::= prolog element Misc* | |
1667 | [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? | |
1668 | [27] Misc ::= Comment | PI | S | |
1669 | [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' | |
1670 | ||
1671 | We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to | |
1672 | [22-1] prolog ::= Misc* (doctypedecl Misc*)? | |
1673 | [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc* | |
1674 | ||
1675 | NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created! | |
1676 | */ | |
1677 | static Boolean parseXML(CFXMLParserRef parser) { | |
1678 | Boolean success = true, sawDTD = false, sawElement = false; | |
1679 | UniChar ch; | |
1680 | while (success && _inputStreamPeekCharacter(&parser->input, &ch)) { | |
1681 | switch (ch) { | |
1682 | case ' ': | |
1683 | case '\n': | |
1684 | case '\t': | |
1685 | case '\r': | |
1686 | success = parseWhitespace(parser); | |
1687 | break; | |
1688 | case '<': | |
1689 | _inputStreamGetCharacter(&parser->input, &ch); | |
1690 | if (!_inputStreamGetCharacter(&parser->input, &ch)) { | |
1691 | _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document"); | |
1692 | return false; | |
1693 | } | |
1694 | if (ch == '!') { | |
1695 | // Comment or DTD | |
1696 | UniChar dashes[2] = {'-', '-'}; | |
1697 | if (_inputStreamMatchString(&parser->input, dashes, 2)) { | |
1698 | // Comment | |
1699 | success = parseComment(parser, true); | |
1700 | } else { | |
1701 | // Should be DTD | |
1702 | if (sawDTD) { | |
1703 | _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD"); | |
1704 | return false; | |
1705 | } | |
1706 | success = parseDTD(parser); | |
1707 | if (success) sawDTD = true; | |
1708 | } | |
1709 | } else if (ch == '?') { | |
1710 | // Processing instruction | |
1711 | success = parseProcessingInstruction(parser, true); | |
1712 | } else { | |
1713 | // Tag or malformed | |
1714 | if (sawElement) { | |
1715 | _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element"); | |
1716 | return false; | |
1717 | } | |
1718 | _inputStreamReturnCharacter(&parser->input, ch); | |
1719 | success = parseTag(parser); | |
1720 | if (success) sawElement = true; | |
1721 | } | |
1722 | break; | |
1723 | default: { | |
1724 | parser->status = kCFXMLErrorMalformedDocument; | |
1725 | parser->errorString = ch < 256 ? | |
1726 | CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) : | |
1727 | CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch); | |
1728 | ||
1729 | if (parser->callBacks.handleError) { | |
1730 | INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info); | |
1731 | } | |
1732 | return false; | |
1733 | } | |
1734 | } | |
1735 | } | |
1736 | ||
1737 | if (!success) return false; | |
1738 | if (!sawElement) { | |
1739 | _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document"); | |
1740 | return false; | |
1741 | } | |
1742 | return true; | |
1743 | } | |
1744 | ||
1745 | static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) { | |
1746 | if (str) { | |
1747 | parser->status = errNum; | |
1748 | parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII); | |
1749 | } | |
1750 | if (parser->callBacks.handleError) { | |
1751 | INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info); | |
1752 | } | |
1753 | } | |
1754 | ||
1755 | // Assumes parser->node has been set and is ready to go | |
1756 | static Boolean reportNewLeaf(CFXMLParserRef parser) { | |
1757 | void *xmlStruct; | |
1758 | if (*(parser->top) == NULL) return true; | |
1759 | ||
1760 | xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); | |
1761 | if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) { | |
1762 | INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info); | |
1763 | if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info); | |
1764 | } | |
1765 | if (parser->status != kCFXMLStatusParseInProgress) { | |
1766 | _CFReportError(parser, parser->status, NULL); | |
1767 | return false; | |
1768 | } | |
1769 | return true; | |
1770 | } | |
1771 | ||
1772 | static void pushXMLNode(CFXMLParserRef parser, void *node) { | |
1773 | parser->top ++; | |
1774 | if ((unsigned)(parser->top - parser->stack) == parser->capacity) { | |
bd5b749c | 1775 | parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0); |
9ce05555 A |
1776 | parser->top = parser->stack + parser->capacity; |
1777 | parser->capacity = 2*parser->capacity; | |
1778 | } | |
1779 | *(parser->top) = node; | |
1780 | } | |
1781 | ||
1782 | /**************************/ | |
1783 | /* Parsing to a CFXMLTree */ | |
1784 | /**************************/ | |
1785 | ||
1786 | static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) { | |
1787 | CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node); | |
1788 | CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode); | |
1789 | CFRelease(myNode); | |
1790 | return (void *)tree; | |
1791 | } | |
1792 | ||
1793 | static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) { | |
1794 | CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child); | |
1795 | } | |
1796 | ||
1797 | static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) { | |
1798 | CFXMLTreeRef node = (CFXMLTreeRef)xmlType; | |
1799 | if (CFTreeGetParent(node)) | |
1800 | CFRelease((CFXMLTreeRef)xmlType); | |
1801 | } | |
1802 | ||
d8925383 A |
1803 | CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) { |
1804 | CFXMLParserRef parser; | |
1805 | CFXMLParserCallBacks callbacks; | |
1806 | CFXMLTreeRef result; | |
1807 | ||
1808 | CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); | |
9ce05555 | 1809 | |
d8925383 A |
1810 | callbacks.createXMLStructure = _XMLTreeCreateXMLStructure; |
1811 | callbacks.addChild = _XMLTreeAddChild; | |
1812 | callbacks.endXMLStructure = _XMLTreeEndXMLStructure; | |
1813 | callbacks.resolveExternalEntity = NULL; | |
1814 | callbacks.handleError = NULL; | |
1815 | parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL); | |
1816 | ||
1817 | if (CFXMLParserParse(parser)) { | |
1818 | result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); | |
1819 | } else { | |
1820 | result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); | |
1821 | if (result) CFRelease(result); | |
1822 | result = NULL; | |
1823 | } | |
1824 | CFRelease(parser); | |
1825 | return result; | |
1826 | } | |
1827 | ||
1828 | CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) { | |
9ce05555 A |
1829 | return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL); |
1830 | } | |
1831 | ||
9ce05555 A |
1832 | CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription"); |
1833 | CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber"); | |
1834 | CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation"); | |
1835 | CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode"); | |
1836 | ||
1837 | CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) { | |
1838 | CFXMLParserRef parser; | |
1839 | CFXMLParserCallBacks callbacks; | |
1840 | CFXMLTreeRef result; | |
1841 | ||
1842 | __CFGenericValidateType(xmlData, CFDataGetTypeID()); | |
1843 | CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); | |
1844 | ||
1845 | callbacks.createXMLStructure = _XMLTreeCreateXMLStructure; | |
1846 | callbacks.addChild = _XMLTreeAddChild; | |
1847 | callbacks.endXMLStructure = _XMLTreeEndXMLStructure; | |
1848 | callbacks.resolveExternalEntity = NULL; | |
1849 | callbacks.handleError = NULL; | |
1850 | parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL); | |
1851 | ||
1852 | if (CFXMLParserParse(parser)) { | |
1853 | result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); | |
1854 | } else { | |
1855 | if (errorDict) { // collect the error dictionary | |
1856 | *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); | |
1857 | if (*errorDict) { | |
1858 | CFIndex rawnum; | |
1859 | CFNumberRef cfnum; | |
1860 | CFStringRef errstring; | |
1861 | ||
1862 | rawnum = CFXMLParserGetLocation(parser); | |
1863 | cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); | |
1864 | if(cfnum) { | |
1865 | CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum); | |
1866 | CFRelease(cfnum); | |
1867 | } | |
1868 | ||
1869 | rawnum = CFXMLParserGetLineNumber(parser); | |
1870 | cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); | |
1871 | if(cfnum) { | |
1872 | CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum); | |
1873 | CFRelease(cfnum); | |
1874 | } | |
1875 | ||
1876 | rawnum = CFXMLParserGetStatusCode(parser); | |
1877 | cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum); | |
1878 | if(cfnum) { | |
1879 | CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum); | |
1880 | CFRelease(cfnum); | |
1881 | } | |
1882 | ||
1883 | errstring = CFXMLParserCopyErrorDescription(parser); | |
1884 | if(errstring) { | |
1885 | CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring); | |
1886 | CFRelease(errstring); | |
1887 | } | |
1888 | } | |
1889 | } | |
1890 | result = (CFXMLTreeRef)CFXMLParserGetDocument(parser); | |
1891 | if (result) CFRelease(result); | |
1892 | result = NULL; | |
1893 | } | |
1894 | CFRelease(parser); | |
1895 | return result; | |
1896 | } | |
1897 | ||
1898 | /* | |
1899 | At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string. | |
1900 | We should also be handling items that are up over certain values correctly. | |
1901 | */ | |
1902 | CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) { | |
1903 | CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__); | |
1904 | CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string | |
1905 | CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator); | |
1906 | ||
1907 | CFStringInlineBuffer inlineBuf; | |
1908 | CFIndex idx = 0; | |
1909 | CFIndex mark = idx; | |
1910 | CFIndex stringLength = CFStringGetLength(string); | |
1911 | UniChar uc; | |
1912 | ||
1913 | CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\"")); | |
1914 | ||
1915 | CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength)); | |
1916 | for(idx = 0; idx < stringLength; idx++) { | |
1917 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx); | |
1918 | if(CFCharacterSetIsCharacterMember(startChars, uc)) { | |
1919 | CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark)); | |
1920 | CFStringAppend(newString, previousSubstring); | |
1921 | CFRelease(previousSubstring); | |
1922 | switch(uc) { | |
1923 | case '&': | |
1924 | CFStringAppend(newString, CFSTR("&")); | |
1925 | break; | |
1926 | case '<': | |
1927 | CFStringAppend(newString, CFSTR("<")); | |
1928 | break; | |
1929 | case '>': | |
1930 | CFStringAppend(newString, CFSTR(">")); | |
1931 | break; | |
1932 | case '\'': | |
1933 | CFStringAppend(newString, CFSTR("'")); | |
1934 | break; | |
1935 | case '"': | |
1936 | CFStringAppend(newString, CFSTR(""")); | |
1937 | break; | |
1938 | } | |
1939 | mark = idx + 1; | |
1940 | } | |
1941 | } | |
d8925383 A |
1942 | // Copy the remainder to the output string before returning. |
1943 | CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark)); | |
1944 | if (NULL != remainder) { | |
1945 | CFStringAppend(newString, remainder); | |
1946 | CFRelease(remainder); | |
1947 | } | |
1948 | ||
9ce05555 A |
1949 | CFRelease(startChars); |
1950 | return newString; | |
1951 | } | |
1952 | ||
1953 | CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) { | |
1954 | CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__); | |
1955 | ||
1956 | CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */ | |
1957 | CFStringRef sub; | |
1958 | CFIndex lastChunkStart, length = CFStringGetLength(string); | |
1959 | CFIndex i, entityStart; | |
1960 | UniChar uc; | |
1961 | UInt32 entity; | |
1962 | int base; | |
1963 | CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks); | |
1964 | ||
1965 | CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&")); | |
1966 | CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\"")); | |
1967 | CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<")); | |
1968 | CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">")); | |
1969 | CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'")); | |
1970 | ||
1971 | CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1)); | |
1972 | CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); | |
1973 | ||
1974 | lastChunkStart = 0; | |
1975 | // Scan through the string in its entirety | |
1976 | for(i = 0; i < length; ) { | |
1977 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i. | |
1978 | ||
1979 | if(uc == '&') { | |
1980 | entityStart = i - 1; | |
1981 | entity = 0xFFFF; // set this to a not-Unicode character as sentinel | |
1982 | // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point. | |
1983 | if(lastChunkStart < i - 1) { | |
1984 | sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart)); | |
1985 | CFStringAppend(newString, sub); | |
1986 | CFRelease(sub); | |
1987 | } | |
1988 | ||
1989 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i. | |
1990 | // Now we can process the entity reference itself | |
1991 | if(uc == '#') { // this is a numeric entity. | |
1992 | base = 10; | |
1993 | entity = 0; | |
1994 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; | |
1995 | ||
1996 | if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal. | |
1997 | base = 16; | |
1998 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; | |
1999 | } | |
2000 | ||
2001 | // process the provided digits 'til we're finished | |
2002 | while(true) { | |
2003 | if (uc >= '0' && uc <= '9') | |
2004 | entity = entity * base + (uc-'0'); | |
2005 | else if (uc >= 'a' && uc <= 'f' && base == 16) | |
2006 | entity = entity * base + (uc-'a'+10); | |
2007 | else if (uc >= 'A' && uc <= 'F' && base == 16) | |
2008 | entity = entity * base + (uc-'A'+10); | |
2009 | else break; | |
2010 | ||
2011 | if (i < length) { | |
2012 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; | |
2013 | } | |
2014 | else | |
2015 | break; | |
2016 | } | |
2017 | } | |
2018 | ||
2019 | // Scan to the end of the entity | |
2020 | while(uc != ';' && i < length) { | |
2021 | uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; | |
2022 | } | |
2023 | ||
2024 | if(0xFFFF != entity) { // it was numeric, and translated. | |
2025 | // Now, output the result fo the entity | |
2026 | if(entity >= 0x10000) { | |
2027 | UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 }; | |
2028 | CFStringAppendCharacters(newString, characters, 2); | |
2029 | } else { | |
2030 | UniChar character = entity; | |
2031 | CFStringAppendCharacters(newString, &character, 1); | |
2032 | } | |
2033 | } else { // it wasn't numeric. | |
2034 | sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself. | |
bd5b749c | 2035 | CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub); |
9ce05555 A |
2036 | if(replacementString) { |
2037 | CFStringAppend(newString, replacementString); | |
2038 | } else { | |
2039 | CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary | |
2040 | sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ; | |
2041 | CFStringAppend(newString, sub); // ...and append that. | |
2042 | } | |
2043 | CFRelease(sub); // in either case, release the most-recent "sub" | |
2044 | } | |
2045 | ||
2046 | // move the lastChunkStart to the beginning of the next chunk. | |
2047 | lastChunkStart = i; | |
2048 | } | |
2049 | } | |
2050 | if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on. | |
2051 | sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart)); | |
2052 | CFStringAppend(newString, sub); | |
2053 | CFRelease(sub); | |
2054 | } | |
2055 | ||
2056 | CFRelease(fullReplDict); | |
2057 | ||
2058 | return newString; | |
2059 | } | |
2060 | ||
9ce05555 | 2061 |