2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Chris Parker
30 #include <CoreFoundation/CFXMLParser.h>
31 #include <CoreFoundation/CFNumber.h>
32 #include "CFXMLInputStream.h"
33 #include "CFUniChar.h"
34 #include "CFInternal.h"
36 struct __CFXMLParser
{
37 CFRuntimeBase _cfBase
;
39 _CFXMLInputStream input
;
45 struct __CFXMLNode
*node
; // Our private node; we use it to report back information
46 CFMutableDictionaryRef argDict
;
47 CFMutableArrayRef argArray
;
50 CFXMLParserCallBacks callBacks
;
51 CFXMLParserContext context
;
53 CFXMLParserStatusCode status
;
54 CFStringRef errorString
;
57 static CFStringRef
__CFXMLParserCopyDescription(CFTypeRef cf
) {
58 const struct __CFXMLParser
*parser
= cf
;
59 return CFStringCreateWithFormat(CFGetAllocator(cf
), NULL
, CFSTR("<CFXMLParser 0x%x>"), parser
);
62 static void __CFXMLParserDeallocate(CFTypeRef cf
) {
63 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)cf
;
64 CFAllocatorRef alloc
= CFGetAllocator(parser
);
65 _freeInputStream(&(parser
->input
));
66 if (parser
->argDict
) CFRelease(parser
->argDict
);
67 if (parser
->argArray
) CFRelease(parser
->argArray
);
68 if (parser
->errorString
) CFRelease(parser
->errorString
);
69 if (parser
->node
) CFRelease(parser
->node
);
70 CFAllocatorDeallocate(alloc
, parser
->stack
);
71 if (parser
->context
.info
&& parser
->context
.release
) {
72 parser
->context
.release(parser
->context
.info
);
76 static CFTypeID __kCFXMLParserTypeID
= _kCFRuntimeNotATypeID
;
78 static const CFRuntimeClass __CFXMLParserClass
= {
83 __CFXMLParserDeallocate
,
87 __CFXMLParserCopyDescription
90 __private_extern__
void __CFXMLParserInitialize(void) {
91 __kCFXMLParserTypeID
= _CFRuntimeRegisterClass(&__CFXMLParserClass
);
94 CFTypeID
CFXMLParserGetTypeID(void) {
95 return __kCFXMLParserTypeID
;
104 void CFXMLParserGetContext(CFXMLParserRef parser
, CFXMLParserContext
*context
) {
105 CFAssert1(parser
!= NULL
, __kCFLogAssertion
, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__
);
106 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
108 context
->version
= parser
->context
.version
;
109 context
->info
= parser
->context
.info
;
110 context
->retain
= (void *)((uintptr_t)parser
->context
.retain
& __mask
);
111 context
->release
= (void *)((uintptr_t)parser
->context
.release
& __mask
);
112 context
->copyDescription
= (void *)((uintptr_t)parser
->context
.copyDescription
& __mask
);
116 void CFXMLParserGetCallBacks(CFXMLParserRef parser
, CFXMLParserCallBacks
*callBacks
) {
117 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
119 callBacks
->version
= parser
->callBacks
.version
;
120 callBacks
->createXMLStructure
= (void *)((uintptr_t)parser
->callBacks
.createXMLStructure
& __mask
);
121 callBacks
->addChild
= (void *)((uintptr_t)parser
->callBacks
.addChild
& __mask
);
122 callBacks
->endXMLStructure
= (void *)((uintptr_t)parser
->callBacks
.endXMLStructure
& __mask
);
123 callBacks
->resolveExternalEntity
= (void *)((uintptr_t)parser
->callBacks
.resolveExternalEntity
& __mask
);
124 callBacks
->handleError
= (void *)((uintptr_t)parser
->callBacks
.handleError
& __mask
);
130 CFURLRef
CFXMLParserGetSourceURL(CFXMLParserRef parser
) {
131 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
132 return parser
->input
.url
;
135 /* Returns the character index or line number of the current parse location */
136 CFIndex
CFXMLParserGetLocation(CFXMLParserRef parser
) {
137 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
138 return _inputStreamCurrentLocation(&parser
->input
);
141 CFIndex
CFXMLParserGetLineNumber(CFXMLParserRef parser
) {
142 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
143 return _inputStreamCurrentLine(&parser
->input
);
146 /* Returns the top-most object returned by the createXMLStructure callback */
147 void *CFXMLParserGetDocument(CFXMLParserRef parser
) {
148 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
149 if (parser
->capacity
> 0)
150 return parser
->stack
[0];
155 CFXMLParserStatusCode
CFXMLParserGetStatusCode(CFXMLParserRef parser
) {
156 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
157 return parser
->status
;
160 CFStringRef
CFXMLParserCopyErrorDescription(CFXMLParserRef parser
) {
161 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
162 return CFRetain(parser
->errorString
);
165 void CFXMLParserAbort(CFXMLParserRef parser
, CFXMLParserStatusCode errorCode
, CFStringRef errorDescription
) {
166 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
167 CFAssert1(errorCode
> 0, __kCFLogAssertion
, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__
);
168 CFAssert1(errorDescription
!= NULL
, __kCFLogAssertion
, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__
);
169 __CFGenericValidateType(errorDescription
, CFStringGetTypeID());
171 parser
->status
= errorCode
;
172 if (parser
->errorString
) CFRelease(parser
->errorString
);
173 parser
->errorString
= CFStringCreateCopy(NULL
, errorDescription
);
177 static Boolean
parseXML(CFXMLParserRef parser
);
178 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
);
179 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
);
180 static Boolean
parseInlineDTD(CFXMLParserRef parser
);
181 static Boolean
parseDTD(CFXMLParserRef parser
);
182 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
);
183 static Boolean
parseCDSect(CFXMLParserRef parser
);
184 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
);
185 static Boolean
parsePCData(CFXMLParserRef parser
);
186 static Boolean
parseWhitespace(CFXMLParserRef parser
);
187 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
);
188 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
);
189 static Boolean
parseElementDeclaration(CFXMLParserRef parser
);
190 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
);
191 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
);
192 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
);
193 static Boolean
parseTagContent(CFXMLParserRef parser
);
194 static Boolean
parseTag(CFXMLParserRef parser
);
195 static Boolean
parseAttributes(CFXMLParserRef parser
);
196 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
);
198 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
199 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
);
200 static Boolean
reportNewLeaf(CFXMLParserRef parser
); // Assumes parser->node has been set and is ready to go
201 static void pushXMLNode(CFXMLParserRef parser
, void *node
);
203 static CFXMLParserRef
__CFXMLParserInit(CFAllocatorRef alloc
, CFURLRef dataSource
, CFOptionFlags options
, CFDataRef xmlData
, CFIndex version
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
204 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)_CFRuntimeCreateInstance(alloc
, __kCFXMLParserTypeID
, sizeof(struct __CFXMLParser
) - sizeof(CFRuntimeBase
), NULL
);
205 struct __CFXMLNode
*node
= (struct __CFXMLNode
*)_CFRuntimeCreateInstance(alloc
, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode
) - sizeof(CFRuntimeBase
), NULL
);
207 if (parser
&& node
) {
208 alloc
= CFGetAllocator(parser
);
209 _initializeInputStream(&(parser
->input
), alloc
, dataSource
, xmlData
);
210 parser
->top
= parser
->stack
;
211 parser
->stack
= NULL
;
212 parser
->capacity
= 0;
214 buf
= CFAllocatorAllocate(alloc
, 128*sizeof(UniChar
), 0);
216 parser
->node
->dataString
= CFStringCreateMutableWithExternalCharactersNoCopy(alloc
, buf
, 0, 128, alloc
);
217 parser
->node
->additionalData
= NULL
;
218 parser
->node
->version
= version
;
219 parser
->argDict
= NULL
; // don't create these until necessary
220 parser
->argArray
= NULL
;
222 parser
->options
= options
;
223 parser
->callBacks
= *callBacks
;
225 FAULT_CALLBACK((void **)&(parser
->callBacks
.createXMLStructure
));
226 FAULT_CALLBACK((void **)&(parser
->callBacks
.addChild
));
227 FAULT_CALLBACK((void **)&(parser
->callBacks
.endXMLStructure
));
228 FAULT_CALLBACK((void **)&(parser
->callBacks
.resolveExternalEntity
));
229 FAULT_CALLBACK((void **)&(parser
->callBacks
.handleError
));
232 parser
->context
= *context
;
233 if (parser
->context
.info
&& parser
->context
.retain
) {
234 parser
->context
.retain(parser
->context
.info
);
237 parser
->context
.version
= 0;
238 parser
->context
.info
= NULL
;
239 parser
->context
.retain
= NULL
;
240 parser
->context
.release
= NULL
;
241 parser
->context
.copyDescription
= NULL
;
243 parser
->status
= kCFXMLStatusParseNotBegun
;
244 parser
->errorString
= NULL
;
246 if (parser
) CFRelease(parser
);
247 if (node
) CFRelease(node
);
253 CFXMLParserRef
CFXMLParserCreate(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
254 CFAssert1(xmlData
!= NULL
, __kCFLogAssertion
, "%s(): NULL data not permitted", __PRETTY_FUNCTION__
);
255 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
256 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
257 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
258 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
259 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
260 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, xmlData
, versionOfNodes
, callBacks
, context
);
264 Boolean
CFXMLParserParse(CFXMLParserRef parser
) {
265 CFXMLDocumentInfo docData
;
266 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
267 if (parser
->status
!= kCFXMLStatusParseNotBegun
) return false;
268 parser
->status
= kCFXMLStatusParseInProgress
;
270 if (!_openInputStream(&parser
->input
)) {
271 if (!parser
->input
.data
) {
273 parser
->status
= kCFXMLErrorNoData
;
274 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("No data found at %@"), CFURLGetString(parser
->input
.url
));
276 // couldn't figure out the encoding
277 CFAssert(parser
->input
.encoding
== kCFStringEncodingInvalidId
, __kCFLogAssertion
, "CFXMLParser internal error: input stream could not be opened");
278 parser
->status
= kCFXMLErrorUnknownEncoding
;
279 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), "Encountered unknown encoding", kCFStringEncodingASCII
);
281 if (parser
->callBacks
.handleError
) {
282 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
287 // Create the document
288 parser
->stack
= CFAllocatorAllocate(CFGetAllocator(parser
), 16 * sizeof(void *), 0);
289 parser
->capacity
= 16;
290 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocument
;
291 docData
.encoding
= _inputStreamGetEncoding(&parser
->input
);
292 docData
.sourceURL
= parser
->input
.url
;
293 parser
->node
->additionalData
= &docData
;
294 parser
->stack
[0] = (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
295 parser
->top
= parser
->stack
;
296 parser
->node
->additionalData
= NULL
;
298 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
299 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
300 _CFReportError(parser
, parser
->status
, NULL
);
303 return parseXML(parser
);
306 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
308 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
309 static Boolean
parseWhitespace(CFXMLParserRef parser
) {
311 Boolean report
= !(parser
->options
& kCFXMLParserSkipWhitespace
);
312 len
= _inputStreamSkipWhitespace(&parser
->input
, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
);
314 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
315 parser
->node
->additionalData
= NULL
;
316 return reportNewLeaf(parser
);
322 // parser should be just past "<!--"
323 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
) {
324 const UniChar dashes
[2] = {'-', '-'};
326 report
= report
&& (!(parser
->options
& kCFXMLParserSkipMetaData
));
327 if (!_inputStreamScanToCharacters(&parser
->input
, dashes
, 2, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
)) {
328 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
,"Found unexpected EOF while parsing comment");
330 } else if (ch
!= '>') {
331 _CFReportError(parser
, kCFXMLErrorMalformedComment
, "Found \"--\" within a comment");
334 parser
->node
->dataTypeID
= kCFXMLNodeTypeComment
;
335 parser
->node
->additionalData
= NULL
;
336 return reportNewLeaf(parser
);
343 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
344 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
346 // parser should be set to the first character after "<?"
347 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
) {
348 const UniChar piTermination
[2] = {'?', '>'};
349 CFMutableStringRef str
;
352 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
353 _CFReportError(parser
, kCFXMLErrorMalformedProcessingInstruction
, "Found malformed processing instruction");
356 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
357 str
= (report
&& *parser
->top
) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
)) : NULL
;
358 if (!_inputStreamScanToCharacters(&parser
->input
, piTermination
, 2, str
)) {
359 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing processing instruction");
360 if (str
) CFRelease(str
);
365 CFXMLProcessingInstructionInfo data
;
367 CFStringRef tmp
= parser
->node
->dataString
;
368 parser
->node
->dataTypeID
= kCFXMLNodeTypeProcessingInstruction
;
369 parser
->node
->dataString
= name
;
370 data
.dataString
= str
;
371 parser
->node
->additionalData
= &data
;
372 result
= reportNewLeaf(parser
);
373 parser
->node
->additionalData
= NULL
;
374 parser
->node
->dataString
= tmp
;
383 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
385 static const UniChar _DoctypeOpening
[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
386 // first character should be immediately after the "<!"
387 static Boolean
parseDTD(CFXMLParserRef parser
) {
389 Boolean success
, hasExtID
= false;
390 CFXMLDocumentTypeInfo docData
= {{NULL
, NULL
}};
391 void *dtdStructure
= NULL
;
394 // First pass "DOCTYPE"
395 success
= _inputStreamMatchString(&parser
->input
, _DoctypeOpening
, 7);
396 success
= success
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
397 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, &name
);
399 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
400 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
402 // didn't make it past "DOCTYPE" successfully.
403 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
406 if (success
&& ch
!= '[' && ch
!= '>') {
409 success
= parseExternalID(parser
, false, &(docData
.externalID
));
411 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
412 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
416 if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
417 CFStringRef tmp
= parser
->node
->dataString
;
418 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocumentType
;
419 parser
->node
->dataString
= name
;
420 parser
->node
->additionalData
= &docData
;
421 dtdStructure
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
422 if (dtdStructure
&& parser
->status
== kCFXMLStatusParseInProgress
) {
423 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, dtdStructure
, parser
->context
.info
);
425 parser
->node
->additionalData
= NULL
;
426 parser
->node
->dataString
= tmp
;
427 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
428 // callback called CFXMLParserAbort()
429 _CFReportError(parser
, parser
->status
, NULL
);
435 if (docData
.externalID
.publicID
) CFRelease(docData
.externalID
.publicID
);
436 if (docData
.externalID
.systemID
) CFRelease(docData
.externalID
.systemID
);
437 pushXMLNode(parser
, dtdStructure
);
439 if (success
&& ch
== '[') {
441 _inputStreamGetCharacter(&parser
->input
, &ch
);
442 if (!parseInlineDTD(parser
)) return false;
443 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
444 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
445 } else if (success
&& ch
== '>') {
447 _inputStreamGetCharacter(&parser
->input
, &ch
);
450 if (_inputStreamAtEOF(&parser
->input
)) {
451 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing DTD");
453 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
458 parser
->top
--; // Remove dtdStructure from the stack
460 if (success
&& dtdStructure
) {
461 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, dtdStructure
, parser
->context
.info
);
462 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
463 _CFReportError(parser
, parser
->status
, NULL
);
471 [69] PEReference ::= '%' Name ';'
473 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
) {
476 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
477 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
479 } else if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
480 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing physical entity reference");
482 } else if (ch
!= ';') {
483 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
485 } else if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
486 CFXMLEntityReferenceInfo myData
;
488 CFStringRef tmp
= parser
->node
->dataString
;
489 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
490 parser
->node
->dataString
= name
;
491 myData
.entityType
= kCFXMLEntityTypeParameter
;
492 parser
->node
->additionalData
= &myData
;
493 result
= reportNewLeaf(parser
);
494 parser
->node
->additionalData
= NULL
;
495 parser
->node
->dataString
= tmp
;
503 [54] AttType ::= StringType | TokenizedType | EnumeratedType
504 [55] StringType ::= 'CDATA'
505 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
506 [57] EnumeratedType ::= NotationType | Enumeration
507 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
508 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
510 static Boolean
parseEnumeration(CFXMLParserRef parser
, Boolean useNMTokens
) {
512 Boolean done
= false;
513 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
514 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
516 } else if (ch
!= '(') {
517 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
520 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
521 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
522 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
526 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
527 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
528 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
530 } else if (ch
== ')') {
532 } else if (ch
== '|') {
533 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
534 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
535 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
539 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
546 static Boolean
parseAttributeType(CFXMLParserRef parser
, CFMutableStringRef str
) {
547 Boolean success
= false;
548 static const UniChar attTypeStrings
[6][8] = {
549 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
550 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
551 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
552 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
553 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
554 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
555 if (str
) _inputStreamSetMark(&parser
->input
);
556 if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[0], 5) ||
557 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 6) ||
558 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 5) ||
559 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 2) ||
560 _inputStreamMatchString(&parser
->input
, attTypeStrings
[2], 6) ||
561 _inputStreamMatchString(&parser
->input
, attTypeStrings
[3], 8) ||
562 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 8) ||
563 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 7)) {
565 } else if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[5], 8)) {
567 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
568 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
571 success
= parseEnumeration(parser
, false);
574 success
= parseEnumeration(parser
, true);
578 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
580 _inputStreamClearMark(&parser
->input
);
585 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
586 static Boolean
parseAttributeDefaultDeclaration(CFXMLParserRef parser
, CFMutableStringRef str
) {
587 const UniChar strings
[3][8] = {
588 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
589 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
590 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
593 if (str
) _inputStreamSetMark(&parser
->input
);
594 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
595 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
597 } else if (ch
== '#') {
598 if (_inputStreamMatchString(&parser
->input
, strings
[0], 8) ||
599 _inputStreamMatchString(&parser
->input
, strings
[1], 7)) {
601 } else if (!_inputStreamMatchString(&parser
->input
, strings
[2], 5) || _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
602 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
605 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
606 success
= parseAttributeValue(parser
, NULL
);
609 _inputStreamReturnCharacter(&parser
->input
, ch
);
610 success
= parseAttributeValue(parser
, NULL
);
614 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
616 _inputStreamClearMark(&parser
->input
);
622 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
623 [53] AttDef ::= S Name S AttType S DefaultDecl
625 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
) {
626 const UniChar attList
[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
627 CFXMLAttributeListDeclarationInfo attListData
;
628 CFXMLAttributeDeclarationInfo attributeArray
[8], *attributes
=attributeArray
;
629 CFIndex capacity
= 8;
631 Boolean success
= true;
633 if (!_inputStreamMatchString(&parser
->input
, attList
, 7) ||
634 _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0 ||
635 !_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
636 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
639 attListData
.numberOfAttributes
= 0;
640 if (!(*parser
->top
) || (parser
->options
& kCFXMLParserSkipMetaData
)) {
641 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
644 while (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
645 CFXMLAttributeDeclarationInfo
*attribute
= NULL
;
646 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
== '>')
649 if (capacity
== attListData
.numberOfAttributes
) {
650 capacity
= 2*capacity
;
651 if (attributes
!= attributeArray
) {
652 attributes
= CFAllocatorReallocate(CFGetAllocator(parser
), attributes
, capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
654 attributes
= CFAllocatorAllocate(CFGetAllocator(parser
), capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
657 attribute
= &(attributes
[attListData
.numberOfAttributes
]);
658 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
659 attribute
->typeString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
660 attribute
->defaultString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
662 if (!_inputStreamScanXMLName(&parser
->input
, false, &(attribute
->attributeName
)) || (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0)) {
663 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
667 if (!parseAttributeType(parser
, attribute
? (CFMutableStringRef
)attribute
->typeString
: NULL
)) {
671 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
672 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
676 if (!parseAttributeDefaultDeclaration(parser
, attribute
? (CFMutableStringRef
)attribute
->defaultString
: NULL
)) {
680 attListData
.numberOfAttributes
++;
683 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
684 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
686 } else if (ch
!= '>') {
687 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
689 } else if (attributes
) {
690 CFStringRef tmp
= parser
->node
->dataString
;
691 parser
->node
->dataTypeID
= kCFXMLNodeTypeAttributeListDeclaration
;
692 parser
->node
->dataString
= name
;
693 attListData
.attributes
= attributes
;
694 parser
->node
->additionalData
= (void *)&attListData
;
695 success
= reportNewLeaf(parser
);
696 parser
->node
->additionalData
= NULL
;
697 parser
->node
->dataString
= tmp
;
701 // Free up all that memory
703 for (idx
= 0; idx
< attListData
.numberOfAttributes
; idx
++) {
704 // Do not release attributeName here; it's a uniqued string from scanXMLName
705 CFRelease(attributes
[idx
].typeString
);
706 CFRelease(attributes
[idx
].defaultString
);
708 if (attributes
!= attributeArray
) {
709 CFAllocatorDeallocate(CFGetAllocator(parser
), attributes
);
715 CF_INLINE Boolean
parseSystemLiteral(CFXMLParserRef parser
, CFXMLExternalID
*extID
) {
718 CFMutableStringRef urlStr
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
719 if (_inputStreamScanQuotedString(&parser
->input
, urlStr
)) {
721 extID
->systemID
= CFURLCreateWithString(CFGetAllocator(parser
), urlStr
, parser
->input
.url
);
723 extID
->systemID
= NULL
;
728 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
734 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
735 [83] PublicID ::= 'PUBLIC' S PubidLiteral
736 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
737 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
738 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
740 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
741 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
) {
742 const UniChar publicString
[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
743 const UniChar systemString
[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
746 extID
->systemID
= NULL
;
747 extID
->publicID
= NULL
;
749 if (_inputStreamMatchString(&parser
->input
, publicString
, 6)) {
750 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
752 extID
->publicID
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
753 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)extID
->publicID
);
755 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, NULL
);
759 if (alsoAcceptPublicID
) {
760 _inputStreamSetMark(&parser
->input
); // In case we need to roll back the parser
762 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0
763 || !_inputStreamPeekCharacter(&parser
->input
, &ch
)
764 || (ch
!= '\'' && ch
!= '\"')
765 || !parseSystemLiteral(parser
, extID
)) {
766 success
= alsoAcceptPublicID
;
767 if (alsoAcceptPublicID
) {
768 _inputStreamBackUpToMark(&parser
->input
);
773 if (alsoAcceptPublicID
) {
774 _inputStreamClearMark(&parser
->input
);
777 } else if (_inputStreamMatchString(&parser
->input
, systemString
, 6)) {
778 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 && parseSystemLiteral(parser
, extID
);
786 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
788 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
) {
789 static UniChar notationString
[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
790 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
791 CFXMLNotationInfo notationData
= {{NULL
, NULL
}};
794 _inputStreamMatchString(&parser
->input
, notationString
, 8) &&
795 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
796 _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) &&
797 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
798 parseExternalID(parser
, true, report
? &(notationData
.externalID
) : NULL
);
802 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
803 success
= (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>');
806 if (_inputStreamAtEOF(&parser
->input
)) {
807 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
809 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
812 CFStringRef tmp
= parser
->node
->dataString
;
813 parser
->node
->dataTypeID
= kCFXMLNodeTypeNotation
;
814 parser
->node
->dataString
= name
;
815 parser
->node
->additionalData
= ¬ationData
;
816 success
= reportNewLeaf(parser
);
817 parser
->node
->additionalData
= NULL
;
818 parser
->node
->dataString
= tmp
;
820 if (notationData
.externalID
.systemID
) CFRelease(notationData
.externalID
.systemID
);
821 if (notationData
.externalID
.publicID
) CFRelease(notationData
.externalID
.publicID
);
826 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
827 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
828 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
830 static Boolean
parseChoiceOrSequence(CFXMLParserRef parser
, Boolean pastParen
) {
831 UniChar ch
, separator
;
833 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '(') return false;
834 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
836 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
838 /* Now scanning cp, production [48] */
840 if (!parseChoiceOrSequence(parser
, false)) return false;
842 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
844 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
845 if (ch
== '?' || ch
== '*' || ch
== '+') _inputStreamGetCharacter(&parser
->input
, &ch
);
848 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
849 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
850 if (ch
== ')') return true;
851 if (ch
!= '|' && ch
!= ',') return false;
853 while (ch
== separator
) {
854 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
855 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
857 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
858 } else if (!parseChoiceOrSequence(parser
, false)) {
861 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
862 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
868 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
870 static Boolean
parseMixedElementContent(CFXMLParserRef parser
) {
871 static const UniChar pcdataString
[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
873 if (!_inputStreamMatchString(&parser
->input
, pcdataString
, 7)) return false;
874 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
875 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) && (ch
== ')' || ch
== '|')) return false;
876 if (ch
== ')') return true;
879 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
880 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
881 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
882 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
884 if (ch
!= ')') return false;
885 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '*') return false;
890 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
891 [47] children ::= (choice | seq) ('?' | '*' | '+')?
893 static Boolean
parseElementContentSpec(CFXMLParserRef parser
) {
894 static const UniChar eltContentEmpty
[5] = {'E', 'M', 'P', 'T', 'Y'};
895 static const UniChar eltContentAny
[3] = {'A', 'N', 'Y'};
897 if (_inputStreamMatchString(&parser
->input
, eltContentEmpty
, 5) || _inputStreamMatchString(&parser
->input
, eltContentAny
, 3)) {
899 } else if (!_inputStreamPeekCharacter(&parser
->input
, &ch
) || ch
!= '(') {
902 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
903 _inputStreamGetCharacter(&parser
->input
, &ch
);
904 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
905 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
908 return parseMixedElementContent(parser
);
910 if (parseChoiceOrSequence(parser
, true)) {
911 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && (ch
== '*' || ch
== '?' || ch
== '+')) {
912 _inputStreamGetCharacter(&parser
->input
, &ch
);
923 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
925 static Boolean
parseElementDeclaration(CFXMLParserRef parser
) {
926 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
928 static const UniChar eltChars
[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
930 CFMutableStringRef contentDesc
= NULL
;
932 success
= _inputStreamMatchString(&parser
->input
, eltChars
, 7)
933 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0
934 && _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
)
935 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
937 if (report
) _inputStreamSetMark(&parser
->input
);
938 success
= parseElementContentSpec(parser
);
939 if (success
&& report
) {
940 contentDesc
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
941 _inputStreamGetCharactersFromMark(&parser
->input
, contentDesc
);
943 if (report
) _inputStreamClearMark(&parser
->input
);
944 if (success
) _inputStreamSkipWhitespace(&parser
->input
, NULL
);
945 success
= success
&& _inputStreamMatchString(&parser
->input
, &ch
, 1);
948 if (_inputStreamAtEOF(&parser
->input
)) {
949 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
951 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
954 CFXMLElementTypeDeclarationInfo eltData
;
955 CFStringRef tmp
= parser
->node
->dataString
;
956 parser
->node
->dataTypeID
= kCFXMLNodeTypeElementTypeDeclaration
;
957 parser
->node
->dataString
= name
;
958 eltData
.contentDescription
= contentDesc
;
959 parser
->node
->additionalData
= &eltData
;
960 success
= reportNewLeaf(parser
);
961 parser
->node
->additionalData
= NULL
;
962 parser
->node
->dataString
= tmp
;
964 if (contentDesc
) CFRelease(contentDesc
);
969 [70] EntityDecl ::= GEDecl | PEDecl
970 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
971 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
972 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
973 [74] PEDef ::= EntityValue | ExternalID
974 [76] NDataDecl ::= S 'NDATA' S Name
975 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
977 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
) {
978 const UniChar entityStr
[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
980 Boolean isPEDecl
= false;
981 CFXMLEntityInfo entityData
;
983 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
985 _inputStreamMatchString(&parser
->input
, entityStr
, 6) &&
986 (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) &&
987 _inputStreamPeekCharacter(&parser
->input
, &ch
);
989 entityData
.replacementText
= NULL
;
990 entityData
.entityID
.publicID
= NULL
;
991 entityData
.entityID
.systemID
= NULL
;
992 entityData
.notationName
= NULL
;
993 // We will set entityType immediately before reporting
995 if (success
&& ch
== '%') {
996 _inputStreamGetCharacter(&parser
->input
, &ch
);
997 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
1000 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) && (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamPeekCharacter(&parser
->input
, &ch
);
1001 if (success
&& (ch
== '\"' || ch
== '\'')) {
1003 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1005 entityData
.replacementText
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1006 success
= _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)entityData
.replacementText
);
1008 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
1010 } else if (success
) {
1012 success
= parseExternalID(parser
, false, report
? &(entityData
.entityID
) : NULL
);
1013 if (success
&& !isPEDecl
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
1014 // There could be an option NDataDecl
1015 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1016 const UniChar nDataStr
[5] = {'N', 'D', 'A', 'T', 'A'};
1017 if (_inputStreamMatchString(&parser
->input
, nDataStr
, 5)) {
1018 success
= (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamScanXMLName(&parser
->input
, false, NULL
);
1023 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1024 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
1027 if (_inputStreamAtEOF(&parser
->input
)) {
1028 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1030 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1033 CFStringRef tmp
= parser
->node
->dataString
;
1034 if (isPEDecl
) entityData
.entityType
= kCFXMLEntityTypeParameter
;
1035 else if (entityData
.replacementText
) entityData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1036 else if (!entityData
.notationName
) entityData
.entityType
= kCFXMLEntityTypeParsedExternal
;
1037 else entityData
.entityType
= kCFXMLEntityTypeUnparsed
;
1038 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntity
;
1039 parser
->node
->dataString
= name
;
1040 parser
->node
->additionalData
= &entityData
;
1041 success
= reportNewLeaf(parser
);
1042 parser
->node
->additionalData
= NULL
;
1043 parser
->node
->dataString
= tmp
;
1044 if (entityData
.replacementText
) CFRelease(entityData
.replacementText
);
1046 if (entityData
.entityID
.publicID
) CFRelease(entityData
.entityID
.publicID
);
1047 if (entityData
.entityID
.systemID
) CFRelease(entityData
.entityID
.systemID
);
1052 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1053 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1055 // First character should be just past '['
1056 static Boolean
parseInlineDTD(CFXMLParserRef parser
) {
1057 Boolean success
= true;
1058 while (success
&& !_inputStreamAtEOF(&parser
->input
)) {
1061 parseWhitespace(parser
);
1062 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break;
1065 success
= parsePhysicalEntityReference(parser
);
1066 } else if (ch
== '<') {
1068 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1069 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1073 // Processing Instruction
1074 success
= parseProcessingInstruction(parser
, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1075 } else if (ch
== '!') {
1076 UniChar dashes
[2] = {'-', '-'};
1077 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1079 success
= parseComment(parser
, true);
1081 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1082 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1083 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1085 } else if (ch
== 'A') {
1087 success
= parseAttributeListDeclaration(parser
);
1088 } else if (ch
== 'N') {
1089 success
= parseNotationDeclaration(parser
);
1090 } else if (ch
== 'E') {
1091 // elementdecl | EntityDecl
1092 _inputStreamGetCharacter(&parser
->input
, &ch
);
1093 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1094 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1097 _inputStreamReturnCharacter(&parser
->input
, 'E');
1099 success
= parseElementDeclaration(parser
);
1100 } else if (ch
== 'N') {
1101 success
= parseEntityDeclaration(parser
);
1103 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1107 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1112 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1115 } else if (ch
== ']') {
1118 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1123 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1129 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1131 static Boolean
parseTagContent(CFXMLParserRef parser
) {
1132 while (!_inputStreamAtEOF(&parser
->input
)) {
1134 CFIndex numWhitespaceCharacters
;
1136 _inputStreamSetMark(&parser
->input
);
1137 numWhitespaceCharacters
= _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1138 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1139 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break; // break == report unexpected EOF
1141 if (ch
!= '<' && ch
!= '&') { // CharData
1142 // Back off the whitespace; we'll report it with the PCData
1143 _inputStreamBackUpToMark(&parser
->input
);
1144 _inputStreamClearMark(&parser
->input
);
1145 if (!parsePCData(parser
)) return false;
1146 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1147 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1153 // element | Reference | CDSect | PI | Comment
1154 // We can safely report any whitespace now
1155 if (!(parser
->options
& kCFXMLParserSkipWhitespace
) && numWhitespaceCharacters
!= 0 && *(parser
->top
)) {
1156 _inputStreamReturnCharacter(&parser
->input
, ch
);
1157 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1158 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
1159 parser
->node
->additionalData
= NULL
;
1160 if (!reportNewLeaf(parser
)) return false;
1161 _inputStreamGetCharacter(&parser
->input
, &ch
);
1163 _inputStreamClearMark(&parser
->input
);
1166 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1167 if (!parseEntityReference(parser
, true)) return false;
1171 // ch == '<'; element | CDSect | PI | Comment
1172 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) break;
1173 if (ch
== '?') { // PI
1174 _inputStreamGetCharacter(&parser
->input
, &ch
);
1175 if (!parseProcessingInstruction(parser
, true))
1177 } else if (ch
== '/') { // end tag; we're passing outside of content's production
1178 _inputStreamReturnCharacter(&parser
->input
, '<'); // Back off to the '<'
1180 } else if (ch
!= '!') { // element
1181 if (!parseTag(parser
)) return false;
1184 UniChar dashes
[3] = {'!', '-', '-'};
1185 if (_inputStreamMatchString(&parser
->input
, dashes
, 3)) {
1187 if (!parseComment(parser
, true)) return false;
1189 // Should have a CDSect; back off the "<!" and call parseCDSect
1190 _inputStreamReturnCharacter(&parser
->input
, '<');
1191 if (!parseCDSect(parser
)) return false;
1196 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1197 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1200 // Only way to get here is if premature EOF was found
1201 //#warning CF:Include the tag name here
1202 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing tag content");
1206 static Boolean
parseCDSect(CFXMLParserRef parser
) {
1207 const UniChar _CDSectOpening
[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1208 const UniChar _CDSectClose
[3] = {']', ']', '>'};
1209 if (!_inputStreamMatchString(&parser
->input
, _CDSectOpening
, 9)) {
1210 _CFReportError(parser
, kCFXMLErrorMalformedCDSect
, "Encountered bad prefix to a presumed CDATA section");
1213 if (!_inputStreamScanToCharacters(&parser
->input
, _CDSectClose
, 3, (CFMutableStringRef
)(parser
->node
->dataString
))) {
1214 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing CDATA section");
1218 parser
->node
->dataTypeID
= kCFXMLNodeTypeCDATASection
;
1219 parser
->node
->additionalData
= NULL
;
1220 return reportNewLeaf(parser
);
1224 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1226 static Boolean
validateCharacterReference(CFStringRef str
) {
1228 CFIndex idx
, len
= CFStringGetLength(str
);
1229 if (len
< 2) return false;
1230 if (CFStringGetCharacterAtIndex(str
, 0) != '#') return false;
1231 if (CFStringGetCharacterAtIndex(str
, 1) == 'x') {
1234 if (len
== 2) return false;
1242 ch
= CFStringGetCharacterAtIndex(str
, idx
);
1244 if (!(ch
<= '9' && ch
>= '0') &&
1245 !(isHex
&& ((ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F')))) {
1249 return (idx
== len
);
1253 [67] Reference ::= EntityRef | CharRef
1254 [68] EntityRef ::= '&' Name ';'
1256 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
) {
1258 CFXMLEntityReferenceInfo entData
;
1259 CFStringRef name
= NULL
;
1260 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1261 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1266 if (!_inputStreamScanToCharacters(&parser
->input
, &ch
, 1, (CFMutableStringRef
)parser
->node
->dataString
)) {
1267 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1269 } else if (!validateCharacterReference(parser
->node
->dataString
)) {
1270 _CFReportError(parser
, kCFXMLErrorMalformedCharacterReference
, "Encountered illegal character while parsing character reference");
1273 entData
.entityType
= kCFXMLEntityTypeCharacter
;
1274 name
= parser
->node
->dataString
;
1275 } else if (!_inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= ';') {
1276 if (_inputStreamAtEOF(&parser
->input
)) {
1277 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1280 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Encountered malformed name while parsing EntityReference");
1284 entData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1287 CFStringRef tmp
= parser
->node
->dataString
;
1289 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
1290 parser
->node
->dataString
= name
;
1291 parser
->node
->additionalData
= &entData
;
1292 success
= reportNewLeaf(parser
);
1293 parser
->node
->additionalData
= NULL
;
1294 parser
->node
->dataString
= tmp
;
1302 // Kept from old entity reference parsing....
1304 switch (*(parser
->curr
)) {
1306 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1311 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1314 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1319 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1321 case 'a': // "apos" or "amp"
1322 if (len
< 4) { // Not enough characters for either conversion
1323 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1326 if (*(parser
->curr
+1) == 'm') {
1328 if (*(parser
->curr
+2) == 'p' && *(parser
->curr
+3) == ';') {
1333 } else if (*(parser
->curr
+1) == 'p') {
1335 if (len
> 4 && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 's' && *(parser
->curr
+4) == ';') {
1341 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1343 case 'q': // "quote"
1344 if (len
>= 6 && *(parser
->curr
+1) == 'u' && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 't' && *(parser
->curr
+4) == 'e' && *(parser
->curr
+5) == ';') {
1349 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1354 Boolean isHex
= false;
1355 if ( len
< 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1356 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1360 if (*(parser
->curr
) == 'x') {
1364 while (parser
->curr
< parser
->end
) {
1365 ch
= *(parser
->curr
);
1367 CFStringAppendCharacters(string
, &num
, 1);
1371 if (!isHex
) num
= num
*10;
1372 else num
= num
<< 4;
1373 if (ch
<= '9' && ch
>= '0') {
1375 } else if (!isHex
) {
1376 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1378 } else if (ch
>= 'a' && ch
<= 'f') {
1379 num
+= 10 + (ch
- 'a');
1380 } else if (ch
>= 'A' && ch
<= 'F') {
1381 num
+= 10 + (ch
- 'A');
1383 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1387 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1391 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1394 CFStringAppendCharacters(string
, &ch
, 1);
1399 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1401 static Boolean
parsePCData(CFXMLParserRef parser
) {
1403 Boolean done
= false;
1404 _inputStreamSetMark(&parser
->input
);
1405 while (!done
&& _inputStreamGetCharacter(&parser
->input
, &ch
)) {
1409 _inputStreamReturnCharacter(&parser
->input
, ch
);
1414 const UniChar endSequence
[2] = {']', '>'};
1415 if (_inputStreamMatchString(&parser
->input
, endSequence
, 2)) {
1416 _CFReportError(parser
, kCFXMLErrorMalformedParsedCharacterData
, "Encountered \"]]>\" in parsed character data");
1417 _inputStreamClearMark(&parser
->input
);
1426 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1427 _inputStreamClearMark(&parser
->input
);
1428 parser
->node
->dataTypeID
= kCFXMLNodeTypeText
;
1429 parser
->node
->additionalData
= NULL
;
1430 return reportNewLeaf(parser
);
1434 [42] ETag ::= '</' Name S? '>'
1436 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
) {
1437 const UniChar beginEndTag
[2] = {'<', '/'};
1438 Boolean unexpectedEOF
= false, mismatch
= false;
1439 CFStringRef closeTag
;
1441 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1442 if (_inputStreamMatchString(&parser
->input
, beginEndTag
, 2) && _inputStreamScanXMLName(&parser
->input
, false, &closeTag
) && closeTag
== tag
) {
1445 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1446 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1447 unexpectedEOF
= true;
1448 } else if (ch
!= '>') {
1451 } else if (_inputStreamAtEOF(&parser
->input
)) {
1452 unexpectedEOF
= true;
1457 if (unexpectedEOF
|| mismatch
) {
1458 if (unexpectedEOF
) {
1459 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag
);
1460 parser
->status
= kCFXMLErrorUnexpectedEOF
;
1461 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorUnexpectedEOF
, parser
->context
.info
);
1463 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered malformed close tag for <%@>"), tag
);
1464 parser
->status
= kCFXMLErrorMalformedCloseTag
;
1465 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorMalformedCloseTag
, parser
->context
.info
);
1473 [39] element ::= EmptyElementTag | STag content ETag
1474 [40] STag ::= '<' Name (S Attribute)* S? '>'
1475 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1477 static Boolean
parseTag(CFXMLParserRef parser
) {
1480 CFXMLElementInfo data
;
1481 Boolean success
= true;
1482 CFStringRef tagName
;
1484 if (!_inputStreamScanXMLName(&parser
->input
, false, &tagName
)) {
1485 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1489 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1491 if (!parseAttributes(parser
)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1492 data
.attributes
= parser
->argDict
;
1493 data
.attributeOrder
= parser
->argArray
;
1494 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1495 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1499 data
.isEmpty
= true;
1500 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1501 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1505 data
.isEmpty
= false;
1508 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1512 if (*parser
->top
|| parser
->top
== parser
->stack
) {
1513 CFStringRef oldStr
= parser
->node
->dataString
;
1514 parser
->node
->dataTypeID
= kCFXMLNodeTypeElement
;
1515 parser
->node
->dataString
= tagName
;
1516 parser
->node
->additionalData
= &data
;
1517 tag
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1518 if (tag
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1519 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, tag
, parser
->context
.info
);
1521 parser
->node
->additionalData
= NULL
;
1522 parser
->node
->dataString
= oldStr
;
1523 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1524 // callback called CFXMLParserAbort()
1525 _CFReportError(parser
, parser
->status
, NULL
);
1532 pushXMLNode(parser
, tag
);
1533 if (!data
.isEmpty
) {
1534 success
= parseTagContent(parser
);
1536 success
= parseCloseTag(parser
, tagName
);
1541 if (success
&& tag
) {
1542 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, tag
, parser
->context
.info
);
1543 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1544 _CFReportError(parser
, parser
->status
, NULL
);
1552 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1553 [67] Reference ::= EntityRef | CharRef
1554 [68] EntityRef ::= '&' Name ';'
1556 // For the moment, we don't worry about references in the attribute values.
1557 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
) {
1559 Boolean success
= _inputStreamGetCharacter(&parser
->input
, "e
);
1560 if (!success
|| (quote
!= '\'' && quote
!= '\"')) return false;
1561 if (str
) _inputStreamSetMark(&parser
->input
);
1562 while (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
!= quote
) {
1564 case '<': success
= false; break;
1566 if (!parseEntityReference(parser
, false)) {
1575 if (success
&& _inputStreamAtEOF(&parser
->input
)) {
1580 _inputStreamReturnCharacter(&parser
->input
, quote
);
1581 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
1582 _inputStreamGetCharacter(&parser
->input
, &ch
);
1584 _inputStreamClearMark(&parser
->input
);
1590 [40] STag ::= '<' Name (S Attribute)* S? '>'
1591 [41] Attribute ::= Name Eq AttValue
1592 [25] Eq ::= S? '=' S?
1595 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1596 Boolean
parseAttributes(CFXMLParserRef parser
) {
1598 CFMutableDictionaryRef dict
;
1599 CFMutableArrayRef array
;
1600 Boolean failure
= false;
1601 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) == '>') {
1602 if (parser
->argDict
) {
1603 CFDictionaryRemoveAllValues(parser
->argDict
);
1604 CFArrayRemoveAllValues(parser
->argArray
);
1606 return true; // No attributes; let caller deal with it
1608 if (!parser
->argDict
) {
1609 parser
->argDict
= CFDictionaryCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1610 parser
->argArray
= CFArrayCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeArrayCallBacks
);
1612 CFDictionaryRemoveAllValues(parser
->argDict
);
1613 CFArrayRemoveAllValues(parser
->argArray
);
1615 dict
= parser
->argDict
;
1616 array
= parser
->argArray
;
1617 while (!failure
&& _inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && ch
!= '/') {
1619 CFMutableStringRef value
;
1620 if (!_inputStreamScanXMLName(&parser
->input
, false, &key
)) {
1624 if (CFArrayGetFirstIndexOfValue(array
, CFRangeMake(0, CFArrayGetCount(array
)), key
) != kCFNotFound
) {
1625 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found repeated attribute");
1628 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1629 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '=') {
1633 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1634 value
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1635 if (!parseAttributeValue(parser
, value
)) {
1640 CFArrayAppendValue(array
, key
);
1641 CFDictionarySetValue(dict
, key
, value
);
1643 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1646 //#warning CF:Include tag name in this error report
1647 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found illegal character while parsing element tag");
1649 } else if (_inputStreamAtEOF(&parser
->input
)) {
1650 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing element attributes");
1658 [1] document ::= prolog element Misc*
1659 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1660 [27] Misc ::= Comment | PI | S
1661 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1663 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1664 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1665 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1667 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1669 static Boolean
parseXML(CFXMLParserRef parser
) {
1670 Boolean success
= true, sawDTD
= false, sawElement
= false;
1672 while (success
&& _inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1678 success
= parseWhitespace(parser
);
1681 _inputStreamGetCharacter(&parser
->input
, &ch
);
1682 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1683 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing top-level document");
1688 UniChar dashes
[2] = {'-', '-'};
1689 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1691 success
= parseComment(parser
, true);
1695 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered a second DTD");
1698 success
= parseDTD(parser
);
1699 if (success
) sawDTD
= true;
1701 } else if (ch
== '?') {
1702 // Processing instruction
1703 success
= parseProcessingInstruction(parser
, true);
1707 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered second top-level element");
1710 _inputStreamReturnCharacter(&parser
->input
, ch
);
1711 success
= parseTag(parser
);
1712 if (success
) sawElement
= true;
1716 parser
->status
= kCFXMLErrorMalformedDocument
;
1717 parser
->errorString
= ch
< 256 ?
1718 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch
, ch
) :
1719 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch
);
1721 if (parser
->callBacks
.handleError
) {
1722 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
1729 if (!success
) return false;
1731 _CFReportError(parser
, kCFXMLErrorElementlessDocument
, "No element found in document");
1737 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
) {
1739 parser
->status
= errNum
;
1740 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), str
, kCFStringEncodingASCII
);
1742 if (parser
->callBacks
.handleError
) {
1743 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, errNum
, parser
->context
.info
);
1747 // Assumes parser->node has been set and is ready to go
1748 static Boolean
reportNewLeaf(CFXMLParserRef parser
) {
1750 if (*(parser
->top
) == NULL
) return true;
1752 xmlStruct
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1753 if (xmlStruct
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1754 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *(parser
->top
), xmlStruct
, parser
->context
.info
);
1755 if (parser
->status
== kCFXMLStatusParseInProgress
) INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, xmlStruct
, parser
->context
.info
);
1757 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1758 _CFReportError(parser
, parser
->status
, NULL
);
1764 static void pushXMLNode(CFXMLParserRef parser
, void *node
) {
1766 if ((unsigned)(parser
->top
- parser
->stack
) == parser
->capacity
) {
1767 parser
->stack
= CFAllocatorReallocate(CFGetAllocator(parser
), parser
->stack
, 2 * parser
->capacity
* sizeof(void *), 0);
1768 parser
->top
= parser
->stack
+ parser
->capacity
;
1769 parser
->capacity
= 2*parser
->capacity
;
1771 *(parser
->top
) = node
;
1774 /**************************/
1775 /* Parsing to a CFXMLTree */
1776 /**************************/
1778 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser
, CFXMLNodeRef node
, void *context
) {
1779 CFXMLNodeRef myNode
= CFXMLNodeCreateCopy(CFGetAllocator(parser
), node
);
1780 CFXMLTreeRef tree
= CFXMLTreeCreateWithNode(CFGetAllocator(parser
), myNode
);
1782 return (void *)tree
;
1785 static void _XMLTreeAddChild(CFXMLParserRef parser
, void *parent
, void *child
, void *context
) {
1786 CFTreeAppendChild((CFTreeRef
)parent
, (CFTreeRef
)child
);
1789 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser
, void *xmlType
, void *context
) {
1790 CFXMLTreeRef node
= (CFXMLTreeRef
)xmlType
;
1791 if (CFTreeGetParent(node
))
1792 CFRelease((CFXMLTreeRef
)xmlType
);
1796 CFXMLTreeRef
CFXMLTreeCreateFromData(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, UInt32 parseOptions
, CFIndex parserVersion
) {
1797 return CFXMLTreeCreateFromDataWithError(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, NULL
);
1800 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_3
1802 CONST_STRING_DECL(kCFXMLTreeErrorDescription
, "kCFXMLTreeErrorDescription");
1803 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber
, "kCFXMLTreeErrorLineNumber");
1804 CONST_STRING_DECL(kCFXMLTreeErrorLocation
, "kCFXMLTreeErrorLocation");
1805 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode
, "kCFXMLTreeErrorStatusCode");
1807 CFXMLTreeRef
CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
, CFDictionaryRef
*errorDict
) {
1808 CFXMLParserRef parser
;
1809 CFXMLParserCallBacks callbacks
;
1810 CFXMLTreeRef result
;
1812 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
1813 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1815 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1816 callbacks
.addChild
= _XMLTreeAddChild
;
1817 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1818 callbacks
.resolveExternalEntity
= NULL
;
1819 callbacks
.handleError
= NULL
;
1820 parser
= CFXMLParserCreate(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, &callbacks
, NULL
);
1822 if (CFXMLParserParse(parser
)) {
1823 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1825 if (errorDict
) { // collect the error dictionary
1826 *errorDict
= CFDictionaryCreateMutable(allocator
, 4, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1830 CFStringRef errstring
;
1832 rawnum
= CFXMLParserGetLocation(parser
);
1833 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1835 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLocation
, cfnum
);
1839 rawnum
= CFXMLParserGetLineNumber(parser
);
1840 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1842 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLineNumber
, cfnum
);
1846 rawnum
= CFXMLParserGetStatusCode(parser
);
1847 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1849 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorStatusCode
, cfnum
);
1853 errstring
= CFXMLParserCopyErrorDescription(parser
);
1855 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorDescription
, errstring
);
1856 CFRelease(errstring
);
1860 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1861 if (result
) CFRelease(result
);
1869 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1870 We should also be handling items that are up over certain values correctly.
1872 CFStringRef
CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1873 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1874 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0); // unbounded mutable string
1875 CFMutableCharacterSetRef startChars
= CFCharacterSetCreateMutable(allocator
);
1877 CFStringInlineBuffer inlineBuf
;
1880 CFIndex stringLength
= CFStringGetLength(string
);
1883 CFCharacterSetAddCharactersInString(startChars
, CFSTR("&<>'\""));
1885 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, stringLength
));
1886 for(idx
= 0; idx
< stringLength
; idx
++) {
1887 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, idx
);
1888 if(CFCharacterSetIsCharacterMember(startChars
, uc
)) {
1889 CFStringRef previousSubstring
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1890 CFStringAppend(newString
, previousSubstring
);
1891 CFRelease(previousSubstring
);
1894 CFStringAppend(newString
, CFSTR("&"));
1897 CFStringAppend(newString
, CFSTR("<"));
1900 CFStringAppend(newString
, CFSTR(">"));
1903 CFStringAppend(newString
, CFSTR("'"));
1906 CFStringAppend(newString
, CFSTR("""));
1912 CFRelease(startChars
);
1916 CFStringRef
CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1917 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1919 CFStringInlineBuffer inlineBuf
; /* use this for fast traversal of the string in question */
1921 CFIndex lastChunkStart
, length
= CFStringGetLength(string
);
1922 CFIndex i
, entityStart
;
1926 CFMutableDictionaryRef fullReplDict
= entitiesDictionary
? CFDictionaryCreateMutableCopy(allocator
, 0, entitiesDictionary
) : CFDictionaryCreateMutable(allocator
, 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1928 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1929 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1930 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1931 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1932 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1934 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, length
- 1));
1935 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0);
1938 // Scan through the string in its entirety
1939 for(i
= 0; i
< length
; ) {
1940 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1943 entityStart
= i
- 1;
1944 entity
= 0xFFFF; // set this to a not-Unicode character as sentinel
1945 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1946 if(lastChunkStart
< i
- 1) {
1947 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, (i
- 1) - lastChunkStart
));
1948 CFStringAppend(newString
, sub
);
1952 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1953 // Now we can process the entity reference itself
1954 if(uc
== '#') { // this is a numeric entity.
1957 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1959 if(uc
== 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1961 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1964 // process the provided digits 'til we're finished
1966 if (uc
>= '0' && uc
<= '9')
1967 entity
= entity
* base
+ (uc
-'0');
1968 else if (uc
>= 'a' && uc
<= 'f' && base
== 16)
1969 entity
= entity
* base
+ (uc
-'a'+10);
1970 else if (uc
>= 'A' && uc
<= 'F' && base
== 16)
1971 entity
= entity
* base
+ (uc
-'A'+10);
1975 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1982 // Scan to the end of the entity
1983 while(uc
!= ';' && i
< length
) {
1984 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1987 if(0xFFFF != entity
) { // it was numeric, and translated.
1988 // Now, output the result fo the entity
1989 if(entity
>= 0x10000) {
1990 UniChar characters
[2] = { ((entity
- 0x10000) >> 10) + 0xD800, ((entity
- 0x10000) & 0x3ff) + 0xDC00 };
1991 CFStringAppendCharacters(newString
, characters
, 2);
1993 UniChar character
= entity
;
1994 CFStringAppendCharacters(newString
, &character
, 1);
1996 } else { // it wasn't numeric.
1997 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
+ 1, (i
- entityStart
- 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
1998 CFStringRef replacementString
= CFDictionaryGetValue(fullReplDict
, sub
);
1999 if(replacementString
) {
2000 CFStringAppend(newString
, replacementString
);
2002 CFRelease(sub
); // let the old substring go, since we didn't find it in the dictionary
2003 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
, (i
- entityStart
))); // create a new one, including the & and ;
2004 CFStringAppend(newString
, sub
); // ...and append that.
2006 CFRelease(sub
); // in either case, release the most-recent "sub"
2009 // move the lastChunkStart to the beginning of the next chunk.
2013 if(lastChunkStart
< length
) { // we've come out of the loop, let's get the rest of the string and tack it on.
2014 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, i
- lastChunkStart
));
2015 CFStringAppend(newString
, sub
);
2019 CFRelease(fullReplDict
);