2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Chris Parker
28 #include <CoreFoundation/CFXMLParser.h>
29 #include "CFXMLInputStream.h"
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFUniChar.h"
32 #include "CFInternal.h"
34 struct __CFXMLParser
{
35 CFRuntimeBase _cfBase
;
37 _CFXMLInputStream input
;
43 struct __CFXMLNode
*node
; // Our private node; we use it to report back information
44 CFMutableDictionaryRef argDict
;
45 CFMutableArrayRef argArray
;
48 CFXMLParserCallBacks callBacks
;
49 CFXMLParserContext context
;
51 CFXMLParserStatusCode status
;
52 CFStringRef errorString
;
55 static CFStringRef
__CFXMLParserCopyDescription(CFTypeRef cf
) {
56 const struct __CFXMLParser
*parser
= cf
;
57 return CFStringCreateWithFormat(CFGetAllocator(cf
), NULL
, CFSTR("<CFXMLParser 0x%x>"), parser
);
60 static void __CFXMLParserDeallocate(CFTypeRef cf
) {
61 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)cf
;
62 CFAllocatorRef alloc
= CFGetAllocator(parser
);
63 _freeInputStream(&(parser
->input
));
64 if (parser
->argDict
) CFRelease(parser
->argDict
);
65 if (parser
->argArray
) CFRelease(parser
->argArray
);
66 if (parser
->errorString
) CFRelease(parser
->errorString
);
67 if (parser
->node
) CFRelease(parser
->node
);
68 CFAllocatorDeallocate(alloc
, parser
->stack
);
69 if (parser
->context
.info
&& parser
->context
.release
) {
70 parser
->context
.release(parser
->context
.info
);
74 static CFTypeID __kCFXMLParserTypeID
= _kCFRuntimeNotATypeID
;
76 static const CFRuntimeClass __CFXMLParserClass
= {
81 __CFXMLParserDeallocate
,
85 __CFXMLParserCopyDescription
88 __private_extern__
void __CFXMLParserInitialize(void) {
89 __kCFXMLParserTypeID
= _CFRuntimeRegisterClass(&__CFXMLParserClass
);
92 CFTypeID
CFXMLParserGetTypeID(void) {
93 return __kCFXMLParserTypeID
;
102 void CFXMLParserGetContext(CFXMLParserRef parser
, CFXMLParserContext
*context
) {
103 CFAssert1(parser
!= NULL
, __kCFLogAssertion
, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__
);
104 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
106 context
->version
= parser
->context
.version
;
107 context
->info
= parser
->context
.info
;
108 context
->retain
= (void *)((uintptr_t)parser
->context
.retain
& __mask
);
109 context
->release
= (void *)((uintptr_t)parser
->context
.release
& __mask
);
110 context
->copyDescription
= (void *)((uintptr_t)parser
->context
.copyDescription
& __mask
);
114 void CFXMLParserGetCallBacks(CFXMLParserRef parser
, CFXMLParserCallBacks
*callBacks
) {
115 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
117 callBacks
->version
= parser
->callBacks
.version
;
118 callBacks
->createXMLStructure
= (void *)((uintptr_t)parser
->callBacks
.createXMLStructure
& __mask
);
119 callBacks
->addChild
= (void *)((uintptr_t)parser
->callBacks
.addChild
& __mask
);
120 callBacks
->endXMLStructure
= (void *)((uintptr_t)parser
->callBacks
.endXMLStructure
& __mask
);
121 callBacks
->resolveExternalEntity
= (void *)((uintptr_t)parser
->callBacks
.resolveExternalEntity
& __mask
);
122 callBacks
->handleError
= (void *)((uintptr_t)parser
->callBacks
.handleError
& __mask
);
128 CFURLRef
CFXMLParserGetSourceURL(CFXMLParserRef parser
) {
129 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
130 return parser
->input
.url
;
133 /* Returns the character index or line number of the current parse location */
134 CFIndex
CFXMLParserGetLocation(CFXMLParserRef parser
) {
135 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
136 return _inputStreamCurrentLocation(&parser
->input
);
139 CFIndex
CFXMLParserGetLineNumber(CFXMLParserRef parser
) {
140 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
141 return _inputStreamCurrentLine(&parser
->input
);
144 /* Returns the top-most object returned by the createXMLStructure callback */
145 void *CFXMLParserGetDocument(CFXMLParserRef parser
) {
146 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
147 if (parser
->capacity
> 0)
148 return parser
->stack
[0];
153 CFXMLParserStatusCode
CFXMLParserGetStatusCode(CFXMLParserRef parser
) {
154 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
155 return parser
->status
;
158 CFStringRef
CFXMLParserCopyErrorDescription(CFXMLParserRef parser
) {
159 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
160 return CFRetain(parser
->errorString
);
163 void CFXMLParserAbort(CFXMLParserRef parser
, CFXMLParserStatusCode errorCode
, CFStringRef errorDescription
) {
164 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
165 CFAssert1(errorCode
> 0, __kCFLogAssertion
, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__
);
166 CFAssert1(errorDescription
!= NULL
, __kCFLogAssertion
, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__
);
167 __CFGenericValidateType(errorDescription
, CFStringGetTypeID());
169 parser
->status
= errorCode
;
170 if (parser
->errorString
) CFRelease(parser
->errorString
);
171 parser
->errorString
= CFStringCreateCopy(NULL
, errorDescription
);
175 static Boolean
parseXML(CFXMLParserRef parser
);
176 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
);
177 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
);
178 static Boolean
parseInlineDTD(CFXMLParserRef parser
);
179 static Boolean
parseDTD(CFXMLParserRef parser
);
180 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
);
181 static Boolean
parseCDSect(CFXMLParserRef parser
);
182 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
);
183 static Boolean
parsePCData(CFXMLParserRef parser
);
184 static Boolean
parseWhitespace(CFXMLParserRef parser
);
185 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
);
186 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
);
187 static Boolean
parseElementDeclaration(CFXMLParserRef parser
);
188 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
);
189 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
);
190 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
);
191 static Boolean
parseTagContent(CFXMLParserRef parser
);
192 static Boolean
parseTag(CFXMLParserRef parser
);
193 static Boolean
parseAttributes(CFXMLParserRef parser
);
194 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
);
196 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
197 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
);
198 static Boolean
reportNewLeaf(CFXMLParserRef parser
); // Assumes parser->node has been set and is ready to go
199 static void pushXMLNode(CFXMLParserRef parser
, void *node
);
201 static CFXMLParserRef
__CFXMLParserInit(CFAllocatorRef alloc
, CFURLRef dataSource
, CFOptionFlags options
, CFDataRef xmlData
, CFIndex version
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
202 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)_CFRuntimeCreateInstance(alloc
, __kCFXMLParserTypeID
, sizeof(struct __CFXMLParser
) - sizeof(CFRuntimeBase
), NULL
);
203 struct __CFXMLNode
*node
= (struct __CFXMLNode
*)_CFRuntimeCreateInstance(alloc
, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode
) - sizeof(CFRuntimeBase
), NULL
);
205 if (parser
&& node
) {
206 alloc
= CFGetAllocator(parser
);
207 _initializeInputStream(&(parser
->input
), alloc
, dataSource
, xmlData
);
208 parser
->top
= parser
->stack
;
209 parser
->stack
= NULL
;
210 parser
->capacity
= 0;
212 buf
= CFAllocatorAllocate(alloc
, 128*sizeof(UniChar
), 0);
214 parser
->node
->dataString
= CFStringCreateMutableWithExternalCharactersNoCopy(alloc
, buf
, 0, 128, alloc
);
215 parser
->node
->additionalData
= NULL
;
216 parser
->node
->version
= version
;
217 parser
->argDict
= NULL
; // don't create these until necessary
218 parser
->argArray
= NULL
;
220 parser
->options
= options
;
221 parser
->callBacks
= *callBacks
;
223 FAULT_CALLBACK((void **)&(parser
->callBacks
.createXMLStructure
));
224 FAULT_CALLBACK((void **)&(parser
->callBacks
.addChild
));
225 FAULT_CALLBACK((void **)&(parser
->callBacks
.endXMLStructure
));
226 FAULT_CALLBACK((void **)&(parser
->callBacks
.resolveExternalEntity
));
227 FAULT_CALLBACK((void **)&(parser
->callBacks
.handleError
));
230 parser
->context
= *context
;
231 if (parser
->context
.info
&& parser
->context
.retain
) {
232 parser
->context
.retain(parser
->context
.info
);
235 parser
->context
.version
= 0;
236 parser
->context
.info
= NULL
;
237 parser
->context
.retain
= NULL
;
238 parser
->context
.release
= NULL
;
239 parser
->context
.copyDescription
= NULL
;
241 parser
->status
= kCFXMLStatusParseNotBegun
;
242 parser
->errorString
= NULL
;
244 if (parser
) CFRelease(parser
);
245 if (node
) CFRelease(node
);
251 CFXMLParserRef
CFXMLParserCreate(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
252 CFAssert1(xmlData
!= NULL
, __kCFLogAssertion
, "%s(): NULL data not permitted", __PRETTY_FUNCTION__
);
253 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
254 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
255 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
256 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
257 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
258 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, xmlData
, versionOfNodes
, callBacks
, context
);
261 CFXMLParserRef
CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
262 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
263 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
264 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
265 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
267 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, NULL
, versionOfNodes
, callBacks
, context
);
270 Boolean
CFXMLParserParse(CFXMLParserRef parser
) {
271 CFXMLDocumentInfo docData
;
272 __CFGenericValidateType(parser
, __kCFXMLParserTypeID
);
273 if (parser
->status
!= kCFXMLStatusParseNotBegun
) return false;
274 parser
->status
= kCFXMLStatusParseInProgress
;
276 if (!_openInputStream(&parser
->input
)) {
277 if (!parser
->input
.data
) {
279 parser
->status
= kCFXMLErrorNoData
;
280 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("No data found at %@"), CFURLGetString(parser
->input
.url
));
282 // couldn't figure out the encoding
283 CFAssert(parser
->input
.encoding
== kCFStringEncodingInvalidId
, __kCFLogAssertion
, "CFXMLParser internal error: input stream could not be opened");
284 parser
->status
= kCFXMLErrorUnknownEncoding
;
285 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), "Encountered unknown encoding", kCFStringEncodingASCII
);
287 if (parser
->callBacks
.handleError
) {
288 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
293 // Create the document
294 parser
->stack
= CFAllocatorAllocate(CFGetAllocator(parser
), 16 * sizeof(void *), 0);
295 parser
->capacity
= 16;
296 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocument
;
297 docData
.encoding
= _inputStreamGetEncoding(&parser
->input
);
298 docData
.sourceURL
= parser
->input
.url
;
299 parser
->node
->additionalData
= &docData
;
300 parser
->stack
[0] = (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
301 parser
->top
= parser
->stack
;
302 parser
->node
->additionalData
= NULL
;
304 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
305 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
306 _CFReportError(parser
, parser
->status
, NULL
);
309 return parseXML(parser
);
312 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
314 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
315 static Boolean
parseWhitespace(CFXMLParserRef parser
) {
317 Boolean report
= !(parser
->options
& kCFXMLParserSkipWhitespace
);
318 len
= _inputStreamSkipWhitespace(&parser
->input
, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
);
320 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
321 parser
->node
->additionalData
= NULL
;
322 return reportNewLeaf(parser
);
328 // parser should be just past "<!--"
329 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
) {
330 const UniChar dashes
[2] = {'-', '-'};
332 report
= report
&& (!(parser
->options
& kCFXMLParserSkipMetaData
));
333 if (!_inputStreamScanToCharacters(&parser
->input
, dashes
, 2, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
)) {
334 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
,"Found unexpected EOF while parsing comment");
336 } else if (ch
!= '>') {
337 _CFReportError(parser
, kCFXMLErrorMalformedComment
, "Found \"--\" within a comment");
340 parser
->node
->dataTypeID
= kCFXMLNodeTypeComment
;
341 parser
->node
->additionalData
= NULL
;
342 return reportNewLeaf(parser
);
349 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
350 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
352 // parser should be set to the first character after "<?"
353 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
) {
354 const UniChar piTermination
[2] = {'?', '>'};
355 CFMutableStringRef str
;
358 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
359 _CFReportError(parser
, kCFXMLErrorMalformedProcessingInstruction
, "Found malformed processing instruction");
362 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
363 str
= (report
&& *parser
->top
) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
)) : NULL
;
364 if (!_inputStreamScanToCharacters(&parser
->input
, piTermination
, 2, str
)) {
365 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing processing instruction");
366 if (str
) CFRelease(str
);
371 CFXMLProcessingInstructionInfo data
;
373 CFStringRef tmp
= parser
->node
->dataString
;
374 parser
->node
->dataTypeID
= kCFXMLNodeTypeProcessingInstruction
;
375 parser
->node
->dataString
= name
;
376 data
.dataString
= str
;
377 parser
->node
->additionalData
= &data
;
378 result
= reportNewLeaf(parser
);
379 parser
->node
->additionalData
= NULL
;
380 parser
->node
->dataString
= tmp
;
389 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
391 static const UniChar _DoctypeOpening
[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
392 // first character should be immediately after the "<!"
393 static Boolean
parseDTD(CFXMLParserRef parser
) {
395 Boolean success
, hasExtID
= false;
396 CFXMLDocumentTypeInfo docData
= {{NULL
, NULL
}};
397 void *dtdStructure
= NULL
;
400 // First pass "DOCTYPE"
401 success
= _inputStreamMatchString(&parser
->input
, _DoctypeOpening
, 7);
402 success
= success
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
403 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, &name
);
405 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
406 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
408 // didn't make it past "DOCTYPE" successfully.
409 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
412 if (success
&& ch
!= '[' && ch
!= '>') {
415 success
= parseExternalID(parser
, false, &(docData
.externalID
));
417 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
418 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
422 if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
423 CFStringRef tmp
= parser
->node
->dataString
;
424 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocumentType
;
425 parser
->node
->dataString
= name
;
426 parser
->node
->additionalData
= &docData
;
427 dtdStructure
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
428 if (dtdStructure
&& parser
->status
== kCFXMLStatusParseInProgress
) {
429 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, dtdStructure
, parser
->context
.info
);
431 parser
->node
->additionalData
= NULL
;
432 parser
->node
->dataString
= tmp
;
433 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
434 // callback called CFXMLParserAbort()
435 _CFReportError(parser
, parser
->status
, NULL
);
441 if (docData
.externalID
.publicID
) CFRelease(docData
.externalID
.publicID
);
442 if (docData
.externalID
.systemID
) CFRelease(docData
.externalID
.systemID
);
443 pushXMLNode(parser
, dtdStructure
);
445 if (success
&& ch
== '[') {
447 _inputStreamGetCharacter(&parser
->input
, &ch
);
448 if (!parseInlineDTD(parser
)) return false;
449 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
450 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
451 } else if (success
&& ch
== '>') {
453 _inputStreamGetCharacter(&parser
->input
, &ch
);
456 if (_inputStreamAtEOF(&parser
->input
)) {
457 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing DTD");
459 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
464 parser
->top
--; // Remove dtdStructure from the stack
466 if (success
&& dtdStructure
) {
467 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, dtdStructure
, parser
->context
.info
);
468 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
469 _CFReportError(parser
, parser
->status
, NULL
);
477 [69] PEReference ::= '%' Name ';'
479 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
) {
482 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
483 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
485 } else if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
486 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing physical entity reference");
488 } else if (ch
!= ';') {
489 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
491 } else if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
492 CFXMLEntityReferenceInfo myData
;
494 CFStringRef tmp
= parser
->node
->dataString
;
495 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
496 parser
->node
->dataString
= name
;
497 myData
.entityType
= kCFXMLEntityTypeParameter
;
498 parser
->node
->additionalData
= &myData
;
499 result
= reportNewLeaf(parser
);
500 parser
->node
->additionalData
= NULL
;
501 parser
->node
->dataString
= tmp
;
509 [54] AttType ::= StringType | TokenizedType | EnumeratedType
510 [55] StringType ::= 'CDATA'
511 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
512 [57] EnumeratedType ::= NotationType | Enumeration
513 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
514 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
516 static Boolean
parseEnumeration(CFXMLParserRef parser
, Boolean useNMTokens
) {
518 Boolean done
= false;
519 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
520 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
522 } else if (ch
!= '(') {
523 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
526 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
527 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
528 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
532 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
533 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
534 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
536 } else if (ch
== ')') {
538 } else if (ch
== '|') {
539 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
540 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
541 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
545 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
552 static Boolean
parseAttributeType(CFXMLParserRef parser
, CFMutableStringRef str
) {
553 Boolean success
= false;
554 static const UniChar attTypeStrings
[6][8] = {
555 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
556 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
557 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
558 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
559 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
560 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
561 if (str
) _inputStreamSetMark(&parser
->input
);
562 if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[0], 5) ||
563 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 6) ||
564 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 5) ||
565 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 2) ||
566 _inputStreamMatchString(&parser
->input
, attTypeStrings
[2], 6) ||
567 _inputStreamMatchString(&parser
->input
, attTypeStrings
[3], 8) ||
568 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 8) ||
569 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 7)) {
571 } else if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[5], 8)) {
573 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
574 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
577 success
= parseEnumeration(parser
, false);
580 success
= parseEnumeration(parser
, true);
584 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
586 _inputStreamClearMark(&parser
->input
);
591 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
592 static Boolean
parseAttributeDefaultDeclaration(CFXMLParserRef parser
, CFMutableStringRef str
) {
593 const UniChar strings
[3][8] = {
594 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
595 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
596 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
599 if (str
) _inputStreamSetMark(&parser
->input
);
600 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
601 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
603 } else if (ch
== '#') {
604 if (_inputStreamMatchString(&parser
->input
, strings
[0], 8) ||
605 _inputStreamMatchString(&parser
->input
, strings
[1], 7)) {
607 } else if (!_inputStreamMatchString(&parser
->input
, strings
[2], 5) || _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
608 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
611 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
612 success
= parseAttributeValue(parser
, NULL
);
615 _inputStreamReturnCharacter(&parser
->input
, ch
);
616 success
= parseAttributeValue(parser
, NULL
);
620 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
622 _inputStreamClearMark(&parser
->input
);
628 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
629 [53] AttDef ::= S Name S AttType S DefaultDecl
631 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
) {
632 const UniChar attList
[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
633 CFXMLAttributeListDeclarationInfo attListData
;
634 CFXMLAttributeDeclarationInfo attributeArray
[8], *attributes
=attributeArray
;
635 CFIndex capacity
= 8;
637 Boolean success
= true;
639 if (!_inputStreamMatchString(&parser
->input
, attList
, 7) ||
640 _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0 ||
641 !_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
642 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
645 attListData
.numberOfAttributes
= 0;
646 if (!(*parser
->top
) || (parser
->options
& kCFXMLParserSkipMetaData
)) {
647 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
650 while (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
651 CFXMLAttributeDeclarationInfo
*attribute
= NULL
;
652 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
== '>')
655 if (capacity
== attListData
.numberOfAttributes
) {
656 capacity
= 2*capacity
;
657 if (attributes
!= attributeArray
) {
658 attributes
= CFAllocatorReallocate(CFGetAllocator(parser
), attributes
, capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
660 attributes
= CFAllocatorAllocate(CFGetAllocator(parser
), capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
663 attribute
= &(attributes
[attListData
.numberOfAttributes
]);
664 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
665 attribute
->typeString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
666 attribute
->defaultString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
668 if (!_inputStreamScanXMLName(&parser
->input
, false, &(attribute
->attributeName
)) || (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0)) {
669 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
673 if (!parseAttributeType(parser
, attribute
? (CFMutableStringRef
)attribute
->typeString
: NULL
)) {
677 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
678 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
682 if (!parseAttributeDefaultDeclaration(parser
, attribute
? (CFMutableStringRef
)attribute
->defaultString
: NULL
)) {
686 attListData
.numberOfAttributes
++;
689 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
690 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
692 } else if (ch
!= '>') {
693 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
695 } else if (attributes
) {
696 CFStringRef tmp
= parser
->node
->dataString
;
697 parser
->node
->dataTypeID
= kCFXMLNodeTypeAttributeListDeclaration
;
698 parser
->node
->dataString
= name
;
699 attListData
.attributes
= attributes
;
700 parser
->node
->additionalData
= (void *)&attListData
;
701 success
= reportNewLeaf(parser
);
702 parser
->node
->additionalData
= NULL
;
703 parser
->node
->dataString
= tmp
;
707 // Free up all that memory
709 for (idx
= 0; idx
< attListData
.numberOfAttributes
; idx
++) {
710 // Do not release attributeName here; it's a uniqued string from scanXMLName
711 CFRelease(attributes
[idx
].typeString
);
712 CFRelease(attributes
[idx
].defaultString
);
714 if (attributes
!= attributeArray
) {
715 CFAllocatorDeallocate(CFGetAllocator(parser
), attributes
);
721 CF_INLINE Boolean
parseSystemLiteral(CFXMLParserRef parser
, CFXMLExternalID
*extID
) {
724 CFMutableStringRef urlStr
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
725 if (_inputStreamScanQuotedString(&parser
->input
, urlStr
)) {
727 extID
->systemID
= CFURLCreateWithString(CFGetAllocator(parser
), urlStr
, parser
->input
.url
);
729 extID
->systemID
= NULL
;
734 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
740 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
741 [83] PublicID ::= 'PUBLIC' S PubidLiteral
742 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
743 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
744 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
746 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
747 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
) {
748 const UniChar publicString
[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
749 const UniChar systemString
[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
752 extID
->systemID
= NULL
;
753 extID
->publicID
= NULL
;
755 if (_inputStreamMatchString(&parser
->input
, publicString
, 6)) {
756 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
758 extID
->publicID
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
759 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)extID
->publicID
);
761 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, NULL
);
765 if (alsoAcceptPublicID
) {
766 _inputStreamSetMark(&parser
->input
); // In case we need to roll back the parser
768 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0
769 || !_inputStreamPeekCharacter(&parser
->input
, &ch
)
770 || (ch
!= '\'' && ch
!= '\"')
771 || !parseSystemLiteral(parser
, extID
)) {
772 success
= alsoAcceptPublicID
;
773 if (alsoAcceptPublicID
) {
774 _inputStreamBackUpToMark(&parser
->input
);
779 if (alsoAcceptPublicID
) {
780 _inputStreamClearMark(&parser
->input
);
783 } else if (_inputStreamMatchString(&parser
->input
, systemString
, 6)) {
784 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 && parseSystemLiteral(parser
, extID
);
792 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
794 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
) {
795 static UniChar notationString
[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
796 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
797 CFXMLNotationInfo notationData
= {{NULL
, NULL
}};
800 _inputStreamMatchString(&parser
->input
, notationString
, 8) &&
801 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
802 _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) &&
803 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
804 parseExternalID(parser
, true, report
? &(notationData
.externalID
) : NULL
);
808 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
809 success
= (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>');
812 if (_inputStreamAtEOF(&parser
->input
)) {
813 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
815 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
818 CFStringRef tmp
= parser
->node
->dataString
;
819 parser
->node
->dataTypeID
= kCFXMLNodeTypeNotation
;
820 parser
->node
->dataString
= name
;
821 parser
->node
->additionalData
= ¬ationData
;
822 success
= reportNewLeaf(parser
);
823 parser
->node
->additionalData
= NULL
;
824 parser
->node
->dataString
= tmp
;
826 if (notationData
.externalID
.systemID
) CFRelease(notationData
.externalID
.systemID
);
827 if (notationData
.externalID
.publicID
) CFRelease(notationData
.externalID
.publicID
);
832 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
833 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
834 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
836 static Boolean
parseChoiceOrSequence(CFXMLParserRef parser
, Boolean pastParen
) {
837 UniChar ch
, separator
;
839 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '(') return false;
840 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
842 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
844 /* Now scanning cp, production [48] */
846 if (!parseChoiceOrSequence(parser
, false)) return false;
848 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
850 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
851 if (ch
== '?' || ch
== '*' || ch
== '+') _inputStreamGetCharacter(&parser
->input
, &ch
);
854 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
855 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
856 if (ch
== ')') return true;
857 if (ch
!= '|' && ch
!= ',') return false;
859 while (ch
== separator
) {
860 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
861 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
863 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
864 } else if (!parseChoiceOrSequence(parser
, false)) {
867 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
868 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
874 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
876 static Boolean
parseMixedElementContent(CFXMLParserRef parser
) {
877 static const UniChar pcdataString
[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
879 if (!_inputStreamMatchString(&parser
->input
, pcdataString
, 7)) return false;
880 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
881 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) && (ch
== ')' || ch
== '|')) return false;
882 if (ch
== ')') return true;
885 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
886 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
887 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
888 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
890 if (ch
!= ')') return false;
891 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '*') return false;
896 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
897 [47] children ::= (choice | seq) ('?' | '*' | '+')?
899 static Boolean
parseElementContentSpec(CFXMLParserRef parser
) {
900 static const UniChar eltContentEmpty
[5] = {'E', 'M', 'P', 'T', 'Y'};
901 static const UniChar eltContentAny
[3] = {'A', 'N', 'Y'};
903 if (_inputStreamMatchString(&parser
->input
, eltContentEmpty
, 5) || _inputStreamMatchString(&parser
->input
, eltContentAny
, 3)) {
905 } else if (!_inputStreamPeekCharacter(&parser
->input
, &ch
) || ch
!= '(') {
908 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
909 _inputStreamGetCharacter(&parser
->input
, &ch
);
910 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
911 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
914 return parseMixedElementContent(parser
);
916 if (parseChoiceOrSequence(parser
, true)) {
917 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && (ch
== '*' || ch
== '?' || ch
== '+')) {
918 _inputStreamGetCharacter(&parser
->input
, &ch
);
929 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
931 static Boolean
parseElementDeclaration(CFXMLParserRef parser
) {
932 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
934 static const UniChar eltChars
[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
936 CFMutableStringRef contentDesc
= NULL
;
938 success
= _inputStreamMatchString(&parser
->input
, eltChars
, 7)
939 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0
940 && _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
)
941 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
943 if (report
) _inputStreamSetMark(&parser
->input
);
944 success
= parseElementContentSpec(parser
);
945 if (success
&& report
) {
946 contentDesc
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
947 _inputStreamGetCharactersFromMark(&parser
->input
, contentDesc
);
949 if (report
) _inputStreamClearMark(&parser
->input
);
950 if (success
) _inputStreamSkipWhitespace(&parser
->input
, NULL
);
951 success
= success
&& _inputStreamMatchString(&parser
->input
, &ch
, 1);
954 if (_inputStreamAtEOF(&parser
->input
)) {
955 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
957 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
960 CFXMLElementTypeDeclarationInfo eltData
;
961 CFStringRef tmp
= parser
->node
->dataString
;
962 parser
->node
->dataTypeID
= kCFXMLNodeTypeElementTypeDeclaration
;
963 parser
->node
->dataString
= name
;
964 eltData
.contentDescription
= contentDesc
;
965 parser
->node
->additionalData
= &eltData
;
966 success
= reportNewLeaf(parser
);
967 parser
->node
->additionalData
= NULL
;
968 parser
->node
->dataString
= tmp
;
970 if (contentDesc
) CFRelease(contentDesc
);
975 [70] EntityDecl ::= GEDecl | PEDecl
976 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
977 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
978 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
979 [74] PEDef ::= EntityValue | ExternalID
980 [76] NDataDecl ::= S 'NDATA' S Name
981 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
983 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
) {
984 const UniChar entityStr
[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
986 Boolean isPEDecl
= false;
987 CFXMLEntityInfo entityData
;
989 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
991 _inputStreamMatchString(&parser
->input
, entityStr
, 6) &&
992 (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) &&
993 _inputStreamPeekCharacter(&parser
->input
, &ch
);
995 entityData
.replacementText
= NULL
;
996 entityData
.entityID
.publicID
= NULL
;
997 entityData
.entityID
.systemID
= NULL
;
998 entityData
.notationName
= NULL
;
999 // We will set entityType immediately before reporting
1001 if (success
&& ch
== '%') {
1002 _inputStreamGetCharacter(&parser
->input
, &ch
);
1003 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
1006 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) && (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamPeekCharacter(&parser
->input
, &ch
);
1007 if (success
&& (ch
== '\"' || ch
== '\'')) {
1009 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1011 entityData
.replacementText
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1012 success
= _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)entityData
.replacementText
);
1014 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
1016 } else if (success
) {
1018 success
= parseExternalID(parser
, false, report
? &(entityData
.entityID
) : NULL
);
1019 if (success
&& !isPEDecl
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
1020 // There could be an option NDataDecl
1021 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1022 const UniChar nDataStr
[5] = {'N', 'D', 'A', 'T', 'A'};
1023 if (_inputStreamMatchString(&parser
->input
, nDataStr
, 5)) {
1024 success
= (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamScanXMLName(&parser
->input
, false, NULL
);
1029 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1030 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
1033 if (_inputStreamAtEOF(&parser
->input
)) {
1034 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1036 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1039 CFStringRef tmp
= parser
->node
->dataString
;
1040 if (isPEDecl
) entityData
.entityType
= kCFXMLEntityTypeParameter
;
1041 else if (entityData
.replacementText
) entityData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1042 else if (!entityData
.notationName
) entityData
.entityType
= kCFXMLEntityTypeParsedExternal
;
1043 else entityData
.entityType
= kCFXMLEntityTypeUnparsed
;
1044 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntity
;
1045 parser
->node
->dataString
= name
;
1046 parser
->node
->additionalData
= &entityData
;
1047 success
= reportNewLeaf(parser
);
1048 parser
->node
->additionalData
= NULL
;
1049 parser
->node
->dataString
= tmp
;
1050 if (entityData
.replacementText
) CFRelease(entityData
.replacementText
);
1052 if (entityData
.entityID
.publicID
) CFRelease(entityData
.entityID
.publicID
);
1053 if (entityData
.entityID
.systemID
) CFRelease(entityData
.entityID
.systemID
);
1058 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1059 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1061 // First character should be just past '['
1062 static Boolean
parseInlineDTD(CFXMLParserRef parser
) {
1063 Boolean success
= true;
1064 while (success
&& !_inputStreamAtEOF(&parser
->input
)) {
1067 parseWhitespace(parser
);
1068 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break;
1071 success
= parsePhysicalEntityReference(parser
);
1072 } else if (ch
== '<') {
1074 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1075 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1079 // Processing Instruction
1080 success
= parseProcessingInstruction(parser
, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1081 } else if (ch
== '!') {
1082 UniChar dashes
[2] = {'-', '-'};
1083 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1085 success
= parseComment(parser
, true);
1087 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1088 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1089 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1091 } else if (ch
== 'A') {
1093 success
= parseAttributeListDeclaration(parser
);
1094 } else if (ch
== 'N') {
1095 success
= parseNotationDeclaration(parser
);
1096 } else if (ch
== 'E') {
1097 // elementdecl | EntityDecl
1098 _inputStreamGetCharacter(&parser
->input
, &ch
);
1099 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1100 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1103 _inputStreamReturnCharacter(&parser
->input
, 'E');
1105 success
= parseElementDeclaration(parser
);
1106 } else if (ch
== 'N') {
1107 success
= parseEntityDeclaration(parser
);
1109 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1113 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1118 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1121 } else if (ch
== ']') {
1124 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1129 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1135 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1137 static Boolean
parseTagContent(CFXMLParserRef parser
) {
1138 while (!_inputStreamAtEOF(&parser
->input
)) {
1140 CFIndex numWhitespaceCharacters
;
1142 _inputStreamSetMark(&parser
->input
);
1143 numWhitespaceCharacters
= _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1144 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1145 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break; // break == report unexpected EOF
1147 if (ch
!= '<' && ch
!= '&') { // CharData
1148 // Back off the whitespace; we'll report it with the PCData
1149 _inputStreamBackUpToMark(&parser
->input
);
1150 _inputStreamClearMark(&parser
->input
);
1151 if (!parsePCData(parser
)) return false;
1152 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1153 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1159 // element | Reference | CDSect | PI | Comment
1160 // We can safely report any whitespace now
1161 if (!(parser
->options
& kCFXMLParserSkipWhitespace
) && numWhitespaceCharacters
!= 0 && *(parser
->top
)) {
1162 _inputStreamReturnCharacter(&parser
->input
, ch
);
1163 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1164 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
1165 parser
->node
->additionalData
= NULL
;
1166 if (!reportNewLeaf(parser
)) return false;
1167 _inputStreamGetCharacter(&parser
->input
, &ch
);
1169 _inputStreamClearMark(&parser
->input
);
1172 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1173 if (!parseEntityReference(parser
, true)) return false;
1177 // ch == '<'; element | CDSect | PI | Comment
1178 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) break;
1179 if (ch
== '?') { // PI
1180 _inputStreamGetCharacter(&parser
->input
, &ch
);
1181 if (!parseProcessingInstruction(parser
, true))
1183 } else if (ch
== '/') { // end tag; we're passing outside of content's production
1184 _inputStreamReturnCharacter(&parser
->input
, '<'); // Back off to the '<'
1186 } else if (ch
!= '!') { // element
1187 if (!parseTag(parser
)) return false;
1190 UniChar dashes
[3] = {'!', '-', '-'};
1191 if (_inputStreamMatchString(&parser
->input
, dashes
, 3)) {
1193 if (!parseComment(parser
, true)) return false;
1195 // Should have a CDSect; back off the "<!" and call parseCDSect
1196 _inputStreamReturnCharacter(&parser
->input
, '<');
1197 if (!parseCDSect(parser
)) return false;
1202 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1203 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1206 // Only way to get here is if premature EOF was found
1207 //#warning CF:Include the tag name here
1208 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing tag content");
1212 static Boolean
parseCDSect(CFXMLParserRef parser
) {
1213 const UniChar _CDSectOpening
[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1214 const UniChar _CDSectClose
[3] = {']', ']', '>'};
1215 if (!_inputStreamMatchString(&parser
->input
, _CDSectOpening
, 9)) {
1216 _CFReportError(parser
, kCFXMLErrorMalformedCDSect
, "Encountered bad prefix to a presumed CDATA section");
1219 if (!_inputStreamScanToCharacters(&parser
->input
, _CDSectClose
, 3, (CFMutableStringRef
)(parser
->node
->dataString
))) {
1220 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing CDATA section");
1224 parser
->node
->dataTypeID
= kCFXMLNodeTypeCDATASection
;
1225 parser
->node
->additionalData
= NULL
;
1226 return reportNewLeaf(parser
);
1230 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1232 static Boolean
validateCharacterReference(CFStringRef str
) {
1234 CFIndex idx
, len
= CFStringGetLength(str
);
1235 if (len
< 2) return false;
1236 if (CFStringGetCharacterAtIndex(str
, 0) != '#') return false;
1237 if (CFStringGetCharacterAtIndex(str
, 1) == 'x') {
1240 if (len
== 2) return false;
1248 ch
= CFStringGetCharacterAtIndex(str
, idx
);
1250 if (!(ch
<= '9' && ch
>= '0') &&
1251 !(isHex
&& ((ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F')))) {
1255 return (idx
== len
);
1259 [67] Reference ::= EntityRef | CharRef
1260 [68] EntityRef ::= '&' Name ';'
1262 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
) {
1264 CFXMLEntityReferenceInfo entData
;
1265 CFStringRef name
= NULL
;
1266 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1267 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1272 if (!_inputStreamScanToCharacters(&parser
->input
, &ch
, 1, (CFMutableStringRef
)parser
->node
->dataString
)) {
1273 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1275 } else if (!validateCharacterReference(parser
->node
->dataString
)) {
1276 _CFReportError(parser
, kCFXMLErrorMalformedCharacterReference
, "Encountered illegal character while parsing character reference");
1279 entData
.entityType
= kCFXMLEntityTypeCharacter
;
1280 name
= parser
->node
->dataString
;
1281 } else if (!_inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= ';') {
1282 if (_inputStreamAtEOF(&parser
->input
)) {
1283 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1286 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Encountered malformed name while parsing EntityReference");
1290 entData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1293 CFStringRef tmp
= parser
->node
->dataString
;
1295 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
1296 parser
->node
->dataString
= name
;
1297 parser
->node
->additionalData
= &entData
;
1298 success
= reportNewLeaf(parser
);
1299 parser
->node
->additionalData
= NULL
;
1300 parser
->node
->dataString
= tmp
;
1308 // Kept from old entity reference parsing....
1310 switch (*(parser
->curr
)) {
1312 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1317 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1320 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1325 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1327 case 'a': // "apos" or "amp"
1328 if (len
< 4) { // Not enough characters for either conversion
1329 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1332 if (*(parser
->curr
+1) == 'm') {
1334 if (*(parser
->curr
+2) == 'p' && *(parser
->curr
+3) == ';') {
1339 } else if (*(parser
->curr
+1) == 'p') {
1341 if (len
> 4 && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 's' && *(parser
->curr
+4) == ';') {
1347 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1349 case 'q': // "quote"
1350 if (len
>= 6 && *(parser
->curr
+1) == 'u' && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 't' && *(parser
->curr
+4) == 'e' && *(parser
->curr
+5) == ';') {
1355 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1360 Boolean isHex
= false;
1361 if ( len
< 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1362 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1366 if (*(parser
->curr
) == 'x') {
1370 while (parser
->curr
< parser
->end
) {
1371 ch
= *(parser
->curr
);
1373 CFStringAppendCharacters(string
, &num
, 1);
1377 if (!isHex
) num
= num
*10;
1378 else num
= num
<< 4;
1379 if (ch
<= '9' && ch
>= '0') {
1381 } else if (!isHex
) {
1382 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1384 } else if (ch
>= 'a' && ch
<= 'f') {
1385 num
+= 10 + (ch
- 'a');
1386 } else if (ch
>= 'A' && ch
<= 'F') {
1387 num
+= 10 + (ch
- 'A');
1389 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1393 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1397 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1400 CFStringAppendCharacters(string
, &ch
, 1);
1405 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1407 static Boolean
parsePCData(CFXMLParserRef parser
) {
1409 Boolean done
= false;
1410 _inputStreamSetMark(&parser
->input
);
1411 while (!done
&& _inputStreamGetCharacter(&parser
->input
, &ch
)) {
1415 _inputStreamReturnCharacter(&parser
->input
, ch
);
1420 const UniChar endSequence
[2] = {']', '>'};
1421 if (_inputStreamMatchString(&parser
->input
, endSequence
, 2)) {
1422 _CFReportError(parser
, kCFXMLErrorMalformedParsedCharacterData
, "Encountered \"]]>\" in parsed character data");
1423 _inputStreamClearMark(&parser
->input
);
1432 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1433 _inputStreamClearMark(&parser
->input
);
1434 parser
->node
->dataTypeID
= kCFXMLNodeTypeText
;
1435 parser
->node
->additionalData
= NULL
;
1436 return reportNewLeaf(parser
);
1440 [42] ETag ::= '</' Name S? '>'
1442 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
) {
1443 const UniChar beginEndTag
[2] = {'<', '/'};
1444 Boolean unexpectedEOF
= false, mismatch
= false;
1445 CFStringRef closeTag
;
1447 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1448 if (_inputStreamMatchString(&parser
->input
, beginEndTag
, 2) && _inputStreamScanXMLName(&parser
->input
, false, &closeTag
) && closeTag
== tag
) {
1451 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1452 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1453 unexpectedEOF
= true;
1454 } else if (ch
!= '>') {
1457 } else if (_inputStreamAtEOF(&parser
->input
)) {
1458 unexpectedEOF
= true;
1463 if (unexpectedEOF
|| mismatch
) {
1464 if (unexpectedEOF
) {
1465 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag
);
1466 parser
->status
= kCFXMLErrorUnexpectedEOF
;
1467 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorUnexpectedEOF
, parser
->context
.info
);
1469 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered malformed close tag for <%@>"), tag
);
1470 parser
->status
= kCFXMLErrorMalformedCloseTag
;
1471 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorMalformedCloseTag
, parser
->context
.info
);
1479 [39] element ::= EmptyElementTag | STag content ETag
1480 [40] STag ::= '<' Name (S Attribute)* S? '>'
1481 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1483 static Boolean
parseTag(CFXMLParserRef parser
) {
1486 CFXMLElementInfo data
;
1487 Boolean success
= true;
1488 CFStringRef tagName
;
1490 if (!_inputStreamScanXMLName(&parser
->input
, false, &tagName
)) {
1491 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1495 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1497 if (!parseAttributes(parser
)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1498 data
.attributes
= parser
->argDict
;
1499 data
.attributeOrder
= parser
->argArray
;
1500 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1501 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1505 data
.isEmpty
= true;
1506 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1507 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1511 data
.isEmpty
= false;
1514 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1518 if (*parser
->top
|| parser
->top
== parser
->stack
) {
1519 CFStringRef oldStr
= parser
->node
->dataString
;
1520 parser
->node
->dataTypeID
= kCFXMLNodeTypeElement
;
1521 parser
->node
->dataString
= tagName
;
1522 parser
->node
->additionalData
= &data
;
1523 tag
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1524 if (tag
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1525 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, tag
, parser
->context
.info
);
1527 parser
->node
->additionalData
= NULL
;
1528 parser
->node
->dataString
= oldStr
;
1529 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1530 // callback called CFXMLParserAbort()
1531 _CFReportError(parser
, parser
->status
, NULL
);
1538 pushXMLNode(parser
, tag
);
1539 if (!data
.isEmpty
) {
1540 success
= parseTagContent(parser
);
1542 success
= parseCloseTag(parser
, tagName
);
1547 if (success
&& tag
) {
1548 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, tag
, parser
->context
.info
);
1549 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1550 _CFReportError(parser
, parser
->status
, NULL
);
1558 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1559 [67] Reference ::= EntityRef | CharRef
1560 [68] EntityRef ::= '&' Name ';'
1562 // For the moment, we don't worry about references in the attribute values.
1563 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
) {
1565 Boolean success
= _inputStreamGetCharacter(&parser
->input
, "e
);
1566 if (!success
|| (quote
!= '\'' && quote
!= '\"')) return false;
1567 if (str
) _inputStreamSetMark(&parser
->input
);
1568 while (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
!= quote
) {
1570 case '<': success
= false; break;
1572 if (!parseEntityReference(parser
, false)) {
1581 if (success
&& _inputStreamAtEOF(&parser
->input
)) {
1586 _inputStreamReturnCharacter(&parser
->input
, quote
);
1587 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
1588 _inputStreamGetCharacter(&parser
->input
, &ch
);
1590 _inputStreamClearMark(&parser
->input
);
1596 [40] STag ::= '<' Name (S Attribute)* S? '>'
1597 [41] Attribute ::= Name Eq AttValue
1598 [25] Eq ::= S? '=' S?
1601 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1602 Boolean
parseAttributes(CFXMLParserRef parser
) {
1604 CFMutableDictionaryRef dict
;
1605 CFMutableArrayRef array
;
1606 Boolean failure
= false;
1607 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) == '>') {
1608 if (parser
->argDict
) {
1609 CFDictionaryRemoveAllValues(parser
->argDict
);
1610 CFArrayRemoveAllValues(parser
->argArray
);
1612 return true; // No attributes; let caller deal with it
1614 if (!parser
->argDict
) {
1615 parser
->argDict
= CFDictionaryCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1616 parser
->argArray
= CFArrayCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeArrayCallBacks
);
1618 CFDictionaryRemoveAllValues(parser
->argDict
);
1619 CFArrayRemoveAllValues(parser
->argArray
);
1621 dict
= parser
->argDict
;
1622 array
= parser
->argArray
;
1623 while (!failure
&& _inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && ch
!= '/') {
1625 CFMutableStringRef value
;
1626 if (!_inputStreamScanXMLName(&parser
->input
, false, &key
)) {
1630 if (CFArrayGetFirstIndexOfValue(array
, CFRangeMake(0, CFArrayGetCount(array
)), key
) != kCFNotFound
) {
1631 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found repeated attribute");
1634 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1635 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '=') {
1639 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1640 value
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1641 if (!parseAttributeValue(parser
, value
)) {
1646 CFArrayAppendValue(array
, key
);
1647 CFDictionarySetValue(dict
, key
, value
);
1649 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1652 //#warning CF:Include tag name in this error report
1653 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found illegal character while parsing element tag");
1655 } else if (_inputStreamAtEOF(&parser
->input
)) {
1656 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing element attributes");
1664 [1] document ::= prolog element Misc*
1665 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1666 [27] Misc ::= Comment | PI | S
1667 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1669 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1670 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1671 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1673 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1675 static Boolean
parseXML(CFXMLParserRef parser
) {
1676 Boolean success
= true, sawDTD
= false, sawElement
= false;
1678 while (success
&& _inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1684 success
= parseWhitespace(parser
);
1687 _inputStreamGetCharacter(&parser
->input
, &ch
);
1688 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1689 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing top-level document");
1694 UniChar dashes
[2] = {'-', '-'};
1695 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1697 success
= parseComment(parser
, true);
1701 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered a second DTD");
1704 success
= parseDTD(parser
);
1705 if (success
) sawDTD
= true;
1707 } else if (ch
== '?') {
1708 // Processing instruction
1709 success
= parseProcessingInstruction(parser
, true);
1713 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered second top-level element");
1716 _inputStreamReturnCharacter(&parser
->input
, ch
);
1717 success
= parseTag(parser
);
1718 if (success
) sawElement
= true;
1722 parser
->status
= kCFXMLErrorMalformedDocument
;
1723 parser
->errorString
= ch
< 256 ?
1724 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch
, ch
) :
1725 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch
);
1727 if (parser
->callBacks
.handleError
) {
1728 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
1735 if (!success
) return false;
1737 _CFReportError(parser
, kCFXMLErrorElementlessDocument
, "No element found in document");
1743 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
) {
1745 parser
->status
= errNum
;
1746 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), str
, kCFStringEncodingASCII
);
1748 if (parser
->callBacks
.handleError
) {
1749 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, errNum
, parser
->context
.info
);
1753 // Assumes parser->node has been set and is ready to go
1754 static Boolean
reportNewLeaf(CFXMLParserRef parser
) {
1756 if (*(parser
->top
) == NULL
) return true;
1758 xmlStruct
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1759 if (xmlStruct
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1760 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *(parser
->top
), xmlStruct
, parser
->context
.info
);
1761 if (parser
->status
== kCFXMLStatusParseInProgress
) INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, xmlStruct
, parser
->context
.info
);
1763 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1764 _CFReportError(parser
, parser
->status
, NULL
);
1770 static void pushXMLNode(CFXMLParserRef parser
, void *node
) {
1772 if ((unsigned)(parser
->top
- parser
->stack
) == parser
->capacity
) {
1773 parser
->stack
= CFAllocatorReallocate(CFGetAllocator(parser
), parser
->stack
, 2 * parser
->capacity
* sizeof(void *), 0);
1774 parser
->top
= parser
->stack
+ parser
->capacity
;
1775 parser
->capacity
= 2*parser
->capacity
;
1777 *(parser
->top
) = node
;
1780 /**************************/
1781 /* Parsing to a CFXMLTree */
1782 /**************************/
1784 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser
, CFXMLNodeRef node
, void *context
) {
1785 CFXMLNodeRef myNode
= CFXMLNodeCreateCopy(CFGetAllocator(parser
), node
);
1786 CFXMLTreeRef tree
= CFXMLTreeCreateWithNode(CFGetAllocator(parser
), myNode
);
1788 return (void *)tree
;
1791 static void _XMLTreeAddChild(CFXMLParserRef parser
, void *parent
, void *child
, void *context
) {
1792 CFTreeAppendChild((CFTreeRef
)parent
, (CFTreeRef
)child
);
1795 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser
, void *xmlType
, void *context
) {
1796 CFXMLTreeRef node
= (CFXMLTreeRef
)xmlType
;
1797 if (CFTreeGetParent(node
))
1798 CFRelease((CFXMLTreeRef
)xmlType
);
1801 CFXMLTreeRef
CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex version
) {
1802 CFXMLParserRef parser
;
1803 CFXMLParserCallBacks callbacks
;
1804 CFXMLTreeRef result
;
1806 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1808 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1809 callbacks
.addChild
= _XMLTreeAddChild
;
1810 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1811 callbacks
.resolveExternalEntity
= NULL
;
1812 callbacks
.handleError
= NULL
;
1813 parser
= CFXMLParserCreateWithDataFromURL(allocator
, dataSource
, parseOptions
, version
, &callbacks
, NULL
);
1815 if (CFXMLParserParse(parser
)) {
1816 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1818 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1819 if (result
) CFRelease(result
);
1826 CFXMLTreeRef
CFXMLTreeCreateFromData(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
) {
1827 return CFXMLTreeCreateFromDataWithError(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, NULL
);
1830 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_3
1832 CONST_STRING_DECL(kCFXMLTreeErrorDescription
, "kCFXMLTreeErrorDescription");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber
, "kCFXMLTreeErrorLineNumber");
1834 CONST_STRING_DECL(kCFXMLTreeErrorLocation
, "kCFXMLTreeErrorLocation");
1835 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode
, "kCFXMLTreeErrorStatusCode");
1837 CFXMLTreeRef
CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
, CFDictionaryRef
*errorDict
) {
1838 CFXMLParserRef parser
;
1839 CFXMLParserCallBacks callbacks
;
1840 CFXMLTreeRef result
;
1842 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
1843 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1845 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1846 callbacks
.addChild
= _XMLTreeAddChild
;
1847 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1848 callbacks
.resolveExternalEntity
= NULL
;
1849 callbacks
.handleError
= NULL
;
1850 parser
= CFXMLParserCreate(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, &callbacks
, NULL
);
1852 if (CFXMLParserParse(parser
)) {
1853 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1855 if (errorDict
) { // collect the error dictionary
1856 *errorDict
= CFDictionaryCreateMutable(allocator
, 4, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1860 CFStringRef errstring
;
1862 rawnum
= CFXMLParserGetLocation(parser
);
1863 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1865 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLocation
, cfnum
);
1869 rawnum
= CFXMLParserGetLineNumber(parser
);
1870 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1872 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLineNumber
, cfnum
);
1876 rawnum
= CFXMLParserGetStatusCode(parser
);
1877 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1879 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorStatusCode
, cfnum
);
1883 errstring
= CFXMLParserCopyErrorDescription(parser
);
1885 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorDescription
, errstring
);
1886 CFRelease(errstring
);
1890 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1891 if (result
) CFRelease(result
);
1899 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1900 We should also be handling items that are up over certain values correctly.
1902 CFStringRef
CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1903 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1904 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0); // unbounded mutable string
1905 CFMutableCharacterSetRef startChars
= CFCharacterSetCreateMutable(allocator
);
1907 CFStringInlineBuffer inlineBuf
;
1910 CFIndex stringLength
= CFStringGetLength(string
);
1913 CFCharacterSetAddCharactersInString(startChars
, CFSTR("&<>'\""));
1915 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, stringLength
));
1916 for(idx
= 0; idx
< stringLength
; idx
++) {
1917 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, idx
);
1918 if(CFCharacterSetIsCharacterMember(startChars
, uc
)) {
1919 CFStringRef previousSubstring
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1920 CFStringAppend(newString
, previousSubstring
);
1921 CFRelease(previousSubstring
);
1924 CFStringAppend(newString
, CFSTR("&"));
1927 CFStringAppend(newString
, CFSTR("<"));
1930 CFStringAppend(newString
, CFSTR(">"));
1933 CFStringAppend(newString
, CFSTR("'"));
1936 CFStringAppend(newString
, CFSTR("""));
1942 // Copy the remainder to the output string before returning.
1943 CFStringRef remainder
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1944 if (NULL
!= remainder
) {
1945 CFStringAppend(newString
, remainder
);
1946 CFRelease(remainder
);
1949 CFRelease(startChars
);
1953 CFStringRef
CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1954 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1956 CFStringInlineBuffer inlineBuf
; /* use this for fast traversal of the string in question */
1958 CFIndex lastChunkStart
, length
= CFStringGetLength(string
);
1959 CFIndex i
, entityStart
;
1963 CFMutableDictionaryRef fullReplDict
= entitiesDictionary
? CFDictionaryCreateMutableCopy(allocator
, 0, entitiesDictionary
) : CFDictionaryCreateMutable(allocator
, 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1965 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1966 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1967 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1968 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1969 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1971 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, length
- 1));
1972 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0);
1975 // Scan through the string in its entirety
1976 for(i
= 0; i
< length
; ) {
1977 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1980 entityStart
= i
- 1;
1981 entity
= 0xFFFF; // set this to a not-Unicode character as sentinel
1982 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1983 if(lastChunkStart
< i
- 1) {
1984 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, (i
- 1) - lastChunkStart
));
1985 CFStringAppend(newString
, sub
);
1989 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1990 // Now we can process the entity reference itself
1991 if(uc
== '#') { // this is a numeric entity.
1994 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1996 if(uc
== 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1998 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2001 // process the provided digits 'til we're finished
2003 if (uc
>= '0' && uc
<= '9')
2004 entity
= entity
* base
+ (uc
-'0');
2005 else if (uc
>= 'a' && uc
<= 'f' && base
== 16)
2006 entity
= entity
* base
+ (uc
-'a'+10);
2007 else if (uc
>= 'A' && uc
<= 'F' && base
== 16)
2008 entity
= entity
* base
+ (uc
-'A'+10);
2012 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2019 // Scan to the end of the entity
2020 while(uc
!= ';' && i
< length
) {
2021 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2024 if(0xFFFF != entity
) { // it was numeric, and translated.
2025 // Now, output the result fo the entity
2026 if(entity
>= 0x10000) {
2027 UniChar characters
[2] = { ((entity
- 0x10000) >> 10) + 0xD800, ((entity
- 0x10000) & 0x3ff) + 0xDC00 };
2028 CFStringAppendCharacters(newString
, characters
, 2);
2030 UniChar character
= entity
;
2031 CFStringAppendCharacters(newString
, &character
, 1);
2033 } else { // it wasn't numeric.
2034 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
+ 1, (i
- entityStart
- 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2035 CFStringRef replacementString
= CFDictionaryGetValue(fullReplDict
, sub
);
2036 if(replacementString
) {
2037 CFStringAppend(newString
, replacementString
);
2039 CFRelease(sub
); // let the old substring go, since we didn't find it in the dictionary
2040 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
, (i
- entityStart
))); // create a new one, including the & and ;
2041 CFStringAppend(newString
, sub
); // ...and append that.
2043 CFRelease(sub
); // in either case, release the most-recent "sub"
2046 // move the lastChunkStart to the beginning of the next chunk.
2050 if(lastChunkStart
< length
) { // we've come out of the loop, let's get the rest of the string and tack it on.
2051 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, i
- lastChunkStart
));
2052 CFStringAppend(newString
, sub
);
2056 CFRelease(fullReplDict
);