2 * Copyright (c) 2008 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Chris Parker
28 #include <CoreFoundation/CFXMLParser.h>
29 #include <CoreFoundation/CFNumber.h>
30 #include "CFXMLInputStream.h"
31 #include "CFUniChar.h"
32 #include "CFInternal.h"
34 struct __CFXMLParser
{
35 CFRuntimeBase _cfBase
;
37 _CFXMLInputStream input
;
43 struct __CFXMLNode
*node
; // Our private node; we use it to report back information
44 CFMutableDictionaryRef argDict
;
45 CFMutableArrayRef argArray
;
48 CFXMLParserCallBacks callBacks
;
49 CFXMLParserContext context
;
51 CFXMLParserStatusCode status
;
52 CFStringRef errorString
;
55 static CFStringRef
__CFXMLParserCopyDescription(CFTypeRef cf
) {
56 const struct __CFXMLParser
*parser
= (const struct __CFXMLParser
*)cf
;
57 return CFStringCreateWithFormat(CFGetAllocator(cf
), NULL
, CFSTR("<CFXMLParser %p>"), parser
);
60 static void __CFXMLParserDeallocate(CFTypeRef cf
) {
61 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)cf
;
62 CFAllocatorRef alloc
= CFGetAllocator(parser
);
63 _freeInputStream(&(parser
->input
));
64 if (parser
->argDict
) CFRelease(parser
->argDict
);
65 if (parser
->argArray
) CFRelease(parser
->argArray
);
66 if (parser
->errorString
) CFRelease(parser
->errorString
);
67 if (parser
->node
) CFRelease(parser
->node
);
68 CFAllocatorDeallocate(alloc
, parser
->stack
);
69 if (parser
->context
.info
&& parser
->context
.release
) {
70 parser
->context
.release(parser
->context
.info
);
74 static CFTypeID __kCFXMLParserTypeID
= _kCFRuntimeNotATypeID
;
76 static const CFRuntimeClass __CFXMLParserClass
= {
81 __CFXMLParserDeallocate
,
85 __CFXMLParserCopyDescription
88 static void __CFXMLParserInitialize(void) {
89 __kCFXMLParserTypeID
= _CFRuntimeRegisterClass(&__CFXMLParserClass
);
92 CFTypeID
CFXMLParserGetTypeID(void) {
93 if (_kCFRuntimeNotATypeID
== __kCFXMLParserTypeID
) __CFXMLParserInitialize();
94 return __kCFXMLParserTypeID
;
97 void CFXMLParserGetContext(CFXMLParserRef parser
, CFXMLParserContext
*context
) {
98 CFAssert1(parser
!= NULL
, __kCFLogAssertion
, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__
);
99 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
101 context
->version
= parser
->context
.version
;
102 context
->info
= parser
->context
.info
;
103 context
->retain
= parser
->context
.retain
;
104 context
->release
= parser
->context
.release
;
105 context
->copyDescription
= parser
->context
.copyDescription
;
106 UNFAULT_CALLBACK(context
->retain
);
107 UNFAULT_CALLBACK(context
->release
);
108 UNFAULT_CALLBACK(context
->copyDescription
);
112 void CFXMLParserGetCallBacks(CFXMLParserRef parser
, CFXMLParserCallBacks
*callBacks
) {
113 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
115 callBacks
->version
= parser
->callBacks
.version
;
116 callBacks
->createXMLStructure
= parser
->callBacks
.createXMLStructure
;
117 callBacks
->addChild
= parser
->callBacks
.addChild
;
118 callBacks
->endXMLStructure
= parser
->callBacks
.endXMLStructure
;
119 callBacks
->resolveExternalEntity
= parser
->callBacks
.resolveExternalEntity
;
120 callBacks
->handleError
= parser
->callBacks
.handleError
;
121 UNFAULT_CALLBACK(callBacks
->createXMLStructure
);
122 UNFAULT_CALLBACK(callBacks
->addChild
);
123 UNFAULT_CALLBACK(callBacks
->endXMLStructure
);
124 UNFAULT_CALLBACK(callBacks
->resolveExternalEntity
);
125 UNFAULT_CALLBACK(callBacks
->handleError
);
129 CFURLRef
CFXMLParserGetSourceURL(CFXMLParserRef parser
) {
130 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
131 return parser
->input
.url
;
134 /* Returns the character index or line number of the current parse location */
135 CFIndex
CFXMLParserGetLocation(CFXMLParserRef parser
) {
136 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
137 return _inputStreamCurrentLocation(&parser
->input
);
140 CFIndex
CFXMLParserGetLineNumber(CFXMLParserRef parser
) {
141 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
142 return _inputStreamCurrentLine(&parser
->input
);
145 /* Returns the top-most object returned by the createXMLStructure callback */
146 void *CFXMLParserGetDocument(CFXMLParserRef parser
) {
147 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
148 if (parser
->capacity
> 0)
149 return parser
->stack
[0];
154 CFXMLParserStatusCode
CFXMLParserGetStatusCode(CFXMLParserRef parser
) {
155 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
156 return parser
->status
;
159 CFStringRef
CFXMLParserCopyErrorDescription(CFXMLParserRef parser
) {
160 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
161 return (CFStringRef
)CFRetain(parser
->errorString
);
164 void CFXMLParserAbort(CFXMLParserRef parser
, CFXMLParserStatusCode errorCode
, CFStringRef errorDescription
) {
165 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
166 CFAssert1(errorCode
> 0, __kCFLogAssertion
, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__
);
167 CFAssert1(errorDescription
!= NULL
, __kCFLogAssertion
, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__
);
168 __CFGenericValidateType(errorDescription
, CFStringGetTypeID());
170 parser
->status
= errorCode
;
171 if (parser
->errorString
) CFRelease(parser
->errorString
);
172 parser
->errorString
= (CFStringRef
)CFStringCreateCopy(kCFAllocatorSystemDefault
, errorDescription
);
176 static Boolean
parseXML(CFXMLParserRef parser
);
177 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
);
178 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
);
179 static Boolean
parseInlineDTD(CFXMLParserRef parser
);
180 static Boolean
parseDTD(CFXMLParserRef parser
);
181 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
);
182 static Boolean
parseCDSect(CFXMLParserRef parser
);
183 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
);
184 static Boolean
parsePCData(CFXMLParserRef parser
);
185 static Boolean
parseWhitespace(CFXMLParserRef parser
);
186 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
);
187 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
);
188 static Boolean
parseElementDeclaration(CFXMLParserRef parser
);
189 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
);
190 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
);
191 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
);
192 static Boolean
parseTagContent(CFXMLParserRef parser
);
193 static Boolean
parseTag(CFXMLParserRef parser
);
194 static Boolean
parseAttributes(CFXMLParserRef parser
);
195 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
);
197 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
198 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
);
199 static Boolean
reportNewLeaf(CFXMLParserRef parser
); // Assumes parser->node has been set and is ready to go
200 static void pushXMLNode(CFXMLParserRef parser
, void *node
);
202 static CFXMLParserRef
__CFXMLParserInit(CFAllocatorRef alloc
, CFURLRef dataSource
, CFOptionFlags options
, CFDataRef xmlData
, CFIndex version
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
203 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)_CFRuntimeCreateInstance(alloc
, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser
) - sizeof(CFRuntimeBase
), NULL
);
204 struct __CFXMLNode
*node
= (struct __CFXMLNode
*)_CFRuntimeCreateInstance(alloc
, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode
) - sizeof(CFRuntimeBase
), NULL
);
206 if (parser
&& node
) {
207 alloc
= CFGetAllocator(parser
);
208 _initializeInputStream(&(parser
->input
), alloc
, dataSource
, xmlData
);
209 parser
->top
= parser
->stack
;
210 parser
->stack
= NULL
;
211 parser
->capacity
= 0;
213 buf
= (UniChar
*)CFAllocatorAllocate(alloc
, 128*sizeof(UniChar
), 0);
215 parser
->node
->dataString
= CFStringCreateMutableWithExternalCharactersNoCopy(alloc
, buf
, 0, 128, alloc
);
216 parser
->node
->additionalData
= NULL
;
217 parser
->node
->version
= version
;
218 parser
->argDict
= NULL
; // don't create these until necessary
219 parser
->argArray
= NULL
;
221 parser
->options
= options
;
222 parser
->callBacks
= *callBacks
;
224 FAULT_CALLBACK((void **)&(parser
->callBacks
.createXMLStructure
));
225 FAULT_CALLBACK((void **)&(parser
->callBacks
.addChild
));
226 FAULT_CALLBACK((void **)&(parser
->callBacks
.endXMLStructure
));
227 FAULT_CALLBACK((void **)&(parser
->callBacks
.resolveExternalEntity
));
228 FAULT_CALLBACK((void **)&(parser
->callBacks
.handleError
));
231 parser
->context
= *context
;
232 if (parser
->context
.info
&& parser
->context
.retain
) {
233 parser
->context
.retain(parser
->context
.info
);
236 parser
->context
.version
= 0;
237 parser
->context
.info
= NULL
;
238 parser
->context
.retain
= NULL
;
239 parser
->context
.release
= NULL
;
240 parser
->context
.copyDescription
= NULL
;
242 parser
->status
= kCFXMLStatusParseNotBegun
;
243 parser
->errorString
= NULL
;
245 if (parser
) CFRelease(parser
);
246 if (node
) CFRelease(node
);
252 CFXMLParserRef
CFXMLParserCreate(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
253 CFAssert1(xmlData
!= NULL
, __kCFLogAssertion
, "%s(): NULL data not permitted", __PRETTY_FUNCTION__
);
254 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
255 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
256 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
257 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
258 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
259 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, xmlData
, versionOfNodes
, callBacks
, context
);
262 CFXMLParserRef
CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
263 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
264 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
265 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
266 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
268 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, NULL
, versionOfNodes
, callBacks
, context
);
271 Boolean
CFXMLParserParse(CFXMLParserRef parser
) {
272 CFXMLDocumentInfo docData
;
273 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
274 if (parser
->status
!= kCFXMLStatusParseNotBegun
) return false;
275 parser
->status
= kCFXMLStatusParseInProgress
;
277 if (!_openInputStream(&parser
->input
)) {
278 if (!parser
->input
.data
) {
280 parser
->status
= kCFXMLErrorNoData
;
281 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("No data found at %@"), CFURLGetString(parser
->input
.url
));
283 // couldn't figure out the encoding
284 CFAssert(parser
->input
.encoding
== kCFStringEncodingInvalidId
, __kCFLogAssertion
, "CFXMLParser internal error: input stream could not be opened");
285 parser
->status
= kCFXMLErrorUnknownEncoding
;
286 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), "Encountered unknown encoding", kCFStringEncodingASCII
);
288 if (parser
->callBacks
.handleError
) {
289 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
294 // Create the document
295 parser
->stack
= (void **)CFAllocatorAllocate(CFGetAllocator(parser
), 16 * sizeof(void *), 0);
296 parser
->capacity
= 16;
297 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocument
;
298 docData
.encoding
= _inputStreamGetEncoding(&parser
->input
);
299 docData
.sourceURL
= parser
->input
.url
;
300 parser
->node
->additionalData
= &docData
;
301 parser
->stack
[0] = (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
302 parser
->top
= parser
->stack
;
303 parser
->node
->additionalData
= NULL
;
305 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
306 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
307 _CFReportError(parser
, parser
->status
, NULL
);
310 return parseXML(parser
);
313 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
315 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
316 static Boolean
parseWhitespace(CFXMLParserRef parser
) {
318 Boolean report
= !(parser
->options
& kCFXMLParserSkipWhitespace
);
319 len
= _inputStreamSkipWhitespace(&parser
->input
, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
);
321 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
322 parser
->node
->additionalData
= NULL
;
323 return reportNewLeaf(parser
);
329 // parser should be just past "<!--"
330 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
) {
331 const UniChar dashes
[2] = {'-', '-'};
333 report
= report
&& (!(parser
->options
& kCFXMLParserSkipMetaData
));
334 if (!_inputStreamScanToCharacters(&parser
->input
, dashes
, 2, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
)) {
335 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
,"Found unexpected EOF while parsing comment");
337 } else if (ch
!= '>') {
338 _CFReportError(parser
, kCFXMLErrorMalformedComment
, "Found \"--\" within a comment");
341 parser
->node
->dataTypeID
= kCFXMLNodeTypeComment
;
342 parser
->node
->additionalData
= NULL
;
343 return reportNewLeaf(parser
);
350 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
351 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
353 // parser should be set to the first character after "<?"
354 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
) {
355 const UniChar piTermination
[2] = {'?', '>'};
356 CFMutableStringRef str
;
359 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
360 _CFReportError(parser
, kCFXMLErrorMalformedProcessingInstruction
, "Found malformed processing instruction");
363 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
364 str
= (report
&& *parser
->top
) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
)) : NULL
;
365 if (!_inputStreamScanToCharacters(&parser
->input
, piTermination
, 2, str
)) {
366 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing processing instruction");
367 if (str
) CFRelease(str
);
372 CFXMLProcessingInstructionInfo data
;
374 CFStringRef tmp
= parser
->node
->dataString
;
375 parser
->node
->dataTypeID
= kCFXMLNodeTypeProcessingInstruction
;
376 parser
->node
->dataString
= name
;
377 data
.dataString
= str
;
378 parser
->node
->additionalData
= &data
;
379 result
= reportNewLeaf(parser
);
380 parser
->node
->additionalData
= NULL
;
381 parser
->node
->dataString
= tmp
;
390 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
392 static const UniChar _DoctypeOpening
[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
393 // first character should be immediately after the "<!"
394 static Boolean
parseDTD(CFXMLParserRef parser
) {
396 Boolean success
, hasExtID
= false;
397 CFXMLDocumentTypeInfo docData
= {{NULL
, NULL
}};
398 void *dtdStructure
= NULL
;
401 // First pass "DOCTYPE"
402 success
= _inputStreamMatchString(&parser
->input
, _DoctypeOpening
, 7);
403 success
= success
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
404 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, &name
);
406 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
407 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
409 // didn't make it past "DOCTYPE" successfully.
410 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
413 if (success
&& ch
!= '[' && ch
!= '>') {
416 success
= parseExternalID(parser
, false, &(docData
.externalID
));
418 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
419 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
423 if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
424 CFStringRef tmp
= parser
->node
->dataString
;
425 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocumentType
;
426 parser
->node
->dataString
= name
;
427 parser
->node
->additionalData
= &docData
;
428 dtdStructure
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
429 if (dtdStructure
&& parser
->status
== kCFXMLStatusParseInProgress
) {
430 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, dtdStructure
, parser
->context
.info
);
432 parser
->node
->additionalData
= NULL
;
433 parser
->node
->dataString
= tmp
;
434 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
435 // callback called CFXMLParserAbort()
436 _CFReportError(parser
, parser
->status
, NULL
);
442 if (docData
.externalID
.publicID
) CFRelease(docData
.externalID
.publicID
);
443 if (docData
.externalID
.systemID
) CFRelease(docData
.externalID
.systemID
);
444 pushXMLNode(parser
, dtdStructure
);
446 if (success
&& ch
== '[') {
448 _inputStreamGetCharacter(&parser
->input
, &ch
);
449 if (!parseInlineDTD(parser
)) return false;
450 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
451 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
452 } else if (success
&& ch
== '>') {
454 _inputStreamGetCharacter(&parser
->input
, &ch
);
457 if (_inputStreamAtEOF(&parser
->input
)) {
458 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing DTD");
460 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
465 parser
->top
--; // Remove dtdStructure from the stack
467 if (success
&& dtdStructure
) {
468 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, dtdStructure
, parser
->context
.info
);
469 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
470 _CFReportError(parser
, parser
->status
, NULL
);
478 [69] PEReference ::= '%' Name ';'
480 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
) {
483 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
484 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
486 } else if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
487 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing physical entity reference");
489 } else if (ch
!= ';') {
490 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
492 } else if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
493 CFXMLEntityReferenceInfo myData
;
495 CFStringRef tmp
= parser
->node
->dataString
;
496 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
497 parser
->node
->dataString
= name
;
498 myData
.entityType
= kCFXMLEntityTypeParameter
;
499 parser
->node
->additionalData
= &myData
;
500 result
= reportNewLeaf(parser
);
501 parser
->node
->additionalData
= NULL
;
502 parser
->node
->dataString
= tmp
;
510 [54] AttType ::= StringType | TokenizedType | EnumeratedType
511 [55] StringType ::= 'CDATA'
512 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
513 [57] EnumeratedType ::= NotationType | Enumeration
514 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
515 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
517 static Boolean
parseEnumeration(CFXMLParserRef parser
, Boolean useNMTokens
) {
519 Boolean done
= false;
520 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
521 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
523 } else if (ch
!= '(') {
524 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
527 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
528 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
529 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
533 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
534 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
535 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
537 } else if (ch
== ')') {
539 } else if (ch
== '|') {
540 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
541 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
542 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
546 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
553 static Boolean
parseAttributeType(CFXMLParserRef parser
, CFMutableStringRef str
) {
554 Boolean success
= false;
555 static const UniChar attTypeStrings
[6][8] = {
556 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
557 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
558 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
559 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
560 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
561 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
562 if (str
) _inputStreamSetMark(&parser
->input
);
563 if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[0], 5) ||
564 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 6) ||
565 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 5) ||
566 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 2) ||
567 _inputStreamMatchString(&parser
->input
, attTypeStrings
[2], 6) ||
568 _inputStreamMatchString(&parser
->input
, attTypeStrings
[3], 8) ||
569 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 8) ||
570 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 7)) {
572 } else if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[5], 8)) {
574 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
575 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
578 success
= parseEnumeration(parser
, false);
581 success
= parseEnumeration(parser
, true);
585 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
587 _inputStreamClearMark(&parser
->input
);
592 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
593 static Boolean
parseAttributeDefaultDeclaration(CFXMLParserRef parser
, CFMutableStringRef str
) {
594 const UniChar strings
[3][8] = {
595 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
596 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
597 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
600 if (str
) _inputStreamSetMark(&parser
->input
);
601 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
602 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
604 } else if (ch
== '#') {
605 if (_inputStreamMatchString(&parser
->input
, strings
[0], 8) ||
606 _inputStreamMatchString(&parser
->input
, strings
[1], 7)) {
608 } else if (!_inputStreamMatchString(&parser
->input
, strings
[2], 5) || _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
609 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
612 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
613 success
= parseAttributeValue(parser
, NULL
);
616 _inputStreamReturnCharacter(&parser
->input
, ch
);
617 success
= parseAttributeValue(parser
, NULL
);
621 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
623 _inputStreamClearMark(&parser
->input
);
629 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
630 [53] AttDef ::= S Name S AttType S DefaultDecl
632 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
) {
633 const UniChar attList
[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
634 CFXMLAttributeListDeclarationInfo attListData
;
635 CFXMLAttributeDeclarationInfo attributeArray
[8], *attributes
=attributeArray
;
636 CFIndex capacity
= 8;
638 Boolean success
= true;
640 if (!_inputStreamMatchString(&parser
->input
, attList
, 7) ||
641 _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0 ||
642 !_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
643 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
646 attListData
.numberOfAttributes
= 0;
647 if (!(*parser
->top
) || (parser
->options
& kCFXMLParserSkipMetaData
)) {
648 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
651 while (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
652 CFXMLAttributeDeclarationInfo
*attribute
= NULL
;
653 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
== '>')
656 if (capacity
== attListData
.numberOfAttributes
) {
657 capacity
= 2*capacity
;
658 if (attributes
!= attributeArray
) {
659 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorReallocate(CFGetAllocator(parser
), attributes
, capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
661 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorAllocate(CFGetAllocator(parser
), capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
664 attribute
= &(attributes
[attListData
.numberOfAttributes
]);
665 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
666 attribute
->typeString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
667 attribute
->defaultString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
669 if (!_inputStreamScanXMLName(&parser
->input
, false, &(attribute
->attributeName
)) || (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0)) {
670 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
674 if (!parseAttributeType(parser
, attribute
? (CFMutableStringRef
)attribute
->typeString
: NULL
)) {
678 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
679 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
683 if (!parseAttributeDefaultDeclaration(parser
, attribute
? (CFMutableStringRef
)attribute
->defaultString
: NULL
)) {
687 attListData
.numberOfAttributes
++;
690 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
691 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
693 } else if (ch
!= '>') {
694 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
696 } else if (attributes
) {
697 CFStringRef tmp
= parser
->node
->dataString
;
698 parser
->node
->dataTypeID
= kCFXMLNodeTypeAttributeListDeclaration
;
699 parser
->node
->dataString
= name
;
700 attListData
.attributes
= attributes
;
701 parser
->node
->additionalData
= (void *)&attListData
;
702 success
= reportNewLeaf(parser
);
703 parser
->node
->additionalData
= NULL
;
704 parser
->node
->dataString
= tmp
;
708 // Free up all that memory
710 for (idx
= 0; idx
< attListData
.numberOfAttributes
; idx
++) {
711 // Do not release attributeName here; it's a uniqued string from scanXMLName
712 CFRelease(attributes
[idx
].typeString
);
713 CFRelease(attributes
[idx
].defaultString
);
715 if (attributes
!= attributeArray
) {
716 CFAllocatorDeallocate(CFGetAllocator(parser
), attributes
);
722 CF_INLINE Boolean
parseSystemLiteral(CFXMLParserRef parser
, CFXMLExternalID
*extID
) {
725 CFMutableStringRef urlStr
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
726 if (_inputStreamScanQuotedString(&parser
->input
, urlStr
)) {
728 extID
->systemID
= CFURLCreateWithString(CFGetAllocator(parser
), urlStr
, parser
->input
.url
);
730 extID
->systemID
= NULL
;
735 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
741 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
742 [83] PublicID ::= 'PUBLIC' S PubidLiteral
743 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
744 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
745 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
747 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
748 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
) {
749 const UniChar publicString
[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
750 const UniChar systemString
[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
753 extID
->systemID
= NULL
;
754 extID
->publicID
= NULL
;
756 if (_inputStreamMatchString(&parser
->input
, publicString
, 6)) {
757 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
759 extID
->publicID
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
760 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)extID
->publicID
);
762 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, NULL
);
766 if (alsoAcceptPublicID
) {
767 _inputStreamSetMark(&parser
->input
); // In case we need to roll back the parser
769 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0
770 || !_inputStreamPeekCharacter(&parser
->input
, &ch
)
771 || (ch
!= '\'' && ch
!= '\"')
772 || !parseSystemLiteral(parser
, extID
)) {
773 success
= alsoAcceptPublicID
;
774 if (alsoAcceptPublicID
) {
775 _inputStreamBackUpToMark(&parser
->input
);
780 if (alsoAcceptPublicID
) {
781 _inputStreamClearMark(&parser
->input
);
784 } else if (_inputStreamMatchString(&parser
->input
, systemString
, 6)) {
785 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 && parseSystemLiteral(parser
, extID
);
793 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
795 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
) {
796 static UniChar notationString
[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
797 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
798 CFXMLNotationInfo notationData
= {{NULL
, NULL
}};
801 _inputStreamMatchString(&parser
->input
, notationString
, 8) &&
802 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
803 _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) &&
804 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
805 parseExternalID(parser
, true, report
? &(notationData
.externalID
) : NULL
);
809 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
810 success
= (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>');
813 if (_inputStreamAtEOF(&parser
->input
)) {
814 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
816 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
819 CFStringRef tmp
= parser
->node
->dataString
;
820 parser
->node
->dataTypeID
= kCFXMLNodeTypeNotation
;
821 parser
->node
->dataString
= name
;
822 parser
->node
->additionalData
= ¬ationData
;
823 success
= reportNewLeaf(parser
);
824 parser
->node
->additionalData
= NULL
;
825 parser
->node
->dataString
= tmp
;
827 if (notationData
.externalID
.systemID
) CFRelease(notationData
.externalID
.systemID
);
828 if (notationData
.externalID
.publicID
) CFRelease(notationData
.externalID
.publicID
);
833 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
834 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
835 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
837 static Boolean
parseChoiceOrSequence(CFXMLParserRef parser
, Boolean pastParen
) {
838 UniChar ch
, separator
;
840 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '(') return false;
841 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
843 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
845 /* Now scanning cp, production [48] */
847 if (!parseChoiceOrSequence(parser
, false)) return false;
849 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
851 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
852 if (ch
== '?' || ch
== '*' || ch
== '+') _inputStreamGetCharacter(&parser
->input
, &ch
);
855 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
856 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
857 if (ch
== ')') return true;
858 if (ch
!= '|' && ch
!= ',') return false;
860 while (ch
== separator
) {
861 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
862 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
864 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
865 } else if (!parseChoiceOrSequence(parser
, false)) {
868 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
869 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
875 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
877 static Boolean
parseMixedElementContent(CFXMLParserRef parser
) {
878 static const UniChar pcdataString
[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
880 if (!_inputStreamMatchString(&parser
->input
, pcdataString
, 7)) return false;
881 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
882 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) && (ch
== ')' || ch
== '|')) return false;
883 if (ch
== ')') return true;
886 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
887 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
888 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
889 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
891 if (ch
!= ')') return false;
892 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '*') return false;
897 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
898 [47] children ::= (choice | seq) ('?' | '*' | '+')?
900 static Boolean
parseElementContentSpec(CFXMLParserRef parser
) {
901 static const UniChar eltContentEmpty
[5] = {'E', 'M', 'P', 'T', 'Y'};
902 static const UniChar eltContentAny
[3] = {'A', 'N', 'Y'};
904 if (_inputStreamMatchString(&parser
->input
, eltContentEmpty
, 5) || _inputStreamMatchString(&parser
->input
, eltContentAny
, 3)) {
906 } else if (!_inputStreamPeekCharacter(&parser
->input
, &ch
) || ch
!= '(') {
909 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
910 _inputStreamGetCharacter(&parser
->input
, &ch
);
911 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
912 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
915 return parseMixedElementContent(parser
);
917 if (parseChoiceOrSequence(parser
, true)) {
918 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && (ch
== '*' || ch
== '?' || ch
== '+')) {
919 _inputStreamGetCharacter(&parser
->input
, &ch
);
930 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
932 static Boolean
parseElementDeclaration(CFXMLParserRef parser
) {
933 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
935 static const UniChar eltChars
[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
937 CFMutableStringRef contentDesc
= NULL
;
939 success
= _inputStreamMatchString(&parser
->input
, eltChars
, 7)
940 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0
941 && _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
)
942 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
944 if (report
) _inputStreamSetMark(&parser
->input
);
945 success
= parseElementContentSpec(parser
);
946 if (success
&& report
) {
947 contentDesc
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
948 _inputStreamGetCharactersFromMark(&parser
->input
, contentDesc
);
950 if (report
) _inputStreamClearMark(&parser
->input
);
951 if (success
) _inputStreamSkipWhitespace(&parser
->input
, NULL
);
952 success
= success
&& _inputStreamMatchString(&parser
->input
, &ch
, 1);
955 if (_inputStreamAtEOF(&parser
->input
)) {
956 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
958 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
961 CFXMLElementTypeDeclarationInfo eltData
;
962 CFStringRef tmp
= parser
->node
->dataString
;
963 parser
->node
->dataTypeID
= kCFXMLNodeTypeElementTypeDeclaration
;
964 parser
->node
->dataString
= name
;
965 eltData
.contentDescription
= contentDesc
;
966 parser
->node
->additionalData
= &eltData
;
967 success
= reportNewLeaf(parser
);
968 parser
->node
->additionalData
= NULL
;
969 parser
->node
->dataString
= tmp
;
971 if (contentDesc
) CFRelease(contentDesc
);
976 [70] EntityDecl ::= GEDecl | PEDecl
977 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
978 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
979 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
980 [74] PEDef ::= EntityValue | ExternalID
981 [76] NDataDecl ::= S 'NDATA' S Name
982 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
984 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
) {
985 const UniChar entityStr
[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
987 Boolean isPEDecl
= false;
988 CFXMLEntityInfo entityData
;
990 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
992 _inputStreamMatchString(&parser
->input
, entityStr
, 6) &&
993 (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) &&
994 _inputStreamPeekCharacter(&parser
->input
, &ch
);
996 entityData
.replacementText
= NULL
;
997 entityData
.entityID
.publicID
= NULL
;
998 entityData
.entityID
.systemID
= NULL
;
999 entityData
.notationName
= NULL
;
1000 // We will set entityType immediately before reporting
1002 if (success
&& ch
== '%') {
1003 _inputStreamGetCharacter(&parser
->input
, &ch
);
1004 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
1007 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) && (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamPeekCharacter(&parser
->input
, &ch
);
1008 if (success
&& (ch
== '\"' || ch
== '\'')) {
1010 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1012 entityData
.replacementText
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1013 success
= _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)entityData
.replacementText
);
1015 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
1017 } else if (success
) {
1019 success
= parseExternalID(parser
, false, report
? &(entityData
.entityID
) : NULL
);
1020 if (success
&& !isPEDecl
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
1021 // There could be an option NDataDecl
1022 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1023 const UniChar nDataStr
[5] = {'N', 'D', 'A', 'T', 'A'};
1024 if (_inputStreamMatchString(&parser
->input
, nDataStr
, 5)) {
1025 success
= (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamScanXMLName(&parser
->input
, false, NULL
);
1030 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1031 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
1034 if (_inputStreamAtEOF(&parser
->input
)) {
1035 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1037 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1040 CFStringRef tmp
= parser
->node
->dataString
;
1041 if (isPEDecl
) entityData
.entityType
= kCFXMLEntityTypeParameter
;
1042 else if (entityData
.replacementText
) entityData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1043 else if (!entityData
.notationName
) entityData
.entityType
= kCFXMLEntityTypeParsedExternal
;
1044 else entityData
.entityType
= kCFXMLEntityTypeUnparsed
;
1045 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntity
;
1046 parser
->node
->dataString
= name
;
1047 parser
->node
->additionalData
= &entityData
;
1048 success
= reportNewLeaf(parser
);
1049 parser
->node
->additionalData
= NULL
;
1050 parser
->node
->dataString
= tmp
;
1051 if (entityData
.replacementText
) CFRelease(entityData
.replacementText
);
1053 if (entityData
.entityID
.publicID
) CFRelease(entityData
.entityID
.publicID
);
1054 if (entityData
.entityID
.systemID
) CFRelease(entityData
.entityID
.systemID
);
1059 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1060 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1062 // First character should be just past '['
1063 static Boolean
parseInlineDTD(CFXMLParserRef parser
) {
1064 Boolean success
= true;
1065 while (success
&& !_inputStreamAtEOF(&parser
->input
)) {
1068 parseWhitespace(parser
);
1069 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break;
1072 success
= parsePhysicalEntityReference(parser
);
1073 } else if (ch
== '<') {
1075 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1076 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1080 // Processing Instruction
1081 success
= parseProcessingInstruction(parser
, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1082 } else if (ch
== '!') {
1083 UniChar dashes
[2] = {'-', '-'};
1084 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1086 success
= parseComment(parser
, true);
1088 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1089 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1090 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1092 } else if (ch
== 'A') {
1094 success
= parseAttributeListDeclaration(parser
);
1095 } else if (ch
== 'N') {
1096 success
= parseNotationDeclaration(parser
);
1097 } else if (ch
== 'E') {
1098 // elementdecl | EntityDecl
1099 _inputStreamGetCharacter(&parser
->input
, &ch
);
1100 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1101 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1104 _inputStreamReturnCharacter(&parser
->input
, 'E');
1106 success
= parseElementDeclaration(parser
);
1107 } else if (ch
== 'N') {
1108 success
= parseEntityDeclaration(parser
);
1110 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1114 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1119 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1122 } else if (ch
== ']') {
1125 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1130 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1136 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1138 static Boolean
parseTagContent(CFXMLParserRef parser
) {
1139 while (!_inputStreamAtEOF(&parser
->input
)) {
1141 CFIndex numWhitespaceCharacters
;
1143 _inputStreamSetMark(&parser
->input
);
1144 numWhitespaceCharacters
= _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1145 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1146 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break; // break == report unexpected EOF
1148 if (ch
!= '<' && ch
!= '&') { // CharData
1149 // Back off the whitespace; we'll report it with the PCData
1150 _inputStreamBackUpToMark(&parser
->input
);
1151 _inputStreamClearMark(&parser
->input
);
1152 if (!parsePCData(parser
)) return false;
1153 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1154 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1160 // element | Reference | CDSect | PI | Comment
1161 // We can safely report any whitespace now
1162 if (!(parser
->options
& kCFXMLParserSkipWhitespace
) && numWhitespaceCharacters
!= 0 && *(parser
->top
)) {
1163 _inputStreamReturnCharacter(&parser
->input
, ch
);
1164 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1165 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
1166 parser
->node
->additionalData
= NULL
;
1167 if (!reportNewLeaf(parser
)) return false;
1168 _inputStreamGetCharacter(&parser
->input
, &ch
);
1170 _inputStreamClearMark(&parser
->input
);
1173 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1174 if (!parseEntityReference(parser
, true)) return false;
1178 // ch == '<'; element | CDSect | PI | Comment
1179 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) break;
1180 if (ch
== '?') { // PI
1181 _inputStreamGetCharacter(&parser
->input
, &ch
);
1182 if (!parseProcessingInstruction(parser
, true))
1184 } else if (ch
== '/') { // end tag; we're passing outside of content's production
1185 _inputStreamReturnCharacter(&parser
->input
, '<'); // Back off to the '<'
1187 } else if (ch
!= '!') { // element
1188 if (!parseTag(parser
)) return false;
1191 UniChar dashes
[3] = {'!', '-', '-'};
1192 if (_inputStreamMatchString(&parser
->input
, dashes
, 3)) {
1194 if (!parseComment(parser
, true)) return false;
1196 // Should have a CDSect; back off the "<!" and call parseCDSect
1197 _inputStreamReturnCharacter(&parser
->input
, '<');
1198 if (!parseCDSect(parser
)) return false;
1203 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1204 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1207 // Only way to get here is if premature EOF was found
1208 //#warning CF:Include the tag name here
1209 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing tag content");
1213 static Boolean
parseCDSect(CFXMLParserRef parser
) {
1214 const UniChar _CDSectOpening
[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1215 const UniChar _CDSectClose
[3] = {']', ']', '>'};
1216 if (!_inputStreamMatchString(&parser
->input
, _CDSectOpening
, 9)) {
1217 _CFReportError(parser
, kCFXMLErrorMalformedCDSect
, "Encountered bad prefix to a presumed CDATA section");
1220 if (!_inputStreamScanToCharacters(&parser
->input
, _CDSectClose
, 3, (CFMutableStringRef
)(parser
->node
->dataString
))) {
1221 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing CDATA section");
1225 parser
->node
->dataTypeID
= kCFXMLNodeTypeCDATASection
;
1226 parser
->node
->additionalData
= NULL
;
1227 return reportNewLeaf(parser
);
1231 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1233 static Boolean
validateCharacterReference(CFStringRef str
) {
1235 CFIndex idx
, len
= CFStringGetLength(str
);
1236 if (len
< 2) return false;
1237 if (CFStringGetCharacterAtIndex(str
, 0) != '#') return false;
1238 if (CFStringGetCharacterAtIndex(str
, 1) == 'x') {
1241 if (len
== 2) return false;
1249 ch
= CFStringGetCharacterAtIndex(str
, idx
);
1251 if (!(ch
<= '9' && ch
>= '0') &&
1252 !(isHex
&& ((ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F')))) {
1256 return (idx
== len
);
1260 [67] Reference ::= EntityRef | CharRef
1261 [68] EntityRef ::= '&' Name ';'
1263 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
) {
1265 CFXMLEntityReferenceInfo entData
;
1266 CFStringRef name
= NULL
;
1267 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1268 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1273 if (!_inputStreamScanToCharacters(&parser
->input
, &ch
, 1, (CFMutableStringRef
)parser
->node
->dataString
)) {
1274 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1276 } else if (!validateCharacterReference(parser
->node
->dataString
)) {
1277 _CFReportError(parser
, kCFXMLErrorMalformedCharacterReference
, "Encountered illegal character while parsing character reference");
1280 entData
.entityType
= kCFXMLEntityTypeCharacter
;
1281 name
= parser
->node
->dataString
;
1282 } else if (!_inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= ';') {
1283 if (_inputStreamAtEOF(&parser
->input
)) {
1284 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1287 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Encountered malformed name while parsing EntityReference");
1291 entData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1294 CFStringRef tmp
= parser
->node
->dataString
;
1296 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
1297 parser
->node
->dataString
= name
;
1298 parser
->node
->additionalData
= &entData
;
1299 success
= reportNewLeaf(parser
);
1300 parser
->node
->additionalData
= NULL
;
1301 parser
->node
->dataString
= tmp
;
1309 // Kept from old entity reference parsing....
1311 switch (*(parser
->curr
)) {
1313 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1318 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1321 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1326 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1328 case 'a': // "apos" or "amp"
1329 if (len
< 4) { // Not enough characters for either conversion
1330 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1333 if (*(parser
->curr
+1) == 'm') {
1335 if (*(parser
->curr
+2) == 'p' && *(parser
->curr
+3) == ';') {
1340 } else if (*(parser
->curr
+1) == 'p') {
1342 if (len
> 4 && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 's' && *(parser
->curr
+4) == ';') {
1348 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1350 case 'q': // "quote"
1351 if (len
>= 6 && *(parser
->curr
+1) == 'u' && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 't' && *(parser
->curr
+4) == 'e' && *(parser
->curr
+5) == ';') {
1356 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1361 Boolean isHex
= false;
1362 if ( len
< 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1363 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1367 if (*(parser
->curr
) == 'x') {
1371 while (parser
->curr
< parser
->end
) {
1372 ch
= *(parser
->curr
);
1374 CFStringAppendCharacters(string
, &num
, 1);
1378 if (!isHex
) num
= num
*10;
1379 else num
= num
<< 4;
1380 if (ch
<= '9' && ch
>= '0') {
1382 } else if (!isHex
) {
1383 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1385 } else if (ch
>= 'a' && ch
<= 'f') {
1386 num
+= 10 + (ch
- 'a');
1387 } else if (ch
>= 'A' && ch
<= 'F') {
1388 num
+= 10 + (ch
- 'A');
1390 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1394 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1398 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1401 CFStringAppendCharacters(string
, &ch
, 1);
1406 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1408 static Boolean
parsePCData(CFXMLParserRef parser
) {
1410 Boolean done
= false;
1411 _inputStreamSetMark(&parser
->input
);
1412 while (!done
&& _inputStreamGetCharacter(&parser
->input
, &ch
)) {
1416 _inputStreamReturnCharacter(&parser
->input
, ch
);
1421 const UniChar endSequence
[2] = {']', '>'};
1422 if (_inputStreamMatchString(&parser
->input
, endSequence
, 2)) {
1423 _CFReportError(parser
, kCFXMLErrorMalformedParsedCharacterData
, "Encountered \"]]>\" in parsed character data");
1424 _inputStreamClearMark(&parser
->input
);
1433 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1434 _inputStreamClearMark(&parser
->input
);
1435 parser
->node
->dataTypeID
= kCFXMLNodeTypeText
;
1436 parser
->node
->additionalData
= NULL
;
1437 return reportNewLeaf(parser
);
1441 [42] ETag ::= '</' Name S? '>'
1443 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
) {
1444 const UniChar beginEndTag
[2] = {'<', '/'};
1445 Boolean unexpectedEOF
= false, mismatch
= false;
1446 CFStringRef closeTag
;
1448 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1449 if (_inputStreamMatchString(&parser
->input
, beginEndTag
, 2) && _inputStreamScanXMLName(&parser
->input
, false, &closeTag
) && closeTag
== tag
) {
1452 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1453 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1454 unexpectedEOF
= true;
1455 } else if (ch
!= '>') {
1458 } else if (_inputStreamAtEOF(&parser
->input
)) {
1459 unexpectedEOF
= true;
1464 if (unexpectedEOF
|| mismatch
) {
1465 if (unexpectedEOF
) {
1466 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag
);
1467 parser
->status
= kCFXMLErrorUnexpectedEOF
;
1468 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorUnexpectedEOF
, parser
->context
.info
);
1470 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered malformed close tag for <%@>"), tag
);
1471 parser
->status
= kCFXMLErrorMalformedCloseTag
;
1472 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorMalformedCloseTag
, parser
->context
.info
);
1480 [39] element ::= EmptyElementTag | STag content ETag
1481 [40] STag ::= '<' Name (S Attribute)* S? '>'
1482 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1484 static Boolean
parseTag(CFXMLParserRef parser
) {
1487 CFXMLElementInfo data
;
1488 Boolean success
= true;
1489 CFStringRef tagName
;
1491 if (!_inputStreamScanXMLName(&parser
->input
, false, &tagName
)) {
1492 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1496 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1498 if (!parseAttributes(parser
)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1499 data
.attributes
= parser
->argDict
;
1500 data
.attributeOrder
= parser
->argArray
;
1501 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1502 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1506 data
.isEmpty
= true;
1507 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1508 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1512 data
.isEmpty
= false;
1515 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1519 if (*parser
->top
|| parser
->top
== parser
->stack
) {
1520 CFStringRef oldStr
= parser
->node
->dataString
;
1521 parser
->node
->dataTypeID
= kCFXMLNodeTypeElement
;
1522 parser
->node
->dataString
= tagName
;
1523 parser
->node
->additionalData
= &data
;
1524 tag
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1525 if (tag
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1526 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, tag
, parser
->context
.info
);
1528 parser
->node
->additionalData
= NULL
;
1529 parser
->node
->dataString
= oldStr
;
1530 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1531 // callback called CFXMLParserAbort()
1532 _CFReportError(parser
, parser
->status
, NULL
);
1539 pushXMLNode(parser
, tag
);
1540 if (!data
.isEmpty
) {
1541 success
= parseTagContent(parser
);
1543 success
= parseCloseTag(parser
, tagName
);
1548 if (success
&& tag
) {
1549 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, tag
, parser
->context
.info
);
1550 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1551 _CFReportError(parser
, parser
->status
, NULL
);
1559 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1560 [67] Reference ::= EntityRef | CharRef
1561 [68] EntityRef ::= '&' Name ';'
1563 // For the moment, we don't worry about references in the attribute values.
1564 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
) {
1566 Boolean success
= _inputStreamGetCharacter(&parser
->input
, "e
);
1567 if (!success
|| (quote
!= '\'' && quote
!= '\"')) return false;
1568 if (str
) _inputStreamSetMark(&parser
->input
);
1569 while (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
!= quote
) {
1571 case '<': success
= false; break;
1573 if (!parseEntityReference(parser
, false)) {
1582 if (success
&& _inputStreamAtEOF(&parser
->input
)) {
1587 _inputStreamReturnCharacter(&parser
->input
, quote
);
1588 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
1589 _inputStreamGetCharacter(&parser
->input
, &ch
);
1591 _inputStreamClearMark(&parser
->input
);
1597 [40] STag ::= '<' Name (S Attribute)* S? '>'
1598 [41] Attribute ::= Name Eq AttValue
1599 [25] Eq ::= S? '=' S?
1602 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1603 Boolean
parseAttributes(CFXMLParserRef parser
) {
1605 CFMutableDictionaryRef dict
;
1606 CFMutableArrayRef array
;
1607 Boolean failure
= false;
1608 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) == '>') {
1609 if (parser
->argDict
) {
1610 CFDictionaryRemoveAllValues(parser
->argDict
);
1611 CFArrayRemoveAllValues(parser
->argArray
);
1613 return true; // No attributes; let caller deal with it
1615 if (!parser
->argDict
) {
1616 parser
->argDict
= CFDictionaryCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1617 parser
->argArray
= CFArrayCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeArrayCallBacks
);
1619 CFDictionaryRemoveAllValues(parser
->argDict
);
1620 CFArrayRemoveAllValues(parser
->argArray
);
1622 dict
= parser
->argDict
;
1623 array
= parser
->argArray
;
1624 while (!failure
&& _inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && ch
!= '/') {
1626 CFMutableStringRef value
;
1627 if (!_inputStreamScanXMLName(&parser
->input
, false, &key
)) {
1631 if (CFArrayGetFirstIndexOfValue(array
, CFRangeMake(0, CFArrayGetCount(array
)), key
) != kCFNotFound
) {
1632 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found repeated attribute");
1635 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1636 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '=') {
1640 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1641 value
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1642 if (!parseAttributeValue(parser
, value
)) {
1647 CFArrayAppendValue(array
, key
);
1648 CFDictionarySetValue(dict
, key
, value
);
1650 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1653 //#warning CF:Include tag name in this error report
1654 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found illegal character while parsing element tag");
1656 } else if (_inputStreamAtEOF(&parser
->input
)) {
1657 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing element attributes");
1665 [1] document ::= prolog element Misc*
1666 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1667 [27] Misc ::= Comment | PI | S
1668 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1670 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1671 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1672 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1674 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1676 static Boolean
parseXML(CFXMLParserRef parser
) {
1677 Boolean success
= true, sawDTD
= false, sawElement
= false;
1679 while (success
&& _inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1685 success
= parseWhitespace(parser
);
1688 _inputStreamGetCharacter(&parser
->input
, &ch
);
1689 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1690 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing top-level document");
1695 UniChar dashes
[2] = {'-', '-'};
1696 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1698 success
= parseComment(parser
, true);
1702 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered a second DTD");
1705 success
= parseDTD(parser
);
1706 if (success
) sawDTD
= true;
1708 } else if (ch
== '?') {
1709 // Processing instruction
1710 success
= parseProcessingInstruction(parser
, true);
1714 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered second top-level element");
1717 _inputStreamReturnCharacter(&parser
->input
, ch
);
1718 success
= parseTag(parser
);
1719 if (success
) sawElement
= true;
1723 parser
->status
= kCFXMLErrorMalformedDocument
;
1724 parser
->errorString
= ch
< 256 ?
1725 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch
, ch
) :
1726 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch
);
1728 if (parser
->callBacks
.handleError
) {
1729 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
1736 if (!success
) return false;
1738 _CFReportError(parser
, kCFXMLErrorElementlessDocument
, "No element found in document");
1744 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
) {
1746 parser
->status
= errNum
;
1747 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), str
, kCFStringEncodingASCII
);
1749 if (parser
->callBacks
.handleError
) {
1750 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, errNum
, parser
->context
.info
);
1754 // Assumes parser->node has been set and is ready to go
1755 static Boolean
reportNewLeaf(CFXMLParserRef parser
) {
1757 if (*(parser
->top
) == NULL
) return true;
1759 xmlStruct
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1760 if (xmlStruct
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1761 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *(parser
->top
), xmlStruct
, parser
->context
.info
);
1762 if (parser
->status
== kCFXMLStatusParseInProgress
) INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, xmlStruct
, parser
->context
.info
);
1764 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1765 _CFReportError(parser
, parser
->status
, NULL
);
1771 static void pushXMLNode(CFXMLParserRef parser
, void *node
) {
1773 if ((unsigned)(parser
->top
- parser
->stack
) == parser
->capacity
) {
1774 parser
->stack
= (void **)CFAllocatorReallocate(CFGetAllocator(parser
), parser
->stack
, 2 * parser
->capacity
* sizeof(void *), 0);
1775 parser
->top
= parser
->stack
+ parser
->capacity
;
1776 parser
->capacity
= 2*parser
->capacity
;
1778 *(parser
->top
) = node
;
1781 /**************************/
1782 /* Parsing to a CFXMLTree */
1783 /**************************/
1785 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser
, CFXMLNodeRef node
, void *context
) {
1786 CFXMLNodeRef myNode
= CFXMLNodeCreateCopy(CFGetAllocator(parser
), node
);
1787 CFXMLTreeRef tree
= CFXMLTreeCreateWithNode(CFGetAllocator(parser
), myNode
);
1789 return (void *)tree
;
1792 static void _XMLTreeAddChild(CFXMLParserRef parser
, void *parent
, void *child
, void *context
) {
1793 CFTreeAppendChild((CFTreeRef
)parent
, (CFTreeRef
)child
);
1796 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser
, void *xmlType
, void *context
) {
1797 CFXMLTreeRef node
= (CFXMLTreeRef
)xmlType
;
1798 if (CFTreeGetParent(node
))
1799 CFRelease((CFXMLTreeRef
)xmlType
);
1802 CFXMLTreeRef
CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex version
) {
1803 CFXMLParserRef parser
;
1804 CFXMLParserCallBacks callbacks
;
1805 CFXMLTreeRef result
;
1807 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1809 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1810 callbacks
.addChild
= _XMLTreeAddChild
;
1811 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1812 callbacks
.resolveExternalEntity
= NULL
;
1813 callbacks
.handleError
= NULL
;
1814 parser
= CFXMLParserCreateWithDataFromURL(allocator
, dataSource
, parseOptions
, version
, &callbacks
, NULL
);
1816 if (CFXMLParserParse(parser
)) {
1817 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1819 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1820 if (result
) CFRelease(result
);
1827 CFXMLTreeRef
CFXMLTreeCreateFromData(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
) {
1828 return CFXMLTreeCreateFromDataWithError(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, NULL
);
1831 CONST_STRING_DECL(kCFXMLTreeErrorDescription
, "kCFXMLTreeErrorDescription");
1832 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber
, "kCFXMLTreeErrorLineNumber");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLocation
, "kCFXMLTreeErrorLocation");
1834 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode
, "kCFXMLTreeErrorStatusCode");
1836 CFXMLTreeRef
CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
, CFDictionaryRef
*errorDict
) {
1837 CFXMLParserRef parser
;
1838 CFXMLParserCallBacks callbacks
;
1839 CFXMLTreeRef result
;
1841 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
1842 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1844 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1845 callbacks
.addChild
= _XMLTreeAddChild
;
1846 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1847 callbacks
.resolveExternalEntity
= NULL
;
1848 callbacks
.handleError
= NULL
;
1849 parser
= CFXMLParserCreate(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, &callbacks
, NULL
);
1851 if (CFXMLParserParse(parser
)) {
1852 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1854 if (errorDict
) { // collect the error dictionary
1855 *errorDict
= CFDictionaryCreateMutable(allocator
, 4, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1859 CFStringRef errstring
;
1861 rawnum
= CFXMLParserGetLocation(parser
);
1862 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1864 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLocation
, cfnum
);
1868 rawnum
= CFXMLParserGetLineNumber(parser
);
1869 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1871 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLineNumber
, cfnum
);
1875 rawnum
= CFXMLParserGetStatusCode(parser
);
1876 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1878 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorStatusCode
, cfnum
);
1882 errstring
= CFXMLParserCopyErrorDescription(parser
);
1884 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorDescription
, errstring
);
1885 CFRelease(errstring
);
1889 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1890 if (result
) CFRelease(result
);
1898 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1899 We should also be handling items that are up over certain values correctly.
1901 CFStringRef
CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1902 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1903 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0); // unbounded mutable string
1904 CFMutableCharacterSetRef startChars
= CFCharacterSetCreateMutable(allocator
);
1906 CFStringInlineBuffer inlineBuf
;
1909 CFIndex stringLength
= CFStringGetLength(string
);
1912 CFCharacterSetAddCharactersInString(startChars
, CFSTR("&<>'\""));
1914 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, stringLength
));
1915 for(idx
= 0; idx
< stringLength
; idx
++) {
1916 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, idx
);
1917 if(CFCharacterSetIsCharacterMember(startChars
, uc
)) {
1918 CFStringRef previousSubstring
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1919 CFStringAppend(newString
, previousSubstring
);
1920 CFRelease(previousSubstring
);
1923 CFStringAppend(newString
, CFSTR("&"));
1926 CFStringAppend(newString
, CFSTR("<"));
1929 CFStringAppend(newString
, CFSTR(">"));
1932 CFStringAppend(newString
, CFSTR("'"));
1935 CFStringAppend(newString
, CFSTR("""));
1941 // Copy the remainder to the output string before returning.
1942 CFStringRef remainder
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1943 if (NULL
!= remainder
) {
1944 CFStringAppend(newString
, remainder
);
1945 CFRelease(remainder
);
1948 CFRelease(startChars
);
1952 CFStringRef
CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1953 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1955 CFStringInlineBuffer inlineBuf
; /* use this for fast traversal of the string in question */
1957 CFIndex lastChunkStart
, length
= CFStringGetLength(string
);
1958 CFIndex i
, entityStart
;
1962 CFMutableDictionaryRef fullReplDict
= entitiesDictionary
? CFDictionaryCreateMutableCopy(allocator
, 0, entitiesDictionary
) : CFDictionaryCreateMutable(allocator
, 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1964 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1965 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1966 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1967 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1968 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1970 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, length
- 1));
1971 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0);
1974 // Scan through the string in its entirety
1975 for(i
= 0; i
< length
; ) {
1976 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1979 entityStart
= i
- 1;
1980 entity
= 0xFFFF; // set this to a not-Unicode character as sentinel
1981 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1982 if(lastChunkStart
< i
- 1) {
1983 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, (i
- 1) - lastChunkStart
));
1984 CFStringAppend(newString
, sub
);
1988 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1989 // Now we can process the entity reference itself
1990 if(uc
== '#') { // this is a numeric entity.
1993 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1995 if(uc
== 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1997 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2000 // process the provided digits 'til we're finished
2002 if (uc
>= '0' && uc
<= '9')
2003 entity
= entity
* base
+ (uc
-'0');
2004 else if (uc
>= 'a' && uc
<= 'f' && base
== 16)
2005 entity
= entity
* base
+ (uc
-'a'+10);
2006 else if (uc
>= 'A' && uc
<= 'F' && base
== 16)
2007 entity
= entity
* base
+ (uc
-'A'+10);
2011 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2018 // Scan to the end of the entity
2019 while(uc
!= ';' && i
< length
) {
2020 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2023 if(0xFFFF != entity
) { // it was numeric, and translated.
2024 // Now, output the result fo the entity
2025 if(entity
>= 0x10000) {
2026 UniChar characters
[2] = { ((entity
- 0x10000) >> 10) + 0xD800, ((entity
- 0x10000) & 0x3ff) + 0xDC00 };
2027 CFStringAppendCharacters(newString
, characters
, 2);
2029 UniChar character
= entity
;
2030 CFStringAppendCharacters(newString
, &character
, 1);
2032 } else { // it wasn't numeric.
2033 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
+ 1, (i
- entityStart
- 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2034 CFStringRef replacementString
= (CFStringRef
)CFDictionaryGetValue(fullReplDict
, sub
);
2035 if(replacementString
) {
2036 CFStringAppend(newString
, replacementString
);
2038 CFRelease(sub
); // let the old substring go, since we didn't find it in the dictionary
2039 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
, (i
- entityStart
))); // create a new one, including the & and ;
2040 CFStringAppend(newString
, sub
); // ...and append that.
2042 CFRelease(sub
); // in either case, release the most-recent "sub"
2045 // move the lastChunkStart to the beginning of the next chunk.
2049 if(lastChunkStart
< length
) { // we've come out of the loop, let's get the rest of the string and tack it on.
2050 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, i
- lastChunkStart
));
2051 CFStringAppend(newString
, sub
);
2055 CFRelease(fullReplDict
);