2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2009, Apple Inc. All rights reserved.
26 Responsibility: Chris Parker
29 #include <CoreFoundation/CFXMLParser.h>
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFXMLInputStream.h"
32 #include "CFUniChar.h"
33 #include "CFInternal.h"
35 struct __CFXMLParser
{
36 CFRuntimeBase _cfBase
;
38 _CFXMLInputStream input
;
44 struct __CFXMLNode
*node
; // Our private node; we use it to report back information
45 CFMutableDictionaryRef argDict
;
46 CFMutableArrayRef argArray
;
49 CFXMLParserCallBacks callBacks
;
50 CFXMLParserContext context
;
52 CFXMLParserStatusCode status
;
53 CFStringRef errorString
;
56 static CFStringRef
__CFXMLParserCopyDescription(CFTypeRef cf
) {
57 const struct __CFXMLParser
*parser
= (const struct __CFXMLParser
*)cf
;
58 return CFStringCreateWithFormat(CFGetAllocator(cf
), NULL
, CFSTR("<CFXMLParser %p>"), parser
);
61 static void __CFXMLParserDeallocate(CFTypeRef cf
) {
62 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)cf
;
63 CFAllocatorRef alloc
= CFGetAllocator(parser
);
64 _freeInputStream(&(parser
->input
));
65 if (parser
->argDict
) CFRelease(parser
->argDict
);
66 if (parser
->argArray
) CFRelease(parser
->argArray
);
67 if (parser
->errorString
) CFRelease(parser
->errorString
);
68 if (parser
->node
) CFRelease(parser
->node
);
69 CFAllocatorDeallocate(alloc
, parser
->stack
);
70 if (parser
->context
.info
&& parser
->context
.release
) {
71 parser
->context
.release(parser
->context
.info
);
75 static CFTypeID __kCFXMLParserTypeID
= _kCFRuntimeNotATypeID
;
77 static const CFRuntimeClass __CFXMLParserClass
= {
82 __CFXMLParserDeallocate
,
86 __CFXMLParserCopyDescription
89 static void __CFXMLParserInitialize(void) {
90 __kCFXMLParserTypeID
= _CFRuntimeRegisterClass(&__CFXMLParserClass
);
93 CFTypeID
CFXMLParserGetTypeID(void) {
94 if (_kCFRuntimeNotATypeID
== __kCFXMLParserTypeID
) __CFXMLParserInitialize();
95 return __kCFXMLParserTypeID
;
98 void CFXMLParserGetContext(CFXMLParserRef parser
, CFXMLParserContext
*context
) {
99 CFAssert1(parser
!= NULL
, __kCFLogAssertion
, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__
);
100 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
102 context
->version
= parser
->context
.version
;
103 context
->info
= parser
->context
.info
;
104 context
->retain
= parser
->context
.retain
;
105 context
->release
= parser
->context
.release
;
106 context
->copyDescription
= parser
->context
.copyDescription
;
107 UNFAULT_CALLBACK(context
->retain
);
108 UNFAULT_CALLBACK(context
->release
);
109 UNFAULT_CALLBACK(context
->copyDescription
);
113 void CFXMLParserGetCallBacks(CFXMLParserRef parser
, CFXMLParserCallBacks
*callBacks
) {
114 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
116 callBacks
->version
= parser
->callBacks
.version
;
117 callBacks
->createXMLStructure
= parser
->callBacks
.createXMLStructure
;
118 callBacks
->addChild
= parser
->callBacks
.addChild
;
119 callBacks
->endXMLStructure
= parser
->callBacks
.endXMLStructure
;
120 callBacks
->resolveExternalEntity
= parser
->callBacks
.resolveExternalEntity
;
121 callBacks
->handleError
= parser
->callBacks
.handleError
;
122 UNFAULT_CALLBACK(callBacks
->createXMLStructure
);
123 UNFAULT_CALLBACK(callBacks
->addChild
);
124 UNFAULT_CALLBACK(callBacks
->endXMLStructure
);
125 UNFAULT_CALLBACK(callBacks
->resolveExternalEntity
);
126 UNFAULT_CALLBACK(callBacks
->handleError
);
130 CFURLRef
CFXMLParserGetSourceURL(CFXMLParserRef parser
) {
131 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
132 return parser
->input
.url
;
135 /* Returns the character index or line number of the current parse location */
136 CFIndex
CFXMLParserGetLocation(CFXMLParserRef parser
) {
137 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
138 return _inputStreamCurrentLocation(&parser
->input
);
141 CFIndex
CFXMLParserGetLineNumber(CFXMLParserRef parser
) {
142 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
143 return _inputStreamCurrentLine(&parser
->input
);
146 /* Returns the top-most object returned by the createXMLStructure callback */
147 void *CFXMLParserGetDocument(CFXMLParserRef parser
) {
148 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
149 if (parser
->capacity
> 0)
150 return parser
->stack
[0];
155 CFXMLParserStatusCode
CFXMLParserGetStatusCode(CFXMLParserRef parser
) {
156 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
157 return parser
->status
;
160 CFStringRef
CFXMLParserCopyErrorDescription(CFXMLParserRef parser
) {
161 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
162 return (CFStringRef
)CFRetain(parser
->errorString
);
165 void CFXMLParserAbort(CFXMLParserRef parser
, CFXMLParserStatusCode errorCode
, CFStringRef errorDescription
) {
166 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
167 CFAssert1(errorCode
> 0, __kCFLogAssertion
, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__
);
168 CFAssert1(errorDescription
!= NULL
, __kCFLogAssertion
, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__
);
169 __CFGenericValidateType(errorDescription
, CFStringGetTypeID());
171 parser
->status
= errorCode
;
172 if (parser
->errorString
) CFRelease(parser
->errorString
);
173 parser
->errorString
= (CFStringRef
)CFStringCreateCopy(kCFAllocatorSystemDefault
, errorDescription
);
177 static Boolean
parseXML(CFXMLParserRef parser
);
178 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
);
179 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
);
180 static Boolean
parseInlineDTD(CFXMLParserRef parser
);
181 static Boolean
parseDTD(CFXMLParserRef parser
);
182 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
);
183 static Boolean
parseCDSect(CFXMLParserRef parser
);
184 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
);
185 static Boolean
parsePCData(CFXMLParserRef parser
);
186 static Boolean
parseWhitespace(CFXMLParserRef parser
);
187 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
);
188 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
);
189 static Boolean
parseElementDeclaration(CFXMLParserRef parser
);
190 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
);
191 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
);
192 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
);
193 static Boolean
parseTagContent(CFXMLParserRef parser
);
194 static Boolean
parseTag(CFXMLParserRef parser
);
195 static Boolean
parseAttributes(CFXMLParserRef parser
);
196 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
);
198 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
199 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
);
200 static Boolean
reportNewLeaf(CFXMLParserRef parser
); // Assumes parser->node has been set and is ready to go
201 static void pushXMLNode(CFXMLParserRef parser
, void *node
);
203 static CFXMLParserRef
__CFXMLParserInit(CFAllocatorRef alloc
, CFURLRef dataSource
, CFOptionFlags options
, CFDataRef xmlData
, CFIndex version
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
204 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)_CFRuntimeCreateInstance(alloc
, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser
) - sizeof(CFRuntimeBase
), NULL
);
205 struct __CFXMLNode
*node
= (struct __CFXMLNode
*)_CFRuntimeCreateInstance(alloc
, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode
) - sizeof(CFRuntimeBase
), NULL
);
207 if (parser
&& node
) {
208 alloc
= CFGetAllocator(parser
);
209 _initializeInputStream(&(parser
->input
), alloc
, dataSource
, xmlData
);
210 parser
->top
= parser
->stack
;
211 parser
->stack
= NULL
;
212 parser
->capacity
= 0;
214 buf
= (UniChar
*)CFAllocatorAllocate(alloc
, 128*sizeof(UniChar
), 0);
216 parser
->node
->dataString
= CFStringCreateMutableWithExternalCharactersNoCopy(alloc
, buf
, 0, 128, alloc
);
217 parser
->node
->additionalData
= NULL
;
218 parser
->node
->version
= version
;
219 parser
->argDict
= NULL
; // don't create these until necessary
220 parser
->argArray
= NULL
;
222 parser
->options
= options
;
223 parser
->callBacks
= *callBacks
;
225 FAULT_CALLBACK((void **)&(parser
->callBacks
.createXMLStructure
));
226 FAULT_CALLBACK((void **)&(parser
->callBacks
.addChild
));
227 FAULT_CALLBACK((void **)&(parser
->callBacks
.endXMLStructure
));
228 FAULT_CALLBACK((void **)&(parser
->callBacks
.resolveExternalEntity
));
229 FAULT_CALLBACK((void **)&(parser
->callBacks
.handleError
));
232 parser
->context
= *context
;
233 if (parser
->context
.info
&& parser
->context
.retain
) {
234 parser
->context
.retain(parser
->context
.info
);
237 parser
->context
.version
= 0;
238 parser
->context
.info
= NULL
;
239 parser
->context
.retain
= NULL
;
240 parser
->context
.release
= NULL
;
241 parser
->context
.copyDescription
= NULL
;
243 parser
->status
= kCFXMLStatusParseNotBegun
;
244 parser
->errorString
= NULL
;
246 if (parser
) CFRelease(parser
);
247 if (node
) CFRelease(node
);
253 CFXMLParserRef
CFXMLParserCreate(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
254 CFAssert1(xmlData
!= NULL
, __kCFLogAssertion
, "%s(): NULL data not permitted", __PRETTY_FUNCTION__
);
255 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
256 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
257 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
258 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
259 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
260 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, xmlData
, versionOfNodes
, callBacks
, context
);
263 CFXMLParserRef
CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
264 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
265 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
266 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
267 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
269 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, NULL
, versionOfNodes
, callBacks
, context
);
272 Boolean
CFXMLParserParse(CFXMLParserRef parser
) {
273 CFXMLDocumentInfo docData
;
274 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
275 if (parser
->status
!= kCFXMLStatusParseNotBegun
) return false;
276 parser
->status
= kCFXMLStatusParseInProgress
;
278 if (!_openInputStream(&parser
->input
)) {
279 if (!parser
->input
.data
) {
281 parser
->status
= kCFXMLErrorNoData
;
282 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("No data found at %@"), CFURLGetString(parser
->input
.url
));
284 // couldn't figure out the encoding
285 CFAssert(parser
->input
.encoding
== kCFStringEncodingInvalidId
, __kCFLogAssertion
, "CFXMLParser internal error: input stream could not be opened");
286 parser
->status
= kCFXMLErrorUnknownEncoding
;
287 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), "Encountered unknown encoding", kCFStringEncodingASCII
);
289 if (parser
->callBacks
.handleError
) {
290 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
295 // Create the document
296 parser
->stack
= (void **)CFAllocatorAllocate(CFGetAllocator(parser
), 16 * sizeof(void *), 0);
297 parser
->capacity
= 16;
298 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocument
;
299 docData
.encoding
= _inputStreamGetEncoding(&parser
->input
);
300 docData
.sourceURL
= parser
->input
.url
;
301 parser
->node
->additionalData
= &docData
;
302 parser
->stack
[0] = (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
303 parser
->top
= parser
->stack
;
304 parser
->node
->additionalData
= NULL
;
306 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
307 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
308 _CFReportError(parser
, parser
->status
, NULL
);
311 return parseXML(parser
);
314 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
316 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
317 static Boolean
parseWhitespace(CFXMLParserRef parser
) {
319 Boolean report
= !(parser
->options
& kCFXMLParserSkipWhitespace
);
320 len
= _inputStreamSkipWhitespace(&parser
->input
, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
);
322 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
323 parser
->node
->additionalData
= NULL
;
324 return reportNewLeaf(parser
);
330 // parser should be just past "<!--"
331 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
) {
332 const UniChar dashes
[2] = {'-', '-'};
334 report
= report
&& (!(parser
->options
& kCFXMLParserSkipMetaData
));
335 if (!_inputStreamScanToCharacters(&parser
->input
, dashes
, 2, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
)) {
336 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
,"Found unexpected EOF while parsing comment");
338 } else if (ch
!= '>') {
339 _CFReportError(parser
, kCFXMLErrorMalformedComment
, "Found \"--\" within a comment");
342 parser
->node
->dataTypeID
= kCFXMLNodeTypeComment
;
343 parser
->node
->additionalData
= NULL
;
344 return reportNewLeaf(parser
);
351 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
352 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
354 // parser should be set to the first character after "<?"
355 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
) {
356 const UniChar piTermination
[2] = {'?', '>'};
357 CFMutableStringRef str
;
360 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
361 _CFReportError(parser
, kCFXMLErrorMalformedProcessingInstruction
, "Found malformed processing instruction");
364 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
365 str
= (report
&& *parser
->top
) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
)) : NULL
;
366 if (!_inputStreamScanToCharacters(&parser
->input
, piTermination
, 2, str
)) {
367 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing processing instruction");
368 if (str
) CFRelease(str
);
373 CFXMLProcessingInstructionInfo data
;
375 CFStringRef tmp
= parser
->node
->dataString
;
376 parser
->node
->dataTypeID
= kCFXMLNodeTypeProcessingInstruction
;
377 parser
->node
->dataString
= name
;
378 data
.dataString
= str
;
379 parser
->node
->additionalData
= &data
;
380 result
= reportNewLeaf(parser
);
381 parser
->node
->additionalData
= NULL
;
382 parser
->node
->dataString
= tmp
;
391 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
393 static const UniChar _DoctypeOpening
[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
394 // first character should be immediately after the "<!"
395 static Boolean
parseDTD(CFXMLParserRef parser
) {
397 Boolean success
, hasExtID
= false;
398 CFXMLDocumentTypeInfo docData
= {{NULL
, NULL
}};
399 void *dtdStructure
= NULL
;
402 // First pass "DOCTYPE"
403 success
= _inputStreamMatchString(&parser
->input
, _DoctypeOpening
, 7);
404 success
= success
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
405 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, &name
);
407 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
408 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
410 // didn't make it past "DOCTYPE" successfully.
411 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
414 if (success
&& ch
!= '[' && ch
!= '>') {
417 success
= parseExternalID(parser
, false, &(docData
.externalID
));
419 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
420 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
424 if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
425 CFStringRef tmp
= parser
->node
->dataString
;
426 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocumentType
;
427 parser
->node
->dataString
= name
;
428 parser
->node
->additionalData
= &docData
;
429 dtdStructure
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
430 if (dtdStructure
&& parser
->status
== kCFXMLStatusParseInProgress
) {
431 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, dtdStructure
, parser
->context
.info
);
433 parser
->node
->additionalData
= NULL
;
434 parser
->node
->dataString
= tmp
;
435 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
436 // callback called CFXMLParserAbort()
437 _CFReportError(parser
, parser
->status
, NULL
);
443 if (docData
.externalID
.publicID
) CFRelease(docData
.externalID
.publicID
);
444 if (docData
.externalID
.systemID
) CFRelease(docData
.externalID
.systemID
);
445 pushXMLNode(parser
, dtdStructure
);
447 if (success
&& ch
== '[') {
449 _inputStreamGetCharacter(&parser
->input
, &ch
);
450 if (!parseInlineDTD(parser
)) return false;
451 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
452 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
453 } else if (success
&& ch
== '>') {
455 _inputStreamGetCharacter(&parser
->input
, &ch
);
458 if (_inputStreamAtEOF(&parser
->input
)) {
459 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing DTD");
461 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
466 parser
->top
--; // Remove dtdStructure from the stack
468 if (success
&& dtdStructure
) {
469 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, dtdStructure
, parser
->context
.info
);
470 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
471 _CFReportError(parser
, parser
->status
, NULL
);
479 [69] PEReference ::= '%' Name ';'
481 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
) {
484 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
485 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
487 } else if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
488 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing physical entity reference");
490 } else if (ch
!= ';') {
491 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
493 } else if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
494 CFXMLEntityReferenceInfo myData
;
496 CFStringRef tmp
= parser
->node
->dataString
;
497 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
498 parser
->node
->dataString
= name
;
499 myData
.entityType
= kCFXMLEntityTypeParameter
;
500 parser
->node
->additionalData
= &myData
;
501 result
= reportNewLeaf(parser
);
502 parser
->node
->additionalData
= NULL
;
503 parser
->node
->dataString
= tmp
;
511 [54] AttType ::= StringType | TokenizedType | EnumeratedType
512 [55] StringType ::= 'CDATA'
513 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
514 [57] EnumeratedType ::= NotationType | Enumeration
515 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
516 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
518 static Boolean
parseEnumeration(CFXMLParserRef parser
, Boolean useNMTokens
) {
520 Boolean done
= false;
521 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
522 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
524 } else if (ch
!= '(') {
525 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
528 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
529 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
530 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
534 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
535 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
536 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
538 } else if (ch
== ')') {
540 } else if (ch
== '|') {
541 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
542 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
543 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
547 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
554 static Boolean
parseAttributeType(CFXMLParserRef parser
, CFMutableStringRef str
) {
555 Boolean success
= false;
556 static const UniChar attTypeStrings
[6][8] = {
557 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
558 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
559 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
560 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
561 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
562 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
563 if (str
) _inputStreamSetMark(&parser
->input
);
564 if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[0], 5) ||
565 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 6) ||
566 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 5) ||
567 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 2) ||
568 _inputStreamMatchString(&parser
->input
, attTypeStrings
[2], 6) ||
569 _inputStreamMatchString(&parser
->input
, attTypeStrings
[3], 8) ||
570 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 8) ||
571 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 7)) {
573 } else if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[5], 8)) {
575 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
576 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
579 success
= parseEnumeration(parser
, false);
582 success
= parseEnumeration(parser
, true);
586 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
588 _inputStreamClearMark(&parser
->input
);
593 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
594 static Boolean
parseAttributeDefaultDeclaration(CFXMLParserRef parser
, CFMutableStringRef str
) {
595 const UniChar strings
[3][8] = {
596 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
597 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
598 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
601 if (str
) _inputStreamSetMark(&parser
->input
);
602 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
603 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
605 } else if (ch
== '#') {
606 if (_inputStreamMatchString(&parser
->input
, strings
[0], 8) ||
607 _inputStreamMatchString(&parser
->input
, strings
[1], 7)) {
609 } else if (!_inputStreamMatchString(&parser
->input
, strings
[2], 5) || _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
610 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
613 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
614 success
= parseAttributeValue(parser
, NULL
);
617 _inputStreamReturnCharacter(&parser
->input
, ch
);
618 success
= parseAttributeValue(parser
, NULL
);
622 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
624 _inputStreamClearMark(&parser
->input
);
630 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
631 [53] AttDef ::= S Name S AttType S DefaultDecl
633 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
) {
634 const UniChar attList
[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
635 CFXMLAttributeListDeclarationInfo attListData
;
636 CFXMLAttributeDeclarationInfo attributeArray
[8], *attributes
=attributeArray
;
637 CFIndex capacity
= 8;
639 Boolean success
= true;
641 if (!_inputStreamMatchString(&parser
->input
, attList
, 7) ||
642 _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0 ||
643 !_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
644 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
647 attListData
.numberOfAttributes
= 0;
648 if (!(*parser
->top
) || (parser
->options
& kCFXMLParserSkipMetaData
)) {
649 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
652 while (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
653 CFXMLAttributeDeclarationInfo
*attribute
= NULL
;
654 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
== '>')
657 if (capacity
== attListData
.numberOfAttributes
) {
658 capacity
= 2*capacity
;
659 if (attributes
!= attributeArray
) {
660 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorReallocate(CFGetAllocator(parser
), attributes
, capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
662 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorAllocate(CFGetAllocator(parser
), capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
665 attribute
= &(attributes
[attListData
.numberOfAttributes
]);
666 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
667 attribute
->typeString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
668 attribute
->defaultString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
670 if (!_inputStreamScanXMLName(&parser
->input
, false, &(attribute
->attributeName
)) || (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0)) {
671 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
675 if (!parseAttributeType(parser
, attribute
? (CFMutableStringRef
)attribute
->typeString
: NULL
)) {
679 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
680 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
684 if (!parseAttributeDefaultDeclaration(parser
, attribute
? (CFMutableStringRef
)attribute
->defaultString
: NULL
)) {
688 attListData
.numberOfAttributes
++;
691 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
692 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
694 } else if (ch
!= '>') {
695 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
697 } else if (attributes
) {
698 CFStringRef tmp
= parser
->node
->dataString
;
699 parser
->node
->dataTypeID
= kCFXMLNodeTypeAttributeListDeclaration
;
700 parser
->node
->dataString
= name
;
701 attListData
.attributes
= attributes
;
702 parser
->node
->additionalData
= (void *)&attListData
;
703 success
= reportNewLeaf(parser
);
704 parser
->node
->additionalData
= NULL
;
705 parser
->node
->dataString
= tmp
;
709 // Free up all that memory
711 for (idx
= 0; idx
< attListData
.numberOfAttributes
; idx
++) {
712 // Do not release attributeName here; it's a uniqued string from scanXMLName
713 CFRelease(attributes
[idx
].typeString
);
714 CFRelease(attributes
[idx
].defaultString
);
716 if (attributes
!= attributeArray
) {
717 CFAllocatorDeallocate(CFGetAllocator(parser
), attributes
);
723 CF_INLINE Boolean
parseSystemLiteral(CFXMLParserRef parser
, CFXMLExternalID
*extID
) {
726 CFMutableStringRef urlStr
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
727 if (_inputStreamScanQuotedString(&parser
->input
, urlStr
)) {
729 extID
->systemID
= CFURLCreateWithString(CFGetAllocator(parser
), urlStr
, parser
->input
.url
);
731 extID
->systemID
= NULL
;
736 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
742 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
743 [83] PublicID ::= 'PUBLIC' S PubidLiteral
744 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
745 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
746 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
748 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
749 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
) {
750 const UniChar publicString
[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
751 const UniChar systemString
[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
754 extID
->systemID
= NULL
;
755 extID
->publicID
= NULL
;
757 if (_inputStreamMatchString(&parser
->input
, publicString
, 6)) {
758 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
760 extID
->publicID
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
761 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)extID
->publicID
);
763 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, NULL
);
767 if (alsoAcceptPublicID
) {
768 _inputStreamSetMark(&parser
->input
); // In case we need to roll back the parser
770 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0
771 || !_inputStreamPeekCharacter(&parser
->input
, &ch
)
772 || (ch
!= '\'' && ch
!= '\"')
773 || !parseSystemLiteral(parser
, extID
)) {
774 success
= alsoAcceptPublicID
;
775 if (alsoAcceptPublicID
) {
776 _inputStreamBackUpToMark(&parser
->input
);
781 if (alsoAcceptPublicID
) {
782 _inputStreamClearMark(&parser
->input
);
785 } else if (_inputStreamMatchString(&parser
->input
, systemString
, 6)) {
786 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 && parseSystemLiteral(parser
, extID
);
794 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
796 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
) {
797 static UniChar notationString
[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
798 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
799 CFXMLNotationInfo notationData
= {{NULL
, NULL
}};
802 _inputStreamMatchString(&parser
->input
, notationString
, 8) &&
803 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
804 _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) &&
805 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
806 parseExternalID(parser
, true, report
? &(notationData
.externalID
) : NULL
);
810 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
811 success
= (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>');
814 if (_inputStreamAtEOF(&parser
->input
)) {
815 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
817 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
820 CFStringRef tmp
= parser
->node
->dataString
;
821 parser
->node
->dataTypeID
= kCFXMLNodeTypeNotation
;
822 parser
->node
->dataString
= name
;
823 parser
->node
->additionalData
= ¬ationData
;
824 success
= reportNewLeaf(parser
);
825 parser
->node
->additionalData
= NULL
;
826 parser
->node
->dataString
= tmp
;
828 if (notationData
.externalID
.systemID
) CFRelease(notationData
.externalID
.systemID
);
829 if (notationData
.externalID
.publicID
) CFRelease(notationData
.externalID
.publicID
);
834 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
835 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
836 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
838 static Boolean
parseChoiceOrSequence(CFXMLParserRef parser
, Boolean pastParen
) {
839 UniChar ch
, separator
;
841 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '(') return false;
842 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
844 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
846 /* Now scanning cp, production [48] */
848 if (!parseChoiceOrSequence(parser
, false)) return false;
850 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
852 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
853 if (ch
== '?' || ch
== '*' || ch
== '+') _inputStreamGetCharacter(&parser
->input
, &ch
);
856 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
857 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
858 if (ch
== ')') return true;
859 if (ch
!= '|' && ch
!= ',') return false;
861 while (ch
== separator
) {
862 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
863 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
865 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
866 } else if (!parseChoiceOrSequence(parser
, false)) {
869 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
870 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
876 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
878 static Boolean
parseMixedElementContent(CFXMLParserRef parser
) {
879 static const UniChar pcdataString
[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
881 if (!_inputStreamMatchString(&parser
->input
, pcdataString
, 7)) return false;
882 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
883 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) && (ch
== ')' || ch
== '|')) return false;
884 if (ch
== ')') return true;
887 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
888 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
889 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
890 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
892 if (ch
!= ')') return false;
893 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '*') return false;
898 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
899 [47] children ::= (choice | seq) ('?' | '*' | '+')?
901 static Boolean
parseElementContentSpec(CFXMLParserRef parser
) {
902 static const UniChar eltContentEmpty
[5] = {'E', 'M', 'P', 'T', 'Y'};
903 static const UniChar eltContentAny
[3] = {'A', 'N', 'Y'};
905 if (_inputStreamMatchString(&parser
->input
, eltContentEmpty
, 5) || _inputStreamMatchString(&parser
->input
, eltContentAny
, 3)) {
907 } else if (!_inputStreamPeekCharacter(&parser
->input
, &ch
) || ch
!= '(') {
910 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
911 _inputStreamGetCharacter(&parser
->input
, &ch
);
912 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
913 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
916 return parseMixedElementContent(parser
);
918 if (parseChoiceOrSequence(parser
, true)) {
919 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && (ch
== '*' || ch
== '?' || ch
== '+')) {
920 _inputStreamGetCharacter(&parser
->input
, &ch
);
931 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
933 static Boolean
parseElementDeclaration(CFXMLParserRef parser
) {
934 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
936 static const UniChar eltChars
[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
938 CFMutableStringRef contentDesc
= NULL
;
940 success
= _inputStreamMatchString(&parser
->input
, eltChars
, 7)
941 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0
942 && _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
)
943 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
945 if (report
) _inputStreamSetMark(&parser
->input
);
946 success
= parseElementContentSpec(parser
);
947 if (success
&& report
) {
948 contentDesc
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
949 _inputStreamGetCharactersFromMark(&parser
->input
, contentDesc
);
951 if (report
) _inputStreamClearMark(&parser
->input
);
952 if (success
) _inputStreamSkipWhitespace(&parser
->input
, NULL
);
953 success
= success
&& _inputStreamMatchString(&parser
->input
, &ch
, 1);
956 if (_inputStreamAtEOF(&parser
->input
)) {
957 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
959 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
962 CFXMLElementTypeDeclarationInfo eltData
;
963 CFStringRef tmp
= parser
->node
->dataString
;
964 parser
->node
->dataTypeID
= kCFXMLNodeTypeElementTypeDeclaration
;
965 parser
->node
->dataString
= name
;
966 eltData
.contentDescription
= contentDesc
;
967 parser
->node
->additionalData
= &eltData
;
968 success
= reportNewLeaf(parser
);
969 parser
->node
->additionalData
= NULL
;
970 parser
->node
->dataString
= tmp
;
972 if (contentDesc
) CFRelease(contentDesc
);
977 [70] EntityDecl ::= GEDecl | PEDecl
978 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
979 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
980 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
981 [74] PEDef ::= EntityValue | ExternalID
982 [76] NDataDecl ::= S 'NDATA' S Name
983 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
985 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
) {
986 const UniChar entityStr
[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
988 Boolean isPEDecl
= false;
989 CFXMLEntityInfo entityData
;
991 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
993 _inputStreamMatchString(&parser
->input
, entityStr
, 6) &&
994 (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) &&
995 _inputStreamPeekCharacter(&parser
->input
, &ch
);
997 entityData
.replacementText
= NULL
;
998 entityData
.entityID
.publicID
= NULL
;
999 entityData
.entityID
.systemID
= NULL
;
1000 entityData
.notationName
= NULL
;
1001 // We will set entityType immediately before reporting
1003 if (success
&& ch
== '%') {
1004 _inputStreamGetCharacter(&parser
->input
, &ch
);
1005 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
1008 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) && (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamPeekCharacter(&parser
->input
, &ch
);
1009 if (success
&& (ch
== '\"' || ch
== '\'')) {
1011 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1013 entityData
.replacementText
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1014 success
= _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)entityData
.replacementText
);
1016 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
1018 } else if (success
) {
1020 success
= parseExternalID(parser
, false, report
? &(entityData
.entityID
) : NULL
);
1021 if (success
&& !isPEDecl
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
1022 // There could be an option NDataDecl
1023 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1024 const UniChar nDataStr
[5] = {'N', 'D', 'A', 'T', 'A'};
1025 if (_inputStreamMatchString(&parser
->input
, nDataStr
, 5)) {
1026 success
= (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamScanXMLName(&parser
->input
, false, NULL
);
1031 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1032 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
1035 if (_inputStreamAtEOF(&parser
->input
)) {
1036 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1038 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1041 CFStringRef tmp
= parser
->node
->dataString
;
1042 if (isPEDecl
) entityData
.entityType
= kCFXMLEntityTypeParameter
;
1043 else if (entityData
.replacementText
) entityData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1044 else if (!entityData
.notationName
) entityData
.entityType
= kCFXMLEntityTypeParsedExternal
;
1045 else entityData
.entityType
= kCFXMLEntityTypeUnparsed
;
1046 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntity
;
1047 parser
->node
->dataString
= name
;
1048 parser
->node
->additionalData
= &entityData
;
1049 success
= reportNewLeaf(parser
);
1050 parser
->node
->additionalData
= NULL
;
1051 parser
->node
->dataString
= tmp
;
1052 if (entityData
.replacementText
) CFRelease(entityData
.replacementText
);
1054 if (entityData
.entityID
.publicID
) CFRelease(entityData
.entityID
.publicID
);
1055 if (entityData
.entityID
.systemID
) CFRelease(entityData
.entityID
.systemID
);
1060 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1061 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1063 // First character should be just past '['
1064 static Boolean
parseInlineDTD(CFXMLParserRef parser
) {
1065 Boolean success
= true;
1066 while (success
&& !_inputStreamAtEOF(&parser
->input
)) {
1069 parseWhitespace(parser
);
1070 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break;
1073 success
= parsePhysicalEntityReference(parser
);
1074 } else if (ch
== '<') {
1076 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1077 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1081 // Processing Instruction
1082 success
= parseProcessingInstruction(parser
, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1083 } else if (ch
== '!') {
1084 UniChar dashes
[2] = {'-', '-'};
1085 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1087 success
= parseComment(parser
, true);
1089 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1090 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1091 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1093 } else if (ch
== 'A') {
1095 success
= parseAttributeListDeclaration(parser
);
1096 } else if (ch
== 'N') {
1097 success
= parseNotationDeclaration(parser
);
1098 } else if (ch
== 'E') {
1099 // elementdecl | EntityDecl
1100 _inputStreamGetCharacter(&parser
->input
, &ch
);
1101 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1102 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1105 _inputStreamReturnCharacter(&parser
->input
, 'E');
1107 success
= parseElementDeclaration(parser
);
1108 } else if (ch
== 'N') {
1109 success
= parseEntityDeclaration(parser
);
1111 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1115 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1120 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1123 } else if (ch
== ']') {
1126 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1131 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1137 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1139 static Boolean
parseTagContent(CFXMLParserRef parser
) {
1140 while (!_inputStreamAtEOF(&parser
->input
)) {
1142 CFIndex numWhitespaceCharacters
;
1144 _inputStreamSetMark(&parser
->input
);
1145 numWhitespaceCharacters
= _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1146 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1147 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break; // break == report unexpected EOF
1149 if (ch
!= '<' && ch
!= '&') { // CharData
1150 // Back off the whitespace; we'll report it with the PCData
1151 _inputStreamBackUpToMark(&parser
->input
);
1152 _inputStreamClearMark(&parser
->input
);
1153 if (!parsePCData(parser
)) return false;
1154 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1155 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1161 // element | Reference | CDSect | PI | Comment
1162 // We can safely report any whitespace now
1163 if (!(parser
->options
& kCFXMLParserSkipWhitespace
) && numWhitespaceCharacters
!= 0 && *(parser
->top
)) {
1164 _inputStreamReturnCharacter(&parser
->input
, ch
);
1165 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1166 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
1167 parser
->node
->additionalData
= NULL
;
1168 if (!reportNewLeaf(parser
)) return false;
1169 _inputStreamGetCharacter(&parser
->input
, &ch
);
1171 _inputStreamClearMark(&parser
->input
);
1174 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1175 if (!parseEntityReference(parser
, true)) return false;
1179 // ch == '<'; element | CDSect | PI | Comment
1180 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) break;
1181 if (ch
== '?') { // PI
1182 _inputStreamGetCharacter(&parser
->input
, &ch
);
1183 if (!parseProcessingInstruction(parser
, true))
1185 } else if (ch
== '/') { // end tag; we're passing outside of content's production
1186 _inputStreamReturnCharacter(&parser
->input
, '<'); // Back off to the '<'
1188 } else if (ch
!= '!') { // element
1189 if (!parseTag(parser
)) return false;
1192 UniChar dashes
[3] = {'!', '-', '-'};
1193 if (_inputStreamMatchString(&parser
->input
, dashes
, 3)) {
1195 if (!parseComment(parser
, true)) return false;
1197 // Should have a CDSect; back off the "<!" and call parseCDSect
1198 _inputStreamReturnCharacter(&parser
->input
, '<');
1199 if (!parseCDSect(parser
)) return false;
1204 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1205 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1208 // Only way to get here is if premature EOF was found
1209 //#warning CF:Include the tag name here
1210 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing tag content");
1214 static Boolean
parseCDSect(CFXMLParserRef parser
) {
1215 const UniChar _CDSectOpening
[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1216 const UniChar _CDSectClose
[3] = {']', ']', '>'};
1217 if (!_inputStreamMatchString(&parser
->input
, _CDSectOpening
, 9)) {
1218 _CFReportError(parser
, kCFXMLErrorMalformedCDSect
, "Encountered bad prefix to a presumed CDATA section");
1221 if (!_inputStreamScanToCharacters(&parser
->input
, _CDSectClose
, 3, (CFMutableStringRef
)(parser
->node
->dataString
))) {
1222 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing CDATA section");
1226 parser
->node
->dataTypeID
= kCFXMLNodeTypeCDATASection
;
1227 parser
->node
->additionalData
= NULL
;
1228 return reportNewLeaf(parser
);
1232 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1234 static Boolean
validateCharacterReference(CFStringRef str
) {
1236 CFIndex idx
, len
= CFStringGetLength(str
);
1237 if (len
< 2) return false;
1238 if (CFStringGetCharacterAtIndex(str
, 0) != '#') return false;
1239 if (CFStringGetCharacterAtIndex(str
, 1) == 'x') {
1242 if (len
== 2) return false;
1250 ch
= CFStringGetCharacterAtIndex(str
, idx
);
1252 if (!(ch
<= '9' && ch
>= '0') &&
1253 !(isHex
&& ((ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F')))) {
1257 return (idx
== len
);
1261 [67] Reference ::= EntityRef | CharRef
1262 [68] EntityRef ::= '&' Name ';'
1264 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
) {
1266 CFXMLEntityReferenceInfo entData
;
1267 CFStringRef name
= NULL
;
1268 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1269 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1274 if (!_inputStreamScanToCharacters(&parser
->input
, &ch
, 1, (CFMutableStringRef
)parser
->node
->dataString
)) {
1275 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1277 } else if (!validateCharacterReference(parser
->node
->dataString
)) {
1278 _CFReportError(parser
, kCFXMLErrorMalformedCharacterReference
, "Encountered illegal character while parsing character reference");
1281 entData
.entityType
= kCFXMLEntityTypeCharacter
;
1282 name
= parser
->node
->dataString
;
1283 } else if (!_inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= ';') {
1284 if (_inputStreamAtEOF(&parser
->input
)) {
1285 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1288 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Encountered malformed name while parsing EntityReference");
1292 entData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1295 CFStringRef tmp
= parser
->node
->dataString
;
1297 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
1298 parser
->node
->dataString
= name
;
1299 parser
->node
->additionalData
= &entData
;
1300 success
= reportNewLeaf(parser
);
1301 parser
->node
->additionalData
= NULL
;
1302 parser
->node
->dataString
= tmp
;
1310 // Kept from old entity reference parsing....
1312 switch (*(parser
->curr
)) {
1314 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1319 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1322 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1327 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1329 case 'a': // "apos" or "amp"
1330 if (len
< 4) { // Not enough characters for either conversion
1331 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1334 if (*(parser
->curr
+1) == 'm') {
1336 if (*(parser
->curr
+2) == 'p' && *(parser
->curr
+3) == ';') {
1341 } else if (*(parser
->curr
+1) == 'p') {
1343 if (len
> 4 && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 's' && *(parser
->curr
+4) == ';') {
1349 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1351 case 'q': // "quote"
1352 if (len
>= 6 && *(parser
->curr
+1) == 'u' && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 't' && *(parser
->curr
+4) == 'e' && *(parser
->curr
+5) == ';') {
1357 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1362 Boolean isHex
= false;
1363 if ( len
< 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1364 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1368 if (*(parser
->curr
) == 'x') {
1372 while (parser
->curr
< parser
->end
) {
1373 ch
= *(parser
->curr
);
1375 CFStringAppendCharacters(string
, &num
, 1);
1379 if (!isHex
) num
= num
*10;
1380 else num
= num
<< 4;
1381 if (ch
<= '9' && ch
>= '0') {
1383 } else if (!isHex
) {
1384 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1386 } else if (ch
>= 'a' && ch
<= 'f') {
1387 num
+= 10 + (ch
- 'a');
1388 } else if (ch
>= 'A' && ch
<= 'F') {
1389 num
+= 10 + (ch
- 'A');
1391 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1395 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1399 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1402 CFStringAppendCharacters(string
, &ch
, 1);
1407 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1409 static Boolean
parsePCData(CFXMLParserRef parser
) {
1411 Boolean done
= false;
1412 _inputStreamSetMark(&parser
->input
);
1413 while (!done
&& _inputStreamGetCharacter(&parser
->input
, &ch
)) {
1417 _inputStreamReturnCharacter(&parser
->input
, ch
);
1422 const UniChar endSequence
[2] = {']', '>'};
1423 if (_inputStreamMatchString(&parser
->input
, endSequence
, 2)) {
1424 _CFReportError(parser
, kCFXMLErrorMalformedParsedCharacterData
, "Encountered \"]]>\" in parsed character data");
1425 _inputStreamClearMark(&parser
->input
);
1434 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1435 _inputStreamClearMark(&parser
->input
);
1436 parser
->node
->dataTypeID
= kCFXMLNodeTypeText
;
1437 parser
->node
->additionalData
= NULL
;
1438 return reportNewLeaf(parser
);
1442 [42] ETag ::= '</' Name S? '>'
1444 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
) {
1445 const UniChar beginEndTag
[2] = {'<', '/'};
1446 Boolean unexpectedEOF
= false, mismatch
= false;
1447 CFStringRef closeTag
;
1449 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1450 if (_inputStreamMatchString(&parser
->input
, beginEndTag
, 2) && _inputStreamScanXMLName(&parser
->input
, false, &closeTag
) && closeTag
== tag
) {
1453 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1454 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1455 unexpectedEOF
= true;
1456 } else if (ch
!= '>') {
1459 } else if (_inputStreamAtEOF(&parser
->input
)) {
1460 unexpectedEOF
= true;
1465 if (unexpectedEOF
|| mismatch
) {
1466 if (unexpectedEOF
) {
1467 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag
);
1468 parser
->status
= kCFXMLErrorUnexpectedEOF
;
1469 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorUnexpectedEOF
, parser
->context
.info
);
1471 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered malformed close tag for <%@>"), tag
);
1472 parser
->status
= kCFXMLErrorMalformedCloseTag
;
1473 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorMalformedCloseTag
, parser
->context
.info
);
1481 [39] element ::= EmptyElementTag | STag content ETag
1482 [40] STag ::= '<' Name (S Attribute)* S? '>'
1483 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1485 static Boolean
parseTag(CFXMLParserRef parser
) {
1488 CFXMLElementInfo data
;
1489 Boolean success
= true;
1490 CFStringRef tagName
;
1492 if (!_inputStreamScanXMLName(&parser
->input
, false, &tagName
)) {
1493 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1497 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1499 if (!parseAttributes(parser
)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1500 data
.attributes
= parser
->argDict
;
1501 data
.attributeOrder
= parser
->argArray
;
1502 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1503 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1507 data
.isEmpty
= true;
1508 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1509 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1513 data
.isEmpty
= false;
1516 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1520 if (*parser
->top
|| parser
->top
== parser
->stack
) {
1521 CFStringRef oldStr
= parser
->node
->dataString
;
1522 parser
->node
->dataTypeID
= kCFXMLNodeTypeElement
;
1523 parser
->node
->dataString
= tagName
;
1524 parser
->node
->additionalData
= &data
;
1525 tag
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1526 if (tag
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1527 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, tag
, parser
->context
.info
);
1529 parser
->node
->additionalData
= NULL
;
1530 parser
->node
->dataString
= oldStr
;
1531 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1532 // callback called CFXMLParserAbort()
1533 _CFReportError(parser
, parser
->status
, NULL
);
1540 pushXMLNode(parser
, tag
);
1541 if (!data
.isEmpty
) {
1542 success
= parseTagContent(parser
);
1544 success
= parseCloseTag(parser
, tagName
);
1549 if (success
&& tag
) {
1550 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, tag
, parser
->context
.info
);
1551 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1552 _CFReportError(parser
, parser
->status
, NULL
);
1560 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1561 [67] Reference ::= EntityRef | CharRef
1562 [68] EntityRef ::= '&' Name ';'
1564 // For the moment, we don't worry about references in the attribute values.
1565 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
) {
1567 Boolean success
= _inputStreamGetCharacter(&parser
->input
, "e
);
1568 if (!success
|| (quote
!= '\'' && quote
!= '\"')) return false;
1569 if (str
) _inputStreamSetMark(&parser
->input
);
1570 while (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
!= quote
) {
1572 case '<': success
= false; break;
1574 if (!parseEntityReference(parser
, false)) {
1583 if (success
&& _inputStreamAtEOF(&parser
->input
)) {
1588 _inputStreamReturnCharacter(&parser
->input
, quote
);
1589 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
1590 _inputStreamGetCharacter(&parser
->input
, &ch
);
1592 _inputStreamClearMark(&parser
->input
);
1598 [40] STag ::= '<' Name (S Attribute)* S? '>'
1599 [41] Attribute ::= Name Eq AttValue
1600 [25] Eq ::= S? '=' S?
1603 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1604 Boolean
parseAttributes(CFXMLParserRef parser
) {
1606 CFMutableDictionaryRef dict
;
1607 CFMutableArrayRef array
;
1608 Boolean failure
= false;
1609 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) == '>') {
1610 if (parser
->argDict
) {
1611 CFDictionaryRemoveAllValues(parser
->argDict
);
1612 CFArrayRemoveAllValues(parser
->argArray
);
1614 return true; // No attributes; let caller deal with it
1616 if (!parser
->argDict
) {
1617 parser
->argDict
= CFDictionaryCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1618 parser
->argArray
= CFArrayCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeArrayCallBacks
);
1620 CFDictionaryRemoveAllValues(parser
->argDict
);
1621 CFArrayRemoveAllValues(parser
->argArray
);
1623 dict
= parser
->argDict
;
1624 array
= parser
->argArray
;
1625 while (!failure
&& _inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && ch
!= '/') {
1627 CFMutableStringRef value
;
1628 if (!_inputStreamScanXMLName(&parser
->input
, false, &key
)) {
1632 if (CFArrayGetFirstIndexOfValue(array
, CFRangeMake(0, CFArrayGetCount(array
)), key
) != kCFNotFound
) {
1633 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found repeated attribute");
1636 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1637 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '=') {
1641 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1642 value
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1643 if (!parseAttributeValue(parser
, value
)) {
1648 CFArrayAppendValue(array
, key
);
1649 CFDictionarySetValue(dict
, key
, value
);
1651 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1654 //#warning CF:Include tag name in this error report
1655 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found illegal character while parsing element tag");
1657 } else if (_inputStreamAtEOF(&parser
->input
)) {
1658 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing element attributes");
1666 [1] document ::= prolog element Misc*
1667 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1668 [27] Misc ::= Comment | PI | S
1669 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1671 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1672 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1673 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1675 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1677 static Boolean
parseXML(CFXMLParserRef parser
) {
1678 Boolean success
= true, sawDTD
= false, sawElement
= false;
1680 while (success
&& _inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1686 success
= parseWhitespace(parser
);
1689 _inputStreamGetCharacter(&parser
->input
, &ch
);
1690 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1691 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing top-level document");
1696 UniChar dashes
[2] = {'-', '-'};
1697 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1699 success
= parseComment(parser
, true);
1703 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered a second DTD");
1706 success
= parseDTD(parser
);
1707 if (success
) sawDTD
= true;
1709 } else if (ch
== '?') {
1710 // Processing instruction
1711 success
= parseProcessingInstruction(parser
, true);
1715 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered second top-level element");
1718 _inputStreamReturnCharacter(&parser
->input
, ch
);
1719 success
= parseTag(parser
);
1720 if (success
) sawElement
= true;
1724 parser
->status
= kCFXMLErrorMalformedDocument
;
1725 parser
->errorString
= ch
< 256 ?
1726 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch
, ch
) :
1727 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch
);
1729 if (parser
->callBacks
.handleError
) {
1730 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
1737 if (!success
) return false;
1739 _CFReportError(parser
, kCFXMLErrorElementlessDocument
, "No element found in document");
1745 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
) {
1747 parser
->status
= errNum
;
1748 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), str
, kCFStringEncodingASCII
);
1750 if (parser
->callBacks
.handleError
) {
1751 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, errNum
, parser
->context
.info
);
1755 // Assumes parser->node has been set and is ready to go
1756 static Boolean
reportNewLeaf(CFXMLParserRef parser
) {
1758 if (*(parser
->top
) == NULL
) return true;
1760 xmlStruct
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1761 if (xmlStruct
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1762 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *(parser
->top
), xmlStruct
, parser
->context
.info
);
1763 if (parser
->status
== kCFXMLStatusParseInProgress
) INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, xmlStruct
, parser
->context
.info
);
1765 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1766 _CFReportError(parser
, parser
->status
, NULL
);
1772 static void pushXMLNode(CFXMLParserRef parser
, void *node
) {
1774 if ((unsigned)(parser
->top
- parser
->stack
) == parser
->capacity
) {
1775 parser
->stack
= (void **)CFAllocatorReallocate(CFGetAllocator(parser
), parser
->stack
, 2 * parser
->capacity
* sizeof(void *), 0);
1776 parser
->top
= parser
->stack
+ parser
->capacity
;
1777 parser
->capacity
= 2*parser
->capacity
;
1779 *(parser
->top
) = node
;
1782 /**************************/
1783 /* Parsing to a CFXMLTree */
1784 /**************************/
1786 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser
, CFXMLNodeRef node
, void *context
) {
1787 CFXMLNodeRef myNode
= CFXMLNodeCreateCopy(CFGetAllocator(parser
), node
);
1788 CFXMLTreeRef tree
= CFXMLTreeCreateWithNode(CFGetAllocator(parser
), myNode
);
1790 return (void *)tree
;
1793 static void _XMLTreeAddChild(CFXMLParserRef parser
, void *parent
, void *child
, void *context
) {
1794 CFTreeAppendChild((CFTreeRef
)parent
, (CFTreeRef
)child
);
1797 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser
, void *xmlType
, void *context
) {
1798 CFXMLTreeRef node
= (CFXMLTreeRef
)xmlType
;
1799 if (CFTreeGetParent(node
))
1800 CFRelease((CFXMLTreeRef
)xmlType
);
1803 CFXMLTreeRef
CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex version
) {
1804 CFXMLParserRef parser
;
1805 CFXMLParserCallBacks callbacks
;
1806 CFXMLTreeRef result
;
1808 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1810 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1811 callbacks
.addChild
= _XMLTreeAddChild
;
1812 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1813 callbacks
.resolveExternalEntity
= NULL
;
1814 callbacks
.handleError
= NULL
;
1815 parser
= CFXMLParserCreateWithDataFromURL(allocator
, dataSource
, parseOptions
, version
, &callbacks
, NULL
);
1817 if (CFXMLParserParse(parser
)) {
1818 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1820 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1821 if (result
) CFRelease(result
);
1828 CFXMLTreeRef
CFXMLTreeCreateFromData(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
) {
1829 return CFXMLTreeCreateFromDataWithError(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, NULL
);
1832 CONST_STRING_DECL(kCFXMLTreeErrorDescription
, "kCFXMLTreeErrorDescription");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber
, "kCFXMLTreeErrorLineNumber");
1834 CONST_STRING_DECL(kCFXMLTreeErrorLocation
, "kCFXMLTreeErrorLocation");
1835 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode
, "kCFXMLTreeErrorStatusCode");
1837 CFXMLTreeRef
CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
, CFDictionaryRef
*errorDict
) {
1838 CFXMLParserRef parser
;
1839 CFXMLParserCallBacks callbacks
;
1840 CFXMLTreeRef result
;
1842 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
1843 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1845 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1846 callbacks
.addChild
= _XMLTreeAddChild
;
1847 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1848 callbacks
.resolveExternalEntity
= NULL
;
1849 callbacks
.handleError
= NULL
;
1850 parser
= CFXMLParserCreate(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, &callbacks
, NULL
);
1852 if (CFXMLParserParse(parser
)) {
1853 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1855 if (errorDict
) { // collect the error dictionary
1856 *errorDict
= CFDictionaryCreateMutable(allocator
, 4, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1860 CFStringRef errstring
;
1862 rawnum
= CFXMLParserGetLocation(parser
);
1863 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1865 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLocation
, cfnum
);
1869 rawnum
= CFXMLParserGetLineNumber(parser
);
1870 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1872 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLineNumber
, cfnum
);
1876 rawnum
= CFXMLParserGetStatusCode(parser
);
1877 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1879 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorStatusCode
, cfnum
);
1883 errstring
= CFXMLParserCopyErrorDescription(parser
);
1885 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorDescription
, errstring
);
1886 CFRelease(errstring
);
1890 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1891 if (result
) CFRelease(result
);
1899 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1900 We should also be handling items that are up over certain values correctly.
1902 CFStringRef
CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1903 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1904 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0); // unbounded mutable string
1905 CFMutableCharacterSetRef startChars
= CFCharacterSetCreateMutable(allocator
);
1907 CFStringInlineBuffer inlineBuf
;
1910 CFIndex stringLength
= CFStringGetLength(string
);
1913 CFCharacterSetAddCharactersInString(startChars
, CFSTR("&<>'\""));
1915 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, stringLength
));
1916 for(idx
= 0; idx
< stringLength
; idx
++) {
1917 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, idx
);
1918 if(CFCharacterSetIsCharacterMember(startChars
, uc
)) {
1919 CFStringRef previousSubstring
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1920 CFStringAppend(newString
, previousSubstring
);
1921 CFRelease(previousSubstring
);
1924 CFStringAppend(newString
, CFSTR("&"));
1927 CFStringAppend(newString
, CFSTR("<"));
1930 CFStringAppend(newString
, CFSTR(">"));
1933 CFStringAppend(newString
, CFSTR("'"));
1936 CFStringAppend(newString
, CFSTR("""));
1942 // Copy the remainder to the output string before returning.
1943 CFStringRef remainder
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1944 if (NULL
!= remainder
) {
1945 CFStringAppend(newString
, remainder
);
1946 CFRelease(remainder
);
1949 CFRelease(startChars
);
1953 CFStringRef
CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1954 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1956 CFStringInlineBuffer inlineBuf
; /* use this for fast traversal of the string in question */
1958 CFIndex lastChunkStart
, length
= CFStringGetLength(string
);
1959 CFIndex i
, entityStart
;
1963 CFMutableDictionaryRef fullReplDict
= entitiesDictionary
? CFDictionaryCreateMutableCopy(allocator
, 0, entitiesDictionary
) : CFDictionaryCreateMutable(allocator
, 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1965 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1966 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1967 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1968 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1969 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1971 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, length
- 1));
1972 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0);
1975 // Scan through the string in its entirety
1976 for(i
= 0; i
< length
; ) {
1977 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1980 entityStart
= i
- 1;
1981 entity
= 0xFFFF; // set this to a not-Unicode character as sentinel
1982 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1983 if(lastChunkStart
< i
- 1) {
1984 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, (i
- 1) - lastChunkStart
));
1985 CFStringAppend(newString
, sub
);
1989 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1990 // Now we can process the entity reference itself
1991 if(uc
== '#') { // this is a numeric entity.
1994 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1996 if(uc
== 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1998 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2001 // process the provided digits 'til we're finished
2003 if (uc
>= '0' && uc
<= '9')
2004 entity
= entity
* base
+ (uc
-'0');
2005 else if (uc
>= 'a' && uc
<= 'f' && base
== 16)
2006 entity
= entity
* base
+ (uc
-'a'+10);
2007 else if (uc
>= 'A' && uc
<= 'F' && base
== 16)
2008 entity
= entity
* base
+ (uc
-'A'+10);
2012 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2019 // Scan to the end of the entity
2020 while(uc
!= ';' && i
< length
) {
2021 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2024 if(0xFFFF != entity
) { // it was numeric, and translated.
2025 // Now, output the result fo the entity
2026 if(entity
>= 0x10000) {
2027 UniChar characters
[2] = { ((entity
- 0x10000) >> 10) + 0xD800, ((entity
- 0x10000) & 0x3ff) + 0xDC00 };
2028 CFStringAppendCharacters(newString
, characters
, 2);
2030 UniChar character
= entity
;
2031 CFStringAppendCharacters(newString
, &character
, 1);
2033 } else { // it wasn't numeric.
2034 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
+ 1, (i
- entityStart
- 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2035 CFStringRef replacementString
= (CFStringRef
)CFDictionaryGetValue(fullReplDict
, sub
);
2036 if(replacementString
) {
2037 CFStringAppend(newString
, replacementString
);
2039 CFRelease(sub
); // let the old substring go, since we didn't find it in the dictionary
2040 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
, (i
- entityStart
))); // create a new one, including the & and ;
2041 CFStringAppend(newString
, sub
); // ...and append that.
2043 CFRelease(sub
); // in either case, release the most-recent "sub"
2046 // move the lastChunkStart to the beginning of the next chunk.
2050 if(lastChunkStart
< length
) { // we've come out of the loop, let's get the rest of the string and tack it on.
2051 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, i
- lastChunkStart
));
2052 CFStringAppend(newString
, sub
);
2056 CFRelease(fullReplDict
);