2 * Copyright (c) 2013 Apple Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
25 Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26 Responsibility: David Smith
29 #include <CoreFoundation/CFXMLParser.h>
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFXMLInputStream.h"
32 #include "CFUniChar.h"
33 #include "CFInternal.h"
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
38 struct __CFXMLParser
{
39 CFRuntimeBase _cfBase
;
41 _CFXMLInputStream input
;
47 struct __CFXMLNode
*node
; // Our private node; we use it to report back information
48 CFMutableDictionaryRef argDict
;
49 CFMutableArrayRef argArray
;
52 CFXMLParserCallBacks callBacks
;
53 CFXMLParserContext context
;
55 CFXMLParserStatusCode status
;
56 CFStringRef errorString
;
59 static CFStringRef
__CFXMLParserCopyDescription(CFTypeRef cf
) {
60 const struct __CFXMLParser
*parser
= (const struct __CFXMLParser
*)cf
;
61 return CFStringCreateWithFormat(CFGetAllocator(cf
), NULL
, CFSTR("<CFXMLParser %p>"), parser
);
64 static void __CFXMLParserDeallocate(CFTypeRef cf
) {
65 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)cf
;
66 CFAllocatorRef alloc
= CFGetAllocator(parser
);
67 _freeInputStream(&(parser
->input
));
68 if (parser
->argDict
) CFRelease(parser
->argDict
);
69 if (parser
->argArray
) CFRelease(parser
->argArray
);
70 if (parser
->errorString
) CFRelease(parser
->errorString
);
71 if (parser
->node
) CFRelease(parser
->node
);
72 CFAllocatorDeallocate(alloc
, parser
->stack
);
73 if (parser
->context
.info
&& parser
->context
.release
) {
74 parser
->context
.release(parser
->context
.info
);
78 static CFTypeID __kCFXMLParserTypeID
= _kCFRuntimeNotATypeID
;
80 static const CFRuntimeClass __CFXMLParserClass
= {
85 __CFXMLParserDeallocate
,
89 __CFXMLParserCopyDescription
92 static void __CFXMLParserInitialize(void) {
93 __kCFXMLParserTypeID
= _CFRuntimeRegisterClass(&__CFXMLParserClass
);
96 CFTypeID
CFXMLParserGetTypeID(void) {
97 if (_kCFRuntimeNotATypeID
== __kCFXMLParserTypeID
) __CFXMLParserInitialize();
98 return __kCFXMLParserTypeID
;
101 void CFXMLParserGetContext(CFXMLParserRef parser
, CFXMLParserContext
*context
) {
102 CFAssert1(parser
!= NULL
, __kCFLogAssertion
, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__
);
103 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
105 context
->version
= parser
->context
.version
;
106 context
->info
= parser
->context
.info
;
107 context
->retain
= parser
->context
.retain
;
108 context
->release
= parser
->context
.release
;
109 context
->copyDescription
= parser
->context
.copyDescription
;
110 UNFAULT_CALLBACK(context
->retain
);
111 UNFAULT_CALLBACK(context
->release
);
112 UNFAULT_CALLBACK(context
->copyDescription
);
116 void CFXMLParserGetCallBacks(CFXMLParserRef parser
, CFXMLParserCallBacks
*callBacks
) {
117 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
119 callBacks
->version
= parser
->callBacks
.version
;
120 callBacks
->createXMLStructure
= parser
->callBacks
.createXMLStructure
;
121 callBacks
->addChild
= parser
->callBacks
.addChild
;
122 callBacks
->endXMLStructure
= parser
->callBacks
.endXMLStructure
;
123 callBacks
->resolveExternalEntity
= parser
->callBacks
.resolveExternalEntity
;
124 callBacks
->handleError
= parser
->callBacks
.handleError
;
125 UNFAULT_CALLBACK(callBacks
->createXMLStructure
);
126 UNFAULT_CALLBACK(callBacks
->addChild
);
127 UNFAULT_CALLBACK(callBacks
->endXMLStructure
);
128 UNFAULT_CALLBACK(callBacks
->resolveExternalEntity
);
129 UNFAULT_CALLBACK(callBacks
->handleError
);
133 CFURLRef
CFXMLParserGetSourceURL(CFXMLParserRef parser
) {
134 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
135 return parser
->input
.url
;
138 /* Returns the character index or line number of the current parse location */
139 CFIndex
CFXMLParserGetLocation(CFXMLParserRef parser
) {
140 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
141 return _inputStreamCurrentLocation(&parser
->input
);
144 CFIndex
CFXMLParserGetLineNumber(CFXMLParserRef parser
) {
145 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
146 return _inputStreamCurrentLine(&parser
->input
);
149 /* Returns the top-most object returned by the createXMLStructure callback */
150 void *CFXMLParserGetDocument(CFXMLParserRef parser
) {
151 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
152 if (parser
->capacity
> 0)
153 return parser
->stack
[0];
158 CFXMLParserStatusCode
CFXMLParserGetStatusCode(CFXMLParserRef parser
) {
159 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
160 return parser
->status
;
163 CFStringRef
CFXMLParserCopyErrorDescription(CFXMLParserRef parser
) {
164 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
165 return (CFStringRef
)CFRetain(parser
->errorString
);
168 void CFXMLParserAbort(CFXMLParserRef parser
, CFXMLParserStatusCode errorCode
, CFStringRef errorDescription
) {
169 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
170 CFAssert1(errorCode
> 0, __kCFLogAssertion
, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__
);
171 CFAssert1(errorDescription
!= NULL
, __kCFLogAssertion
, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__
);
172 __CFGenericValidateType(errorDescription
, CFStringGetTypeID());
174 parser
->status
= errorCode
;
175 if (parser
->errorString
) CFRelease(parser
->errorString
);
176 parser
->errorString
= (CFStringRef
)CFStringCreateCopy(kCFAllocatorSystemDefault
, errorDescription
);
180 static Boolean
parseXML(CFXMLParserRef parser
);
181 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
);
182 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
);
183 static Boolean
parseInlineDTD(CFXMLParserRef parser
);
184 static Boolean
parseDTD(CFXMLParserRef parser
);
185 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
);
186 static Boolean
parseCDSect(CFXMLParserRef parser
);
187 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
);
188 static Boolean
parsePCData(CFXMLParserRef parser
);
189 static Boolean
parseWhitespace(CFXMLParserRef parser
);
190 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
);
191 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
);
192 static Boolean
parseElementDeclaration(CFXMLParserRef parser
);
193 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
);
194 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
);
195 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
);
196 static Boolean
parseTagContent(CFXMLParserRef parser
);
197 static Boolean
parseTag(CFXMLParserRef parser
);
198 static Boolean
parseAttributes(CFXMLParserRef parser
);
199 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
);
201 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
202 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
);
203 static Boolean
reportNewLeaf(CFXMLParserRef parser
); // Assumes parser->node has been set and is ready to go
204 static void pushXMLNode(CFXMLParserRef parser
, void *node
);
206 static CFXMLParserRef
__CFXMLParserInit(CFAllocatorRef alloc
, CFURLRef dataSource
, CFOptionFlags options
, CFDataRef xmlData
, CFIndex version
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
207 struct __CFXMLParser
*parser
= (struct __CFXMLParser
*)_CFRuntimeCreateInstance(alloc
, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser
) - sizeof(CFRuntimeBase
), NULL
);
208 struct __CFXMLNode
*node
= (struct __CFXMLNode
*)_CFRuntimeCreateInstance(alloc
, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode
) - sizeof(CFRuntimeBase
), NULL
);
210 if (parser
&& node
) {
211 alloc
= CFGetAllocator(parser
);
212 _initializeInputStream(&(parser
->input
), alloc
, dataSource
, xmlData
);
213 parser
->top
= parser
->stack
;
214 parser
->stack
= NULL
;
215 parser
->capacity
= 0;
217 buf
= (UniChar
*)CFAllocatorAllocate(alloc
, 128*sizeof(UniChar
), 0);
219 parser
->node
->dataString
= CFStringCreateMutableWithExternalCharactersNoCopy(alloc
, buf
, 0, 128, alloc
);
220 parser
->node
->additionalData
= NULL
;
221 parser
->node
->version
= version
;
222 parser
->argDict
= NULL
; // don't create these until necessary
223 parser
->argArray
= NULL
;
225 parser
->options
= options
;
226 parser
->callBacks
= *callBacks
;
228 FAULT_CALLBACK((void **)&(parser
->callBacks
.createXMLStructure
));
229 FAULT_CALLBACK((void **)&(parser
->callBacks
.addChild
));
230 FAULT_CALLBACK((void **)&(parser
->callBacks
.endXMLStructure
));
231 FAULT_CALLBACK((void **)&(parser
->callBacks
.resolveExternalEntity
));
232 FAULT_CALLBACK((void **)&(parser
->callBacks
.handleError
));
235 parser
->context
= *context
;
236 if (parser
->context
.info
&& parser
->context
.retain
) {
237 parser
->context
.retain(parser
->context
.info
);
240 parser
->context
.version
= 0;
241 parser
->context
.info
= NULL
;
242 parser
->context
.retain
= NULL
;
243 parser
->context
.release
= NULL
;
244 parser
->context
.copyDescription
= NULL
;
246 parser
->status
= kCFXMLStatusParseNotBegun
;
247 parser
->errorString
= NULL
;
249 if (parser
) CFRelease(parser
);
250 if (node
) CFRelease(node
);
256 CFXMLParserRef
CFXMLParserCreate(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
257 CFAssert1(xmlData
!= NULL
, __kCFLogAssertion
, "%s(): NULL data not permitted", __PRETTY_FUNCTION__
);
258 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
259 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
260 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
261 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
262 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
263 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, xmlData
, versionOfNodes
, callBacks
, context
);
266 CFXMLParserRef
CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex versionOfNodes
, CFXMLParserCallBacks
*callBacks
, CFXMLParserContext
*context
) {
267 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
268 CFAssert1(callBacks
!= NULL
&& callBacks
->createXMLStructure
!= NULL
&& callBacks
->addChild
!= NULL
&& callBacks
->endXMLStructure
!= NULL
, __kCFLogAssertion
, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__
);
269 CFAssert2(versionOfNodes
<= 1, __kCFLogAssertion
, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__
, versionOfNodes
);
270 CFAssert1(versionOfNodes
!= 0, __kCFLogAssertion
, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__
);
272 return __CFXMLParserInit(allocator
, dataSource
, parseOptions
, NULL
, versionOfNodes
, callBacks
, context
);
275 Boolean
CFXMLParserParse(CFXMLParserRef parser
) {
276 CFXMLDocumentInfo docData
;
277 __CFGenericValidateType(parser
, CFXMLParserGetTypeID());
278 if (parser
->status
!= kCFXMLStatusParseNotBegun
) return false;
279 parser
->status
= kCFXMLStatusParseInProgress
;
281 if (!_openInputStream(&parser
->input
)) {
282 if (!parser
->input
.data
) {
284 parser
->status
= kCFXMLErrorNoData
;
285 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("No data found at %@"), CFURLGetString(parser
->input
.url
));
287 // couldn't figure out the encoding
288 CFAssert(parser
->input
.encoding
== kCFStringEncodingInvalidId
, __kCFLogAssertion
, "CFXMLParser internal error: input stream could not be opened");
289 parser
->status
= kCFXMLErrorUnknownEncoding
;
290 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), "Encountered unknown encoding", kCFStringEncodingASCII
);
292 if (parser
->callBacks
.handleError
) {
293 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
298 // Create the document
299 parser
->stack
= (void **)CFAllocatorAllocate(CFGetAllocator(parser
), 16 * sizeof(void *), 0);
300 parser
->capacity
= 16;
301 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocument
;
302 docData
.encoding
= _inputStreamGetEncoding(&parser
->input
);
303 docData
.sourceURL
= parser
->input
.url
;
304 parser
->node
->additionalData
= &docData
;
305 parser
->stack
[0] = (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
306 parser
->top
= parser
->stack
;
307 parser
->node
->additionalData
= NULL
;
309 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
310 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
311 _CFReportError(parser
, parser
->status
, NULL
);
314 return parseXML(parser
);
317 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
319 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
320 static Boolean
parseWhitespace(CFXMLParserRef parser
) {
322 Boolean report
= !(parser
->options
& kCFXMLParserSkipWhitespace
);
323 len
= _inputStreamSkipWhitespace(&parser
->input
, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
);
325 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
326 parser
->node
->additionalData
= NULL
;
327 return reportNewLeaf(parser
);
333 // parser should be just past "<!--"
334 static Boolean
parseComment(CFXMLParserRef parser
, Boolean report
) {
335 const UniChar dashes
[2] = {'-', '-'};
337 report
= report
&& (!(parser
->options
& kCFXMLParserSkipMetaData
));
338 if (!_inputStreamScanToCharacters(&parser
->input
, dashes
, 2, report
? (CFMutableStringRef
)(parser
->node
->dataString
) : NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
)) {
339 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
,"Found unexpected EOF while parsing comment");
341 } else if (ch
!= '>') {
342 _CFReportError(parser
, kCFXMLErrorMalformedComment
, "Found \"--\" within a comment");
345 parser
->node
->dataTypeID
= kCFXMLNodeTypeComment
;
346 parser
->node
->additionalData
= NULL
;
347 return reportNewLeaf(parser
);
354 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
355 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
357 // parser should be set to the first character after "<?"
358 static Boolean
parseProcessingInstruction(CFXMLParserRef parser
, Boolean report
) {
359 const UniChar piTermination
[2] = {'?', '>'};
360 CFMutableStringRef str
;
363 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
364 _CFReportError(parser
, kCFXMLErrorMalformedProcessingInstruction
, "Found malformed processing instruction");
367 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
368 str
= (report
&& *parser
->top
) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
)) : NULL
;
369 if (!_inputStreamScanToCharacters(&parser
->input
, piTermination
, 2, str
)) {
370 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing processing instruction");
371 if (str
) CFRelease(str
);
376 CFXMLProcessingInstructionInfo data
;
378 CFStringRef tmp
= parser
->node
->dataString
;
379 parser
->node
->dataTypeID
= kCFXMLNodeTypeProcessingInstruction
;
380 parser
->node
->dataString
= name
;
381 data
.dataString
= str
;
382 parser
->node
->additionalData
= &data
;
383 result
= reportNewLeaf(parser
);
384 parser
->node
->additionalData
= NULL
;
385 parser
->node
->dataString
= tmp
;
394 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
396 static const UniChar _DoctypeOpening
[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
397 // first character should be immediately after the "<!"
398 static Boolean
parseDTD(CFXMLParserRef parser
) {
400 Boolean success
, hasExtID
= false;
401 CFXMLDocumentTypeInfo docData
= {{NULL
, NULL
}};
402 void *dtdStructure
= NULL
;
405 // First pass "DOCTYPE"
406 success
= _inputStreamMatchString(&parser
->input
, _DoctypeOpening
, 7);
407 success
= success
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
408 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, &name
);
410 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
411 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
413 // didn't make it past "DOCTYPE" successfully.
414 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
417 if (success
&& ch
!= '[' && ch
!= '>') {
420 success
= parseExternalID(parser
, false, &(docData
.externalID
));
422 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
423 success
= _inputStreamPeekCharacter(&parser
->input
, &ch
);
427 if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
428 CFStringRef tmp
= parser
->node
->dataString
;
429 parser
->node
->dataTypeID
= kCFXMLNodeTypeDocumentType
;
430 parser
->node
->dataString
= name
;
431 parser
->node
->additionalData
= &docData
;
432 dtdStructure
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
433 if (dtdStructure
&& parser
->status
== kCFXMLStatusParseInProgress
) {
434 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, dtdStructure
, parser
->context
.info
);
436 parser
->node
->additionalData
= NULL
;
437 parser
->node
->dataString
= tmp
;
438 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
439 // callback called CFXMLParserAbort()
440 _CFReportError(parser
, parser
->status
, NULL
);
446 if (docData
.externalID
.publicID
) CFRelease(docData
.externalID
.publicID
);
447 if (docData
.externalID
.systemID
) CFRelease(docData
.externalID
.systemID
);
448 pushXMLNode(parser
, dtdStructure
);
450 if (success
&& ch
== '[') {
452 _inputStreamGetCharacter(&parser
->input
, &ch
);
453 if (!parseInlineDTD(parser
)) return false;
454 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
455 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
456 } else if (success
&& ch
== '>') {
458 _inputStreamGetCharacter(&parser
->input
, &ch
);
461 if (_inputStreamAtEOF(&parser
->input
)) {
462 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing DTD");
464 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found malformed DTD");
469 parser
->top
--; // Remove dtdStructure from the stack
471 if (success
&& dtdStructure
) {
472 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, dtdStructure
, parser
->context
.info
);
473 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
474 _CFReportError(parser
, parser
->status
, NULL
);
482 [69] PEReference ::= '%' Name ';'
484 static Boolean
parsePhysicalEntityReference(CFXMLParserRef parser
) {
487 if (!_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
488 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
490 } else if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
491 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing physical entity reference");
493 } else if (ch
!= ';') {
494 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Found malformed name while parsing physical entity reference");
496 } else if (!(parser
->options
& kCFXMLParserSkipMetaData
) && *(parser
->top
)) {
497 CFXMLEntityReferenceInfo myData
;
499 CFStringRef tmp
= parser
->node
->dataString
;
500 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
501 parser
->node
->dataString
= name
;
502 myData
.entityType
= kCFXMLEntityTypeParameter
;
503 parser
->node
->additionalData
= &myData
;
504 result
= reportNewLeaf(parser
);
505 parser
->node
->additionalData
= NULL
;
506 parser
->node
->dataString
= tmp
;
514 [54] AttType ::= StringType | TokenizedType | EnumeratedType
515 [55] StringType ::= 'CDATA'
516 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
517 [57] EnumeratedType ::= NotationType | Enumeration
518 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
519 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
521 static Boolean
parseEnumeration(CFXMLParserRef parser
, Boolean useNMTokens
) {
523 Boolean done
= false;
524 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
525 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
527 } else if (ch
!= '(') {
528 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
531 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
532 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
533 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
537 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
538 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
539 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
541 } else if (ch
== ')') {
543 } else if (ch
== '|') {
544 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
545 if (!_inputStreamScanXMLName(&parser
->input
, useNMTokens
, NULL
)) {
546 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
550 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
557 static Boolean
parseAttributeType(CFXMLParserRef parser
, CFMutableStringRef str
) {
558 Boolean success
= false;
559 static const UniChar attTypeStrings
[6][8] = {
560 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
561 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
562 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
563 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
564 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
565 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
566 if (str
) _inputStreamSetMark(&parser
->input
);
567 if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[0], 5) ||
568 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 6) ||
569 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 5) ||
570 _inputStreamMatchString(&parser
->input
, attTypeStrings
[1], 2) ||
571 _inputStreamMatchString(&parser
->input
, attTypeStrings
[2], 6) ||
572 _inputStreamMatchString(&parser
->input
, attTypeStrings
[3], 8) ||
573 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 8) ||
574 _inputStreamMatchString(&parser
->input
, attTypeStrings
[4], 7)) {
576 } else if (_inputStreamMatchString(&parser
->input
, attTypeStrings
[5], 8)) {
578 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
579 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
582 success
= parseEnumeration(parser
, false);
585 success
= parseEnumeration(parser
, true);
589 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
591 _inputStreamClearMark(&parser
->input
);
596 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
597 static Boolean
parseAttributeDefaultDeclaration(CFXMLParserRef parser
, CFMutableStringRef str
) {
598 const UniChar strings
[3][8] = {
599 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
600 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
601 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
604 if (str
) _inputStreamSetMark(&parser
->input
);
605 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
606 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
608 } else if (ch
== '#') {
609 if (_inputStreamMatchString(&parser
->input
, strings
[0], 8) ||
610 _inputStreamMatchString(&parser
->input
, strings
[1], 7)) {
612 } else if (!_inputStreamMatchString(&parser
->input
, strings
[2], 5) || _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
613 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
616 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
617 success
= parseAttributeValue(parser
, NULL
);
620 _inputStreamReturnCharacter(&parser
->input
, ch
);
621 success
= parseAttributeValue(parser
, NULL
);
625 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
627 _inputStreamClearMark(&parser
->input
);
633 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
634 [53] AttDef ::= S Name S AttType S DefaultDecl
636 static Boolean
parseAttributeListDeclaration(CFXMLParserRef parser
) {
637 const UniChar attList
[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
638 CFXMLAttributeListDeclarationInfo attListData
;
639 CFXMLAttributeDeclarationInfo attributeArray
[8], *attributes
=attributeArray
;
640 CFIndex capacity
= 8;
642 Boolean success
= true;
644 if (!_inputStreamMatchString(&parser
->input
, attList
, 7) ||
645 _inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0 ||
646 !_inputStreamScanXMLName(&parser
->input
, false, &name
)) {
647 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
650 attListData
.numberOfAttributes
= 0;
651 if (!(*parser
->top
) || (parser
->options
& kCFXMLParserSkipMetaData
)) {
652 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
655 while (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
656 CFXMLAttributeDeclarationInfo
*attribute
= NULL
;
657 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
== '>')
660 if (capacity
== attListData
.numberOfAttributes
) {
661 capacity
= 2*capacity
;
662 if (attributes
!= attributeArray
) {
663 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorReallocate(CFGetAllocator(parser
), attributes
, capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
665 attributes
= (CFXMLAttributeDeclarationInfo
*)CFAllocatorAllocate(CFGetAllocator(parser
), capacity
* sizeof(CFXMLAttributeDeclarationInfo
), 0);
668 attribute
= &(attributes
[attListData
.numberOfAttributes
]);
669 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
670 attribute
->typeString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
671 attribute
->defaultString
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
673 if (!_inputStreamScanXMLName(&parser
->input
, false, &(attribute
->attributeName
)) || (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0)) {
674 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
678 if (!parseAttributeType(parser
, attribute
? (CFMutableStringRef
)attribute
->typeString
: NULL
)) {
682 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0) {
683 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
687 if (!parseAttributeDefaultDeclaration(parser
, attribute
? (CFMutableStringRef
)attribute
->defaultString
: NULL
)) {
691 attListData
.numberOfAttributes
++;
694 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
695 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
697 } else if (ch
!= '>') {
698 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
700 } else if (attributes
) {
701 CFStringRef tmp
= parser
->node
->dataString
;
702 parser
->node
->dataTypeID
= kCFXMLNodeTypeAttributeListDeclaration
;
703 parser
->node
->dataString
= name
;
704 attListData
.attributes
= attributes
;
705 parser
->node
->additionalData
= (void *)&attListData
;
706 success
= reportNewLeaf(parser
);
707 parser
->node
->additionalData
= NULL
;
708 parser
->node
->dataString
= tmp
;
712 // Free up all that memory
714 for (idx
= 0; idx
< attListData
.numberOfAttributes
; idx
++) {
715 // Do not release attributeName here; it's a uniqued string from scanXMLName
716 CFRelease(attributes
[idx
].typeString
);
717 CFRelease(attributes
[idx
].defaultString
);
719 if (attributes
!= attributeArray
) {
720 CFAllocatorDeallocate(CFGetAllocator(parser
), attributes
);
726 CF_INLINE Boolean
parseSystemLiteral(CFXMLParserRef parser
, CFXMLExternalID
*extID
) {
729 CFMutableStringRef urlStr
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
730 if (_inputStreamScanQuotedString(&parser
->input
, urlStr
)) {
732 extID
->systemID
= CFURLCreateWithString(CFGetAllocator(parser
), urlStr
, parser
->input
.url
);
734 extID
->systemID
= NULL
;
739 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
745 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
746 [83] PublicID ::= 'PUBLIC' S PubidLiteral
747 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
748 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
749 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
751 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
752 static Boolean
parseExternalID(CFXMLParserRef parser
, Boolean alsoAcceptPublicID
, CFXMLExternalID
*extID
) {
753 const UniChar publicString
[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
754 const UniChar systemString
[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
757 extID
->systemID
= NULL
;
758 extID
->publicID
= NULL
;
760 if (_inputStreamMatchString(&parser
->input
, publicString
, 6)) {
761 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
763 extID
->publicID
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
764 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)extID
->publicID
);
766 success
= success
&& _inputStreamScanQuotedString(&parser
->input
, NULL
);
770 if (alsoAcceptPublicID
) {
771 _inputStreamSetMark(&parser
->input
); // In case we need to roll back the parser
773 if (_inputStreamSkipWhitespace(&parser
->input
, NULL
) == 0
774 || !_inputStreamPeekCharacter(&parser
->input
, &ch
)
775 || (ch
!= '\'' && ch
!= '\"')
776 || !parseSystemLiteral(parser
, extID
)) {
777 success
= alsoAcceptPublicID
;
778 if (alsoAcceptPublicID
) {
779 _inputStreamBackUpToMark(&parser
->input
);
784 if (alsoAcceptPublicID
) {
785 _inputStreamClearMark(&parser
->input
);
788 } else if (_inputStreamMatchString(&parser
->input
, systemString
, 6)) {
789 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 && parseSystemLiteral(parser
, extID
);
797 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
799 static Boolean
parseNotationDeclaration(CFXMLParserRef parser
) {
800 static UniChar notationString
[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
801 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
802 CFXMLNotationInfo notationData
= {{NULL
, NULL
}};
805 _inputStreamMatchString(&parser
->input
, notationString
, 8) &&
806 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
807 _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) &&
808 _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0 &&
809 parseExternalID(parser
, true, report
? &(notationData
.externalID
) : NULL
);
813 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
814 success
= (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>');
817 if (_inputStreamAtEOF(&parser
->input
)) {
818 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
820 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
823 CFStringRef tmp
= parser
->node
->dataString
;
824 parser
->node
->dataTypeID
= kCFXMLNodeTypeNotation
;
825 parser
->node
->dataString
= name
;
826 parser
->node
->additionalData
= ¬ationData
;
827 success
= reportNewLeaf(parser
);
828 parser
->node
->additionalData
= NULL
;
829 parser
->node
->dataString
= tmp
;
831 if (notationData
.externalID
.systemID
) CFRelease(notationData
.externalID
.systemID
);
832 if (notationData
.externalID
.publicID
) CFRelease(notationData
.externalID
.publicID
);
837 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
838 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
839 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
841 static Boolean
parseChoiceOrSequence(CFXMLParserRef parser
, Boolean pastParen
) {
842 UniChar ch
, separator
;
844 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '(') return false;
845 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
847 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
849 /* Now scanning cp, production [48] */
851 if (!parseChoiceOrSequence(parser
, false)) return false;
853 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
855 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
856 if (ch
== '?' || ch
== '*' || ch
== '+') _inputStreamGetCharacter(&parser
->input
, &ch
);
859 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
860 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
861 if (ch
== ')') return true;
862 if (ch
!= '|' && ch
!= ',') return false;
864 while (ch
== separator
) {
865 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
866 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
868 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
869 } else if (!parseChoiceOrSequence(parser
, false)) {
872 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
873 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
879 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
881 static Boolean
parseMixedElementContent(CFXMLParserRef parser
) {
882 static const UniChar pcdataString
[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
884 if (!_inputStreamMatchString(&parser
->input
, pcdataString
, 7)) return false;
885 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
886 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) && (ch
== ')' || ch
== '|')) return false;
887 if (ch
== ')') return true;
890 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
891 if (!_inputStreamScanXMLName(&parser
->input
, false, NULL
)) return false;
892 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
893 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) return false;
895 if (ch
!= ')') return false;
896 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '*') return false;
901 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
902 [47] children ::= (choice | seq) ('?' | '*' | '+')?
904 static Boolean
parseElementContentSpec(CFXMLParserRef parser
) {
905 static const UniChar eltContentEmpty
[5] = {'E', 'M', 'P', 'T', 'Y'};
906 static const UniChar eltContentAny
[3] = {'A', 'N', 'Y'};
908 if (_inputStreamMatchString(&parser
->input
, eltContentEmpty
, 5) || _inputStreamMatchString(&parser
->input
, eltContentAny
, 3)) {
910 } else if (!_inputStreamPeekCharacter(&parser
->input
, &ch
) || ch
!= '(') {
913 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
914 _inputStreamGetCharacter(&parser
->input
, &ch
);
915 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
916 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) return false;
919 return parseMixedElementContent(parser
);
921 if (parseChoiceOrSequence(parser
, true)) {
922 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) && (ch
== '*' || ch
== '?' || ch
== '+')) {
923 _inputStreamGetCharacter(&parser
->input
, &ch
);
934 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
936 static Boolean
parseElementDeclaration(CFXMLParserRef parser
) {
937 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
939 static const UniChar eltChars
[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
941 CFMutableStringRef contentDesc
= NULL
;
943 success
= _inputStreamMatchString(&parser
->input
, eltChars
, 7)
944 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0
945 && _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
)
946 && _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
948 if (report
) _inputStreamSetMark(&parser
->input
);
949 success
= parseElementContentSpec(parser
);
950 if (success
&& report
) {
951 contentDesc
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
952 _inputStreamGetCharactersFromMark(&parser
->input
, contentDesc
);
954 if (report
) _inputStreamClearMark(&parser
->input
);
955 if (success
) _inputStreamSkipWhitespace(&parser
->input
, NULL
);
956 success
= success
&& _inputStreamMatchString(&parser
->input
, &ch
, 1);
959 if (_inputStreamAtEOF(&parser
->input
)) {
960 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
962 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
965 CFXMLElementTypeDeclarationInfo eltData
;
966 CFStringRef tmp
= parser
->node
->dataString
;
967 parser
->node
->dataTypeID
= kCFXMLNodeTypeElementTypeDeclaration
;
968 parser
->node
->dataString
= name
;
969 eltData
.contentDescription
= contentDesc
;
970 parser
->node
->additionalData
= &eltData
;
971 success
= reportNewLeaf(parser
);
972 parser
->node
->additionalData
= NULL
;
973 parser
->node
->dataString
= tmp
;
975 if (contentDesc
) CFRelease(contentDesc
);
980 [70] EntityDecl ::= GEDecl | PEDecl
981 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
982 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
983 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
984 [74] PEDef ::= EntityValue | ExternalID
985 [76] NDataDecl ::= S 'NDATA' S Name
986 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
988 static Boolean
parseEntityDeclaration(CFXMLParserRef parser
) {
989 const UniChar entityStr
[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
991 Boolean isPEDecl
= false;
992 CFXMLEntityInfo entityData
;
994 Boolean report
= *(parser
->top
) && !(parser
->options
& kCFXMLParserSkipMetaData
);
996 _inputStreamMatchString(&parser
->input
, entityStr
, 6) &&
997 (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) &&
998 _inputStreamPeekCharacter(&parser
->input
, &ch
);
1000 entityData
.replacementText
= NULL
;
1001 entityData
.entityID
.publicID
= NULL
;
1002 entityData
.entityID
.systemID
= NULL
;
1003 entityData
.notationName
= NULL
;
1004 // We will set entityType immediately before reporting
1006 if (success
&& ch
== '%') {
1007 _inputStreamGetCharacter(&parser
->input
, &ch
);
1008 success
= _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0;
1011 success
= success
&& _inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) && (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamPeekCharacter(&parser
->input
, &ch
);
1012 if (success
&& (ch
== '\"' || ch
== '\'')) {
1014 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1016 entityData
.replacementText
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1017 success
= _inputStreamScanQuotedString(&parser
->input
, (CFMutableStringRef
)entityData
.replacementText
);
1019 success
= _inputStreamScanQuotedString(&parser
->input
, NULL
);
1021 } else if (success
) {
1023 success
= parseExternalID(parser
, false, report
? &(entityData
.entityID
) : NULL
);
1024 if (success
&& !isPEDecl
&& _inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) {
1025 // There could be an option NDataDecl
1026 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1027 const UniChar nDataStr
[5] = {'N', 'D', 'A', 'T', 'A'};
1028 if (_inputStreamMatchString(&parser
->input
, nDataStr
, 5)) {
1029 success
= (_inputStreamSkipWhitespace(&parser
->input
, NULL
) != 0) && _inputStreamScanXMLName(&parser
->input
, false, NULL
);
1034 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1035 success
= _inputStreamGetCharacter(&parser
->input
, &ch
) && ch
== '>';
1038 if (_inputStreamAtEOF(&parser
->input
)) {
1039 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1041 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1044 CFStringRef tmp
= parser
->node
->dataString
;
1045 if (isPEDecl
) entityData
.entityType
= kCFXMLEntityTypeParameter
;
1046 else if (entityData
.replacementText
) entityData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1047 else if (!entityData
.notationName
) entityData
.entityType
= kCFXMLEntityTypeParsedExternal
;
1048 else entityData
.entityType
= kCFXMLEntityTypeUnparsed
;
1049 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntity
;
1050 parser
->node
->dataString
= name
;
1051 parser
->node
->additionalData
= &entityData
;
1052 success
= reportNewLeaf(parser
);
1053 parser
->node
->additionalData
= NULL
;
1054 parser
->node
->dataString
= tmp
;
1055 if (entityData
.replacementText
) CFRelease(entityData
.replacementText
);
1057 if (entityData
.entityID
.publicID
) CFRelease(entityData
.entityID
.publicID
);
1058 if (entityData
.entityID
.systemID
) CFRelease(entityData
.entityID
.systemID
);
1063 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1064 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1066 // First character should be just past '['
1067 static Boolean
parseInlineDTD(CFXMLParserRef parser
) {
1068 Boolean success
= true;
1069 while (success
&& !_inputStreamAtEOF(&parser
->input
)) {
1072 parseWhitespace(parser
);
1073 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break;
1076 success
= parsePhysicalEntityReference(parser
);
1077 } else if (ch
== '<') {
1079 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1080 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1084 // Processing Instruction
1085 success
= parseProcessingInstruction(parser
, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1086 } else if (ch
== '!') {
1087 UniChar dashes
[2] = {'-', '-'};
1088 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1090 success
= parseComment(parser
, true);
1092 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1093 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1094 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1096 } else if (ch
== 'A') {
1098 success
= parseAttributeListDeclaration(parser
);
1099 } else if (ch
== 'N') {
1100 success
= parseNotationDeclaration(parser
);
1101 } else if (ch
== 'E') {
1102 // elementdecl | EntityDecl
1103 _inputStreamGetCharacter(&parser
->input
, &ch
);
1104 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1105 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1108 _inputStreamReturnCharacter(&parser
->input
, 'E');
1110 success
= parseElementDeclaration(parser
);
1111 } else if (ch
== 'N') {
1112 success
= parseEntityDeclaration(parser
);
1114 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1118 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1123 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1126 } else if (ch
== ']') {
1129 _CFReportError(parser
, kCFXMLErrorMalformedDTD
, "Found unexpected character while parsing inline DTD");
1134 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Found unexpected EOF while parsing inline DTD");
1140 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1142 static Boolean
parseTagContent(CFXMLParserRef parser
) {
1143 while (!_inputStreamAtEOF(&parser
->input
)) {
1145 CFIndex numWhitespaceCharacters
;
1147 _inputStreamSetMark(&parser
->input
);
1148 numWhitespaceCharacters
= _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1149 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1150 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) break; // break == report unexpected EOF
1152 if (ch
!= '<' && ch
!= '&') { // CharData
1153 // Back off the whitespace; we'll report it with the PCData
1154 _inputStreamBackUpToMark(&parser
->input
);
1155 _inputStreamClearMark(&parser
->input
);
1156 if (!parsePCData(parser
)) return false;
1157 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1158 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1164 // element | Reference | CDSect | PI | Comment
1165 // We can safely report any whitespace now
1166 if (!(parser
->options
& kCFXMLParserSkipWhitespace
) && numWhitespaceCharacters
!= 0 && *(parser
->top
)) {
1167 _inputStreamReturnCharacter(&parser
->input
, ch
);
1168 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1169 parser
->node
->dataTypeID
= kCFXMLNodeTypeWhitespace
;
1170 parser
->node
->additionalData
= NULL
;
1171 if (!reportNewLeaf(parser
)) return false;
1172 _inputStreamGetCharacter(&parser
->input
, &ch
);
1174 _inputStreamClearMark(&parser
->input
);
1177 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1178 if (!parseEntityReference(parser
, true)) return false;
1182 // ch == '<'; element | CDSect | PI | Comment
1183 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) break;
1184 if (ch
== '?') { // PI
1185 _inputStreamGetCharacter(&parser
->input
, &ch
);
1186 if (!parseProcessingInstruction(parser
, true))
1188 } else if (ch
== '/') { // end tag; we're passing outside of content's production
1189 _inputStreamReturnCharacter(&parser
->input
, '<'); // Back off to the '<'
1191 } else if (ch
!= '!') { // element
1192 if (!parseTag(parser
)) return false;
1195 UniChar dashes
[3] = {'!', '-', '-'};
1196 if (_inputStreamMatchString(&parser
->input
, dashes
, 3)) {
1198 if (!parseComment(parser
, true)) return false;
1200 // Should have a CDSect; back off the "<!" and call parseCDSect
1201 _inputStreamReturnCharacter(&parser
->input
, '<');
1202 if (!parseCDSect(parser
)) return false;
1207 if(_inputStreamComposingErrorOccurred(&parser
->input
)) {
1208 _CFReportError(parser
, kCFXMLErrorEncodingConversionFailure
, "Encountered string encoding error");
1211 // Only way to get here is if premature EOF was found
1212 //#warning CF:Include the tag name here
1213 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing tag content");
1217 static Boolean
parseCDSect(CFXMLParserRef parser
) {
1218 const UniChar _CDSectOpening
[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1219 const UniChar _CDSectClose
[3] = {']', ']', '>'};
1220 if (!_inputStreamMatchString(&parser
->input
, _CDSectOpening
, 9)) {
1221 _CFReportError(parser
, kCFXMLErrorMalformedCDSect
, "Encountered bad prefix to a presumed CDATA section");
1224 if (!_inputStreamScanToCharacters(&parser
->input
, _CDSectClose
, 3, (CFMutableStringRef
)(parser
->node
->dataString
))) {
1225 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing CDATA section");
1229 parser
->node
->dataTypeID
= kCFXMLNodeTypeCDATASection
;
1230 parser
->node
->additionalData
= NULL
;
1231 return reportNewLeaf(parser
);
1235 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1237 static Boolean
validateCharacterReference(CFStringRef str
) {
1239 CFIndex idx
, len
= CFStringGetLength(str
);
1240 if (len
< 2) return false;
1241 if (CFStringGetCharacterAtIndex(str
, 0) != '#') return false;
1242 if (CFStringGetCharacterAtIndex(str
, 1) == 'x') {
1245 if (len
== 2) return false;
1253 ch
= CFStringGetCharacterAtIndex(str
, idx
);
1255 if (!(ch
<= '9' && ch
>= '0') &&
1256 !(isHex
&& ((ch
>= 'a' && ch
<= 'f') || (ch
>= 'A' && ch
<= 'F')))) {
1260 return (idx
== len
);
1264 [67] Reference ::= EntityRef | CharRef
1265 [68] EntityRef ::= '&' Name ';'
1267 static Boolean
parseEntityReference(CFXMLParserRef parser
, Boolean report
) {
1269 CFXMLEntityReferenceInfo entData
;
1270 CFStringRef name
= NULL
;
1271 if (!_inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1272 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1277 if (!_inputStreamScanToCharacters(&parser
->input
, &ch
, 1, (CFMutableStringRef
)parser
->node
->dataString
)) {
1278 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1280 } else if (!validateCharacterReference(parser
->node
->dataString
)) {
1281 _CFReportError(parser
, kCFXMLErrorMalformedCharacterReference
, "Encountered illegal character while parsing character reference");
1284 entData
.entityType
= kCFXMLEntityTypeCharacter
;
1285 name
= parser
->node
->dataString
;
1286 } else if (!_inputStreamScanXMLName(&parser
->input
, false, report
? &name
: NULL
) || !_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= ';') {
1287 if (_inputStreamAtEOF(&parser
->input
)) {
1288 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing EntityReference");
1291 _CFReportError(parser
, kCFXMLErrorMalformedName
, "Encountered malformed name while parsing EntityReference");
1295 entData
.entityType
= kCFXMLEntityTypeParsedInternal
;
1298 CFStringRef tmp
= parser
->node
->dataString
;
1300 parser
->node
->dataTypeID
= kCFXMLNodeTypeEntityReference
;
1301 parser
->node
->dataString
= name
;
1302 parser
->node
->additionalData
= &entData
;
1303 success
= reportNewLeaf(parser
);
1304 parser
->node
->additionalData
= NULL
;
1305 parser
->node
->dataString
= tmp
;
1313 // Kept from old entity reference parsing....
1315 switch (*(parser
->curr
)) {
1317 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1322 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1325 if (len
>= 3 && *(parser
->curr
+1) == 't' && *(parser
->curr
+2) == ';') {
1330 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1332 case 'a': // "apos" or "amp"
1333 if (len
< 4) { // Not enough characters for either conversion
1334 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1337 if (*(parser
->curr
+1) == 'm') {
1339 if (*(parser
->curr
+2) == 'p' && *(parser
->curr
+3) == ';') {
1344 } else if (*(parser
->curr
+1) == 'p') {
1346 if (len
> 4 && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 's' && *(parser
->curr
+4) == ';') {
1352 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1354 case 'q': // "quote"
1355 if (len
>= 6 && *(parser
->curr
+1) == 'u' && *(parser
->curr
+2) == 'o' && *(parser
->curr
+3) == 't' && *(parser
->curr
+4) == 'e' && *(parser
->curr
+5) == ';') {
1360 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1365 Boolean isHex
= false;
1366 if ( len
< 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1367 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1371 if (*(parser
->curr
) == 'x') {
1375 while (parser
->curr
< parser
->end
) {
1376 ch
= *(parser
->curr
);
1378 CFStringAppendCharacters(string
, &num
, 1);
1382 if (!isHex
) num
= num
*10;
1383 else num
= num
<< 4;
1384 if (ch
<= '9' && ch
>= '0') {
1386 } else if (!isHex
) {
1387 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1389 } else if (ch
>= 'a' && ch
<= 'f') {
1390 num
+= 10 + (ch
- 'a');
1391 } else if (ch
>= 'A' && ch
<= 'F') {
1392 num
+= 10 + (ch
- 'A');
1394 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unexpected character %c at line %d"), ch
, lineNumber(parser
));
1398 parser
->errorString
= CFStringCreateWithCString(parser
->allocator
, "Encountered unexpected EOF", kCFStringEncodingASCII
);
1402 parser
->errorString
= CFStringCreateWithFormat(parser
->allocator
, NULL
, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser
));
1405 CFStringAppendCharacters(string
, &ch
, 1);
1410 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1412 static Boolean
parsePCData(CFXMLParserRef parser
) {
1414 Boolean done
= false;
1415 _inputStreamSetMark(&parser
->input
);
1416 while (!done
&& _inputStreamGetCharacter(&parser
->input
, &ch
)) {
1420 _inputStreamReturnCharacter(&parser
->input
, ch
);
1425 const UniChar endSequence
[2] = {']', '>'};
1426 if (_inputStreamMatchString(&parser
->input
, endSequence
, 2)) {
1427 _CFReportError(parser
, kCFXMLErrorMalformedParsedCharacterData
, "Encountered \"]]>\" in parsed character data");
1428 _inputStreamClearMark(&parser
->input
);
1437 _inputStreamGetCharactersFromMark(&parser
->input
, (CFMutableStringRef
)(parser
->node
->dataString
));
1438 _inputStreamClearMark(&parser
->input
);
1439 parser
->node
->dataTypeID
= kCFXMLNodeTypeText
;
1440 parser
->node
->additionalData
= NULL
;
1441 return reportNewLeaf(parser
);
1445 [42] ETag ::= '</' Name S? '>'
1447 static Boolean
parseCloseTag(CFXMLParserRef parser
, CFStringRef tag
) {
1448 const UniChar beginEndTag
[2] = {'<', '/'};
1449 Boolean unexpectedEOF
= false, mismatch
= false;
1450 CFStringRef closeTag
;
1452 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1453 if (_inputStreamMatchString(&parser
->input
, beginEndTag
, 2) && _inputStreamScanXMLName(&parser
->input
, false, &closeTag
) && closeTag
== tag
) {
1456 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1457 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1458 unexpectedEOF
= true;
1459 } else if (ch
!= '>') {
1462 } else if (_inputStreamAtEOF(&parser
->input
)) {
1463 unexpectedEOF
= true;
1468 if (unexpectedEOF
|| mismatch
) {
1469 if (unexpectedEOF
) {
1470 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag
);
1471 parser
->status
= kCFXMLErrorUnexpectedEOF
;
1472 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorUnexpectedEOF
, parser
->context
.info
);
1474 parser
->errorString
= CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered malformed close tag for <%@>"), tag
);
1475 parser
->status
= kCFXMLErrorMalformedCloseTag
;
1476 if(parser
->callBacks
.handleError
) INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, kCFXMLErrorMalformedCloseTag
, parser
->context
.info
);
1484 [39] element ::= EmptyElementTag | STag content ETag
1485 [40] STag ::= '<' Name (S Attribute)* S? '>'
1486 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1488 static Boolean
parseTag(CFXMLParserRef parser
) {
1491 CFXMLElementInfo data
;
1492 Boolean success
= true;
1493 CFStringRef tagName
;
1495 if (!_inputStreamScanXMLName(&parser
->input
, false, &tagName
)) {
1496 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1500 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1502 if (!parseAttributes(parser
)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1503 data
.attributes
= parser
->argDict
;
1504 data
.attributeOrder
= parser
->argArray
;
1505 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1506 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1510 data
.isEmpty
= true;
1511 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1512 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF");
1516 data
.isEmpty
= false;
1519 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Encountered malformed start tag");
1523 if (*parser
->top
|| parser
->top
== parser
->stack
) {
1524 CFStringRef oldStr
= parser
->node
->dataString
;
1525 parser
->node
->dataTypeID
= kCFXMLNodeTypeElement
;
1526 parser
->node
->dataString
= tagName
;
1527 parser
->node
->additionalData
= &data
;
1528 tag
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1529 if (tag
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1530 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *parser
->top
, tag
, parser
->context
.info
);
1532 parser
->node
->additionalData
= NULL
;
1533 parser
->node
->dataString
= oldStr
;
1534 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1535 // callback called CFXMLParserAbort()
1536 _CFReportError(parser
, parser
->status
, NULL
);
1543 pushXMLNode(parser
, tag
);
1544 if (!data
.isEmpty
) {
1545 success
= parseTagContent(parser
);
1547 success
= parseCloseTag(parser
, tagName
);
1552 if (success
&& tag
) {
1553 INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, tag
, parser
->context
.info
);
1554 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1555 _CFReportError(parser
, parser
->status
, NULL
);
1563 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1564 [67] Reference ::= EntityRef | CharRef
1565 [68] EntityRef ::= '&' Name ';'
1567 // For the moment, we don't worry about references in the attribute values.
1568 static Boolean
parseAttributeValue(CFXMLParserRef parser
, CFMutableStringRef str
) {
1570 Boolean success
= _inputStreamGetCharacter(&parser
->input
, "e
);
1571 if (!success
|| (quote
!= '\'' && quote
!= '\"')) return false;
1572 if (str
) _inputStreamSetMark(&parser
->input
);
1573 while (_inputStreamGetCharacter(&parser
->input
, &ch
) && ch
!= quote
) {
1575 case '<': success
= false; break;
1577 if (!parseEntityReference(parser
, false)) {
1586 if (success
&& _inputStreamAtEOF(&parser
->input
)) {
1591 _inputStreamReturnCharacter(&parser
->input
, quote
);
1592 _inputStreamGetCharactersFromMark(&parser
->input
, str
);
1593 _inputStreamGetCharacter(&parser
->input
, &ch
);
1595 _inputStreamClearMark(&parser
->input
);
1601 [40] STag ::= '<' Name (S Attribute)* S? '>'
1602 [41] Attribute ::= Name Eq AttValue
1603 [25] Eq ::= S? '=' S?
1606 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1607 Boolean
parseAttributes(CFXMLParserRef parser
) {
1609 CFMutableDictionaryRef dict
;
1610 CFMutableArrayRef array
;
1611 Boolean failure
= false;
1612 if (_inputStreamPeekCharacter(&parser
->input
, &ch
) == '>') {
1613 if (parser
->argDict
) {
1614 CFDictionaryRemoveAllValues(parser
->argDict
);
1615 CFArrayRemoveAllValues(parser
->argArray
);
1617 return true; // No attributes; let caller deal with it
1619 if (!parser
->argDict
) {
1620 parser
->argDict
= CFDictionaryCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1621 parser
->argArray
= CFArrayCreateMutable(CFGetAllocator(parser
), 0, &kCFTypeArrayCallBacks
);
1623 CFDictionaryRemoveAllValues(parser
->argDict
);
1624 CFArrayRemoveAllValues(parser
->argArray
);
1626 dict
= parser
->argDict
;
1627 array
= parser
->argArray
;
1628 while (!failure
&& _inputStreamPeekCharacter(&parser
->input
, &ch
) && ch
!= '>' && ch
!= '/') {
1630 CFMutableStringRef value
;
1631 if (!_inputStreamScanXMLName(&parser
->input
, false, &key
)) {
1635 if (CFArrayGetFirstIndexOfValue(array
, CFRangeMake(0, CFArrayGetCount(array
)), key
) != kCFNotFound
) {
1636 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found repeated attribute");
1639 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1640 if (!_inputStreamGetCharacter(&parser
->input
, &ch
) || ch
!= '=') {
1644 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1645 value
= CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser
), NULL
, 0, 0, CFGetAllocator(parser
));
1646 if (!parseAttributeValue(parser
, value
)) {
1651 CFArrayAppendValue(array
, key
);
1652 CFDictionarySetValue(dict
, key
, value
);
1654 _inputStreamSkipWhitespace(&parser
->input
, NULL
);
1657 //#warning CF:Include tag name in this error report
1658 _CFReportError(parser
, kCFXMLErrorMalformedStartTag
, "Found illegal character while parsing element tag");
1660 } else if (_inputStreamAtEOF(&parser
->input
)) {
1661 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing element attributes");
1669 [1] document ::= prolog element Misc*
1670 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1671 [27] Misc ::= Comment | PI | S
1672 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1674 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1675 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1676 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1678 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1680 static Boolean
parseXML(CFXMLParserRef parser
) {
1681 Boolean success
= true, sawDTD
= false, sawElement
= false;
1683 while (success
&& _inputStreamPeekCharacter(&parser
->input
, &ch
)) {
1689 success
= parseWhitespace(parser
);
1692 _inputStreamGetCharacter(&parser
->input
, &ch
);
1693 if (!_inputStreamGetCharacter(&parser
->input
, &ch
)) {
1694 _CFReportError(parser
, kCFXMLErrorUnexpectedEOF
, "Encountered unexpected EOF while parsing top-level document");
1699 UniChar dashes
[2] = {'-', '-'};
1700 if (_inputStreamMatchString(&parser
->input
, dashes
, 2)) {
1702 success
= parseComment(parser
, true);
1706 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered a second DTD");
1709 success
= parseDTD(parser
);
1710 if (success
) sawDTD
= true;
1712 } else if (ch
== '?') {
1713 // Processing instruction
1714 success
= parseProcessingInstruction(parser
, true);
1718 _CFReportError(parser
, kCFXMLErrorMalformedDocument
, "Encountered second top-level element");
1721 _inputStreamReturnCharacter(&parser
->input
, ch
);
1722 success
= parseTag(parser
);
1723 if (success
) sawElement
= true;
1727 parser
->status
= kCFXMLErrorMalformedDocument
;
1728 parser
->errorString
= ch
< 256 ?
1729 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch
, ch
) :
1730 CFStringCreateWithFormat(CFGetAllocator(parser
), NULL
, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch
);
1732 if (parser
->callBacks
.handleError
) {
1733 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, parser
->status
, parser
->context
.info
);
1740 if (!success
) return false;
1742 _CFReportError(parser
, kCFXMLErrorElementlessDocument
, "No element found in document");
1748 static void _CFReportError(CFXMLParserRef parser
, CFXMLParserStatusCode errNum
, const char *str
) {
1750 parser
->status
= errNum
;
1751 parser
->errorString
= CFStringCreateWithCString(CFGetAllocator(parser
), str
, kCFStringEncodingASCII
);
1753 if (parser
->callBacks
.handleError
) {
1754 INVOKE_CALLBACK3(parser
->callBacks
.handleError
, parser
, errNum
, parser
->context
.info
);
1758 // Assumes parser->node has been set and is ready to go
1759 static Boolean
reportNewLeaf(CFXMLParserRef parser
) {
1761 if (*(parser
->top
) == NULL
) return true;
1763 xmlStruct
= (void *)INVOKE_CALLBACK3(parser
->callBacks
.createXMLStructure
, parser
, parser
->node
, parser
->context
.info
);
1764 if (xmlStruct
&& parser
->status
== kCFXMLStatusParseInProgress
) {
1765 INVOKE_CALLBACK4(parser
->callBacks
.addChild
, parser
, *(parser
->top
), xmlStruct
, parser
->context
.info
);
1766 if (parser
->status
== kCFXMLStatusParseInProgress
) INVOKE_CALLBACK3(parser
->callBacks
.endXMLStructure
, parser
, xmlStruct
, parser
->context
.info
);
1768 if (parser
->status
!= kCFXMLStatusParseInProgress
) {
1769 _CFReportError(parser
, parser
->status
, NULL
);
1775 static void pushXMLNode(CFXMLParserRef parser
, void *node
) {
1777 if ((unsigned)(parser
->top
- parser
->stack
) == parser
->capacity
) {
1778 parser
->stack
= (void **)CFAllocatorReallocate(CFGetAllocator(parser
), parser
->stack
, 2 * parser
->capacity
* sizeof(void *), 0);
1779 parser
->top
= parser
->stack
+ parser
->capacity
;
1780 parser
->capacity
= 2*parser
->capacity
;
1782 *(parser
->top
) = node
;
1785 /**************************/
1786 /* Parsing to a CFXMLTree */
1787 /**************************/
1789 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser
, CFXMLNodeRef node
, void *context
) {
1790 CFXMLNodeRef myNode
= CFXMLNodeCreateCopy(CFGetAllocator(parser
), node
);
1791 CFXMLTreeRef tree
= CFXMLTreeCreateWithNode(CFGetAllocator(parser
), myNode
);
1793 return (void *)tree
;
1796 static void _XMLTreeAddChild(CFXMLParserRef parser
, void *parent
, void *child
, void *context
) {
1797 CFTreeAppendChild((CFTreeRef
)parent
, (CFTreeRef
)child
);
1800 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser
, void *xmlType
, void *context
) {
1801 CFXMLTreeRef node
= (CFXMLTreeRef
)xmlType
;
1802 if (CFTreeGetParent(node
))
1803 CFRelease((CFXMLTreeRef
)xmlType
);
1806 CFXMLTreeRef
CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex version
) {
1807 CFXMLParserRef parser
;
1808 CFXMLParserCallBacks callbacks
;
1809 CFXMLTreeRef result
;
1811 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1813 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1814 callbacks
.addChild
= _XMLTreeAddChild
;
1815 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1816 callbacks
.resolveExternalEntity
= NULL
;
1817 callbacks
.handleError
= NULL
;
1818 parser
= CFXMLParserCreateWithDataFromURL(allocator
, dataSource
, parseOptions
, version
, &callbacks
, NULL
);
1820 if (CFXMLParserParse(parser
)) {
1821 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1823 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1824 if (result
) CFRelease(result
);
1831 CFXMLTreeRef
CFXMLTreeCreateFromData(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
) {
1832 return CFXMLTreeCreateFromDataWithError(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, NULL
);
1835 CONST_STRING_DECL(kCFXMLTreeErrorDescription
, "kCFXMLTreeErrorDescription");
1836 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber
, "kCFXMLTreeErrorLineNumber");
1837 CONST_STRING_DECL(kCFXMLTreeErrorLocation
, "kCFXMLTreeErrorLocation");
1838 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode
, "kCFXMLTreeErrorStatusCode");
1840 CFXMLTreeRef
CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator
, CFDataRef xmlData
, CFURLRef dataSource
, CFOptionFlags parseOptions
, CFIndex parserVersion
, CFDictionaryRef
*errorDict
) {
1841 CFXMLParserRef parser
;
1842 CFXMLParserCallBacks callbacks
;
1843 CFXMLTreeRef result
;
1845 __CFGenericValidateType(xmlData
, CFDataGetTypeID());
1846 CFAssert1(dataSource
== NULL
|| CFGetTypeID(dataSource
) == CFURLGetTypeID(), __kCFLogAssertion
, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__
);
1848 callbacks
.createXMLStructure
= _XMLTreeCreateXMLStructure
;
1849 callbacks
.addChild
= _XMLTreeAddChild
;
1850 callbacks
.endXMLStructure
= _XMLTreeEndXMLStructure
;
1851 callbacks
.resolveExternalEntity
= NULL
;
1852 callbacks
.handleError
= NULL
;
1853 parser
= CFXMLParserCreate(allocator
, xmlData
, dataSource
, parseOptions
, parserVersion
, &callbacks
, NULL
);
1855 if (CFXMLParserParse(parser
)) {
1856 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1858 if (errorDict
) { // collect the error dictionary
1859 *errorDict
= CFDictionaryCreateMutable(allocator
, 4, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1863 CFStringRef errstring
;
1865 rawnum
= CFXMLParserGetLocation(parser
);
1866 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1868 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLocation
, cfnum
);
1872 rawnum
= CFXMLParserGetLineNumber(parser
);
1873 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1875 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorLineNumber
, cfnum
);
1879 rawnum
= CFXMLParserGetStatusCode(parser
);
1880 cfnum
= CFNumberCreate(allocator
, kCFNumberSInt32Type
, &rawnum
);
1882 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorStatusCode
, cfnum
);
1886 errstring
= CFXMLParserCopyErrorDescription(parser
);
1888 CFDictionaryAddValue((CFMutableDictionaryRef
)*errorDict
, kCFXMLTreeErrorDescription
, errstring
);
1889 CFRelease(errstring
);
1893 result
= (CFXMLTreeRef
)CFXMLParserGetDocument(parser
);
1894 if (result
) CFRelease(result
);
1902 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1903 We should also be handling items that are up over certain values correctly.
1905 CFStringRef
CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1906 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1907 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0); // unbounded mutable string
1908 CFMutableCharacterSetRef startChars
= CFCharacterSetCreateMutable(allocator
);
1910 CFStringInlineBuffer inlineBuf
;
1913 CFIndex stringLength
= CFStringGetLength(string
);
1916 CFCharacterSetAddCharactersInString(startChars
, CFSTR("&<>'\""));
1918 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, stringLength
));
1919 for(idx
= 0; idx
< stringLength
; idx
++) {
1920 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, idx
);
1921 if(CFCharacterSetIsCharacterMember(startChars
, uc
)) {
1922 CFStringRef previousSubstring
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1923 CFStringAppend(newString
, previousSubstring
);
1924 CFRelease(previousSubstring
);
1927 CFStringAppend(newString
, CFSTR("&"));
1930 CFStringAppend(newString
, CFSTR("<"));
1933 CFStringAppend(newString
, CFSTR(">"));
1936 CFStringAppend(newString
, CFSTR("'"));
1939 CFStringAppend(newString
, CFSTR("""));
1945 // Copy the remainder to the output string before returning.
1946 CFStringRef remainder
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(mark
, idx
- mark
));
1947 if (NULL
!= remainder
) {
1948 CFStringAppend(newString
, remainder
);
1949 CFRelease(remainder
);
1952 CFRelease(startChars
);
1956 CFStringRef
CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator
, CFStringRef string
, CFDictionaryRef entitiesDictionary
) {
1957 CFAssert1(string
!= NULL
, __kCFLogAssertion
, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__
);
1959 CFStringInlineBuffer inlineBuf
; /* use this for fast traversal of the string in question */
1961 CFIndex lastChunkStart
, length
= CFStringGetLength(string
);
1962 CFIndex i
, entityStart
;
1966 CFMutableDictionaryRef fullReplDict
= entitiesDictionary
? CFDictionaryCreateMutableCopy(allocator
, 0, entitiesDictionary
) : CFDictionaryCreateMutable(allocator
, 0, &kCFTypeDictionaryKeyCallBacks
, &kCFTypeDictionaryValueCallBacks
);
1968 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1969 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1970 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1971 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1972 CFDictionaryAddValue(fullReplDict
, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1974 CFStringInitInlineBuffer(string
, &inlineBuf
, CFRangeMake(0, length
- 1));
1975 CFMutableStringRef newString
= CFStringCreateMutable(allocator
, 0);
1978 // Scan through the string in its entirety
1979 for(i
= 0; i
< length
; ) {
1980 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1983 entityStart
= i
- 1;
1984 entity
= 0xFFFF; // set this to a not-Unicode character as sentinel
1985 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1986 if(lastChunkStart
< i
- 1) {
1987 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, (i
- 1) - lastChunkStart
));
1988 CFStringAppend(newString
, sub
);
1992 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++; // grab the next character and move i.
1993 // Now we can process the entity reference itself
1994 if(uc
== '#') { // this is a numeric entity.
1997 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
1999 if(uc
== 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
2001 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2004 // process the provided digits 'til we're finished
2006 if (uc
>= '0' && uc
<= '9')
2007 entity
= entity
* base
+ (uc
-'0');
2008 else if (uc
>= 'a' && uc
<= 'f' && base
== 16)
2009 entity
= entity
* base
+ (uc
-'a'+10);
2010 else if (uc
>= 'A' && uc
<= 'F' && base
== 16)
2011 entity
= entity
* base
+ (uc
-'A'+10);
2015 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2022 // Scan to the end of the entity
2023 while(uc
!= ';' && i
< length
) {
2024 uc
= CFStringGetCharacterFromInlineBuffer(&inlineBuf
, i
); i
++;
2027 if(0xFFFF != entity
) { // it was numeric, and translated.
2028 // Now, output the result fo the entity
2029 if(entity
>= 0x10000) {
2030 UniChar characters
[2] = { ((entity
- 0x10000) >> 10) + 0xD800, ((entity
- 0x10000) & 0x3ff) + 0xDC00 };
2031 CFStringAppendCharacters(newString
, characters
, 2);
2033 UniChar character
= entity
;
2034 CFStringAppendCharacters(newString
, &character
, 1);
2036 } else { // it wasn't numeric.
2037 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
+ 1, (i
- entityStart
- 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2038 CFStringRef replacementString
= (CFStringRef
)CFDictionaryGetValue(fullReplDict
, sub
);
2039 if(replacementString
) {
2040 CFStringAppend(newString
, replacementString
);
2042 CFRelease(sub
); // let the old substring go, since we didn't find it in the dictionary
2043 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(entityStart
, (i
- entityStart
))); // create a new one, including the & and ;
2044 CFStringAppend(newString
, sub
); // ...and append that.
2046 CFRelease(sub
); // in either case, release the most-recent "sub"
2049 // move the lastChunkStart to the beginning of the next chunk.
2053 if(lastChunkStart
< length
) { // we've come out of the loop, let's get the rest of the string and tack it on.
2054 sub
= CFStringCreateWithSubstring(allocator
, string
, CFRangeMake(lastChunkStart
, i
- lastChunkStart
));
2055 CFStringAppend(newString
, sub
);
2059 CFRelease(fullReplDict
);
2064 #pragma GCC diagnostic pop