]> git.saurik.com Git - apple/cf.git/blob - CFXMLParser.c
a12834357cf2fa59c6d865ef4eb9da7ec9cc2af9
[apple/cf.git] / CFXMLParser.c
1 /*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFXMLParser.c
25 Copyright (c) 1999-2014, Apple Inc. All rights reserved.
26 Responsibility: David Smith
27 */
28
29 #include <CoreFoundation/CFXMLParser.h>
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFXMLInputStream.h"
32 #include "CFUniChar.h"
33 #include "CFInternal.h"
34
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
37
38 struct __CFXMLParser {
39 CFRuntimeBase _cfBase;
40
41 _CFXMLInputStream input;
42
43 void **stack;
44 void **top;
45 UInt32 capacity;
46
47 struct __CFXMLNode *node; // Our private node; we use it to report back information
48 CFMutableDictionaryRef argDict;
49 CFMutableArrayRef argArray;
50
51 UInt32 options;
52 CFXMLParserCallBacks callBacks;
53 CFXMLParserContext context;
54
55 CFXMLParserStatusCode status;
56 CFStringRef errorString;
57 };
58
59 static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
60 const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
61 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
62 }
63
64 static void __CFXMLParserDeallocate(CFTypeRef cf) {
65 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
66 CFAllocatorRef alloc = CFGetAllocator(parser);
67 _freeInputStream(&(parser->input));
68 if (parser->argDict) CFRelease(parser->argDict);
69 if (parser->argArray) CFRelease(parser->argArray);
70 if (parser->errorString) CFRelease(parser->errorString);
71 if (parser->node) CFRelease(parser->node);
72 CFAllocatorDeallocate(alloc, parser->stack);
73 if (parser->context.info && parser->context.release) {
74 parser->context.release(parser->context.info);
75 }
76 }
77
78 static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
79
80 static const CFRuntimeClass __CFXMLParserClass = {
81 0,
82 "CFXMLParser",
83 NULL, // init
84 NULL, // copy
85 __CFXMLParserDeallocate,
86 NULL,
87 NULL,
88 NULL, //
89 __CFXMLParserCopyDescription
90 };
91
92 CFTypeID CFXMLParserGetTypeID(void) {
93 static dispatch_once_t initOnce;
94 dispatch_once(&initOnce, ^{ __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass); });
95 return __kCFXMLParserTypeID;
96 }
97
98 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
99 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
100 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
101 if (context) {
102 context->version = parser->context.version;
103 context->info = parser->context.info;
104 context->retain = parser->context.retain;
105 context->release = parser->context.release;
106 context->copyDescription = parser->context.copyDescription;
107 UNFAULT_CALLBACK(context->retain);
108 UNFAULT_CALLBACK(context->release);
109 UNFAULT_CALLBACK(context->copyDescription);
110 }
111 }
112
113 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
114 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
115 if (callBacks) {
116 callBacks->version = parser->callBacks.version;
117 callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
118 callBacks->addChild = parser->callBacks.addChild;
119 callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
120 callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
121 callBacks->handleError = parser->callBacks.handleError;
122 UNFAULT_CALLBACK(callBacks->createXMLStructure);
123 UNFAULT_CALLBACK(callBacks->addChild);
124 UNFAULT_CALLBACK(callBacks->endXMLStructure);
125 UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
126 UNFAULT_CALLBACK(callBacks->handleError);
127 }
128 }
129
130 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
131 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
132 return parser->input.url;
133 }
134
135 /* Returns the character index or line number of the current parse location */
136 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
137 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
138 return _inputStreamCurrentLocation(&parser->input);
139 }
140
141 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
142 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
143 return _inputStreamCurrentLine(&parser->input);
144 }
145
146 /* Returns the top-most object returned by the createXMLStructure callback */
147 void *CFXMLParserGetDocument(CFXMLParserRef parser) {
148 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
149 if (parser->capacity > 0)
150 return parser->stack[0];
151 else
152 return NULL;
153 }
154
155 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
156 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
157 return parser->status;
158 }
159
160 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
161 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
162 return (CFStringRef)CFRetain(parser->errorString);
163 }
164
165 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
166 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
167 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
168 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
169 __CFGenericValidateType(errorDescription, CFStringGetTypeID());
170
171 parser->status = errorCode;
172 if (parser->errorString) CFRelease(parser->errorString);
173 parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
174 }
175
176
177 static Boolean parseXML(CFXMLParserRef parser);
178 static Boolean parseComment(CFXMLParserRef parser, Boolean report);
179 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
180 static Boolean parseInlineDTD(CFXMLParserRef parser);
181 static Boolean parseDTD(CFXMLParserRef parser);
182 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
183 static Boolean parseCDSect(CFXMLParserRef parser);
184 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
185 static Boolean parsePCData(CFXMLParserRef parser);
186 static Boolean parseWhitespace(CFXMLParserRef parser);
187 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
188 static Boolean parseNotationDeclaration(CFXMLParserRef parser);
189 static Boolean parseElementDeclaration(CFXMLParserRef parser);
190 static Boolean parseEntityDeclaration(CFXMLParserRef parser);
191 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
192 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
193 static Boolean parseTagContent(CFXMLParserRef parser);
194 static Boolean parseTag(CFXMLParserRef parser);
195 static Boolean parseAttributes(CFXMLParserRef parser);
196 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
197
198 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
199 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
200 static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
201 static void pushXMLNode(CFXMLParserRef parser, void *node);
202
203 static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
204 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
205 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
206 UniChar *buf;
207 if (parser && node) {
208 alloc = CFGetAllocator(parser);
209 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
210 parser->top = parser->stack;
211 parser->stack = NULL;
212 parser->capacity = 0;
213
214 buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
215 parser->node = node;
216 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
217 parser->node->additionalData = NULL;
218 parser->node->version = version;
219 parser->argDict = NULL; // don't create these until necessary
220 parser->argArray = NULL;
221
222 parser->options = options;
223 parser->callBacks = *callBacks;
224
225 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
226 FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
227 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
228 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
229 FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
230
231 if (context) {
232 parser->context = *context;
233 if (parser->context.info && parser->context.retain) {
234 parser->context.retain(parser->context.info);
235 }
236 } else {
237 parser->context.version = 0;
238 parser->context.info = NULL;
239 parser->context.retain = NULL;
240 parser->context.release = NULL;
241 parser->context.copyDescription = NULL;
242 }
243 parser->status = kCFXMLStatusParseNotBegun;
244 parser->errorString = NULL;
245 } else {
246 if (parser) CFRelease(parser);
247 if (node) CFRelease(node);
248 parser = NULL;
249 }
250 return parser;
251 }
252
253 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
254 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
255 __CFGenericValidateType(xmlData, CFDataGetTypeID());
256 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
257 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
258 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
259 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
260 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
261 }
262
263 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
264 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
265 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
266 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
267 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
268
269 return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
270 }
271
272 Boolean CFXMLParserParse(CFXMLParserRef parser) {
273 CFXMLDocumentInfo docData;
274 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
275 if (parser->status != kCFXMLStatusParseNotBegun) return false;
276 parser->status = kCFXMLStatusParseInProgress;
277
278 if (!_openInputStream(&parser->input)) {
279 if (!parser->input.data) {
280 // couldn't load URL
281 parser->status = kCFXMLErrorNoData;
282 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
283 } else {
284 // couldn't figure out the encoding
285 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
286 parser->status = kCFXMLErrorUnknownEncoding;
287 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
288 }
289 if (parser->callBacks.handleError) {
290 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
291 }
292 return false;
293 }
294
295 // Create the document
296 parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
297 parser->capacity = 16;
298 parser->node->dataTypeID = kCFXMLNodeTypeDocument;
299 docData.encoding = _inputStreamGetEncoding(&parser->input);
300 docData.sourceURL = parser->input.url;
301 parser->node->additionalData = &docData;
302 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
303 parser->top = parser->stack;
304 parser->node->additionalData = NULL;
305
306 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
307 if (parser->status != kCFXMLStatusParseInProgress) {
308 _CFReportError(parser, parser->status, NULL);
309 return false;
310 }
311 return parseXML(parser);
312 }
313
314 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
315
316 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
317 static Boolean parseWhitespace(CFXMLParserRef parser) {
318 CFIndex len;
319 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
320 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
321 if (report && len) {
322 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
323 parser->node->additionalData = NULL;
324 return reportNewLeaf(parser);
325 } else {
326 return true;
327 }
328 }
329
330 // parser should be just past "<!--"
331 static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
332 const UniChar dashes[2] = {'-', '-'};
333 UniChar ch;
334 report = report && (!(parser->options & kCFXMLParserSkipMetaData));
335 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
336 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
337 return false;
338 } else if (ch != '>') {
339 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
340 return false;
341 } else if (report) {
342 parser->node->dataTypeID = kCFXMLNodeTypeComment;
343 parser->node->additionalData = NULL;
344 return reportNewLeaf(parser);
345 } else {
346 return true;
347 }
348 }
349
350 /*
351 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
352 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
353 */
354 // parser should be set to the first character after "<?"
355 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
356 const UniChar piTermination[2] = {'?', '>'};
357 CFMutableStringRef str;
358 CFStringRef name;
359
360 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
361 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
362 return false;
363 }
364 _inputStreamSkipWhitespace(&parser->input, NULL);
365 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
366 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
367 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
368 if (str) CFRelease(str);
369 return false;
370 }
371
372 if (str) {
373 CFXMLProcessingInstructionInfo data;
374 Boolean result;
375 CFStringRef tmp = parser->node->dataString;
376 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
377 parser->node->dataString = name;
378 data.dataString = str;
379 parser->node->additionalData = &data;
380 result = reportNewLeaf(parser);
381 parser->node->additionalData = NULL;
382 parser->node->dataString = tmp;
383 CFRelease(str);
384 return result;
385 } else {
386 return true;
387 }
388 }
389
390 /*
391 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
392 */
393 static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
394 // first character should be immediately after the "<!"
395 static Boolean parseDTD(CFXMLParserRef parser) {
396 UniChar ch;
397 Boolean success, hasExtID = false;
398 CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
399 void *dtdStructure = NULL;
400 CFStringRef name;
401
402 // First pass "DOCTYPE"
403 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
404 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
405 success = success && _inputStreamScanXMLName(&parser->input, false, &name);
406 if (success) {
407 _inputStreamSkipWhitespace(&parser->input, NULL);
408 success = _inputStreamPeekCharacter(&parser->input, &ch);
409 } else {
410 // didn't make it past "DOCTYPE" successfully.
411 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
412 return false;
413 }
414 if (success && ch != '[' && ch != '>') {
415 // ExternalID
416 hasExtID = true;
417 success = parseExternalID(parser, false, &(docData.externalID));
418 if (success) {
419 _inputStreamSkipWhitespace(&parser->input, NULL);
420 success = _inputStreamPeekCharacter(&parser->input, &ch);
421 }
422 }
423
424 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
425 CFStringRef tmp = parser->node->dataString;
426 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
427 parser->node->dataString = name;
428 parser->node->additionalData = &docData;
429 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
430 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
431 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
432 }
433 parser->node->additionalData = NULL;
434 parser->node->dataString = tmp;
435 if (parser->status != kCFXMLStatusParseInProgress) {
436 // callback called CFXMLParserAbort()
437 _CFReportError(parser, parser->status, NULL);
438 return false;
439 }
440 } else {
441 dtdStructure = NULL;
442 }
443 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
444 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
445 pushXMLNode(parser, dtdStructure);
446
447 if (success && ch == '[') {
448 // inline DTD
449 _inputStreamGetCharacter(&parser->input, &ch);
450 if (!parseInlineDTD(parser)) return false;
451 _inputStreamSkipWhitespace(&parser->input, NULL);
452 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
453 } else if (success && ch == '>') {
454 // End of the DTD
455 _inputStreamGetCharacter(&parser->input, &ch);
456 }
457 if (!success) {
458 if (_inputStreamAtEOF(&parser->input)) {
459 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
460 } else {
461 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
462 }
463 return false;
464 }
465
466 parser->top --; // Remove dtdStructure from the stack
467
468 if (success && dtdStructure) {
469 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
470 if (parser->status != kCFXMLStatusParseInProgress) {
471 _CFReportError(parser, parser->status, NULL);
472 return false;
473 }
474 }
475 return true;
476 }
477
478 /*
479 [69] PEReference ::= '%' Name ';'
480 */
481 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
482 UniChar ch;
483 CFStringRef name;
484 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
485 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
486 return false;
487 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
488 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
489 return false;
490 } else if (ch != ';') {
491 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
492 return false;
493 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
494 CFXMLEntityReferenceInfo myData;
495 Boolean result;
496 CFStringRef tmp = parser->node->dataString;
497 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
498 parser->node->dataString = name;
499 myData.entityType = kCFXMLEntityTypeParameter;
500 parser->node->additionalData = &myData;
501 result = reportNewLeaf(parser);
502 parser->node->additionalData = NULL;
503 parser->node->dataString = tmp;
504 return result;
505 } else {
506 return true;
507 }
508 }
509
510 /*
511 [54] AttType ::= StringType | TokenizedType | EnumeratedType
512 [55] StringType ::= 'CDATA'
513 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
514 [57] EnumeratedType ::= NotationType | Enumeration
515 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
516 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
517 */
518 static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
519 UniChar ch;
520 Boolean done = false;
521 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
522 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
523 return false;
524 } else if (ch != '(') {
525 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
526 return false;
527 }
528 _inputStreamSkipWhitespace(&parser->input, NULL);
529 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
530 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
531 return false;
532 }
533 while (!done) {
534 _inputStreamSkipWhitespace(&parser->input, NULL);
535 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
536 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
537 return false;
538 } else if (ch == ')') {
539 done = true;
540 } else if (ch == '|') {
541 _inputStreamSkipWhitespace(&parser->input, NULL);
542 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
543 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
544 return false;
545 }
546 } else {
547 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
548 return false;
549 }
550 }
551 return true;
552 }
553
554 static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
555 Boolean success = false;
556 static const UniChar attTypeStrings[6][8] = {
557 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
558 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
559 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
560 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
561 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
562 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
563 if (str) _inputStreamSetMark(&parser->input);
564 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
565 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
566 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
567 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
568 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
569 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
570 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
571 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
572 success = true;
573 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
574 // Notation
575 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
576 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
577 success = false;
578 } else {
579 success = parseEnumeration(parser, false);
580 }
581 } else {
582 success = parseEnumeration(parser, true);
583 }
584 if (str) {
585 if (success) {
586 _inputStreamGetCharactersFromMark(&parser->input, str);
587 }
588 _inputStreamClearMark(&parser->input);
589 }
590 return success;
591 }
592
593 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
594 static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
595 const UniChar strings[3][8] = {
596 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
597 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
598 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
599 UniChar ch;
600 Boolean success;
601 if (str) _inputStreamSetMark(&parser->input);
602 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
603 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
604 success = false;
605 } else if (ch == '#') {
606 if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
607 _inputStreamMatchString(&parser->input, strings[1], 7)) {
608 success = true;
609 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
610 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
611 success = false;
612 } else {
613 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
614 success = parseAttributeValue(parser, NULL);
615 }
616 } else {
617 _inputStreamReturnCharacter(&parser->input, ch);
618 success = parseAttributeValue(parser, NULL);
619 }
620 if (str) {
621 if (success) {
622 _inputStreamGetCharactersFromMark(&parser->input, str);
623 }
624 _inputStreamClearMark(&parser->input);
625 }
626 return success;
627 }
628
629 /*
630 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
631 [53] AttDef ::= S Name S AttType S DefaultDecl
632 */
633 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
634 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
635 CFXMLAttributeListDeclarationInfo attListData;
636 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
637 CFIndex capacity = 8;
638 UniChar ch;
639 Boolean success = true;
640 CFStringRef name;
641 if (!_inputStreamMatchString(&parser->input, attList, 7) ||
642 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
643 !_inputStreamScanXMLName(&parser->input, false, &name)) {
644 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
645 return false;
646 }
647 attListData.numberOfAttributes = 0;
648 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
649 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
650 attributes = NULL;
651 }
652 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
653 CFXMLAttributeDeclarationInfo *attribute = NULL;
654 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
655 break;
656 if (attributes) {
657 if (capacity == attListData.numberOfAttributes) {
658 capacity = 2*capacity;
659 if (attributes != attributeArray) {
660 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
661 } else {
662 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
663 }
664 }
665 attribute = &(attributes[attListData.numberOfAttributes]);
666 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
667 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
668 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
669 }
670 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
671 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
672 success = false;
673 break;
674 }
675 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
676 success = false;
677 break;
678 }
679 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
680 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
681 success = false;
682 break;
683 }
684 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
685 success = false;
686 break;
687 }
688 attListData.numberOfAttributes ++;
689 }
690 if (success) {
691 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
692 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
693 success = false;
694 } else if (ch != '>') {
695 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
696 success = false;
697 } else if (attributes) {
698 CFStringRef tmp = parser->node->dataString;
699 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
700 parser->node->dataString = name;
701 attListData.attributes = attributes;
702 parser->node->additionalData = (void *)&attListData;
703 success = reportNewLeaf(parser);
704 parser->node->additionalData = NULL;
705 parser->node->dataString = tmp;
706 }
707 }
708 if (attributes) {
709 // Free up all that memory
710 CFIndex idx;
711 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
712 // Do not release attributeName here; it's a uniqued string from scanXMLName
713 CFRelease(attributes[idx].typeString);
714 CFRelease(attributes[idx].defaultString);
715 }
716 if (attributes != attributeArray) {
717 CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
718 }
719 }
720 return success;
721 }
722
723 CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
724 Boolean success;
725 if (extID) {
726 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
727 if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
728 success = true;
729 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
730 } else {
731 extID->systemID = NULL;
732 success = false;
733 }
734 CFRelease(urlStr);
735 } else {
736 success = _inputStreamScanQuotedString(&parser->input, NULL);
737 }
738 return success;
739 }
740
741 /*
742 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
743 [83] PublicID ::= 'PUBLIC' S PubidLiteral
744 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
745 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
746 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
747 */
748 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
749 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
750 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
751 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
752 Boolean success;
753 if (extID) {
754 extID->systemID = NULL;
755 extID->publicID = NULL;
756 }
757 if (_inputStreamMatchString(&parser->input, publicString, 6)) {
758 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
759 if (extID) {
760 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
761 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
762 } else {
763 success = success && _inputStreamScanQuotedString(&parser->input, NULL);
764 }
765 if (success) {
766 UniChar ch;
767 if (alsoAcceptPublicID) {
768 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
769 }
770 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
771 || !_inputStreamPeekCharacter(&parser->input, &ch)
772 || (ch != '\'' && ch != '\"')
773 || !parseSystemLiteral(parser, extID)) {
774 success = alsoAcceptPublicID;
775 if (alsoAcceptPublicID) {
776 _inputStreamBackUpToMark(&parser->input);
777 }
778 } else {
779 success = true;
780 }
781 if (alsoAcceptPublicID) {
782 _inputStreamClearMark(&parser->input);
783 }
784 }
785 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
786 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
787 } else {
788 success = false;
789 }
790 return success;
791 }
792
793 /*
794 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
795 */
796 static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
797 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
798 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
799 CFXMLNotationInfo notationData = {{NULL, NULL}};
800 CFStringRef name;
801 Boolean success =
802 _inputStreamMatchString(&parser->input, notationString, 8) &&
803 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
804 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
805 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
806 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
807
808 if (success) {
809 UniChar ch;
810 _inputStreamSkipWhitespace(&parser->input, NULL);
811 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
812 }
813 if (!success) {
814 if (_inputStreamAtEOF(&parser->input)) {
815 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
816 } else {
817 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
818 }
819 } else if (report) {
820 CFStringRef tmp = parser->node->dataString;
821 parser->node->dataTypeID = kCFXMLNodeTypeNotation;
822 parser->node->dataString = name;
823 parser->node->additionalData = &notationData;
824 success = reportNewLeaf(parser);
825 parser->node->additionalData = NULL;
826 parser->node->dataString = tmp;
827 }
828 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
829 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
830 return success;
831 }
832
833 /*
834 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
835 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
836 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
837 */
838 static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
839 UniChar ch, separator;
840 if (!pastParen) {
841 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
842 _inputStreamSkipWhitespace(&parser->input, NULL);
843 }
844 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
845
846 /* Now scanning cp, production [48] */
847 if (ch == '(') {
848 if (!parseChoiceOrSequence(parser, false)) return false;
849 } else {
850 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
851 }
852 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
853 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
854
855 /* Now past cp */
856 _inputStreamSkipWhitespace(&parser->input, NULL);
857 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
858 if (ch == ')') return true;
859 if (ch != '|' && ch != ',') return false;
860 separator = ch;
861 while (ch == separator) {
862 _inputStreamSkipWhitespace(&parser->input, NULL);
863 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
864 if (ch != '(') {
865 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
866 } else if (!parseChoiceOrSequence(parser, false)) {
867 return false;
868 }
869 _inputStreamSkipWhitespace(&parser->input, NULL);
870 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
871 }
872 return ch == ')';
873 }
874
875 /*
876 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
877 */
878 static Boolean parseMixedElementContent(CFXMLParserRef parser) {
879 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
880 UniChar ch;
881 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
882 _inputStreamSkipWhitespace(&parser->input, NULL);
883 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
884 if (ch == ')') return true;
885
886 while (ch == '|') {
887 _inputStreamSkipWhitespace(&parser->input, NULL);
888 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
889 _inputStreamSkipWhitespace(&parser->input, NULL);
890 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
891 }
892 if (ch != ')') return false;
893 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
894 return true;
895 }
896
897 /*
898 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
899 [47] children ::= (choice | seq) ('?' | '*' | '+')?
900 */
901 static Boolean parseElementContentSpec(CFXMLParserRef parser) {
902 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
903 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
904 UniChar ch;
905 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
906 return true;
907 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
908 return false;
909 } else {
910 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
911 _inputStreamGetCharacter(&parser->input, &ch);
912 _inputStreamSkipWhitespace(&parser->input, NULL);
913 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
914 if (ch == '#') {
915 // Mixed
916 return parseMixedElementContent(parser);
917 } else {
918 if (parseChoiceOrSequence(parser, true)) {
919 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
920 _inputStreamGetCharacter(&parser->input, &ch);
921 }
922 return true;
923 } else {
924 return false;
925 }
926 }
927 }
928 }
929
930 /*
931 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
932 */
933 static Boolean parseElementDeclaration(CFXMLParserRef parser) {
934 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
935 Boolean success;
936 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
937 UniChar ch = '>';
938 CFMutableStringRef contentDesc = NULL;
939 CFStringRef name;
940 success = _inputStreamMatchString(&parser->input, eltChars, 7)
941 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
942 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
943 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
944 if (success) {
945 if (report) _inputStreamSetMark(&parser->input);
946 success = parseElementContentSpec(parser);
947 if (success && report) {
948 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
949 _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
950 }
951 if (report) _inputStreamClearMark(&parser->input);
952 if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
953 success = success && _inputStreamMatchString(&parser->input, &ch, 1);
954 }
955 if (!success) {
956 if (_inputStreamAtEOF(&parser->input)) {
957 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
958 } else {
959 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
960 }
961 } else if (report) {
962 CFXMLElementTypeDeclarationInfo eltData;
963 CFStringRef tmp = parser->node->dataString;
964 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
965 parser->node->dataString = name;
966 eltData.contentDescription = contentDesc;
967 parser->node->additionalData = &eltData;
968 success = reportNewLeaf(parser);
969 parser->node->additionalData = NULL;
970 parser->node->dataString = tmp;
971 }
972 if (contentDesc) CFRelease(contentDesc);
973 return success;
974 }
975
976 /*
977 [70] EntityDecl ::= GEDecl | PEDecl
978 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
979 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
980 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
981 [74] PEDef ::= EntityValue | ExternalID
982 [76] NDataDecl ::= S 'NDATA' S Name
983 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
984 */
985 static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
986 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
987 UniChar ch;
988 Boolean isPEDecl = false;
989 CFXMLEntityInfo entityData;
990 CFStringRef name;
991 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
992 Boolean success =
993 _inputStreamMatchString(&parser->input, entityStr, 6) &&
994 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
995 _inputStreamPeekCharacter(&parser->input, &ch);
996
997 entityData.replacementText = NULL;
998 entityData.entityID.publicID = NULL;
999 entityData.entityID.systemID = NULL;
1000 entityData.notationName = NULL;
1001 // We will set entityType immediately before reporting
1002
1003 if (success && ch == '%') {
1004 _inputStreamGetCharacter(&parser->input, &ch);
1005 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1006 isPEDecl = true;
1007 }
1008 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1009 if (success && (ch == '\"' || ch == '\'')) {
1010 // EntityValue
1011 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1012 if (report) {
1013 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1014 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1015 } else {
1016 success = _inputStreamScanQuotedString(&parser->input, NULL);
1017 }
1018 } else if (success) {
1019 // ExternalID
1020 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1021 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1022 // There could be an option NDataDecl
1023 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1024 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1025 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1026 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1027 }
1028 }
1029 }
1030 if (success) {
1031 _inputStreamSkipWhitespace(&parser->input, NULL);
1032 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1033 }
1034 if (!success) {
1035 if (_inputStreamAtEOF(&parser->input)) {
1036 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1037 } else {
1038 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1039 }
1040 } else {
1041 CFStringRef tmp = parser->node->dataString;
1042 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1043 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1044 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1045 else entityData.entityType = kCFXMLEntityTypeUnparsed;
1046 parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1047 parser->node->dataString = name;
1048 parser->node->additionalData = &entityData;
1049 success = reportNewLeaf(parser);
1050 parser->node->additionalData = NULL;
1051 parser->node->dataString = tmp;
1052 if (entityData.replacementText) CFRelease(entityData.replacementText);
1053 }
1054 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1055 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1056 return success;
1057 }
1058
1059 /*
1060 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1061 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1062 */
1063 // First character should be just past '['
1064 static Boolean parseInlineDTD(CFXMLParserRef parser) {
1065 Boolean success = true;
1066 while (success && !_inputStreamAtEOF(&parser->input)) {
1067 UniChar ch;
1068
1069 parseWhitespace(parser);
1070 if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1071 if (ch == '%') {
1072 // PEReference
1073 success = parsePhysicalEntityReference(parser);
1074 } else if (ch == '<') {
1075 // markupdecl
1076 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1077 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1078 return false;
1079 }
1080 if (ch == '?') {
1081 // Processing Instruction
1082 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1083 } else if (ch == '!') {
1084 UniChar dashes[2] = {'-', '-'};
1085 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1086 // Comment
1087 success = parseComment(parser, true);
1088 } else {
1089 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1090 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1091 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1092 return false;
1093 } else if (ch == 'A') {
1094 // AttListDecl
1095 success = parseAttributeListDeclaration(parser);
1096 } else if (ch == 'N') {
1097 success = parseNotationDeclaration(parser);
1098 } else if (ch == 'E') {
1099 // elementdecl | EntityDecl
1100 _inputStreamGetCharacter(&parser->input, &ch);
1101 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1102 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1103 return false;
1104 }
1105 _inputStreamReturnCharacter(&parser->input, 'E');
1106 if (ch == 'L') {
1107 success = parseElementDeclaration(parser);
1108 } else if (ch == 'N') {
1109 success = parseEntityDeclaration(parser);
1110 } else {
1111 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1112 return false;
1113 }
1114 } else {
1115 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1116 return false;
1117 }
1118 }
1119 } else {
1120 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1121 return false;
1122 }
1123 } else if (ch == ']') {
1124 return true;
1125 } else {
1126 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1127 return false;
1128 }
1129 }
1130 if (success) {
1131 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1132 }
1133 return false;
1134 }
1135
1136 /*
1137 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1138 */
1139 static Boolean parseTagContent(CFXMLParserRef parser) {
1140 while (!_inputStreamAtEOF(&parser->input)) {
1141 UniChar ch;
1142 CFIndex numWhitespaceCharacters;
1143
1144 _inputStreamSetMark(&parser->input);
1145 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1146 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1147 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF
1148
1149 if (ch != '<' && ch != '&') { // CharData
1150 // Back off the whitespace; we'll report it with the PCData
1151 _inputStreamBackUpToMark(&parser->input);
1152 _inputStreamClearMark(&parser->input);
1153 if (!parsePCData(parser)) return false;
1154 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1155 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1156 return false;
1157 }
1158 continue;
1159 }
1160
1161 // element | Reference | CDSect | PI | Comment
1162 // We can safely report any whitespace now
1163 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1164 _inputStreamReturnCharacter(&parser->input, ch);
1165 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1166 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1167 parser->node->additionalData = NULL;
1168 if (!reportNewLeaf(parser)) return false;
1169 _inputStreamGetCharacter(&parser->input, &ch);
1170 }
1171 _inputStreamClearMark(&parser->input);
1172
1173 if (ch == '&') {
1174 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1175 if (!parseEntityReference(parser, true)) return false;
1176 continue;
1177 }
1178
1179 // ch == '<'; element | CDSect | PI | Comment
1180 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1181 if (ch == '?') { // PI
1182 _inputStreamGetCharacter(&parser->input, &ch);
1183 if (!parseProcessingInstruction(parser, true))
1184 return false;
1185 } else if (ch == '/') { // end tag; we're passing outside of content's production
1186 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1187 return true;
1188 } else if (ch != '!') { // element
1189 if (!parseTag(parser)) return false;
1190 } else {
1191 // Comment | CDSect
1192 UniChar dashes[3] = {'!', '-', '-'};
1193 if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1194 // Comment
1195 if (!parseComment(parser, true)) return false;
1196 } else {
1197 // Should have a CDSect; back off the "<!" and call parseCDSect
1198 _inputStreamReturnCharacter(&parser->input, '<');
1199 if (!parseCDSect(parser)) return false;
1200 }
1201 }
1202 }
1203
1204 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1205 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1206 return false;
1207 }
1208 // Only way to get here is if premature EOF was found
1209 //#warning CF:Include the tag name here
1210 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1211 return false;
1212 }
1213
1214 static Boolean parseCDSect(CFXMLParserRef parser) {
1215 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1216 const UniChar _CDSectClose[3] = {']', ']', '>'};
1217 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1218 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1219 return false;
1220 }
1221 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1222 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1223 return false;
1224 }
1225
1226 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1227 parser->node->additionalData = NULL;
1228 return reportNewLeaf(parser);
1229 }
1230
1231 /*
1232 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1233 */
1234 static Boolean validateCharacterReference(CFStringRef str) {
1235 Boolean isHex;
1236 CFIndex idx, len = CFStringGetLength(str);
1237 if (len < 2) return false;
1238 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1239 if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1240 isHex = true;
1241 idx = 2;
1242 if (len == 2) return false;
1243 } else {
1244 isHex = false;
1245 idx = 1;
1246 }
1247
1248 while (idx < len) {
1249 UniChar ch;
1250 ch = CFStringGetCharacterAtIndex(str, idx);
1251 idx ++;
1252 if (!(ch <= '9' && ch >= '0') &&
1253 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1254 break;
1255 }
1256 }
1257 return (idx == len);
1258 }
1259
1260 /*
1261 [67] Reference ::= EntityRef | CharRef
1262 [68] EntityRef ::= '&' Name ';'
1263 */
1264 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1265 UniChar ch;
1266 CFXMLEntityReferenceInfo entData;
1267 CFStringRef name = NULL;
1268 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1269 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1270 return false;
1271 }
1272 if (ch == '#') {
1273 ch = ';';
1274 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1275 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1276 return false;
1277 } else if (!validateCharacterReference(parser->node->dataString)) {
1278 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1279 return false;
1280 }
1281 entData.entityType = kCFXMLEntityTypeCharacter;
1282 name = parser->node->dataString;
1283 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1284 if (_inputStreamAtEOF(&parser->input)) {
1285 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1286 return false;
1287 } else {
1288 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1289 return false;
1290 }
1291 } else {
1292 entData.entityType = kCFXMLEntityTypeParsedInternal;
1293 }
1294 if (report) {
1295 CFStringRef tmp = parser->node->dataString;
1296 Boolean success;
1297 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1298 parser->node->dataString = name;
1299 parser->node->additionalData = &entData;
1300 success = reportNewLeaf(parser);
1301 parser->node->additionalData = NULL;
1302 parser->node->dataString = tmp;
1303 return success;
1304 } else {
1305 return true;
1306 }
1307 }
1308
1309 #if 0
1310 // Kept from old entity reference parsing....
1311 {
1312 switch (*(parser->curr)) {
1313 case 'l': // "lt"
1314 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1315 ch = '<';
1316 parser->curr += 3;
1317 break;
1318 }
1319 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1320 return;
1321 case 'g': // "gt"
1322 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1323 ch = '>';
1324 parser->curr += 3;
1325 break;
1326 }
1327 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1328 return;
1329 case 'a': // "apos" or "amp"
1330 if (len < 4) { // Not enough characters for either conversion
1331 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1332 return;
1333 }
1334 if (*(parser->curr+1) == 'm') {
1335 // "amp"
1336 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1337 ch = '&';
1338 parser->curr += 4;
1339 break;
1340 }
1341 } else if (*(parser->curr+1) == 'p') {
1342 // "apos"
1343 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1344 ch = '\'';
1345 parser->curr += 5;
1346 break;
1347 }
1348 }
1349 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1350 return;
1351 case 'q': // "quote"
1352 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1353 ch = '\"';
1354 parser->curr += 6;
1355 break;
1356 }
1357 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1358 return;
1359 case '#':
1360 {
1361 UniChar num = 0;
1362 Boolean isHex = false;
1363 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1364 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1365 return;
1366 }
1367 parser->curr ++;
1368 if (*(parser->curr) == 'x') {
1369 isHex = true;
1370 parser->curr ++;
1371 }
1372 while (parser->curr < parser->end) {
1373 ch = *(parser->curr);
1374 if (ch == ';') {
1375 CFStringAppendCharacters(string, &num, 1);
1376 parser->curr ++;
1377 return;
1378 }
1379 if (!isHex) num = num*10;
1380 else num = num << 4;
1381 if (ch <= '9' && ch >= '0') {
1382 num += (ch - '0');
1383 } else if (!isHex) {
1384 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1385 return;
1386 } else if (ch >= 'a' && ch <= 'f') {
1387 num += 10 + (ch - 'a');
1388 } else if (ch >= 'A' && ch <= 'F') {
1389 num += 10 + (ch - 'A');
1390 } else {
1391 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1392 return;
1393 }
1394 }
1395 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1396 return;
1397 }
1398 default:
1399 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1400 return;
1401 }
1402 CFStringAppendCharacters(string, &ch, 1);
1403 }
1404 #endif
1405
1406 /*
1407 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1408 */
1409 static Boolean parsePCData(CFXMLParserRef parser) {
1410 UniChar ch;
1411 Boolean done = false;
1412 _inputStreamSetMark(&parser->input);
1413 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1414 switch (ch) {
1415 case '<':
1416 case '&':
1417 _inputStreamReturnCharacter(&parser->input, ch);
1418 done = true;
1419 break;
1420 case ']':
1421 {
1422 const UniChar endSequence[2] = {']', '>'};
1423 if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1424 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1425 _inputStreamClearMark(&parser->input);
1426 return false;
1427 }
1428 break;
1429 }
1430 default:
1431 ;
1432 }
1433 }
1434 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1435 _inputStreamClearMark(&parser->input);
1436 parser->node->dataTypeID = kCFXMLNodeTypeText;
1437 parser->node->additionalData = NULL;
1438 return reportNewLeaf(parser);
1439 }
1440
1441 /*
1442 [42] ETag ::= '</' Name S? '>'
1443 */
1444 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1445 const UniChar beginEndTag[2] = {'<', '/'};
1446 Boolean unexpectedEOF = false, mismatch = false;
1447 CFStringRef closeTag;
1448
1449 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1450 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1451
1452 UniChar ch;
1453 _inputStreamSkipWhitespace(&parser->input, NULL);
1454 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1455 unexpectedEOF = true;
1456 } else if (ch != '>') {
1457 mismatch = true;
1458 }
1459 } else if (_inputStreamAtEOF(&parser->input)) {
1460 unexpectedEOF = true;
1461 } else {
1462 mismatch = true;
1463 }
1464
1465 if (unexpectedEOF || mismatch) {
1466 if (unexpectedEOF) {
1467 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1468 parser->status = kCFXMLErrorUnexpectedEOF;
1469 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1470 } else {
1471 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1472 parser->status = kCFXMLErrorMalformedCloseTag;
1473 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1474 }
1475 return false;
1476 }
1477 return true;
1478 }
1479
1480 /*
1481 [39] element ::= EmptyElementTag | STag content ETag
1482 [40] STag ::= '<' Name (S Attribute)* S? '>'
1483 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1484 */
1485 static Boolean parseTag(CFXMLParserRef parser) {
1486 UniChar ch;
1487 void *tag;
1488 CFXMLElementInfo data;
1489 Boolean success = true;
1490 CFStringRef tagName;
1491
1492 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1493 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1494 return false;
1495 }
1496
1497 _inputStreamSkipWhitespace(&parser->input, NULL);
1498
1499 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1500 data.attributes = parser->argDict;
1501 data.attributeOrder = parser->argArray;
1502 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1503 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1504 return false;
1505 }
1506 if (ch == '/') {
1507 data.isEmpty = true;
1508 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1509 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1510 return false;
1511 }
1512 } else {
1513 data.isEmpty = false;
1514 }
1515 if (ch != '>') {
1516 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1517 return false;
1518 }
1519
1520 if (*parser->top || parser->top == parser->stack) {
1521 CFStringRef oldStr = parser->node->dataString;
1522 parser->node->dataTypeID = kCFXMLNodeTypeElement;
1523 parser->node->dataString = tagName;
1524 parser->node->additionalData = &data;
1525 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1526 if (tag && parser->status == kCFXMLStatusParseInProgress) {
1527 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1528 }
1529 parser->node->additionalData = NULL;
1530 parser->node->dataString = oldStr;
1531 if (parser->status != kCFXMLStatusParseInProgress) {
1532 // callback called CFXMLParserAbort()
1533 _CFReportError(parser, parser->status, NULL);
1534 return false;
1535 }
1536 } else {
1537 tag = NULL;
1538 }
1539
1540 pushXMLNode(parser, tag);
1541 if (!data.isEmpty) {
1542 success = parseTagContent(parser);
1543 if (success) {
1544 success = parseCloseTag(parser, tagName);
1545 }
1546 }
1547 parser->top --;
1548
1549 if (success && tag) {
1550 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1551 if (parser->status != kCFXMLStatusParseInProgress) {
1552 _CFReportError(parser, parser->status, NULL);
1553 return false;
1554 }
1555 }
1556 return success;
1557 }
1558
1559 /*
1560 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1561 [67] Reference ::= EntityRef | CharRef
1562 [68] EntityRef ::= '&' Name ';'
1563 */
1564 // For the moment, we don't worry about references in the attribute values.
1565 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1566 UniChar quote, ch;
1567 Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1568 if (!success || (quote != '\'' && quote != '\"')) return false;
1569 if (str) _inputStreamSetMark(&parser->input);
1570 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1571 switch (ch) {
1572 case '<': success = false; break;
1573 case '&':
1574 if (!parseEntityReference(parser, false)) {
1575 success = false;
1576 break;
1577 }
1578 default:
1579 ;
1580 }
1581 }
1582
1583 if (success && _inputStreamAtEOF(&parser->input)) {
1584 success = false;
1585 }
1586 if (str) {
1587 if (success) {
1588 _inputStreamReturnCharacter(&parser->input, quote);
1589 _inputStreamGetCharactersFromMark(&parser->input, str);
1590 _inputStreamGetCharacter(&parser->input, &ch);
1591 }
1592 _inputStreamClearMark(&parser->input);
1593 }
1594 return success;
1595 }
1596
1597 /*
1598 [40] STag ::= '<' Name (S Attribute)* S? '>'
1599 [41] Attribute ::= Name Eq AttValue
1600 [25] Eq ::= S? '=' S?
1601 */
1602
1603 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1604 Boolean parseAttributes(CFXMLParserRef parser) {
1605 UniChar ch;
1606 CFMutableDictionaryRef dict;
1607 CFMutableArrayRef array;
1608 Boolean failure = false;
1609 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1610 if (parser->argDict) {
1611 CFDictionaryRemoveAllValues(parser->argDict);
1612 CFArrayRemoveAllValues(parser->argArray);
1613 }
1614 return true; // No attributes; let caller deal with it
1615 }
1616 if (!parser->argDict) {
1617 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1618 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1619 } else {
1620 CFDictionaryRemoveAllValues(parser->argDict);
1621 CFArrayRemoveAllValues(parser->argArray);
1622 }
1623 dict = parser->argDict;
1624 array = parser->argArray;
1625 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1626 CFStringRef key;
1627 CFMutableStringRef value;
1628 if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1629 failure = true;
1630 break;
1631 }
1632 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1633 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1634 return false;
1635 }
1636 _inputStreamSkipWhitespace(&parser->input, NULL);
1637 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1638 failure = true;
1639 break;
1640 }
1641 _inputStreamSkipWhitespace(&parser->input, NULL);
1642 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1643 if (!parseAttributeValue(parser, value)) {
1644 CFRelease(value);
1645 failure = true;
1646 break;
1647 }
1648 CFArrayAppendValue(array, key);
1649 CFDictionarySetValue(dict, key, value);
1650 CFRelease(value);
1651 _inputStreamSkipWhitespace(&parser->input, NULL);
1652 }
1653 if (failure) {
1654 //#warning CF:Include tag name in this error report
1655 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1656 return false;
1657 } else if (_inputStreamAtEOF(&parser->input)) {
1658 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1659 return false;
1660 } else {
1661 return true;
1662 }
1663 }
1664
1665 /*
1666 [1] document ::= prolog element Misc*
1667 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1668 [27] Misc ::= Comment | PI | S
1669 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1670
1671 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1672 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1673 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1674
1675 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1676 */
1677 static Boolean parseXML(CFXMLParserRef parser) {
1678 Boolean success = true, sawDTD = false, sawElement = false;
1679 UniChar ch;
1680 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1681 switch (ch) {
1682 case ' ':
1683 case '\n':
1684 case '\t':
1685 case '\r':
1686 success = parseWhitespace(parser);
1687 break;
1688 case '<':
1689 _inputStreamGetCharacter(&parser->input, &ch);
1690 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1691 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1692 return false;
1693 }
1694 if (ch == '!') {
1695 // Comment or DTD
1696 UniChar dashes[2] = {'-', '-'};
1697 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1698 // Comment
1699 success = parseComment(parser, true);
1700 } else {
1701 // Should be DTD
1702 if (sawDTD) {
1703 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1704 return false;
1705 }
1706 success = parseDTD(parser);
1707 if (success) sawDTD = true;
1708 }
1709 } else if (ch == '?') {
1710 // Processing instruction
1711 success = parseProcessingInstruction(parser, true);
1712 } else {
1713 // Tag or malformed
1714 if (sawElement) {
1715 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1716 return false;
1717 }
1718 _inputStreamReturnCharacter(&parser->input, ch);
1719 success = parseTag(parser);
1720 if (success) sawElement = true;
1721 }
1722 break;
1723 default: {
1724 parser->status = kCFXMLErrorMalformedDocument;
1725 parser->errorString = ch < 256 ?
1726 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1727 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1728
1729 if (parser->callBacks.handleError) {
1730 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1731 }
1732 return false;
1733 }
1734 }
1735 }
1736
1737 if (!success) return false;
1738 if (!sawElement) {
1739 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1740 return false;
1741 }
1742 return true;
1743 }
1744
1745 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1746 if (str) {
1747 parser->status = errNum;
1748 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1749 }
1750 if (parser->callBacks.handleError) {
1751 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1752 }
1753 }
1754
1755 // Assumes parser->node has been set and is ready to go
1756 static Boolean reportNewLeaf(CFXMLParserRef parser) {
1757 void *xmlStruct;
1758 if (*(parser->top) == NULL) return true;
1759
1760 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1761 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1762 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1763 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1764 }
1765 if (parser->status != kCFXMLStatusParseInProgress) {
1766 _CFReportError(parser, parser->status, NULL);
1767 return false;
1768 }
1769 return true;
1770 }
1771
1772 static void pushXMLNode(CFXMLParserRef parser, void *node) {
1773 parser->top ++;
1774 if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1775 parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1776 parser->top = parser->stack + parser->capacity;
1777 parser->capacity = 2*parser->capacity;
1778 }
1779 *(parser->top) = node;
1780 }
1781
1782 /**************************/
1783 /* Parsing to a CFXMLTree */
1784 /**************************/
1785
1786 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1787 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1788 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1789 CFRelease(myNode);
1790 return (void *)tree;
1791 }
1792
1793 static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1794 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1795 }
1796
1797 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1798 CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1799 if (CFTreeGetParent(node))
1800 CFRelease((CFXMLTreeRef)xmlType);
1801 }
1802
1803 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1804 CFXMLParserRef parser;
1805 CFXMLParserCallBacks callbacks;
1806 CFXMLTreeRef result;
1807
1808 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1809
1810 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1811 callbacks.addChild = _XMLTreeAddChild;
1812 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1813 callbacks.resolveExternalEntity = NULL;
1814 callbacks.handleError = NULL;
1815 parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1816
1817 if (CFXMLParserParse(parser)) {
1818 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1819 } else {
1820 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1821 if (result) CFRelease(result);
1822 result = NULL;
1823 }
1824 CFRelease(parser);
1825 return result;
1826 }
1827
1828 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1829 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1830 }
1831
1832 CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1834 CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1835 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1836
1837 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1838 CFXMLParserRef parser;
1839 CFXMLParserCallBacks callbacks;
1840 CFXMLTreeRef result;
1841
1842 __CFGenericValidateType(xmlData, CFDataGetTypeID());
1843 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1844
1845 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1846 callbacks.addChild = _XMLTreeAddChild;
1847 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1848 callbacks.resolveExternalEntity = NULL;
1849 callbacks.handleError = NULL;
1850 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1851
1852 if (CFXMLParserParse(parser)) {
1853 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1854 } else {
1855 if (errorDict) { // collect the error dictionary
1856 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1857 if (*errorDict) {
1858 CFIndex rawnum;
1859 CFNumberRef cfnum;
1860 CFStringRef errstring;
1861
1862 rawnum = CFXMLParserGetLocation(parser);
1863 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1864 if(cfnum) {
1865 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1866 CFRelease(cfnum);
1867 }
1868
1869 rawnum = CFXMLParserGetLineNumber(parser);
1870 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1871 if(cfnum) {
1872 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1873 CFRelease(cfnum);
1874 }
1875
1876 rawnum = CFXMLParserGetStatusCode(parser);
1877 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1878 if(cfnum) {
1879 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1880 CFRelease(cfnum);
1881 }
1882
1883 errstring = CFXMLParserCopyErrorDescription(parser);
1884 if(errstring) {
1885 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1886 CFRelease(errstring);
1887 }
1888 }
1889 }
1890 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1891 if (result) CFRelease(result);
1892 result = NULL;
1893 }
1894 CFRelease(parser);
1895 return result;
1896 }
1897
1898 /*
1899 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1900 We should also be handling items that are up over certain values correctly.
1901 */
1902 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1903 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1904 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1905 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1906
1907 CFStringInlineBuffer inlineBuf;
1908 CFIndex idx = 0;
1909 CFIndex mark = idx;
1910 CFIndex stringLength = CFStringGetLength(string);
1911 UniChar uc;
1912
1913 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1914
1915 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1916 for(idx = 0; idx < stringLength; idx++) {
1917 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1918 if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1919 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1920 CFStringAppend(newString, previousSubstring);
1921 CFRelease(previousSubstring);
1922 switch(uc) {
1923 case '&':
1924 CFStringAppend(newString, CFSTR("&amp;"));
1925 break;
1926 case '<':
1927 CFStringAppend(newString, CFSTR("&lt;"));
1928 break;
1929 case '>':
1930 CFStringAppend(newString, CFSTR("&gt;"));
1931 break;
1932 case '\'':
1933 CFStringAppend(newString, CFSTR("&apos;"));
1934 break;
1935 case '"':
1936 CFStringAppend(newString, CFSTR("&quot;"));
1937 break;
1938 }
1939 mark = idx + 1;
1940 }
1941 }
1942 // Copy the remainder to the output string before returning.
1943 CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1944 if (NULL != remainder) {
1945 CFStringAppend(newString, remainder);
1946 CFRelease(remainder);
1947 }
1948
1949 CFRelease(startChars);
1950 return newString;
1951 }
1952
1953 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1954 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1955
1956 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1957 CFStringRef sub;
1958 CFIndex lastChunkStart, length = CFStringGetLength(string);
1959 CFIndex i, entityStart;
1960 UniChar uc;
1961 UInt32 entity;
1962 int base;
1963 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1964
1965 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1966 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1967 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1968 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1969 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1970
1971 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1972 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1973
1974 lastChunkStart = 0;
1975 // Scan through the string in its entirety
1976 for(i = 0; i < length; ) {
1977 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1978
1979 if(uc == '&') {
1980 entityStart = i - 1;
1981 entity = 0xFFFF; // set this to a not-Unicode character as sentinel
1982 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1983 if(lastChunkStart < i - 1) {
1984 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1985 CFStringAppend(newString, sub);
1986 CFRelease(sub);
1987 }
1988
1989 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1990 // Now we can process the entity reference itself
1991 if(uc == '#') { // this is a numeric entity.
1992 base = 10;
1993 entity = 0;
1994 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1995
1996 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1997 base = 16;
1998 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1999 }
2000
2001 // process the provided digits 'til we're finished
2002 while(true) {
2003 if (uc >= '0' && uc <= '9')
2004 entity = entity * base + (uc-'0');
2005 else if (uc >= 'a' && uc <= 'f' && base == 16)
2006 entity = entity * base + (uc-'a'+10);
2007 else if (uc >= 'A' && uc <= 'F' && base == 16)
2008 entity = entity * base + (uc-'A'+10);
2009 else break;
2010
2011 if (i < length) {
2012 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2013 }
2014 else
2015 break;
2016 }
2017 }
2018
2019 // Scan to the end of the entity
2020 while(uc != ';' && i < length) {
2021 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2022 }
2023
2024 if(0xFFFF != entity) { // it was numeric, and translated.
2025 // Now, output the result fo the entity
2026 if(entity >= 0x10000) {
2027 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2028 CFStringAppendCharacters(newString, characters, 2);
2029 } else {
2030 UniChar character = entity;
2031 CFStringAppendCharacters(newString, &character, 1);
2032 }
2033 } else { // it wasn't numeric.
2034 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2035 CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
2036 if(replacementString) {
2037 CFStringAppend(newString, replacementString);
2038 } else {
2039 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2040 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2041 CFStringAppend(newString, sub); // ...and append that.
2042 }
2043 CFRelease(sub); // in either case, release the most-recent "sub"
2044 }
2045
2046 // move the lastChunkStart to the beginning of the next chunk.
2047 lastChunkStart = i;
2048 }
2049 }
2050 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2051 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2052 CFStringAppend(newString, sub);
2053 CFRelease(sub);
2054 }
2055
2056 CFRelease(fullReplDict);
2057
2058 return newString;
2059 }
2060
2061 #pragma GCC diagnostic pop