]> git.saurik.com Git - apple/cf.git/blob - CFXMLParser.c
CF-476.10.tar.gz
[apple/cf.git] / CFXMLParser.c
1 /*
2 * Copyright (c) 2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFXMLParser.c
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Chris Parker
26 */
27
28 #include <CoreFoundation/CFXMLParser.h>
29 #include <CoreFoundation/CFNumber.h>
30 #include "CFXMLInputStream.h"
31 #include "CFUniChar.h"
32 #include "CFInternal.h"
33
34 struct __CFXMLParser {
35 CFRuntimeBase _cfBase;
36
37 _CFXMLInputStream input;
38
39 void **stack;
40 void **top;
41 UInt32 capacity;
42
43 struct __CFXMLNode *node; // Our private node; we use it to report back information
44 CFMutableDictionaryRef argDict;
45 CFMutableArrayRef argArray;
46
47 UInt32 options;
48 CFXMLParserCallBacks callBacks;
49 CFXMLParserContext context;
50
51 CFXMLParserStatusCode status;
52 CFStringRef errorString;
53 };
54
55 static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
56 const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
57 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
58 }
59
60 static void __CFXMLParserDeallocate(CFTypeRef cf) {
61 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
62 CFAllocatorRef alloc = CFGetAllocator(parser);
63 _freeInputStream(&(parser->input));
64 if (parser->argDict) CFRelease(parser->argDict);
65 if (parser->argArray) CFRelease(parser->argArray);
66 if (parser->errorString) CFRelease(parser->errorString);
67 if (parser->node) CFRelease(parser->node);
68 CFAllocatorDeallocate(alloc, parser->stack);
69 if (parser->context.info && parser->context.release) {
70 parser->context.release(parser->context.info);
71 }
72 }
73
74 static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
75
76 static const CFRuntimeClass __CFXMLParserClass = {
77 0,
78 "CFXMLParser",
79 NULL, // init
80 NULL, // copy
81 __CFXMLParserDeallocate,
82 NULL,
83 NULL,
84 NULL, //
85 __CFXMLParserCopyDescription
86 };
87
88 static void __CFXMLParserInitialize(void) {
89 __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
90 }
91
92 CFTypeID CFXMLParserGetTypeID(void) {
93 if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize();
94 return __kCFXMLParserTypeID;
95 }
96
97 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
98 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
99 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
100 if (context) {
101 context->version = parser->context.version;
102 context->info = parser->context.info;
103 context->retain = parser->context.retain;
104 context->release = parser->context.release;
105 context->copyDescription = parser->context.copyDescription;
106 UNFAULT_CALLBACK(context->retain);
107 UNFAULT_CALLBACK(context->release);
108 UNFAULT_CALLBACK(context->copyDescription);
109 }
110 }
111
112 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
113 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
114 if (callBacks) {
115 callBacks->version = parser->callBacks.version;
116 callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
117 callBacks->addChild = parser->callBacks.addChild;
118 callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
119 callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
120 callBacks->handleError = parser->callBacks.handleError;
121 UNFAULT_CALLBACK(callBacks->createXMLStructure);
122 UNFAULT_CALLBACK(callBacks->addChild);
123 UNFAULT_CALLBACK(callBacks->endXMLStructure);
124 UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
125 UNFAULT_CALLBACK(callBacks->handleError);
126 }
127 }
128
129 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
130 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
131 return parser->input.url;
132 }
133
134 /* Returns the character index or line number of the current parse location */
135 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
136 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
137 return _inputStreamCurrentLocation(&parser->input);
138 }
139
140 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
141 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
142 return _inputStreamCurrentLine(&parser->input);
143 }
144
145 /* Returns the top-most object returned by the createXMLStructure callback */
146 void *CFXMLParserGetDocument(CFXMLParserRef parser) {
147 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
148 if (parser->capacity > 0)
149 return parser->stack[0];
150 else
151 return NULL;
152 }
153
154 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
155 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
156 return parser->status;
157 }
158
159 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
160 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
161 return (CFStringRef)CFRetain(parser->errorString);
162 }
163
164 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
165 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
166 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
167 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
168 __CFGenericValidateType(errorDescription, CFStringGetTypeID());
169
170 parser->status = errorCode;
171 if (parser->errorString) CFRelease(parser->errorString);
172 parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
173 }
174
175
176 static Boolean parseXML(CFXMLParserRef parser);
177 static Boolean parseComment(CFXMLParserRef parser, Boolean report);
178 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
179 static Boolean parseInlineDTD(CFXMLParserRef parser);
180 static Boolean parseDTD(CFXMLParserRef parser);
181 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
182 static Boolean parseCDSect(CFXMLParserRef parser);
183 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
184 static Boolean parsePCData(CFXMLParserRef parser);
185 static Boolean parseWhitespace(CFXMLParserRef parser);
186 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
187 static Boolean parseNotationDeclaration(CFXMLParserRef parser);
188 static Boolean parseElementDeclaration(CFXMLParserRef parser);
189 static Boolean parseEntityDeclaration(CFXMLParserRef parser);
190 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
191 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
192 static Boolean parseTagContent(CFXMLParserRef parser);
193 static Boolean parseTag(CFXMLParserRef parser);
194 static Boolean parseAttributes(CFXMLParserRef parser);
195 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
196
197 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
198 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
199 static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
200 static void pushXMLNode(CFXMLParserRef parser, void *node);
201
202 static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
203 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
204 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
205 UniChar *buf;
206 if (parser && node) {
207 alloc = CFGetAllocator(parser);
208 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
209 parser->top = parser->stack;
210 parser->stack = NULL;
211 parser->capacity = 0;
212
213 buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
214 parser->node = node;
215 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
216 parser->node->additionalData = NULL;
217 parser->node->version = version;
218 parser->argDict = NULL; // don't create these until necessary
219 parser->argArray = NULL;
220
221 parser->options = options;
222 parser->callBacks = *callBacks;
223
224 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
225 FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
226 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
227 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
228 FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
229
230 if (context) {
231 parser->context = *context;
232 if (parser->context.info && parser->context.retain) {
233 parser->context.retain(parser->context.info);
234 }
235 } else {
236 parser->context.version = 0;
237 parser->context.info = NULL;
238 parser->context.retain = NULL;
239 parser->context.release = NULL;
240 parser->context.copyDescription = NULL;
241 }
242 parser->status = kCFXMLStatusParseNotBegun;
243 parser->errorString = NULL;
244 } else {
245 if (parser) CFRelease(parser);
246 if (node) CFRelease(node);
247 parser = NULL;
248 }
249 return parser;
250 }
251
252 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
253 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
254 __CFGenericValidateType(xmlData, CFDataGetTypeID());
255 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
256 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
257 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
258 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
259 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
260 }
261
262 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
263 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
264 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
265 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
266 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
267
268 return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
269 }
270
271 Boolean CFXMLParserParse(CFXMLParserRef parser) {
272 CFXMLDocumentInfo docData;
273 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
274 if (parser->status != kCFXMLStatusParseNotBegun) return false;
275 parser->status = kCFXMLStatusParseInProgress;
276
277 if (!_openInputStream(&parser->input)) {
278 if (!parser->input.data) {
279 // couldn't load URL
280 parser->status = kCFXMLErrorNoData;
281 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
282 } else {
283 // couldn't figure out the encoding
284 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
285 parser->status = kCFXMLErrorUnknownEncoding;
286 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
287 }
288 if (parser->callBacks.handleError) {
289 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
290 }
291 return false;
292 }
293
294 // Create the document
295 parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
296 parser->capacity = 16;
297 parser->node->dataTypeID = kCFXMLNodeTypeDocument;
298 docData.encoding = _inputStreamGetEncoding(&parser->input);
299 docData.sourceURL = parser->input.url;
300 parser->node->additionalData = &docData;
301 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
302 parser->top = parser->stack;
303 parser->node->additionalData = NULL;
304
305 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
306 if (parser->status != kCFXMLStatusParseInProgress) {
307 _CFReportError(parser, parser->status, NULL);
308 return false;
309 }
310 return parseXML(parser);
311 }
312
313 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
314
315 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
316 static Boolean parseWhitespace(CFXMLParserRef parser) {
317 CFIndex len;
318 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
319 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
320 if (report && len) {
321 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
322 parser->node->additionalData = NULL;
323 return reportNewLeaf(parser);
324 } else {
325 return true;
326 }
327 }
328
329 // parser should be just past "<!--"
330 static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
331 const UniChar dashes[2] = {'-', '-'};
332 UniChar ch;
333 report = report && (!(parser->options & kCFXMLParserSkipMetaData));
334 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
335 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
336 return false;
337 } else if (ch != '>') {
338 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
339 return false;
340 } else if (report) {
341 parser->node->dataTypeID = kCFXMLNodeTypeComment;
342 parser->node->additionalData = NULL;
343 return reportNewLeaf(parser);
344 } else {
345 return true;
346 }
347 }
348
349 /*
350 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
351 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
352 */
353 // parser should be set to the first character after "<?"
354 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
355 const UniChar piTermination[2] = {'?', '>'};
356 CFMutableStringRef str;
357 CFStringRef name;
358
359 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
360 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
361 return false;
362 }
363 _inputStreamSkipWhitespace(&parser->input, NULL);
364 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
365 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
366 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
367 if (str) CFRelease(str);
368 return false;
369 }
370
371 if (str) {
372 CFXMLProcessingInstructionInfo data;
373 Boolean result;
374 CFStringRef tmp = parser->node->dataString;
375 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
376 parser->node->dataString = name;
377 data.dataString = str;
378 parser->node->additionalData = &data;
379 result = reportNewLeaf(parser);
380 parser->node->additionalData = NULL;
381 parser->node->dataString = tmp;
382 CFRelease(str);
383 return result;
384 } else {
385 return true;
386 }
387 }
388
389 /*
390 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
391 */
392 static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
393 // first character should be immediately after the "<!"
394 static Boolean parseDTD(CFXMLParserRef parser) {
395 UniChar ch;
396 Boolean success, hasExtID = false;
397 CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
398 void *dtdStructure = NULL;
399 CFStringRef name;
400
401 // First pass "DOCTYPE"
402 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
403 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
404 success = success && _inputStreamScanXMLName(&parser->input, false, &name);
405 if (success) {
406 _inputStreamSkipWhitespace(&parser->input, NULL);
407 success = _inputStreamPeekCharacter(&parser->input, &ch);
408 } else {
409 // didn't make it past "DOCTYPE" successfully.
410 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
411 return false;
412 }
413 if (success && ch != '[' && ch != '>') {
414 // ExternalID
415 hasExtID = true;
416 success = parseExternalID(parser, false, &(docData.externalID));
417 if (success) {
418 _inputStreamSkipWhitespace(&parser->input, NULL);
419 success = _inputStreamPeekCharacter(&parser->input, &ch);
420 }
421 }
422
423 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
424 CFStringRef tmp = parser->node->dataString;
425 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
426 parser->node->dataString = name;
427 parser->node->additionalData = &docData;
428 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
429 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
430 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
431 }
432 parser->node->additionalData = NULL;
433 parser->node->dataString = tmp;
434 if (parser->status != kCFXMLStatusParseInProgress) {
435 // callback called CFXMLParserAbort()
436 _CFReportError(parser, parser->status, NULL);
437 return false;
438 }
439 } else {
440 dtdStructure = NULL;
441 }
442 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
443 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
444 pushXMLNode(parser, dtdStructure);
445
446 if (success && ch == '[') {
447 // inline DTD
448 _inputStreamGetCharacter(&parser->input, &ch);
449 if (!parseInlineDTD(parser)) return false;
450 _inputStreamSkipWhitespace(&parser->input, NULL);
451 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
452 } else if (success && ch == '>') {
453 // End of the DTD
454 _inputStreamGetCharacter(&parser->input, &ch);
455 }
456 if (!success) {
457 if (_inputStreamAtEOF(&parser->input)) {
458 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
459 } else {
460 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
461 }
462 return false;
463 }
464
465 parser->top --; // Remove dtdStructure from the stack
466
467 if (success && dtdStructure) {
468 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
469 if (parser->status != kCFXMLStatusParseInProgress) {
470 _CFReportError(parser, parser->status, NULL);
471 return false;
472 }
473 }
474 return true;
475 }
476
477 /*
478 [69] PEReference ::= '%' Name ';'
479 */
480 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
481 UniChar ch;
482 CFStringRef name;
483 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
484 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
485 return false;
486 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
487 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
488 return false;
489 } else if (ch != ';') {
490 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
491 return false;
492 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
493 CFXMLEntityReferenceInfo myData;
494 Boolean result;
495 CFStringRef tmp = parser->node->dataString;
496 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
497 parser->node->dataString = name;
498 myData.entityType = kCFXMLEntityTypeParameter;
499 parser->node->additionalData = &myData;
500 result = reportNewLeaf(parser);
501 parser->node->additionalData = NULL;
502 parser->node->dataString = tmp;
503 return result;
504 } else {
505 return true;
506 }
507 }
508
509 /*
510 [54] AttType ::= StringType | TokenizedType | EnumeratedType
511 [55] StringType ::= 'CDATA'
512 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
513 [57] EnumeratedType ::= NotationType | Enumeration
514 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
515 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
516 */
517 static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
518 UniChar ch;
519 Boolean done = false;
520 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
521 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
522 return false;
523 } else if (ch != '(') {
524 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
525 return false;
526 }
527 _inputStreamSkipWhitespace(&parser->input, NULL);
528 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
529 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
530 return false;
531 }
532 while (!done) {
533 _inputStreamSkipWhitespace(&parser->input, NULL);
534 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
535 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
536 return false;
537 } else if (ch == ')') {
538 done = true;
539 } else if (ch == '|') {
540 _inputStreamSkipWhitespace(&parser->input, NULL);
541 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
542 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
543 return false;
544 }
545 } else {
546 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
547 return false;
548 }
549 }
550 return true;
551 }
552
553 static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
554 Boolean success = false;
555 static const UniChar attTypeStrings[6][8] = {
556 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
557 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
558 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
559 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
560 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
561 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
562 if (str) _inputStreamSetMark(&parser->input);
563 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
564 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
565 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
566 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
567 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
568 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
569 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
570 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
571 success = true;
572 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
573 // Notation
574 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
575 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
576 success = false;
577 } else {
578 success = parseEnumeration(parser, false);
579 }
580 } else {
581 success = parseEnumeration(parser, true);
582 }
583 if (str) {
584 if (success) {
585 _inputStreamGetCharactersFromMark(&parser->input, str);
586 }
587 _inputStreamClearMark(&parser->input);
588 }
589 return success;
590 }
591
592 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
593 static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
594 const UniChar strings[3][8] = {
595 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
596 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
597 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
598 UniChar ch;
599 Boolean success;
600 if (str) _inputStreamSetMark(&parser->input);
601 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
602 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
603 success = false;
604 } else if (ch == '#') {
605 if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
606 _inputStreamMatchString(&parser->input, strings[1], 7)) {
607 success = true;
608 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
609 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
610 success = false;
611 } else {
612 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
613 success = parseAttributeValue(parser, NULL);
614 }
615 } else {
616 _inputStreamReturnCharacter(&parser->input, ch);
617 success = parseAttributeValue(parser, NULL);
618 }
619 if (str) {
620 if (success) {
621 _inputStreamGetCharactersFromMark(&parser->input, str);
622 }
623 _inputStreamClearMark(&parser->input);
624 }
625 return success;
626 }
627
628 /*
629 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
630 [53] AttDef ::= S Name S AttType S DefaultDecl
631 */
632 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
633 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
634 CFXMLAttributeListDeclarationInfo attListData;
635 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
636 CFIndex capacity = 8;
637 UniChar ch;
638 Boolean success = true;
639 CFStringRef name;
640 if (!_inputStreamMatchString(&parser->input, attList, 7) ||
641 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
642 !_inputStreamScanXMLName(&parser->input, false, &name)) {
643 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
644 return false;
645 }
646 attListData.numberOfAttributes = 0;
647 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
648 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
649 attributes = NULL;
650 }
651 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
652 CFXMLAttributeDeclarationInfo *attribute = NULL;
653 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
654 break;
655 if (attributes) {
656 if (capacity == attListData.numberOfAttributes) {
657 capacity = 2*capacity;
658 if (attributes != attributeArray) {
659 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
660 } else {
661 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
662 }
663 }
664 attribute = &(attributes[attListData.numberOfAttributes]);
665 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
666 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
667 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
668 }
669 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
670 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
671 success = false;
672 break;
673 }
674 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
675 success = false;
676 break;
677 }
678 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
679 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
680 success = false;
681 break;
682 }
683 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
684 success = false;
685 break;
686 }
687 attListData.numberOfAttributes ++;
688 }
689 if (success) {
690 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
691 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
692 success = false;
693 } else if (ch != '>') {
694 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
695 success = false;
696 } else if (attributes) {
697 CFStringRef tmp = parser->node->dataString;
698 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
699 parser->node->dataString = name;
700 attListData.attributes = attributes;
701 parser->node->additionalData = (void *)&attListData;
702 success = reportNewLeaf(parser);
703 parser->node->additionalData = NULL;
704 parser->node->dataString = tmp;
705 }
706 }
707 if (attributes) {
708 // Free up all that memory
709 CFIndex idx;
710 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
711 // Do not release attributeName here; it's a uniqued string from scanXMLName
712 CFRelease(attributes[idx].typeString);
713 CFRelease(attributes[idx].defaultString);
714 }
715 if (attributes != attributeArray) {
716 CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
717 }
718 }
719 return success;
720 }
721
722 CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
723 Boolean success;
724 if (extID) {
725 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
726 if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
727 success = true;
728 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
729 } else {
730 extID->systemID = NULL;
731 success = false;
732 }
733 CFRelease(urlStr);
734 } else {
735 success = _inputStreamScanQuotedString(&parser->input, NULL);
736 }
737 return success;
738 }
739
740 /*
741 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
742 [83] PublicID ::= 'PUBLIC' S PubidLiteral
743 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
744 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
745 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
746 */
747 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
748 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
749 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
750 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
751 Boolean success;
752 if (extID) {
753 extID->systemID = NULL;
754 extID->publicID = NULL;
755 }
756 if (_inputStreamMatchString(&parser->input, publicString, 6)) {
757 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
758 if (extID) {
759 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
760 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
761 } else {
762 success = success && _inputStreamScanQuotedString(&parser->input, NULL);
763 }
764 if (success) {
765 UniChar ch;
766 if (alsoAcceptPublicID) {
767 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
768 }
769 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
770 || !_inputStreamPeekCharacter(&parser->input, &ch)
771 || (ch != '\'' && ch != '\"')
772 || !parseSystemLiteral(parser, extID)) {
773 success = alsoAcceptPublicID;
774 if (alsoAcceptPublicID) {
775 _inputStreamBackUpToMark(&parser->input);
776 }
777 } else {
778 success = true;
779 }
780 if (alsoAcceptPublicID) {
781 _inputStreamClearMark(&parser->input);
782 }
783 }
784 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
785 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
786 } else {
787 success = false;
788 }
789 return success;
790 }
791
792 /*
793 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
794 */
795 static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
796 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
797 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
798 CFXMLNotationInfo notationData = {{NULL, NULL}};
799 CFStringRef name;
800 Boolean success =
801 _inputStreamMatchString(&parser->input, notationString, 8) &&
802 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
803 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
804 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
805 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
806
807 if (success) {
808 UniChar ch;
809 _inputStreamSkipWhitespace(&parser->input, NULL);
810 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
811 }
812 if (!success) {
813 if (_inputStreamAtEOF(&parser->input)) {
814 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
815 } else {
816 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
817 }
818 } else if (report) {
819 CFStringRef tmp = parser->node->dataString;
820 parser->node->dataTypeID = kCFXMLNodeTypeNotation;
821 parser->node->dataString = name;
822 parser->node->additionalData = &notationData;
823 success = reportNewLeaf(parser);
824 parser->node->additionalData = NULL;
825 parser->node->dataString = tmp;
826 }
827 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
828 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
829 return success;
830 }
831
832 /*
833 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
834 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
835 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
836 */
837 static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
838 UniChar ch, separator;
839 if (!pastParen) {
840 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
841 _inputStreamSkipWhitespace(&parser->input, NULL);
842 }
843 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
844
845 /* Now scanning cp, production [48] */
846 if (ch == '(') {
847 if (!parseChoiceOrSequence(parser, false)) return false;
848 } else {
849 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
850 }
851 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
852 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
853
854 /* Now past cp */
855 _inputStreamSkipWhitespace(&parser->input, NULL);
856 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
857 if (ch == ')') return true;
858 if (ch != '|' && ch != ',') return false;
859 separator = ch;
860 while (ch == separator) {
861 _inputStreamSkipWhitespace(&parser->input, NULL);
862 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
863 if (ch != '(') {
864 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
865 } else if (!parseChoiceOrSequence(parser, false)) {
866 return false;
867 }
868 _inputStreamSkipWhitespace(&parser->input, NULL);
869 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
870 }
871 return ch == ')';
872 }
873
874 /*
875 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
876 */
877 static Boolean parseMixedElementContent(CFXMLParserRef parser) {
878 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
879 UniChar ch;
880 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
881 _inputStreamSkipWhitespace(&parser->input, NULL);
882 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
883 if (ch == ')') return true;
884
885 while (ch == '|') {
886 _inputStreamSkipWhitespace(&parser->input, NULL);
887 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
888 _inputStreamSkipWhitespace(&parser->input, NULL);
889 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
890 }
891 if (ch != ')') return false;
892 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
893 return true;
894 }
895
896 /*
897 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
898 [47] children ::= (choice | seq) ('?' | '*' | '+')?
899 */
900 static Boolean parseElementContentSpec(CFXMLParserRef parser) {
901 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
902 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
903 UniChar ch;
904 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
905 return true;
906 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
907 return false;
908 } else {
909 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
910 _inputStreamGetCharacter(&parser->input, &ch);
911 _inputStreamSkipWhitespace(&parser->input, NULL);
912 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
913 if (ch == '#') {
914 // Mixed
915 return parseMixedElementContent(parser);
916 } else {
917 if (parseChoiceOrSequence(parser, true)) {
918 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
919 _inputStreamGetCharacter(&parser->input, &ch);
920 }
921 return true;
922 } else {
923 return false;
924 }
925 }
926 }
927 }
928
929 /*
930 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
931 */
932 static Boolean parseElementDeclaration(CFXMLParserRef parser) {
933 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
934 Boolean success;
935 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
936 UniChar ch = '>';
937 CFMutableStringRef contentDesc = NULL;
938 CFStringRef name;
939 success = _inputStreamMatchString(&parser->input, eltChars, 7)
940 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
941 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
942 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
943 if (success) {
944 if (report) _inputStreamSetMark(&parser->input);
945 success = parseElementContentSpec(parser);
946 if (success && report) {
947 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
948 _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
949 }
950 if (report) _inputStreamClearMark(&parser->input);
951 if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
952 success = success && _inputStreamMatchString(&parser->input, &ch, 1);
953 }
954 if (!success) {
955 if (_inputStreamAtEOF(&parser->input)) {
956 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
957 } else {
958 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
959 }
960 } else if (report) {
961 CFXMLElementTypeDeclarationInfo eltData;
962 CFStringRef tmp = parser->node->dataString;
963 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
964 parser->node->dataString = name;
965 eltData.contentDescription = contentDesc;
966 parser->node->additionalData = &eltData;
967 success = reportNewLeaf(parser);
968 parser->node->additionalData = NULL;
969 parser->node->dataString = tmp;
970 }
971 if (contentDesc) CFRelease(contentDesc);
972 return success;
973 }
974
975 /*
976 [70] EntityDecl ::= GEDecl | PEDecl
977 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
978 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
979 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
980 [74] PEDef ::= EntityValue | ExternalID
981 [76] NDataDecl ::= S 'NDATA' S Name
982 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
983 */
984 static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
985 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
986 UniChar ch;
987 Boolean isPEDecl = false;
988 CFXMLEntityInfo entityData;
989 CFStringRef name;
990 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
991 Boolean success =
992 _inputStreamMatchString(&parser->input, entityStr, 6) &&
993 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
994 _inputStreamPeekCharacter(&parser->input, &ch);
995
996 entityData.replacementText = NULL;
997 entityData.entityID.publicID = NULL;
998 entityData.entityID.systemID = NULL;
999 entityData.notationName = NULL;
1000 // We will set entityType immediately before reporting
1001
1002 if (success && ch == '%') {
1003 _inputStreamGetCharacter(&parser->input, &ch);
1004 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1005 isPEDecl = true;
1006 }
1007 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1008 if (success && (ch == '\"' || ch == '\'')) {
1009 // EntityValue
1010 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1011 if (report) {
1012 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1013 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1014 } else {
1015 success = _inputStreamScanQuotedString(&parser->input, NULL);
1016 }
1017 } else if (success) {
1018 // ExternalID
1019 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1020 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1021 // There could be an option NDataDecl
1022 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1023 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1024 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1025 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1026 }
1027 }
1028 }
1029 if (success) {
1030 _inputStreamSkipWhitespace(&parser->input, NULL);
1031 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1032 }
1033 if (!success) {
1034 if (_inputStreamAtEOF(&parser->input)) {
1035 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1036 } else {
1037 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1038 }
1039 } else {
1040 CFStringRef tmp = parser->node->dataString;
1041 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1042 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1043 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1044 else entityData.entityType = kCFXMLEntityTypeUnparsed;
1045 parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1046 parser->node->dataString = name;
1047 parser->node->additionalData = &entityData;
1048 success = reportNewLeaf(parser);
1049 parser->node->additionalData = NULL;
1050 parser->node->dataString = tmp;
1051 if (entityData.replacementText) CFRelease(entityData.replacementText);
1052 }
1053 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1054 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1055 return success;
1056 }
1057
1058 /*
1059 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1060 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1061 */
1062 // First character should be just past '['
1063 static Boolean parseInlineDTD(CFXMLParserRef parser) {
1064 Boolean success = true;
1065 while (success && !_inputStreamAtEOF(&parser->input)) {
1066 UniChar ch;
1067
1068 parseWhitespace(parser);
1069 if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1070 if (ch == '%') {
1071 // PEReference
1072 success = parsePhysicalEntityReference(parser);
1073 } else if (ch == '<') {
1074 // markupdecl
1075 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1076 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1077 return false;
1078 }
1079 if (ch == '?') {
1080 // Processing Instruction
1081 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1082 } else if (ch == '!') {
1083 UniChar dashes[2] = {'-', '-'};
1084 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1085 // Comment
1086 success = parseComment(parser, true);
1087 } else {
1088 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1089 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1090 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1091 return false;
1092 } else if (ch == 'A') {
1093 // AttListDecl
1094 success = parseAttributeListDeclaration(parser);
1095 } else if (ch == 'N') {
1096 success = parseNotationDeclaration(parser);
1097 } else if (ch == 'E') {
1098 // elementdecl | EntityDecl
1099 _inputStreamGetCharacter(&parser->input, &ch);
1100 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1101 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1102 return false;
1103 }
1104 _inputStreamReturnCharacter(&parser->input, 'E');
1105 if (ch == 'L') {
1106 success = parseElementDeclaration(parser);
1107 } else if (ch == 'N') {
1108 success = parseEntityDeclaration(parser);
1109 } else {
1110 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1111 return false;
1112 }
1113 } else {
1114 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1115 return false;
1116 }
1117 }
1118 } else {
1119 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1120 return false;
1121 }
1122 } else if (ch == ']') {
1123 return true;
1124 } else {
1125 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1126 return false;
1127 }
1128 }
1129 if (success) {
1130 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1131 }
1132 return false;
1133 }
1134
1135 /*
1136 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1137 */
1138 static Boolean parseTagContent(CFXMLParserRef parser) {
1139 while (!_inputStreamAtEOF(&parser->input)) {
1140 UniChar ch;
1141 CFIndex numWhitespaceCharacters;
1142
1143 _inputStreamSetMark(&parser->input);
1144 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1145 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1146 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF
1147
1148 if (ch != '<' && ch != '&') { // CharData
1149 // Back off the whitespace; we'll report it with the PCData
1150 _inputStreamBackUpToMark(&parser->input);
1151 _inputStreamClearMark(&parser->input);
1152 if (!parsePCData(parser)) return false;
1153 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1154 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1155 return false;
1156 }
1157 continue;
1158 }
1159
1160 // element | Reference | CDSect | PI | Comment
1161 // We can safely report any whitespace now
1162 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1163 _inputStreamReturnCharacter(&parser->input, ch);
1164 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1165 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1166 parser->node->additionalData = NULL;
1167 if (!reportNewLeaf(parser)) return false;
1168 _inputStreamGetCharacter(&parser->input, &ch);
1169 }
1170 _inputStreamClearMark(&parser->input);
1171
1172 if (ch == '&') {
1173 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1174 if (!parseEntityReference(parser, true)) return false;
1175 continue;
1176 }
1177
1178 // ch == '<'; element | CDSect | PI | Comment
1179 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1180 if (ch == '?') { // PI
1181 _inputStreamGetCharacter(&parser->input, &ch);
1182 if (!parseProcessingInstruction(parser, true))
1183 return false;
1184 } else if (ch == '/') { // end tag; we're passing outside of content's production
1185 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1186 return true;
1187 } else if (ch != '!') { // element
1188 if (!parseTag(parser)) return false;
1189 } else {
1190 // Comment | CDSect
1191 UniChar dashes[3] = {'!', '-', '-'};
1192 if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1193 // Comment
1194 if (!parseComment(parser, true)) return false;
1195 } else {
1196 // Should have a CDSect; back off the "<!" and call parseCDSect
1197 _inputStreamReturnCharacter(&parser->input, '<');
1198 if (!parseCDSect(parser)) return false;
1199 }
1200 }
1201 }
1202
1203 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1204 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1205 return false;
1206 }
1207 // Only way to get here is if premature EOF was found
1208 //#warning CF:Include the tag name here
1209 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1210 return false;
1211 }
1212
1213 static Boolean parseCDSect(CFXMLParserRef parser) {
1214 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1215 const UniChar _CDSectClose[3] = {']', ']', '>'};
1216 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1217 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1218 return false;
1219 }
1220 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1221 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1222 return false;
1223 }
1224
1225 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1226 parser->node->additionalData = NULL;
1227 return reportNewLeaf(parser);
1228 }
1229
1230 /*
1231 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1232 */
1233 static Boolean validateCharacterReference(CFStringRef str) {
1234 Boolean isHex;
1235 CFIndex idx, len = CFStringGetLength(str);
1236 if (len < 2) return false;
1237 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1238 if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1239 isHex = true;
1240 idx = 2;
1241 if (len == 2) return false;
1242 } else {
1243 isHex = false;
1244 idx = 1;
1245 }
1246
1247 while (idx < len) {
1248 UniChar ch;
1249 ch = CFStringGetCharacterAtIndex(str, idx);
1250 idx ++;
1251 if (!(ch <= '9' && ch >= '0') &&
1252 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1253 break;
1254 }
1255 }
1256 return (idx == len);
1257 }
1258
1259 /*
1260 [67] Reference ::= EntityRef | CharRef
1261 [68] EntityRef ::= '&' Name ';'
1262 */
1263 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1264 UniChar ch;
1265 CFXMLEntityReferenceInfo entData;
1266 CFStringRef name = NULL;
1267 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1268 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1269 return false;
1270 }
1271 if (ch == '#') {
1272 ch = ';';
1273 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1274 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1275 return false;
1276 } else if (!validateCharacterReference(parser->node->dataString)) {
1277 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1278 return false;
1279 }
1280 entData.entityType = kCFXMLEntityTypeCharacter;
1281 name = parser->node->dataString;
1282 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1283 if (_inputStreamAtEOF(&parser->input)) {
1284 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1285 return false;
1286 } else {
1287 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1288 return false;
1289 }
1290 } else {
1291 entData.entityType = kCFXMLEntityTypeParsedInternal;
1292 }
1293 if (report) {
1294 CFStringRef tmp = parser->node->dataString;
1295 Boolean success;
1296 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1297 parser->node->dataString = name;
1298 parser->node->additionalData = &entData;
1299 success = reportNewLeaf(parser);
1300 parser->node->additionalData = NULL;
1301 parser->node->dataString = tmp;
1302 return success;
1303 } else {
1304 return true;
1305 }
1306 }
1307
1308 #if 0
1309 // Kept from old entity reference parsing....
1310 {
1311 switch (*(parser->curr)) {
1312 case 'l': // "lt"
1313 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1314 ch = '<';
1315 parser->curr += 3;
1316 break;
1317 }
1318 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1319 return;
1320 case 'g': // "gt"
1321 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1322 ch = '>';
1323 parser->curr += 3;
1324 break;
1325 }
1326 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1327 return;
1328 case 'a': // "apos" or "amp"
1329 if (len < 4) { // Not enough characters for either conversion
1330 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1331 return;
1332 }
1333 if (*(parser->curr+1) == 'm') {
1334 // "amp"
1335 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1336 ch = '&';
1337 parser->curr += 4;
1338 break;
1339 }
1340 } else if (*(parser->curr+1) == 'p') {
1341 // "apos"
1342 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1343 ch = '\'';
1344 parser->curr += 5;
1345 break;
1346 }
1347 }
1348 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1349 return;
1350 case 'q': // "quote"
1351 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1352 ch = '\"';
1353 parser->curr += 6;
1354 break;
1355 }
1356 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1357 return;
1358 case '#':
1359 {
1360 UniChar num = 0;
1361 Boolean isHex = false;
1362 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1363 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1364 return;
1365 }
1366 parser->curr ++;
1367 if (*(parser->curr) == 'x') {
1368 isHex = true;
1369 parser->curr ++;
1370 }
1371 while (parser->curr < parser->end) {
1372 ch = *(parser->curr);
1373 if (ch == ';') {
1374 CFStringAppendCharacters(string, &num, 1);
1375 parser->curr ++;
1376 return;
1377 }
1378 if (!isHex) num = num*10;
1379 else num = num << 4;
1380 if (ch <= '9' && ch >= '0') {
1381 num += (ch - '0');
1382 } else if (!isHex) {
1383 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1384 return;
1385 } else if (ch >= 'a' && ch <= 'f') {
1386 num += 10 + (ch - 'a');
1387 } else if (ch >= 'A' && ch <= 'F') {
1388 num += 10 + (ch - 'A');
1389 } else {
1390 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1391 return;
1392 }
1393 }
1394 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1395 return;
1396 }
1397 default:
1398 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1399 return;
1400 }
1401 CFStringAppendCharacters(string, &ch, 1);
1402 }
1403 #endif
1404
1405 /*
1406 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1407 */
1408 static Boolean parsePCData(CFXMLParserRef parser) {
1409 UniChar ch;
1410 Boolean done = false;
1411 _inputStreamSetMark(&parser->input);
1412 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1413 switch (ch) {
1414 case '<':
1415 case '&':
1416 _inputStreamReturnCharacter(&parser->input, ch);
1417 done = true;
1418 break;
1419 case ']':
1420 {
1421 const UniChar endSequence[2] = {']', '>'};
1422 if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1423 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1424 _inputStreamClearMark(&parser->input);
1425 return false;
1426 }
1427 break;
1428 }
1429 default:
1430 ;
1431 }
1432 }
1433 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1434 _inputStreamClearMark(&parser->input);
1435 parser->node->dataTypeID = kCFXMLNodeTypeText;
1436 parser->node->additionalData = NULL;
1437 return reportNewLeaf(parser);
1438 }
1439
1440 /*
1441 [42] ETag ::= '</' Name S? '>'
1442 */
1443 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1444 const UniChar beginEndTag[2] = {'<', '/'};
1445 Boolean unexpectedEOF = false, mismatch = false;
1446 CFStringRef closeTag;
1447
1448 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1449 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1450
1451 UniChar ch;
1452 _inputStreamSkipWhitespace(&parser->input, NULL);
1453 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1454 unexpectedEOF = true;
1455 } else if (ch != '>') {
1456 mismatch = true;
1457 }
1458 } else if (_inputStreamAtEOF(&parser->input)) {
1459 unexpectedEOF = true;
1460 } else {
1461 mismatch = true;
1462 }
1463
1464 if (unexpectedEOF || mismatch) {
1465 if (unexpectedEOF) {
1466 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1467 parser->status = kCFXMLErrorUnexpectedEOF;
1468 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1469 } else {
1470 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1471 parser->status = kCFXMLErrorMalformedCloseTag;
1472 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1473 }
1474 return false;
1475 }
1476 return true;
1477 }
1478
1479 /*
1480 [39] element ::= EmptyElementTag | STag content ETag
1481 [40] STag ::= '<' Name (S Attribute)* S? '>'
1482 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1483 */
1484 static Boolean parseTag(CFXMLParserRef parser) {
1485 UniChar ch;
1486 void *tag;
1487 CFXMLElementInfo data;
1488 Boolean success = true;
1489 CFStringRef tagName;
1490
1491 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1492 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1493 return false;
1494 }
1495
1496 _inputStreamSkipWhitespace(&parser->input, NULL);
1497
1498 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1499 data.attributes = parser->argDict;
1500 data.attributeOrder = parser->argArray;
1501 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1502 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1503 return false;
1504 }
1505 if (ch == '/') {
1506 data.isEmpty = true;
1507 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1508 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1509 return false;
1510 }
1511 } else {
1512 data.isEmpty = false;
1513 }
1514 if (ch != '>') {
1515 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1516 return false;
1517 }
1518
1519 if (*parser->top || parser->top == parser->stack) {
1520 CFStringRef oldStr = parser->node->dataString;
1521 parser->node->dataTypeID = kCFXMLNodeTypeElement;
1522 parser->node->dataString = tagName;
1523 parser->node->additionalData = &data;
1524 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1525 if (tag && parser->status == kCFXMLStatusParseInProgress) {
1526 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1527 }
1528 parser->node->additionalData = NULL;
1529 parser->node->dataString = oldStr;
1530 if (parser->status != kCFXMLStatusParseInProgress) {
1531 // callback called CFXMLParserAbort()
1532 _CFReportError(parser, parser->status, NULL);
1533 return false;
1534 }
1535 } else {
1536 tag = NULL;
1537 }
1538
1539 pushXMLNode(parser, tag);
1540 if (!data.isEmpty) {
1541 success = parseTagContent(parser);
1542 if (success) {
1543 success = parseCloseTag(parser, tagName);
1544 }
1545 }
1546 parser->top --;
1547
1548 if (success && tag) {
1549 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1550 if (parser->status != kCFXMLStatusParseInProgress) {
1551 _CFReportError(parser, parser->status, NULL);
1552 return false;
1553 }
1554 }
1555 return success;
1556 }
1557
1558 /*
1559 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1560 [67] Reference ::= EntityRef | CharRef
1561 [68] EntityRef ::= '&' Name ';'
1562 */
1563 // For the moment, we don't worry about references in the attribute values.
1564 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1565 UniChar quote, ch;
1566 Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1567 if (!success || (quote != '\'' && quote != '\"')) return false;
1568 if (str) _inputStreamSetMark(&parser->input);
1569 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1570 switch (ch) {
1571 case '<': success = false; break;
1572 case '&':
1573 if (!parseEntityReference(parser, false)) {
1574 success = false;
1575 break;
1576 }
1577 default:
1578 ;
1579 }
1580 }
1581
1582 if (success && _inputStreamAtEOF(&parser->input)) {
1583 success = false;
1584 }
1585 if (str) {
1586 if (success) {
1587 _inputStreamReturnCharacter(&parser->input, quote);
1588 _inputStreamGetCharactersFromMark(&parser->input, str);
1589 _inputStreamGetCharacter(&parser->input, &ch);
1590 }
1591 _inputStreamClearMark(&parser->input);
1592 }
1593 return success;
1594 }
1595
1596 /*
1597 [40] STag ::= '<' Name (S Attribute)* S? '>'
1598 [41] Attribute ::= Name Eq AttValue
1599 [25] Eq ::= S? '=' S?
1600 */
1601
1602 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1603 Boolean parseAttributes(CFXMLParserRef parser) {
1604 UniChar ch;
1605 CFMutableDictionaryRef dict;
1606 CFMutableArrayRef array;
1607 Boolean failure = false;
1608 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1609 if (parser->argDict) {
1610 CFDictionaryRemoveAllValues(parser->argDict);
1611 CFArrayRemoveAllValues(parser->argArray);
1612 }
1613 return true; // No attributes; let caller deal with it
1614 }
1615 if (!parser->argDict) {
1616 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1617 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1618 } else {
1619 CFDictionaryRemoveAllValues(parser->argDict);
1620 CFArrayRemoveAllValues(parser->argArray);
1621 }
1622 dict = parser->argDict;
1623 array = parser->argArray;
1624 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1625 CFStringRef key;
1626 CFMutableStringRef value;
1627 if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1628 failure = true;
1629 break;
1630 }
1631 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1632 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1633 return false;
1634 }
1635 _inputStreamSkipWhitespace(&parser->input, NULL);
1636 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1637 failure = true;
1638 break;
1639 }
1640 _inputStreamSkipWhitespace(&parser->input, NULL);
1641 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1642 if (!parseAttributeValue(parser, value)) {
1643 CFRelease(value);
1644 failure = true;
1645 break;
1646 }
1647 CFArrayAppendValue(array, key);
1648 CFDictionarySetValue(dict, key, value);
1649 CFRelease(value);
1650 _inputStreamSkipWhitespace(&parser->input, NULL);
1651 }
1652 if (failure) {
1653 //#warning CF:Include tag name in this error report
1654 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1655 return false;
1656 } else if (_inputStreamAtEOF(&parser->input)) {
1657 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1658 return false;
1659 } else {
1660 return true;
1661 }
1662 }
1663
1664 /*
1665 [1] document ::= prolog element Misc*
1666 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1667 [27] Misc ::= Comment | PI | S
1668 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1669
1670 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1671 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1672 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1673
1674 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1675 */
1676 static Boolean parseXML(CFXMLParserRef parser) {
1677 Boolean success = true, sawDTD = false, sawElement = false;
1678 UniChar ch;
1679 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1680 switch (ch) {
1681 case ' ':
1682 case '\n':
1683 case '\t':
1684 case '\r':
1685 success = parseWhitespace(parser);
1686 break;
1687 case '<':
1688 _inputStreamGetCharacter(&parser->input, &ch);
1689 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1690 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1691 return false;
1692 }
1693 if (ch == '!') {
1694 // Comment or DTD
1695 UniChar dashes[2] = {'-', '-'};
1696 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1697 // Comment
1698 success = parseComment(parser, true);
1699 } else {
1700 // Should be DTD
1701 if (sawDTD) {
1702 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1703 return false;
1704 }
1705 success = parseDTD(parser);
1706 if (success) sawDTD = true;
1707 }
1708 } else if (ch == '?') {
1709 // Processing instruction
1710 success = parseProcessingInstruction(parser, true);
1711 } else {
1712 // Tag or malformed
1713 if (sawElement) {
1714 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1715 return false;
1716 }
1717 _inputStreamReturnCharacter(&parser->input, ch);
1718 success = parseTag(parser);
1719 if (success) sawElement = true;
1720 }
1721 break;
1722 default: {
1723 parser->status = kCFXMLErrorMalformedDocument;
1724 parser->errorString = ch < 256 ?
1725 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1726 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1727
1728 if (parser->callBacks.handleError) {
1729 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1730 }
1731 return false;
1732 }
1733 }
1734 }
1735
1736 if (!success) return false;
1737 if (!sawElement) {
1738 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1739 return false;
1740 }
1741 return true;
1742 }
1743
1744 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1745 if (str) {
1746 parser->status = errNum;
1747 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1748 }
1749 if (parser->callBacks.handleError) {
1750 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1751 }
1752 }
1753
1754 // Assumes parser->node has been set and is ready to go
1755 static Boolean reportNewLeaf(CFXMLParserRef parser) {
1756 void *xmlStruct;
1757 if (*(parser->top) == NULL) return true;
1758
1759 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1760 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1761 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1762 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1763 }
1764 if (parser->status != kCFXMLStatusParseInProgress) {
1765 _CFReportError(parser, parser->status, NULL);
1766 return false;
1767 }
1768 return true;
1769 }
1770
1771 static void pushXMLNode(CFXMLParserRef parser, void *node) {
1772 parser->top ++;
1773 if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1774 parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1775 parser->top = parser->stack + parser->capacity;
1776 parser->capacity = 2*parser->capacity;
1777 }
1778 *(parser->top) = node;
1779 }
1780
1781 /**************************/
1782 /* Parsing to a CFXMLTree */
1783 /**************************/
1784
1785 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1786 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1787 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1788 CFRelease(myNode);
1789 return (void *)tree;
1790 }
1791
1792 static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1793 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1794 }
1795
1796 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1797 CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1798 if (CFTreeGetParent(node))
1799 CFRelease((CFXMLTreeRef)xmlType);
1800 }
1801
1802 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1803 CFXMLParserRef parser;
1804 CFXMLParserCallBacks callbacks;
1805 CFXMLTreeRef result;
1806
1807 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1808
1809 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1810 callbacks.addChild = _XMLTreeAddChild;
1811 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1812 callbacks.resolveExternalEntity = NULL;
1813 callbacks.handleError = NULL;
1814 parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1815
1816 if (CFXMLParserParse(parser)) {
1817 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1818 } else {
1819 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1820 if (result) CFRelease(result);
1821 result = NULL;
1822 }
1823 CFRelease(parser);
1824 return result;
1825 }
1826
1827 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1828 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1829 }
1830
1831 CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1832 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1834 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1835
1836 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1837 CFXMLParserRef parser;
1838 CFXMLParserCallBacks callbacks;
1839 CFXMLTreeRef result;
1840
1841 __CFGenericValidateType(xmlData, CFDataGetTypeID());
1842 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1843
1844 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1845 callbacks.addChild = _XMLTreeAddChild;
1846 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1847 callbacks.resolveExternalEntity = NULL;
1848 callbacks.handleError = NULL;
1849 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1850
1851 if (CFXMLParserParse(parser)) {
1852 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1853 } else {
1854 if (errorDict) { // collect the error dictionary
1855 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1856 if (*errorDict) {
1857 CFIndex rawnum;
1858 CFNumberRef cfnum;
1859 CFStringRef errstring;
1860
1861 rawnum = CFXMLParserGetLocation(parser);
1862 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1863 if(cfnum) {
1864 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1865 CFRelease(cfnum);
1866 }
1867
1868 rawnum = CFXMLParserGetLineNumber(parser);
1869 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1870 if(cfnum) {
1871 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1872 CFRelease(cfnum);
1873 }
1874
1875 rawnum = CFXMLParserGetStatusCode(parser);
1876 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1877 if(cfnum) {
1878 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1879 CFRelease(cfnum);
1880 }
1881
1882 errstring = CFXMLParserCopyErrorDescription(parser);
1883 if(errstring) {
1884 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1885 CFRelease(errstring);
1886 }
1887 }
1888 }
1889 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1890 if (result) CFRelease(result);
1891 result = NULL;
1892 }
1893 CFRelease(parser);
1894 return result;
1895 }
1896
1897 /*
1898 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1899 We should also be handling items that are up over certain values correctly.
1900 */
1901 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1902 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1903 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1904 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1905
1906 CFStringInlineBuffer inlineBuf;
1907 CFIndex idx = 0;
1908 CFIndex mark = idx;
1909 CFIndex stringLength = CFStringGetLength(string);
1910 UniChar uc;
1911
1912 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1913
1914 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1915 for(idx = 0; idx < stringLength; idx++) {
1916 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1917 if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1918 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1919 CFStringAppend(newString, previousSubstring);
1920 CFRelease(previousSubstring);
1921 switch(uc) {
1922 case '&':
1923 CFStringAppend(newString, CFSTR("&amp;"));
1924 break;
1925 case '<':
1926 CFStringAppend(newString, CFSTR("&lt;"));
1927 break;
1928 case '>':
1929 CFStringAppend(newString, CFSTR("&gt;"));
1930 break;
1931 case '\'':
1932 CFStringAppend(newString, CFSTR("&apos;"));
1933 break;
1934 case '"':
1935 CFStringAppend(newString, CFSTR("&quot;"));
1936 break;
1937 }
1938 mark = idx + 1;
1939 }
1940 }
1941 // Copy the remainder to the output string before returning.
1942 CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1943 if (NULL != remainder) {
1944 CFStringAppend(newString, remainder);
1945 CFRelease(remainder);
1946 }
1947
1948 CFRelease(startChars);
1949 return newString;
1950 }
1951
1952 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1953 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1954
1955 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1956 CFStringRef sub;
1957 CFIndex lastChunkStart, length = CFStringGetLength(string);
1958 CFIndex i, entityStart;
1959 UniChar uc;
1960 UInt32 entity;
1961 int base;
1962 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1963
1964 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1965 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1966 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1967 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1968 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1969
1970 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1971 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1972
1973 lastChunkStart = 0;
1974 // Scan through the string in its entirety
1975 for(i = 0; i < length; ) {
1976 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1977
1978 if(uc == '&') {
1979 entityStart = i - 1;
1980 entity = 0xFFFF; // set this to a not-Unicode character as sentinel
1981 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1982 if(lastChunkStart < i - 1) {
1983 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1984 CFStringAppend(newString, sub);
1985 CFRelease(sub);
1986 }
1987
1988 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1989 // Now we can process the entity reference itself
1990 if(uc == '#') { // this is a numeric entity.
1991 base = 10;
1992 entity = 0;
1993 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1994
1995 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1996 base = 16;
1997 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1998 }
1999
2000 // process the provided digits 'til we're finished
2001 while(true) {
2002 if (uc >= '0' && uc <= '9')
2003 entity = entity * base + (uc-'0');
2004 else if (uc >= 'a' && uc <= 'f' && base == 16)
2005 entity = entity * base + (uc-'a'+10);
2006 else if (uc >= 'A' && uc <= 'F' && base == 16)
2007 entity = entity * base + (uc-'A'+10);
2008 else break;
2009
2010 if (i < length) {
2011 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2012 }
2013 else
2014 break;
2015 }
2016 }
2017
2018 // Scan to the end of the entity
2019 while(uc != ';' && i < length) {
2020 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2021 }
2022
2023 if(0xFFFF != entity) { // it was numeric, and translated.
2024 // Now, output the result fo the entity
2025 if(entity >= 0x10000) {
2026 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2027 CFStringAppendCharacters(newString, characters, 2);
2028 } else {
2029 UniChar character = entity;
2030 CFStringAppendCharacters(newString, &character, 1);
2031 }
2032 } else { // it wasn't numeric.
2033 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2034 CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
2035 if(replacementString) {
2036 CFStringAppend(newString, replacementString);
2037 } else {
2038 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2039 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2040 CFStringAppend(newString, sub); // ...and append that.
2041 }
2042 CFRelease(sub); // in either case, release the most-recent "sub"
2043 }
2044
2045 // move the lastChunkStart to the beginning of the next chunk.
2046 lastChunkStart = i;
2047 }
2048 }
2049 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2050 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2051 CFStringAppend(newString, sub);
2052 CFRelease(sub);
2053 }
2054
2055 CFRelease(fullReplDict);
2056
2057 return newString;
2058 }
2059
2060