]> git.saurik.com Git - apple/cf.git/blob - Parsing.subproj/CFXMLParser.c
CF-368.tar.gz
[apple/cf.git] / Parsing.subproj / CFXMLParser.c
1 /*
2 * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /* CFXMLParser.c
24 Copyright 1999-2002, Apple, Inc. All rights reserved.
25 Responsibility: Chris Parker
26 */
27
28 #include <CoreFoundation/CFXMLParser.h>
29 #include "CFXMLInputStream.h"
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFUniChar.h"
32 #include "CFInternal.h"
33
34 struct __CFXMLParser {
35 CFRuntimeBase _cfBase;
36
37 _CFXMLInputStream input;
38
39 void **stack;
40 void **top;
41 UInt32 capacity;
42
43 struct __CFXMLNode *node; // Our private node; we use it to report back information
44 CFMutableDictionaryRef argDict;
45 CFMutableArrayRef argArray;
46
47 UInt32 options;
48 CFXMLParserCallBacks callBacks;
49 CFXMLParserContext context;
50
51 CFXMLParserStatusCode status;
52 CFStringRef errorString;
53 };
54
55 static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
56 const struct __CFXMLParser *parser = cf;
57 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser 0x%x>"), parser);
58 }
59
60 static void __CFXMLParserDeallocate(CFTypeRef cf) {
61 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
62 CFAllocatorRef alloc = CFGetAllocator(parser);
63 _freeInputStream(&(parser->input));
64 if (parser->argDict) CFRelease(parser->argDict);
65 if (parser->argArray) CFRelease(parser->argArray);
66 if (parser->errorString) CFRelease(parser->errorString);
67 if (parser->node) CFRelease(parser->node);
68 CFAllocatorDeallocate(alloc, parser->stack);
69 if (parser->context.info && parser->context.release) {
70 parser->context.release(parser->context.info);
71 }
72 }
73
74 static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
75
76 static const CFRuntimeClass __CFXMLParserClass = {
77 0,
78 "CFXMLParser",
79 NULL, // init
80 NULL, // copy
81 __CFXMLParserDeallocate,
82 NULL,
83 NULL,
84 NULL, //
85 __CFXMLParserCopyDescription
86 };
87
88 __private_extern__ void __CFXMLParserInitialize(void) {
89 __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
90 }
91
92 CFTypeID CFXMLParserGetTypeID(void) {
93 return __kCFXMLParserTypeID;
94 }
95
96 #if defined(__ppc__)
97 #define __mask ~0x3
98 #else
99 #define __mask ~0x0
100 #endif
101
102 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
103 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
104 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
105 if (context) {
106 context->version = parser->context.version;
107 context->info = parser->context.info;
108 context->retain = (void *)((uintptr_t)parser->context.retain & __mask);
109 context->release = (void *)((uintptr_t)parser->context.release & __mask);
110 context->copyDescription = (void *)((uintptr_t)parser->context.copyDescription & __mask);
111 }
112 }
113
114 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
115 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
116 if (callBacks) {
117 callBacks->version = parser->callBacks.version;
118 callBacks->createXMLStructure = (void *)((uintptr_t)parser->callBacks.createXMLStructure & __mask);
119 callBacks->addChild = (void *)((uintptr_t)parser->callBacks.addChild & __mask);
120 callBacks->endXMLStructure = (void *)((uintptr_t)parser->callBacks.endXMLStructure & __mask);
121 callBacks->resolveExternalEntity = (void *)((uintptr_t)parser->callBacks.resolveExternalEntity & __mask);
122 callBacks->handleError = (void *)((uintptr_t)parser->callBacks.handleError & __mask);
123 }
124 }
125
126 #undef __mask
127
128 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
129 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
130 return parser->input.url;
131 }
132
133 /* Returns the character index or line number of the current parse location */
134 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
135 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
136 return _inputStreamCurrentLocation(&parser->input);
137 }
138
139 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
140 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
141 return _inputStreamCurrentLine(&parser->input);
142 }
143
144 /* Returns the top-most object returned by the createXMLStructure callback */
145 void *CFXMLParserGetDocument(CFXMLParserRef parser) {
146 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
147 if (parser->capacity > 0)
148 return parser->stack[0];
149 else
150 return NULL;
151 }
152
153 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
154 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
155 return parser->status;
156 }
157
158 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
159 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
160 return CFRetain(parser->errorString);
161 }
162
163 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
164 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
165 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
166 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
167 __CFGenericValidateType(errorDescription, CFStringGetTypeID());
168
169 parser->status = errorCode;
170 if (parser->errorString) CFRelease(parser->errorString);
171 parser->errorString = CFStringCreateCopy(NULL, errorDescription);
172 }
173
174
175 static Boolean parseXML(CFXMLParserRef parser);
176 static Boolean parseComment(CFXMLParserRef parser, Boolean report);
177 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
178 static Boolean parseInlineDTD(CFXMLParserRef parser);
179 static Boolean parseDTD(CFXMLParserRef parser);
180 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
181 static Boolean parseCDSect(CFXMLParserRef parser);
182 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
183 static Boolean parsePCData(CFXMLParserRef parser);
184 static Boolean parseWhitespace(CFXMLParserRef parser);
185 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
186 static Boolean parseNotationDeclaration(CFXMLParserRef parser);
187 static Boolean parseElementDeclaration(CFXMLParserRef parser);
188 static Boolean parseEntityDeclaration(CFXMLParserRef parser);
189 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
190 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
191 static Boolean parseTagContent(CFXMLParserRef parser);
192 static Boolean parseTag(CFXMLParserRef parser);
193 static Boolean parseAttributes(CFXMLParserRef parser);
194 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
195
196 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
197 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
198 static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
199 static void pushXMLNode(CFXMLParserRef parser, void *node);
200
201 static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
202 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, __kCFXMLParserTypeID, sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
203 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
204 UniChar *buf;
205 if (parser && node) {
206 alloc = CFGetAllocator(parser);
207 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
208 parser->top = parser->stack;
209 parser->stack = NULL;
210 parser->capacity = 0;
211
212 buf = CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
213 parser->node = node;
214 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
215 parser->node->additionalData = NULL;
216 parser->node->version = version;
217 parser->argDict = NULL; // don't create these until necessary
218 parser->argArray = NULL;
219
220 parser->options = options;
221 parser->callBacks = *callBacks;
222
223 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
224 FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
225 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
226 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
227 FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
228
229 if (context) {
230 parser->context = *context;
231 if (parser->context.info && parser->context.retain) {
232 parser->context.retain(parser->context.info);
233 }
234 } else {
235 parser->context.version = 0;
236 parser->context.info = NULL;
237 parser->context.retain = NULL;
238 parser->context.release = NULL;
239 parser->context.copyDescription = NULL;
240 }
241 parser->status = kCFXMLStatusParseNotBegun;
242 parser->errorString = NULL;
243 } else {
244 if (parser) CFRelease(parser);
245 if (node) CFRelease(node);
246 parser = NULL;
247 }
248 return parser;
249 }
250
251 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
252 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
253 __CFGenericValidateType(xmlData, CFDataGetTypeID());
254 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
255 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
256 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
257 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
258 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
259 }
260
261 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
262 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
263 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
264 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
265 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
266
267 return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
268 }
269
270 Boolean CFXMLParserParse(CFXMLParserRef parser) {
271 CFXMLDocumentInfo docData;
272 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
273 if (parser->status != kCFXMLStatusParseNotBegun) return false;
274 parser->status = kCFXMLStatusParseInProgress;
275
276 if (!_openInputStream(&parser->input)) {
277 if (!parser->input.data) {
278 // couldn't load URL
279 parser->status = kCFXMLErrorNoData;
280 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
281 } else {
282 // couldn't figure out the encoding
283 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
284 parser->status = kCFXMLErrorUnknownEncoding;
285 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
286 }
287 if (parser->callBacks.handleError) {
288 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
289 }
290 return false;
291 }
292
293 // Create the document
294 parser->stack = CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
295 parser->capacity = 16;
296 parser->node->dataTypeID = kCFXMLNodeTypeDocument;
297 docData.encoding = _inputStreamGetEncoding(&parser->input);
298 docData.sourceURL = parser->input.url;
299 parser->node->additionalData = &docData;
300 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
301 parser->top = parser->stack;
302 parser->node->additionalData = NULL;
303
304 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
305 if (parser->status != kCFXMLStatusParseInProgress) {
306 _CFReportError(parser, parser->status, NULL);
307 return false;
308 }
309 return parseXML(parser);
310 }
311
312 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
313
314 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
315 static Boolean parseWhitespace(CFXMLParserRef parser) {
316 CFIndex len;
317 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
318 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
319 if (report && len) {
320 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
321 parser->node->additionalData = NULL;
322 return reportNewLeaf(parser);
323 } else {
324 return true;
325 }
326 }
327
328 // parser should be just past "<!--"
329 static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
330 const UniChar dashes[2] = {'-', '-'};
331 UniChar ch;
332 report = report && (!(parser->options & kCFXMLParserSkipMetaData));
333 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
334 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
335 return false;
336 } else if (ch != '>') {
337 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
338 return false;
339 } else if (report) {
340 parser->node->dataTypeID = kCFXMLNodeTypeComment;
341 parser->node->additionalData = NULL;
342 return reportNewLeaf(parser);
343 } else {
344 return true;
345 }
346 }
347
348 /*
349 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
350 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
351 */
352 // parser should be set to the first character after "<?"
353 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
354 const UniChar piTermination[2] = {'?', '>'};
355 CFMutableStringRef str;
356 CFStringRef name;
357
358 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
359 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
360 return false;
361 }
362 _inputStreamSkipWhitespace(&parser->input, NULL);
363 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
364 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
365 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
366 if (str) CFRelease(str);
367 return false;
368 }
369
370 if (str) {
371 CFXMLProcessingInstructionInfo data;
372 Boolean result;
373 CFStringRef tmp = parser->node->dataString;
374 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
375 parser->node->dataString = name;
376 data.dataString = str;
377 parser->node->additionalData = &data;
378 result = reportNewLeaf(parser);
379 parser->node->additionalData = NULL;
380 parser->node->dataString = tmp;
381 CFRelease(str);
382 return result;
383 } else {
384 return true;
385 }
386 }
387
388 /*
389 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
390 */
391 static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
392 // first character should be immediately after the "<!"
393 static Boolean parseDTD(CFXMLParserRef parser) {
394 UniChar ch;
395 Boolean success, hasExtID = false;
396 CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
397 void *dtdStructure = NULL;
398 CFStringRef name;
399
400 // First pass "DOCTYPE"
401 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
402 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
403 success = success && _inputStreamScanXMLName(&parser->input, false, &name);
404 if (success) {
405 _inputStreamSkipWhitespace(&parser->input, NULL);
406 success = _inputStreamPeekCharacter(&parser->input, &ch);
407 } else {
408 // didn't make it past "DOCTYPE" successfully.
409 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
410 return false;
411 }
412 if (success && ch != '[' && ch != '>') {
413 // ExternalID
414 hasExtID = true;
415 success = parseExternalID(parser, false, &(docData.externalID));
416 if (success) {
417 _inputStreamSkipWhitespace(&parser->input, NULL);
418 success = _inputStreamPeekCharacter(&parser->input, &ch);
419 }
420 }
421
422 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
423 CFStringRef tmp = parser->node->dataString;
424 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
425 parser->node->dataString = name;
426 parser->node->additionalData = &docData;
427 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
428 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
429 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
430 }
431 parser->node->additionalData = NULL;
432 parser->node->dataString = tmp;
433 if (parser->status != kCFXMLStatusParseInProgress) {
434 // callback called CFXMLParserAbort()
435 _CFReportError(parser, parser->status, NULL);
436 return false;
437 }
438 } else {
439 dtdStructure = NULL;
440 }
441 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
442 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
443 pushXMLNode(parser, dtdStructure);
444
445 if (success && ch == '[') {
446 // inline DTD
447 _inputStreamGetCharacter(&parser->input, &ch);
448 if (!parseInlineDTD(parser)) return false;
449 _inputStreamSkipWhitespace(&parser->input, NULL);
450 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
451 } else if (success && ch == '>') {
452 // End of the DTD
453 _inputStreamGetCharacter(&parser->input, &ch);
454 }
455 if (!success) {
456 if (_inputStreamAtEOF(&parser->input)) {
457 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
458 } else {
459 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
460 }
461 return false;
462 }
463
464 parser->top --; // Remove dtdStructure from the stack
465
466 if (success && dtdStructure) {
467 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
468 if (parser->status != kCFXMLStatusParseInProgress) {
469 _CFReportError(parser, parser->status, NULL);
470 return false;
471 }
472 }
473 return true;
474 }
475
476 /*
477 [69] PEReference ::= '%' Name ';'
478 */
479 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
480 UniChar ch;
481 CFStringRef name;
482 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
483 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
484 return false;
485 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
486 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
487 return false;
488 } else if (ch != ';') {
489 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
490 return false;
491 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
492 CFXMLEntityReferenceInfo myData;
493 Boolean result;
494 CFStringRef tmp = parser->node->dataString;
495 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
496 parser->node->dataString = name;
497 myData.entityType = kCFXMLEntityTypeParameter;
498 parser->node->additionalData = &myData;
499 result = reportNewLeaf(parser);
500 parser->node->additionalData = NULL;
501 parser->node->dataString = tmp;
502 return result;
503 } else {
504 return true;
505 }
506 }
507
508 /*
509 [54] AttType ::= StringType | TokenizedType | EnumeratedType
510 [55] StringType ::= 'CDATA'
511 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
512 [57] EnumeratedType ::= NotationType | Enumeration
513 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
514 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
515 */
516 static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
517 UniChar ch;
518 Boolean done = false;
519 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
520 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
521 return false;
522 } else if (ch != '(') {
523 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
524 return false;
525 }
526 _inputStreamSkipWhitespace(&parser->input, NULL);
527 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
528 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
529 return false;
530 }
531 while (!done) {
532 _inputStreamSkipWhitespace(&parser->input, NULL);
533 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
534 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
535 return false;
536 } else if (ch == ')') {
537 done = true;
538 } else if (ch == '|') {
539 _inputStreamSkipWhitespace(&parser->input, NULL);
540 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
541 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
542 return false;
543 }
544 } else {
545 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
546 return false;
547 }
548 }
549 return true;
550 }
551
552 static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
553 Boolean success = false;
554 static const UniChar attTypeStrings[6][8] = {
555 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
556 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
557 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
558 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
559 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
560 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
561 if (str) _inputStreamSetMark(&parser->input);
562 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
563 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
564 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
565 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
566 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
567 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
568 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
569 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
570 success = true;
571 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
572 // Notation
573 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
574 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
575 success = false;
576 } else {
577 success = parseEnumeration(parser, false);
578 }
579 } else {
580 success = parseEnumeration(parser, true);
581 }
582 if (str) {
583 if (success) {
584 _inputStreamGetCharactersFromMark(&parser->input, str);
585 }
586 _inputStreamClearMark(&parser->input);
587 }
588 return success;
589 }
590
591 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
592 static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
593 const UniChar strings[3][8] = {
594 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
595 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
596 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
597 UniChar ch;
598 Boolean success;
599 if (str) _inputStreamSetMark(&parser->input);
600 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
601 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
602 success = false;
603 } else if (ch == '#') {
604 if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
605 _inputStreamMatchString(&parser->input, strings[1], 7)) {
606 success = true;
607 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
608 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
609 success = false;
610 } else {
611 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
612 success = parseAttributeValue(parser, NULL);
613 }
614 } else {
615 _inputStreamReturnCharacter(&parser->input, ch);
616 success = parseAttributeValue(parser, NULL);
617 }
618 if (str) {
619 if (success) {
620 _inputStreamGetCharactersFromMark(&parser->input, str);
621 }
622 _inputStreamClearMark(&parser->input);
623 }
624 return success;
625 }
626
627 /*
628 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
629 [53] AttDef ::= S Name S AttType S DefaultDecl
630 */
631 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
632 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
633 CFXMLAttributeListDeclarationInfo attListData;
634 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
635 CFIndex capacity = 8;
636 UniChar ch;
637 Boolean success = true;
638 CFStringRef name;
639 if (!_inputStreamMatchString(&parser->input, attList, 7) ||
640 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
641 !_inputStreamScanXMLName(&parser->input, false, &name)) {
642 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
643 return false;
644 }
645 attListData.numberOfAttributes = 0;
646 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
647 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
648 attributes = NULL;
649 }
650 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
651 CFXMLAttributeDeclarationInfo *attribute = NULL;
652 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
653 break;
654 if (attributes) {
655 if (capacity == attListData.numberOfAttributes) {
656 capacity = 2*capacity;
657 if (attributes != attributeArray) {
658 attributes = CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
659 } else {
660 attributes = CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
661 }
662 }
663 attribute = &(attributes[attListData.numberOfAttributes]);
664 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
665 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
666 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
667 }
668 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
669 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
670 success = false;
671 break;
672 }
673 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
674 success = false;
675 break;
676 }
677 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
678 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
679 success = false;
680 break;
681 }
682 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
683 success = false;
684 break;
685 }
686 attListData.numberOfAttributes ++;
687 }
688 if (success) {
689 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
690 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
691 success = false;
692 } else if (ch != '>') {
693 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
694 success = false;
695 } else if (attributes) {
696 CFStringRef tmp = parser->node->dataString;
697 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
698 parser->node->dataString = name;
699 attListData.attributes = attributes;
700 parser->node->additionalData = (void *)&attListData;
701 success = reportNewLeaf(parser);
702 parser->node->additionalData = NULL;
703 parser->node->dataString = tmp;
704 }
705 }
706 if (attributes) {
707 // Free up all that memory
708 CFIndex idx;
709 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
710 // Do not release attributeName here; it's a uniqued string from scanXMLName
711 CFRelease(attributes[idx].typeString);
712 CFRelease(attributes[idx].defaultString);
713 }
714 if (attributes != attributeArray) {
715 CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
716 }
717 }
718 return success;
719 }
720
721 CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
722 Boolean success;
723 if (extID) {
724 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
725 if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
726 success = true;
727 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
728 } else {
729 extID->systemID = NULL;
730 success = false;
731 }
732 CFRelease(urlStr);
733 } else {
734 success = _inputStreamScanQuotedString(&parser->input, NULL);
735 }
736 return success;
737 }
738
739 /*
740 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
741 [83] PublicID ::= 'PUBLIC' S PubidLiteral
742 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
743 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
744 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
745 */
746 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
747 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
748 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
749 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
750 Boolean success;
751 if (extID) {
752 extID->systemID = NULL;
753 extID->publicID = NULL;
754 }
755 if (_inputStreamMatchString(&parser->input, publicString, 6)) {
756 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
757 if (extID) {
758 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
759 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
760 } else {
761 success = success && _inputStreamScanQuotedString(&parser->input, NULL);
762 }
763 if (success) {
764 UniChar ch;
765 if (alsoAcceptPublicID) {
766 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
767 }
768 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
769 || !_inputStreamPeekCharacter(&parser->input, &ch)
770 || (ch != '\'' && ch != '\"')
771 || !parseSystemLiteral(parser, extID)) {
772 success = alsoAcceptPublicID;
773 if (alsoAcceptPublicID) {
774 _inputStreamBackUpToMark(&parser->input);
775 }
776 } else {
777 success = true;
778 }
779 if (alsoAcceptPublicID) {
780 _inputStreamClearMark(&parser->input);
781 }
782 }
783 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
784 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
785 } else {
786 success = false;
787 }
788 return success;
789 }
790
791 /*
792 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
793 */
794 static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
795 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
796 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
797 CFXMLNotationInfo notationData = {{NULL, NULL}};
798 CFStringRef name;
799 Boolean success =
800 _inputStreamMatchString(&parser->input, notationString, 8) &&
801 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
802 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
803 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
804 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
805
806 if (success) {
807 UniChar ch;
808 _inputStreamSkipWhitespace(&parser->input, NULL);
809 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
810 }
811 if (!success) {
812 if (_inputStreamAtEOF(&parser->input)) {
813 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
814 } else {
815 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
816 }
817 } else if (report) {
818 CFStringRef tmp = parser->node->dataString;
819 parser->node->dataTypeID = kCFXMLNodeTypeNotation;
820 parser->node->dataString = name;
821 parser->node->additionalData = &notationData;
822 success = reportNewLeaf(parser);
823 parser->node->additionalData = NULL;
824 parser->node->dataString = tmp;
825 }
826 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
827 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
828 return success;
829 }
830
831 /*
832 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
833 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
834 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
835 */
836 static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
837 UniChar ch, separator;
838 if (!pastParen) {
839 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
840 _inputStreamSkipWhitespace(&parser->input, NULL);
841 }
842 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
843
844 /* Now scanning cp, production [48] */
845 if (ch == '(') {
846 if (!parseChoiceOrSequence(parser, false)) return false;
847 } else {
848 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
849 }
850 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
851 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
852
853 /* Now past cp */
854 _inputStreamSkipWhitespace(&parser->input, NULL);
855 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
856 if (ch == ')') return true;
857 if (ch != '|' && ch != ',') return false;
858 separator = ch;
859 while (ch == separator) {
860 _inputStreamSkipWhitespace(&parser->input, NULL);
861 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
862 if (ch != '(') {
863 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
864 } else if (!parseChoiceOrSequence(parser, false)) {
865 return false;
866 }
867 _inputStreamSkipWhitespace(&parser->input, NULL);
868 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
869 }
870 return ch == ')';
871 }
872
873 /*
874 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
875 */
876 static Boolean parseMixedElementContent(CFXMLParserRef parser) {
877 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
878 UniChar ch;
879 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
880 _inputStreamSkipWhitespace(&parser->input, NULL);
881 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
882 if (ch == ')') return true;
883
884 while (ch == '|') {
885 _inputStreamSkipWhitespace(&parser->input, NULL);
886 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
887 _inputStreamSkipWhitespace(&parser->input, NULL);
888 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
889 }
890 if (ch != ')') return false;
891 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
892 return true;
893 }
894
895 /*
896 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
897 [47] children ::= (choice | seq) ('?' | '*' | '+')?
898 */
899 static Boolean parseElementContentSpec(CFXMLParserRef parser) {
900 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
901 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
902 UniChar ch;
903 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
904 return true;
905 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
906 return false;
907 } else {
908 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
909 _inputStreamGetCharacter(&parser->input, &ch);
910 _inputStreamSkipWhitespace(&parser->input, NULL);
911 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
912 if (ch == '#') {
913 // Mixed
914 return parseMixedElementContent(parser);
915 } else {
916 if (parseChoiceOrSequence(parser, true)) {
917 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
918 _inputStreamGetCharacter(&parser->input, &ch);
919 }
920 return true;
921 } else {
922 return false;
923 }
924 }
925 }
926 }
927
928 /*
929 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
930 */
931 static Boolean parseElementDeclaration(CFXMLParserRef parser) {
932 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
933 Boolean success;
934 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
935 UniChar ch = '>';
936 CFMutableStringRef contentDesc = NULL;
937 CFStringRef name;
938 success = _inputStreamMatchString(&parser->input, eltChars, 7)
939 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
940 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
941 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
942 if (success) {
943 if (report) _inputStreamSetMark(&parser->input);
944 success = parseElementContentSpec(parser);
945 if (success && report) {
946 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
947 _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
948 }
949 if (report) _inputStreamClearMark(&parser->input);
950 if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
951 success = success && _inputStreamMatchString(&parser->input, &ch, 1);
952 }
953 if (!success) {
954 if (_inputStreamAtEOF(&parser->input)) {
955 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
956 } else {
957 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
958 }
959 } else if (report) {
960 CFXMLElementTypeDeclarationInfo eltData;
961 CFStringRef tmp = parser->node->dataString;
962 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
963 parser->node->dataString = name;
964 eltData.contentDescription = contentDesc;
965 parser->node->additionalData = &eltData;
966 success = reportNewLeaf(parser);
967 parser->node->additionalData = NULL;
968 parser->node->dataString = tmp;
969 }
970 if (contentDesc) CFRelease(contentDesc);
971 return success;
972 }
973
974 /*
975 [70] EntityDecl ::= GEDecl | PEDecl
976 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
977 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
978 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
979 [74] PEDef ::= EntityValue | ExternalID
980 [76] NDataDecl ::= S 'NDATA' S Name
981 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
982 */
983 static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
984 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
985 UniChar ch;
986 Boolean isPEDecl = false;
987 CFXMLEntityInfo entityData;
988 CFStringRef name;
989 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
990 Boolean success =
991 _inputStreamMatchString(&parser->input, entityStr, 6) &&
992 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
993 _inputStreamPeekCharacter(&parser->input, &ch);
994
995 entityData.replacementText = NULL;
996 entityData.entityID.publicID = NULL;
997 entityData.entityID.systemID = NULL;
998 entityData.notationName = NULL;
999 // We will set entityType immediately before reporting
1000
1001 if (success && ch == '%') {
1002 _inputStreamGetCharacter(&parser->input, &ch);
1003 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1004 isPEDecl = true;
1005 }
1006 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1007 if (success && (ch == '\"' || ch == '\'')) {
1008 // EntityValue
1009 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1010 if (report) {
1011 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1012 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1013 } else {
1014 success = _inputStreamScanQuotedString(&parser->input, NULL);
1015 }
1016 } else if (success) {
1017 // ExternalID
1018 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1019 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1020 // There could be an option NDataDecl
1021 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1022 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1023 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1024 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1025 }
1026 }
1027 }
1028 if (success) {
1029 _inputStreamSkipWhitespace(&parser->input, NULL);
1030 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1031 }
1032 if (!success) {
1033 if (_inputStreamAtEOF(&parser->input)) {
1034 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1035 } else {
1036 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1037 }
1038 } else {
1039 CFStringRef tmp = parser->node->dataString;
1040 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1041 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1042 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1043 else entityData.entityType = kCFXMLEntityTypeUnparsed;
1044 parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1045 parser->node->dataString = name;
1046 parser->node->additionalData = &entityData;
1047 success = reportNewLeaf(parser);
1048 parser->node->additionalData = NULL;
1049 parser->node->dataString = tmp;
1050 if (entityData.replacementText) CFRelease(entityData.replacementText);
1051 }
1052 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1053 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1054 return success;
1055 }
1056
1057 /*
1058 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1059 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1060 */
1061 // First character should be just past '['
1062 static Boolean parseInlineDTD(CFXMLParserRef parser) {
1063 Boolean success = true;
1064 while (success && !_inputStreamAtEOF(&parser->input)) {
1065 UniChar ch;
1066
1067 parseWhitespace(parser);
1068 if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1069 if (ch == '%') {
1070 // PEReference
1071 success = parsePhysicalEntityReference(parser);
1072 } else if (ch == '<') {
1073 // markupdecl
1074 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1075 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1076 return false;
1077 }
1078 if (ch == '?') {
1079 // Processing Instruction
1080 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1081 } else if (ch == '!') {
1082 UniChar dashes[2] = {'-', '-'};
1083 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1084 // Comment
1085 success = parseComment(parser, true);
1086 } else {
1087 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1088 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1089 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1090 return false;
1091 } else if (ch == 'A') {
1092 // AttListDecl
1093 success = parseAttributeListDeclaration(parser);
1094 } else if (ch == 'N') {
1095 success = parseNotationDeclaration(parser);
1096 } else if (ch == 'E') {
1097 // elementdecl | EntityDecl
1098 _inputStreamGetCharacter(&parser->input, &ch);
1099 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1100 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1101 return false;
1102 }
1103 _inputStreamReturnCharacter(&parser->input, 'E');
1104 if (ch == 'L') {
1105 success = parseElementDeclaration(parser);
1106 } else if (ch == 'N') {
1107 success = parseEntityDeclaration(parser);
1108 } else {
1109 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1110 return false;
1111 }
1112 } else {
1113 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1114 return false;
1115 }
1116 }
1117 } else {
1118 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1119 return false;
1120 }
1121 } else if (ch == ']') {
1122 return true;
1123 } else {
1124 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1125 return false;
1126 }
1127 }
1128 if (success) {
1129 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1130 }
1131 return false;
1132 }
1133
1134 /*
1135 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1136 */
1137 static Boolean parseTagContent(CFXMLParserRef parser) {
1138 while (!_inputStreamAtEOF(&parser->input)) {
1139 UniChar ch;
1140 CFIndex numWhitespaceCharacters;
1141
1142 _inputStreamSetMark(&parser->input);
1143 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1144 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1145 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF
1146
1147 if (ch != '<' && ch != '&') { // CharData
1148 // Back off the whitespace; we'll report it with the PCData
1149 _inputStreamBackUpToMark(&parser->input);
1150 _inputStreamClearMark(&parser->input);
1151 if (!parsePCData(parser)) return false;
1152 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1153 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1154 return false;
1155 }
1156 continue;
1157 }
1158
1159 // element | Reference | CDSect | PI | Comment
1160 // We can safely report any whitespace now
1161 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1162 _inputStreamReturnCharacter(&parser->input, ch);
1163 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1164 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1165 parser->node->additionalData = NULL;
1166 if (!reportNewLeaf(parser)) return false;
1167 _inputStreamGetCharacter(&parser->input, &ch);
1168 }
1169 _inputStreamClearMark(&parser->input);
1170
1171 if (ch == '&') {
1172 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1173 if (!parseEntityReference(parser, true)) return false;
1174 continue;
1175 }
1176
1177 // ch == '<'; element | CDSect | PI | Comment
1178 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1179 if (ch == '?') { // PI
1180 _inputStreamGetCharacter(&parser->input, &ch);
1181 if (!parseProcessingInstruction(parser, true))
1182 return false;
1183 } else if (ch == '/') { // end tag; we're passing outside of content's production
1184 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1185 return true;
1186 } else if (ch != '!') { // element
1187 if (!parseTag(parser)) return false;
1188 } else {
1189 // Comment | CDSect
1190 UniChar dashes[3] = {'!', '-', '-'};
1191 if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1192 // Comment
1193 if (!parseComment(parser, true)) return false;
1194 } else {
1195 // Should have a CDSect; back off the "<!" and call parseCDSect
1196 _inputStreamReturnCharacter(&parser->input, '<');
1197 if (!parseCDSect(parser)) return false;
1198 }
1199 }
1200 }
1201
1202 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1203 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1204 return false;
1205 }
1206 // Only way to get here is if premature EOF was found
1207 //#warning CF:Include the tag name here
1208 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1209 return false;
1210 }
1211
1212 static Boolean parseCDSect(CFXMLParserRef parser) {
1213 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1214 const UniChar _CDSectClose[3] = {']', ']', '>'};
1215 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1216 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1217 return false;
1218 }
1219 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1220 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1221 return false;
1222 }
1223
1224 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1225 parser->node->additionalData = NULL;
1226 return reportNewLeaf(parser);
1227 }
1228
1229 /*
1230 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1231 */
1232 static Boolean validateCharacterReference(CFStringRef str) {
1233 Boolean isHex;
1234 CFIndex idx, len = CFStringGetLength(str);
1235 if (len < 2) return false;
1236 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1237 if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1238 isHex = true;
1239 idx = 2;
1240 if (len == 2) return false;
1241 } else {
1242 isHex = false;
1243 idx = 1;
1244 }
1245
1246 while (idx < len) {
1247 UniChar ch;
1248 ch = CFStringGetCharacterAtIndex(str, idx);
1249 idx ++;
1250 if (!(ch <= '9' && ch >= '0') &&
1251 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1252 break;
1253 }
1254 }
1255 return (idx == len);
1256 }
1257
1258 /*
1259 [67] Reference ::= EntityRef | CharRef
1260 [68] EntityRef ::= '&' Name ';'
1261 */
1262 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1263 UniChar ch;
1264 CFXMLEntityReferenceInfo entData;
1265 CFStringRef name = NULL;
1266 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1267 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1268 return false;
1269 }
1270 if (ch == '#') {
1271 ch = ';';
1272 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1273 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1274 return false;
1275 } else if (!validateCharacterReference(parser->node->dataString)) {
1276 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1277 return false;
1278 }
1279 entData.entityType = kCFXMLEntityTypeCharacter;
1280 name = parser->node->dataString;
1281 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1282 if (_inputStreamAtEOF(&parser->input)) {
1283 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1284 return false;
1285 } else {
1286 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1287 return false;
1288 }
1289 } else {
1290 entData.entityType = kCFXMLEntityTypeParsedInternal;
1291 }
1292 if (report) {
1293 CFStringRef tmp = parser->node->dataString;
1294 Boolean success;
1295 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1296 parser->node->dataString = name;
1297 parser->node->additionalData = &entData;
1298 success = reportNewLeaf(parser);
1299 parser->node->additionalData = NULL;
1300 parser->node->dataString = tmp;
1301 return success;
1302 } else {
1303 return true;
1304 }
1305 }
1306
1307 #if 0
1308 // Kept from old entity reference parsing....
1309 {
1310 switch (*(parser->curr)) {
1311 case 'l': // "lt"
1312 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1313 ch = '<';
1314 parser->curr += 3;
1315 break;
1316 }
1317 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1318 return;
1319 case 'g': // "gt"
1320 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1321 ch = '>';
1322 parser->curr += 3;
1323 break;
1324 }
1325 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1326 return;
1327 case 'a': // "apos" or "amp"
1328 if (len < 4) { // Not enough characters for either conversion
1329 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1330 return;
1331 }
1332 if (*(parser->curr+1) == 'm') {
1333 // "amp"
1334 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1335 ch = '&';
1336 parser->curr += 4;
1337 break;
1338 }
1339 } else if (*(parser->curr+1) == 'p') {
1340 // "apos"
1341 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1342 ch = '\'';
1343 parser->curr += 5;
1344 break;
1345 }
1346 }
1347 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1348 return;
1349 case 'q': // "quote"
1350 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1351 ch = '\"';
1352 parser->curr += 6;
1353 break;
1354 }
1355 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1356 return;
1357 case '#':
1358 {
1359 UniChar num = 0;
1360 Boolean isHex = false;
1361 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1362 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1363 return;
1364 }
1365 parser->curr ++;
1366 if (*(parser->curr) == 'x') {
1367 isHex = true;
1368 parser->curr ++;
1369 }
1370 while (parser->curr < parser->end) {
1371 ch = *(parser->curr);
1372 if (ch == ';') {
1373 CFStringAppendCharacters(string, &num, 1);
1374 parser->curr ++;
1375 return;
1376 }
1377 if (!isHex) num = num*10;
1378 else num = num << 4;
1379 if (ch <= '9' && ch >= '0') {
1380 num += (ch - '0');
1381 } else if (!isHex) {
1382 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1383 return;
1384 } else if (ch >= 'a' && ch <= 'f') {
1385 num += 10 + (ch - 'a');
1386 } else if (ch >= 'A' && ch <= 'F') {
1387 num += 10 + (ch - 'A');
1388 } else {
1389 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1390 return;
1391 }
1392 }
1393 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1394 return;
1395 }
1396 default:
1397 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1398 return;
1399 }
1400 CFStringAppendCharacters(string, &ch, 1);
1401 }
1402 #endif
1403
1404 /*
1405 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1406 */
1407 static Boolean parsePCData(CFXMLParserRef parser) {
1408 UniChar ch;
1409 Boolean done = false;
1410 _inputStreamSetMark(&parser->input);
1411 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1412 switch (ch) {
1413 case '<':
1414 case '&':
1415 _inputStreamReturnCharacter(&parser->input, ch);
1416 done = true;
1417 break;
1418 case ']':
1419 {
1420 const UniChar endSequence[2] = {']', '>'};
1421 if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1422 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1423 _inputStreamClearMark(&parser->input);
1424 return false;
1425 }
1426 break;
1427 }
1428 default:
1429 ;
1430 }
1431 }
1432 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1433 _inputStreamClearMark(&parser->input);
1434 parser->node->dataTypeID = kCFXMLNodeTypeText;
1435 parser->node->additionalData = NULL;
1436 return reportNewLeaf(parser);
1437 }
1438
1439 /*
1440 [42] ETag ::= '</' Name S? '>'
1441 */
1442 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1443 const UniChar beginEndTag[2] = {'<', '/'};
1444 Boolean unexpectedEOF = false, mismatch = false;
1445 CFStringRef closeTag;
1446
1447 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1448 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1449
1450 UniChar ch;
1451 _inputStreamSkipWhitespace(&parser->input, NULL);
1452 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1453 unexpectedEOF = true;
1454 } else if (ch != '>') {
1455 mismatch = true;
1456 }
1457 } else if (_inputStreamAtEOF(&parser->input)) {
1458 unexpectedEOF = true;
1459 } else {
1460 mismatch = true;
1461 }
1462
1463 if (unexpectedEOF || mismatch) {
1464 if (unexpectedEOF) {
1465 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1466 parser->status = kCFXMLErrorUnexpectedEOF;
1467 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1468 } else {
1469 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1470 parser->status = kCFXMLErrorMalformedCloseTag;
1471 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1472 }
1473 return false;
1474 }
1475 return true;
1476 }
1477
1478 /*
1479 [39] element ::= EmptyElementTag | STag content ETag
1480 [40] STag ::= '<' Name (S Attribute)* S? '>'
1481 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1482 */
1483 static Boolean parseTag(CFXMLParserRef parser) {
1484 UniChar ch;
1485 void *tag;
1486 CFXMLElementInfo data;
1487 Boolean success = true;
1488 CFStringRef tagName;
1489
1490 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1491 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1492 return false;
1493 }
1494
1495 _inputStreamSkipWhitespace(&parser->input, NULL);
1496
1497 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1498 data.attributes = parser->argDict;
1499 data.attributeOrder = parser->argArray;
1500 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1501 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1502 return false;
1503 }
1504 if (ch == '/') {
1505 data.isEmpty = true;
1506 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1507 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1508 return false;
1509 }
1510 } else {
1511 data.isEmpty = false;
1512 }
1513 if (ch != '>') {
1514 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1515 return false;
1516 }
1517
1518 if (*parser->top || parser->top == parser->stack) {
1519 CFStringRef oldStr = parser->node->dataString;
1520 parser->node->dataTypeID = kCFXMLNodeTypeElement;
1521 parser->node->dataString = tagName;
1522 parser->node->additionalData = &data;
1523 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1524 if (tag && parser->status == kCFXMLStatusParseInProgress) {
1525 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1526 }
1527 parser->node->additionalData = NULL;
1528 parser->node->dataString = oldStr;
1529 if (parser->status != kCFXMLStatusParseInProgress) {
1530 // callback called CFXMLParserAbort()
1531 _CFReportError(parser, parser->status, NULL);
1532 return false;
1533 }
1534 } else {
1535 tag = NULL;
1536 }
1537
1538 pushXMLNode(parser, tag);
1539 if (!data.isEmpty) {
1540 success = parseTagContent(parser);
1541 if (success) {
1542 success = parseCloseTag(parser, tagName);
1543 }
1544 }
1545 parser->top --;
1546
1547 if (success && tag) {
1548 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1549 if (parser->status != kCFXMLStatusParseInProgress) {
1550 _CFReportError(parser, parser->status, NULL);
1551 return false;
1552 }
1553 }
1554 return success;
1555 }
1556
1557 /*
1558 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1559 [67] Reference ::= EntityRef | CharRef
1560 [68] EntityRef ::= '&' Name ';'
1561 */
1562 // For the moment, we don't worry about references in the attribute values.
1563 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1564 UniChar quote, ch;
1565 Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1566 if (!success || (quote != '\'' && quote != '\"')) return false;
1567 if (str) _inputStreamSetMark(&parser->input);
1568 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1569 switch (ch) {
1570 case '<': success = false; break;
1571 case '&':
1572 if (!parseEntityReference(parser, false)) {
1573 success = false;
1574 break;
1575 }
1576 default:
1577 ;
1578 }
1579 }
1580
1581 if (success && _inputStreamAtEOF(&parser->input)) {
1582 success = false;
1583 }
1584 if (str) {
1585 if (success) {
1586 _inputStreamReturnCharacter(&parser->input, quote);
1587 _inputStreamGetCharactersFromMark(&parser->input, str);
1588 _inputStreamGetCharacter(&parser->input, &ch);
1589 }
1590 _inputStreamClearMark(&parser->input);
1591 }
1592 return success;
1593 }
1594
1595 /*
1596 [40] STag ::= '<' Name (S Attribute)* S? '>'
1597 [41] Attribute ::= Name Eq AttValue
1598 [25] Eq ::= S? '=' S?
1599 */
1600
1601 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1602 Boolean parseAttributes(CFXMLParserRef parser) {
1603 UniChar ch;
1604 CFMutableDictionaryRef dict;
1605 CFMutableArrayRef array;
1606 Boolean failure = false;
1607 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1608 if (parser->argDict) {
1609 CFDictionaryRemoveAllValues(parser->argDict);
1610 CFArrayRemoveAllValues(parser->argArray);
1611 }
1612 return true; // No attributes; let caller deal with it
1613 }
1614 if (!parser->argDict) {
1615 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1616 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1617 } else {
1618 CFDictionaryRemoveAllValues(parser->argDict);
1619 CFArrayRemoveAllValues(parser->argArray);
1620 }
1621 dict = parser->argDict;
1622 array = parser->argArray;
1623 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1624 CFStringRef key;
1625 CFMutableStringRef value;
1626 if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1627 failure = true;
1628 break;
1629 }
1630 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1631 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1632 return false;
1633 }
1634 _inputStreamSkipWhitespace(&parser->input, NULL);
1635 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1636 failure = true;
1637 break;
1638 }
1639 _inputStreamSkipWhitespace(&parser->input, NULL);
1640 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1641 if (!parseAttributeValue(parser, value)) {
1642 CFRelease(value);
1643 failure = true;
1644 break;
1645 }
1646 CFArrayAppendValue(array, key);
1647 CFDictionarySetValue(dict, key, value);
1648 CFRelease(value);
1649 _inputStreamSkipWhitespace(&parser->input, NULL);
1650 }
1651 if (failure) {
1652 //#warning CF:Include tag name in this error report
1653 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1654 return false;
1655 } else if (_inputStreamAtEOF(&parser->input)) {
1656 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1657 return false;
1658 } else {
1659 return true;
1660 }
1661 }
1662
1663 /*
1664 [1] document ::= prolog element Misc*
1665 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1666 [27] Misc ::= Comment | PI | S
1667 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1668
1669 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1670 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1671 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1672
1673 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1674 */
1675 static Boolean parseXML(CFXMLParserRef parser) {
1676 Boolean success = true, sawDTD = false, sawElement = false;
1677 UniChar ch;
1678 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1679 switch (ch) {
1680 case ' ':
1681 case '\n':
1682 case '\t':
1683 case '\r':
1684 success = parseWhitespace(parser);
1685 break;
1686 case '<':
1687 _inputStreamGetCharacter(&parser->input, &ch);
1688 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1689 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1690 return false;
1691 }
1692 if (ch == '!') {
1693 // Comment or DTD
1694 UniChar dashes[2] = {'-', '-'};
1695 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1696 // Comment
1697 success = parseComment(parser, true);
1698 } else {
1699 // Should be DTD
1700 if (sawDTD) {
1701 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1702 return false;
1703 }
1704 success = parseDTD(parser);
1705 if (success) sawDTD = true;
1706 }
1707 } else if (ch == '?') {
1708 // Processing instruction
1709 success = parseProcessingInstruction(parser, true);
1710 } else {
1711 // Tag or malformed
1712 if (sawElement) {
1713 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1714 return false;
1715 }
1716 _inputStreamReturnCharacter(&parser->input, ch);
1717 success = parseTag(parser);
1718 if (success) sawElement = true;
1719 }
1720 break;
1721 default: {
1722 parser->status = kCFXMLErrorMalformedDocument;
1723 parser->errorString = ch < 256 ?
1724 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1725 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1726
1727 if (parser->callBacks.handleError) {
1728 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1729 }
1730 return false;
1731 }
1732 }
1733 }
1734
1735 if (!success) return false;
1736 if (!sawElement) {
1737 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1738 return false;
1739 }
1740 return true;
1741 }
1742
1743 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1744 if (str) {
1745 parser->status = errNum;
1746 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1747 }
1748 if (parser->callBacks.handleError) {
1749 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1750 }
1751 }
1752
1753 // Assumes parser->node has been set and is ready to go
1754 static Boolean reportNewLeaf(CFXMLParserRef parser) {
1755 void *xmlStruct;
1756 if (*(parser->top) == NULL) return true;
1757
1758 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1759 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1760 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1761 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1762 }
1763 if (parser->status != kCFXMLStatusParseInProgress) {
1764 _CFReportError(parser, parser->status, NULL);
1765 return false;
1766 }
1767 return true;
1768 }
1769
1770 static void pushXMLNode(CFXMLParserRef parser, void *node) {
1771 parser->top ++;
1772 if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1773 parser->stack = CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1774 parser->top = parser->stack + parser->capacity;
1775 parser->capacity = 2*parser->capacity;
1776 }
1777 *(parser->top) = node;
1778 }
1779
1780 /**************************/
1781 /* Parsing to a CFXMLTree */
1782 /**************************/
1783
1784 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1785 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1786 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1787 CFRelease(myNode);
1788 return (void *)tree;
1789 }
1790
1791 static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1792 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1793 }
1794
1795 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1796 CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1797 if (CFTreeGetParent(node))
1798 CFRelease((CFXMLTreeRef)xmlType);
1799 }
1800
1801 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1802 CFXMLParserRef parser;
1803 CFXMLParserCallBacks callbacks;
1804 CFXMLTreeRef result;
1805
1806 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1807
1808 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1809 callbacks.addChild = _XMLTreeAddChild;
1810 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1811 callbacks.resolveExternalEntity = NULL;
1812 callbacks.handleError = NULL;
1813 parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1814
1815 if (CFXMLParserParse(parser)) {
1816 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1817 } else {
1818 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1819 if (result) CFRelease(result);
1820 result = NULL;
1821 }
1822 CFRelease(parser);
1823 return result;
1824 }
1825
1826 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1827 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1828 }
1829
1830 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_3
1831
1832 CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1833 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1834 CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1835 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1836
1837 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1838 CFXMLParserRef parser;
1839 CFXMLParserCallBacks callbacks;
1840 CFXMLTreeRef result;
1841
1842 __CFGenericValidateType(xmlData, CFDataGetTypeID());
1843 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1844
1845 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1846 callbacks.addChild = _XMLTreeAddChild;
1847 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1848 callbacks.resolveExternalEntity = NULL;
1849 callbacks.handleError = NULL;
1850 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1851
1852 if (CFXMLParserParse(parser)) {
1853 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1854 } else {
1855 if (errorDict) { // collect the error dictionary
1856 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1857 if (*errorDict) {
1858 CFIndex rawnum;
1859 CFNumberRef cfnum;
1860 CFStringRef errstring;
1861
1862 rawnum = CFXMLParserGetLocation(parser);
1863 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1864 if(cfnum) {
1865 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1866 CFRelease(cfnum);
1867 }
1868
1869 rawnum = CFXMLParserGetLineNumber(parser);
1870 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1871 if(cfnum) {
1872 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1873 CFRelease(cfnum);
1874 }
1875
1876 rawnum = CFXMLParserGetStatusCode(parser);
1877 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1878 if(cfnum) {
1879 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1880 CFRelease(cfnum);
1881 }
1882
1883 errstring = CFXMLParserCopyErrorDescription(parser);
1884 if(errstring) {
1885 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1886 CFRelease(errstring);
1887 }
1888 }
1889 }
1890 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1891 if (result) CFRelease(result);
1892 result = NULL;
1893 }
1894 CFRelease(parser);
1895 return result;
1896 }
1897
1898 /*
1899 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1900 We should also be handling items that are up over certain values correctly.
1901 */
1902 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1903 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1904 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1905 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1906
1907 CFStringInlineBuffer inlineBuf;
1908 CFIndex idx = 0;
1909 CFIndex mark = idx;
1910 CFIndex stringLength = CFStringGetLength(string);
1911 UniChar uc;
1912
1913 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1914
1915 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1916 for(idx = 0; idx < stringLength; idx++) {
1917 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1918 if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1919 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1920 CFStringAppend(newString, previousSubstring);
1921 CFRelease(previousSubstring);
1922 switch(uc) {
1923 case '&':
1924 CFStringAppend(newString, CFSTR("&amp;"));
1925 break;
1926 case '<':
1927 CFStringAppend(newString, CFSTR("&lt;"));
1928 break;
1929 case '>':
1930 CFStringAppend(newString, CFSTR("&gt;"));
1931 break;
1932 case '\'':
1933 CFStringAppend(newString, CFSTR("&apos;"));
1934 break;
1935 case '"':
1936 CFStringAppend(newString, CFSTR("&quot;"));
1937 break;
1938 }
1939 mark = idx + 1;
1940 }
1941 }
1942 // Copy the remainder to the output string before returning.
1943 CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1944 if (NULL != remainder) {
1945 CFStringAppend(newString, remainder);
1946 CFRelease(remainder);
1947 }
1948
1949 CFRelease(startChars);
1950 return newString;
1951 }
1952
1953 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1954 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1955
1956 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1957 CFStringRef sub;
1958 CFIndex lastChunkStart, length = CFStringGetLength(string);
1959 CFIndex i, entityStart;
1960 UniChar uc;
1961 UInt32 entity;
1962 int base;
1963 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1964
1965 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1966 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1967 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1968 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1969 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1970
1971 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1972 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1973
1974 lastChunkStart = 0;
1975 // Scan through the string in its entirety
1976 for(i = 0; i < length; ) {
1977 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1978
1979 if(uc == '&') {
1980 entityStart = i - 1;
1981 entity = 0xFFFF; // set this to a not-Unicode character as sentinel
1982 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1983 if(lastChunkStart < i - 1) {
1984 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1985 CFStringAppend(newString, sub);
1986 CFRelease(sub);
1987 }
1988
1989 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1990 // Now we can process the entity reference itself
1991 if(uc == '#') { // this is a numeric entity.
1992 base = 10;
1993 entity = 0;
1994 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1995
1996 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1997 base = 16;
1998 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1999 }
2000
2001 // process the provided digits 'til we're finished
2002 while(true) {
2003 if (uc >= '0' && uc <= '9')
2004 entity = entity * base + (uc-'0');
2005 else if (uc >= 'a' && uc <= 'f' && base == 16)
2006 entity = entity * base + (uc-'a'+10);
2007 else if (uc >= 'A' && uc <= 'F' && base == 16)
2008 entity = entity * base + (uc-'A'+10);
2009 else break;
2010
2011 if (i < length) {
2012 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2013 }
2014 else
2015 break;
2016 }
2017 }
2018
2019 // Scan to the end of the entity
2020 while(uc != ';' && i < length) {
2021 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2022 }
2023
2024 if(0xFFFF != entity) { // it was numeric, and translated.
2025 // Now, output the result fo the entity
2026 if(entity >= 0x10000) {
2027 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2028 CFStringAppendCharacters(newString, characters, 2);
2029 } else {
2030 UniChar character = entity;
2031 CFStringAppendCharacters(newString, &character, 1);
2032 }
2033 } else { // it wasn't numeric.
2034 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2035 CFStringRef replacementString = CFDictionaryGetValue(fullReplDict, sub);
2036 if(replacementString) {
2037 CFStringAppend(newString, replacementString);
2038 } else {
2039 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2040 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2041 CFStringAppend(newString, sub); // ...and append that.
2042 }
2043 CFRelease(sub); // in either case, release the most-recent "sub"
2044 }
2045
2046 // move the lastChunkStart to the beginning of the next chunk.
2047 lastChunkStart = i;
2048 }
2049 }
2050 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2051 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2052 CFStringAppend(newString, sub);
2053 CFRelease(sub);
2054 }
2055
2056 CFRelease(fullReplDict);
2057
2058 return newString;
2059 }
2060
2061 #endif
2062