]> git.saurik.com Git - apple/cf.git/blob - Parsing.subproj/CFXMLParser.c
CF-299.tar.gz
[apple/cf.git] / Parsing.subproj / CFXMLParser.c
1 /*
2 * Copyright (c) 2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* CFXMLParser.c
26 Copyright 1999-2002, Apple, Inc. All rights reserved.
27 Responsibility: Chris Parker
28 */
29
30 #include <CoreFoundation/CFXMLParser.h>
31 #include <CoreFoundation/CFNumber.h>
32 #include "CFXMLInputStream.h"
33 #include "CFUniChar.h"
34 #include "CFInternal.h"
35
36 struct __CFXMLParser {
37 CFRuntimeBase _cfBase;
38
39 _CFXMLInputStream input;
40
41 void **stack;
42 void **top;
43 UInt32 capacity;
44
45 struct __CFXMLNode *node; // Our private node; we use it to report back information
46 CFMutableDictionaryRef argDict;
47 CFMutableArrayRef argArray;
48
49 UInt32 options;
50 CFXMLParserCallBacks callBacks;
51 CFXMLParserContext context;
52
53 CFXMLParserStatusCode status;
54 CFStringRef errorString;
55 };
56
57 static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
58 const struct __CFXMLParser *parser = cf;
59 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser 0x%x>"), parser);
60 }
61
62 static void __CFXMLParserDeallocate(CFTypeRef cf) {
63 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
64 CFAllocatorRef alloc = CFGetAllocator(parser);
65 _freeInputStream(&(parser->input));
66 if (parser->argDict) CFRelease(parser->argDict);
67 if (parser->argArray) CFRelease(parser->argArray);
68 if (parser->errorString) CFRelease(parser->errorString);
69 if (parser->node) CFRelease(parser->node);
70 CFAllocatorDeallocate(alloc, parser->stack);
71 if (parser->context.info && parser->context.release) {
72 parser->context.release(parser->context.info);
73 }
74 }
75
76 static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
77
78 static const CFRuntimeClass __CFXMLParserClass = {
79 0,
80 "CFXMLParser",
81 NULL, // init
82 NULL, // copy
83 __CFXMLParserDeallocate,
84 NULL,
85 NULL,
86 NULL, //
87 __CFXMLParserCopyDescription
88 };
89
90 __private_extern__ void __CFXMLParserInitialize(void) {
91 __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
92 }
93
94 CFTypeID CFXMLParserGetTypeID(void) {
95 return __kCFXMLParserTypeID;
96 }
97
98 #if defined(__ppc__)
99 #define __mask ~0x3
100 #else
101 #define __mask ~0x0
102 #endif
103
104 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
105 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
106 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
107 if (context) {
108 context->version = parser->context.version;
109 context->info = parser->context.info;
110 context->retain = (void *)((uintptr_t)parser->context.retain & __mask);
111 context->release = (void *)((uintptr_t)parser->context.release & __mask);
112 context->copyDescription = (void *)((uintptr_t)parser->context.copyDescription & __mask);
113 }
114 }
115
116 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
117 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
118 if (callBacks) {
119 callBacks->version = parser->callBacks.version;
120 callBacks->createXMLStructure = (void *)((uintptr_t)parser->callBacks.createXMLStructure & __mask);
121 callBacks->addChild = (void *)((uintptr_t)parser->callBacks.addChild & __mask);
122 callBacks->endXMLStructure = (void *)((uintptr_t)parser->callBacks.endXMLStructure & __mask);
123 callBacks->resolveExternalEntity = (void *)((uintptr_t)parser->callBacks.resolveExternalEntity & __mask);
124 callBacks->handleError = (void *)((uintptr_t)parser->callBacks.handleError & __mask);
125 }
126 }
127
128 #undef __mask
129
130 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
131 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
132 return parser->input.url;
133 }
134
135 /* Returns the character index or line number of the current parse location */
136 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
137 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
138 return _inputStreamCurrentLocation(&parser->input);
139 }
140
141 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
142 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
143 return _inputStreamCurrentLine(&parser->input);
144 }
145
146 /* Returns the top-most object returned by the createXMLStructure callback */
147 void *CFXMLParserGetDocument(CFXMLParserRef parser) {
148 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
149 if (parser->capacity > 0)
150 return parser->stack[0];
151 else
152 return NULL;
153 }
154
155 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
156 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
157 return parser->status;
158 }
159
160 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
161 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
162 return CFRetain(parser->errorString);
163 }
164
165 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
166 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
167 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
168 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
169 __CFGenericValidateType(errorDescription, CFStringGetTypeID());
170
171 parser->status = errorCode;
172 if (parser->errorString) CFRelease(parser->errorString);
173 parser->errorString = CFStringCreateCopy(NULL, errorDescription);
174 }
175
176
177 static Boolean parseXML(CFXMLParserRef parser);
178 static Boolean parseComment(CFXMLParserRef parser, Boolean report);
179 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
180 static Boolean parseInlineDTD(CFXMLParserRef parser);
181 static Boolean parseDTD(CFXMLParserRef parser);
182 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
183 static Boolean parseCDSect(CFXMLParserRef parser);
184 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
185 static Boolean parsePCData(CFXMLParserRef parser);
186 static Boolean parseWhitespace(CFXMLParserRef parser);
187 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
188 static Boolean parseNotationDeclaration(CFXMLParserRef parser);
189 static Boolean parseElementDeclaration(CFXMLParserRef parser);
190 static Boolean parseEntityDeclaration(CFXMLParserRef parser);
191 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
192 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
193 static Boolean parseTagContent(CFXMLParserRef parser);
194 static Boolean parseTag(CFXMLParserRef parser);
195 static Boolean parseAttributes(CFXMLParserRef parser);
196 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
197
198 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
199 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
200 static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
201 static void pushXMLNode(CFXMLParserRef parser, void *node);
202
203 static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
204 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, __kCFXMLParserTypeID, sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
205 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
206 UniChar *buf;
207 if (parser && node) {
208 alloc = CFGetAllocator(parser);
209 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
210 parser->top = parser->stack;
211 parser->stack = NULL;
212 parser->capacity = 0;
213
214 buf = CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
215 parser->node = node;
216 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
217 parser->node->additionalData = NULL;
218 parser->node->version = version;
219 parser->argDict = NULL; // don't create these until necessary
220 parser->argArray = NULL;
221
222 parser->options = options;
223 parser->callBacks = *callBacks;
224
225 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
226 FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
227 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
228 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
229 FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
230
231 if (context) {
232 parser->context = *context;
233 if (parser->context.info && parser->context.retain) {
234 parser->context.retain(parser->context.info);
235 }
236 } else {
237 parser->context.version = 0;
238 parser->context.info = NULL;
239 parser->context.retain = NULL;
240 parser->context.release = NULL;
241 parser->context.copyDescription = NULL;
242 }
243 parser->status = kCFXMLStatusParseNotBegun;
244 parser->errorString = NULL;
245 } else {
246 if (parser) CFRelease(parser);
247 if (node) CFRelease(node);
248 parser = NULL;
249 }
250 return parser;
251 }
252
253 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
254 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
255 __CFGenericValidateType(xmlData, CFDataGetTypeID());
256 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
257 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
258 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
259 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
260 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
261 }
262
263
264 Boolean CFXMLParserParse(CFXMLParserRef parser) {
265 CFXMLDocumentInfo docData;
266 __CFGenericValidateType(parser, __kCFXMLParserTypeID);
267 if (parser->status != kCFXMLStatusParseNotBegun) return false;
268 parser->status = kCFXMLStatusParseInProgress;
269
270 if (!_openInputStream(&parser->input)) {
271 if (!parser->input.data) {
272 // couldn't load URL
273 parser->status = kCFXMLErrorNoData;
274 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
275 } else {
276 // couldn't figure out the encoding
277 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
278 parser->status = kCFXMLErrorUnknownEncoding;
279 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
280 }
281 if (parser->callBacks.handleError) {
282 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
283 }
284 return false;
285 }
286
287 // Create the document
288 parser->stack = CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
289 parser->capacity = 16;
290 parser->node->dataTypeID = kCFXMLNodeTypeDocument;
291 docData.encoding = _inputStreamGetEncoding(&parser->input);
292 docData.sourceURL = parser->input.url;
293 parser->node->additionalData = &docData;
294 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
295 parser->top = parser->stack;
296 parser->node->additionalData = NULL;
297
298 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
299 if (parser->status != kCFXMLStatusParseInProgress) {
300 _CFReportError(parser, parser->status, NULL);
301 return false;
302 }
303 return parseXML(parser);
304 }
305
306 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
307
308 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
309 static Boolean parseWhitespace(CFXMLParserRef parser) {
310 CFIndex len;
311 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
312 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
313 if (report && len) {
314 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
315 parser->node->additionalData = NULL;
316 return reportNewLeaf(parser);
317 } else {
318 return true;
319 }
320 }
321
322 // parser should be just past "<!--"
323 static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
324 const UniChar dashes[2] = {'-', '-'};
325 UniChar ch;
326 report = report && (!(parser->options & kCFXMLParserSkipMetaData));
327 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
328 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
329 return false;
330 } else if (ch != '>') {
331 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
332 return false;
333 } else if (report) {
334 parser->node->dataTypeID = kCFXMLNodeTypeComment;
335 parser->node->additionalData = NULL;
336 return reportNewLeaf(parser);
337 } else {
338 return true;
339 }
340 }
341
342 /*
343 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
344 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
345 */
346 // parser should be set to the first character after "<?"
347 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
348 const UniChar piTermination[2] = {'?', '>'};
349 CFMutableStringRef str;
350 CFStringRef name;
351
352 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
353 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
354 return false;
355 }
356 _inputStreamSkipWhitespace(&parser->input, NULL);
357 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
358 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
359 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
360 if (str) CFRelease(str);
361 return false;
362 }
363
364 if (str) {
365 CFXMLProcessingInstructionInfo data;
366 Boolean result;
367 CFStringRef tmp = parser->node->dataString;
368 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
369 parser->node->dataString = name;
370 data.dataString = str;
371 parser->node->additionalData = &data;
372 result = reportNewLeaf(parser);
373 parser->node->additionalData = NULL;
374 parser->node->dataString = tmp;
375 CFRelease(str);
376 return result;
377 } else {
378 return true;
379 }
380 }
381
382 /*
383 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
384 */
385 static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
386 // first character should be immediately after the "<!"
387 static Boolean parseDTD(CFXMLParserRef parser) {
388 UniChar ch;
389 Boolean success, hasExtID = false;
390 CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
391 void *dtdStructure = NULL;
392 CFStringRef name;
393
394 // First pass "DOCTYPE"
395 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
396 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
397 success = success && _inputStreamScanXMLName(&parser->input, false, &name);
398 if (success) {
399 _inputStreamSkipWhitespace(&parser->input, NULL);
400 success = _inputStreamPeekCharacter(&parser->input, &ch);
401 } else {
402 // didn't make it past "DOCTYPE" successfully.
403 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
404 return false;
405 }
406 if (success && ch != '[' && ch != '>') {
407 // ExternalID
408 hasExtID = true;
409 success = parseExternalID(parser, false, &(docData.externalID));
410 if (success) {
411 _inputStreamSkipWhitespace(&parser->input, NULL);
412 success = _inputStreamPeekCharacter(&parser->input, &ch);
413 }
414 }
415
416 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
417 CFStringRef tmp = parser->node->dataString;
418 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
419 parser->node->dataString = name;
420 parser->node->additionalData = &docData;
421 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
422 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
423 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
424 }
425 parser->node->additionalData = NULL;
426 parser->node->dataString = tmp;
427 if (parser->status != kCFXMLStatusParseInProgress) {
428 // callback called CFXMLParserAbort()
429 _CFReportError(parser, parser->status, NULL);
430 return false;
431 }
432 } else {
433 dtdStructure = NULL;
434 }
435 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
436 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
437 pushXMLNode(parser, dtdStructure);
438
439 if (success && ch == '[') {
440 // inline DTD
441 _inputStreamGetCharacter(&parser->input, &ch);
442 if (!parseInlineDTD(parser)) return false;
443 _inputStreamSkipWhitespace(&parser->input, NULL);
444 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
445 } else if (success && ch == '>') {
446 // End of the DTD
447 _inputStreamGetCharacter(&parser->input, &ch);
448 }
449 if (!success) {
450 if (_inputStreamAtEOF(&parser->input)) {
451 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
452 } else {
453 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
454 }
455 return false;
456 }
457
458 parser->top --; // Remove dtdStructure from the stack
459
460 if (success && dtdStructure) {
461 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
462 if (parser->status != kCFXMLStatusParseInProgress) {
463 _CFReportError(parser, parser->status, NULL);
464 return false;
465 }
466 }
467 return true;
468 }
469
470 /*
471 [69] PEReference ::= '%' Name ';'
472 */
473 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
474 UniChar ch;
475 CFStringRef name;
476 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
477 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
478 return false;
479 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
480 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
481 return false;
482 } else if (ch != ';') {
483 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
484 return false;
485 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
486 CFXMLEntityReferenceInfo myData;
487 Boolean result;
488 CFStringRef tmp = parser->node->dataString;
489 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
490 parser->node->dataString = name;
491 myData.entityType = kCFXMLEntityTypeParameter;
492 parser->node->additionalData = &myData;
493 result = reportNewLeaf(parser);
494 parser->node->additionalData = NULL;
495 parser->node->dataString = tmp;
496 return result;
497 } else {
498 return true;
499 }
500 }
501
502 /*
503 [54] AttType ::= StringType | TokenizedType | EnumeratedType
504 [55] StringType ::= 'CDATA'
505 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
506 [57] EnumeratedType ::= NotationType | Enumeration
507 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
508 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
509 */
510 static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
511 UniChar ch;
512 Boolean done = false;
513 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
514 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
515 return false;
516 } else if (ch != '(') {
517 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
518 return false;
519 }
520 _inputStreamSkipWhitespace(&parser->input, NULL);
521 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
522 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
523 return false;
524 }
525 while (!done) {
526 _inputStreamSkipWhitespace(&parser->input, NULL);
527 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
528 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
529 return false;
530 } else if (ch == ')') {
531 done = true;
532 } else if (ch == '|') {
533 _inputStreamSkipWhitespace(&parser->input, NULL);
534 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
535 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
536 return false;
537 }
538 } else {
539 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
540 return false;
541 }
542 }
543 return true;
544 }
545
546 static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
547 Boolean success = false;
548 static const UniChar attTypeStrings[6][8] = {
549 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
550 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
551 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
552 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
553 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
554 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
555 if (str) _inputStreamSetMark(&parser->input);
556 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
557 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
558 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
559 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
560 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
561 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
562 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
563 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
564 success = true;
565 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
566 // Notation
567 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
568 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
569 success = false;
570 } else {
571 success = parseEnumeration(parser, false);
572 }
573 } else {
574 success = parseEnumeration(parser, true);
575 }
576 if (str) {
577 if (success) {
578 _inputStreamGetCharactersFromMark(&parser->input, str);
579 }
580 _inputStreamClearMark(&parser->input);
581 }
582 return success;
583 }
584
585 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
586 static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
587 const UniChar strings[3][8] = {
588 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
589 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
590 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
591 UniChar ch;
592 Boolean success;
593 if (str) _inputStreamSetMark(&parser->input);
594 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
595 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
596 success = false;
597 } else if (ch == '#') {
598 if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
599 _inputStreamMatchString(&parser->input, strings[1], 7)) {
600 success = true;
601 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
602 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
603 success = false;
604 } else {
605 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
606 success = parseAttributeValue(parser, NULL);
607 }
608 } else {
609 _inputStreamReturnCharacter(&parser->input, ch);
610 success = parseAttributeValue(parser, NULL);
611 }
612 if (str) {
613 if (success) {
614 _inputStreamGetCharactersFromMark(&parser->input, str);
615 }
616 _inputStreamClearMark(&parser->input);
617 }
618 return success;
619 }
620
621 /*
622 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
623 [53] AttDef ::= S Name S AttType S DefaultDecl
624 */
625 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
626 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
627 CFXMLAttributeListDeclarationInfo attListData;
628 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
629 CFIndex capacity = 8;
630 UniChar ch;
631 Boolean success = true;
632 CFStringRef name;
633 if (!_inputStreamMatchString(&parser->input, attList, 7) ||
634 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
635 !_inputStreamScanXMLName(&parser->input, false, &name)) {
636 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
637 return false;
638 }
639 attListData.numberOfAttributes = 0;
640 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
641 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
642 attributes = NULL;
643 }
644 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
645 CFXMLAttributeDeclarationInfo *attribute = NULL;
646 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
647 break;
648 if (attributes) {
649 if (capacity == attListData.numberOfAttributes) {
650 capacity = 2*capacity;
651 if (attributes != attributeArray) {
652 attributes = CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
653 } else {
654 attributes = CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
655 }
656 }
657 attribute = &(attributes[attListData.numberOfAttributes]);
658 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
659 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
660 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
661 }
662 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
663 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
664 success = false;
665 break;
666 }
667 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
668 success = false;
669 break;
670 }
671 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
672 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
673 success = false;
674 break;
675 }
676 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
677 success = false;
678 break;
679 }
680 attListData.numberOfAttributes ++;
681 }
682 if (success) {
683 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
684 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
685 success = false;
686 } else if (ch != '>') {
687 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
688 success = false;
689 } else if (attributes) {
690 CFStringRef tmp = parser->node->dataString;
691 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
692 parser->node->dataString = name;
693 attListData.attributes = attributes;
694 parser->node->additionalData = (void *)&attListData;
695 success = reportNewLeaf(parser);
696 parser->node->additionalData = NULL;
697 parser->node->dataString = tmp;
698 }
699 }
700 if (attributes) {
701 // Free up all that memory
702 CFIndex idx;
703 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
704 // Do not release attributeName here; it's a uniqued string from scanXMLName
705 CFRelease(attributes[idx].typeString);
706 CFRelease(attributes[idx].defaultString);
707 }
708 if (attributes != attributeArray) {
709 CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
710 }
711 }
712 return success;
713 }
714
715 CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
716 Boolean success;
717 if (extID) {
718 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
719 if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
720 success = true;
721 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
722 } else {
723 extID->systemID = NULL;
724 success = false;
725 }
726 CFRelease(urlStr);
727 } else {
728 success = _inputStreamScanQuotedString(&parser->input, NULL);
729 }
730 return success;
731 }
732
733 /*
734 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
735 [83] PublicID ::= 'PUBLIC' S PubidLiteral
736 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
737 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
738 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
739 */
740 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
741 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
742 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
743 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
744 Boolean success;
745 if (extID) {
746 extID->systemID = NULL;
747 extID->publicID = NULL;
748 }
749 if (_inputStreamMatchString(&parser->input, publicString, 6)) {
750 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
751 if (extID) {
752 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
753 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
754 } else {
755 success = success && _inputStreamScanQuotedString(&parser->input, NULL);
756 }
757 if (success) {
758 UniChar ch;
759 if (alsoAcceptPublicID) {
760 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
761 }
762 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
763 || !_inputStreamPeekCharacter(&parser->input, &ch)
764 || (ch != '\'' && ch != '\"')
765 || !parseSystemLiteral(parser, extID)) {
766 success = alsoAcceptPublicID;
767 if (alsoAcceptPublicID) {
768 _inputStreamBackUpToMark(&parser->input);
769 }
770 } else {
771 success = true;
772 }
773 if (alsoAcceptPublicID) {
774 _inputStreamClearMark(&parser->input);
775 }
776 }
777 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
778 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
779 } else {
780 success = false;
781 }
782 return success;
783 }
784
785 /*
786 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
787 */
788 static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
789 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
790 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
791 CFXMLNotationInfo notationData = {{NULL, NULL}};
792 CFStringRef name;
793 Boolean success =
794 _inputStreamMatchString(&parser->input, notationString, 8) &&
795 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
796 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
797 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
798 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
799
800 if (success) {
801 UniChar ch;
802 _inputStreamSkipWhitespace(&parser->input, NULL);
803 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
804 }
805 if (!success) {
806 if (_inputStreamAtEOF(&parser->input)) {
807 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
808 } else {
809 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
810 }
811 } else if (report) {
812 CFStringRef tmp = parser->node->dataString;
813 parser->node->dataTypeID = kCFXMLNodeTypeNotation;
814 parser->node->dataString = name;
815 parser->node->additionalData = &notationData;
816 success = reportNewLeaf(parser);
817 parser->node->additionalData = NULL;
818 parser->node->dataString = tmp;
819 }
820 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
821 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
822 return success;
823 }
824
825 /*
826 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
827 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
828 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
829 */
830 static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
831 UniChar ch, separator;
832 if (!pastParen) {
833 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
834 _inputStreamSkipWhitespace(&parser->input, NULL);
835 }
836 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
837
838 /* Now scanning cp, production [48] */
839 if (ch == '(') {
840 if (!parseChoiceOrSequence(parser, false)) return false;
841 } else {
842 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
843 }
844 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
845 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
846
847 /* Now past cp */
848 _inputStreamSkipWhitespace(&parser->input, NULL);
849 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
850 if (ch == ')') return true;
851 if (ch != '|' && ch != ',') return false;
852 separator = ch;
853 while (ch == separator) {
854 _inputStreamSkipWhitespace(&parser->input, NULL);
855 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
856 if (ch != '(') {
857 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
858 } else if (!parseChoiceOrSequence(parser, false)) {
859 return false;
860 }
861 _inputStreamSkipWhitespace(&parser->input, NULL);
862 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
863 }
864 return ch == ')';
865 }
866
867 /*
868 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
869 */
870 static Boolean parseMixedElementContent(CFXMLParserRef parser) {
871 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
872 UniChar ch;
873 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
874 _inputStreamSkipWhitespace(&parser->input, NULL);
875 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
876 if (ch == ')') return true;
877
878 while (ch == '|') {
879 _inputStreamSkipWhitespace(&parser->input, NULL);
880 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
881 _inputStreamSkipWhitespace(&parser->input, NULL);
882 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
883 }
884 if (ch != ')') return false;
885 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
886 return true;
887 }
888
889 /*
890 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
891 [47] children ::= (choice | seq) ('?' | '*' | '+')?
892 */
893 static Boolean parseElementContentSpec(CFXMLParserRef parser) {
894 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
895 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
896 UniChar ch;
897 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
898 return true;
899 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
900 return false;
901 } else {
902 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
903 _inputStreamGetCharacter(&parser->input, &ch);
904 _inputStreamSkipWhitespace(&parser->input, NULL);
905 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
906 if (ch == '#') {
907 // Mixed
908 return parseMixedElementContent(parser);
909 } else {
910 if (parseChoiceOrSequence(parser, true)) {
911 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
912 _inputStreamGetCharacter(&parser->input, &ch);
913 }
914 return true;
915 } else {
916 return false;
917 }
918 }
919 }
920 }
921
922 /*
923 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
924 */
925 static Boolean parseElementDeclaration(CFXMLParserRef parser) {
926 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
927 Boolean success;
928 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
929 UniChar ch = '>';
930 CFMutableStringRef contentDesc = NULL;
931 CFStringRef name;
932 success = _inputStreamMatchString(&parser->input, eltChars, 7)
933 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
934 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
935 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
936 if (success) {
937 if (report) _inputStreamSetMark(&parser->input);
938 success = parseElementContentSpec(parser);
939 if (success && report) {
940 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
941 _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
942 }
943 if (report) _inputStreamClearMark(&parser->input);
944 if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
945 success = success && _inputStreamMatchString(&parser->input, &ch, 1);
946 }
947 if (!success) {
948 if (_inputStreamAtEOF(&parser->input)) {
949 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
950 } else {
951 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
952 }
953 } else if (report) {
954 CFXMLElementTypeDeclarationInfo eltData;
955 CFStringRef tmp = parser->node->dataString;
956 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
957 parser->node->dataString = name;
958 eltData.contentDescription = contentDesc;
959 parser->node->additionalData = &eltData;
960 success = reportNewLeaf(parser);
961 parser->node->additionalData = NULL;
962 parser->node->dataString = tmp;
963 }
964 if (contentDesc) CFRelease(contentDesc);
965 return success;
966 }
967
968 /*
969 [70] EntityDecl ::= GEDecl | PEDecl
970 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
971 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
972 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
973 [74] PEDef ::= EntityValue | ExternalID
974 [76] NDataDecl ::= S 'NDATA' S Name
975 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
976 */
977 static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
978 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
979 UniChar ch;
980 Boolean isPEDecl = false;
981 CFXMLEntityInfo entityData;
982 CFStringRef name;
983 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
984 Boolean success =
985 _inputStreamMatchString(&parser->input, entityStr, 6) &&
986 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
987 _inputStreamPeekCharacter(&parser->input, &ch);
988
989 entityData.replacementText = NULL;
990 entityData.entityID.publicID = NULL;
991 entityData.entityID.systemID = NULL;
992 entityData.notationName = NULL;
993 // We will set entityType immediately before reporting
994
995 if (success && ch == '%') {
996 _inputStreamGetCharacter(&parser->input, &ch);
997 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
998 isPEDecl = true;
999 }
1000 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1001 if (success && (ch == '\"' || ch == '\'')) {
1002 // EntityValue
1003 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1004 if (report) {
1005 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1006 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1007 } else {
1008 success = _inputStreamScanQuotedString(&parser->input, NULL);
1009 }
1010 } else if (success) {
1011 // ExternalID
1012 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1013 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1014 // There could be an option NDataDecl
1015 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1016 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1017 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1018 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1019 }
1020 }
1021 }
1022 if (success) {
1023 _inputStreamSkipWhitespace(&parser->input, NULL);
1024 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1025 }
1026 if (!success) {
1027 if (_inputStreamAtEOF(&parser->input)) {
1028 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1029 } else {
1030 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1031 }
1032 } else {
1033 CFStringRef tmp = parser->node->dataString;
1034 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1035 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1036 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1037 else entityData.entityType = kCFXMLEntityTypeUnparsed;
1038 parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1039 parser->node->dataString = name;
1040 parser->node->additionalData = &entityData;
1041 success = reportNewLeaf(parser);
1042 parser->node->additionalData = NULL;
1043 parser->node->dataString = tmp;
1044 if (entityData.replacementText) CFRelease(entityData.replacementText);
1045 }
1046 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1047 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1048 return success;
1049 }
1050
1051 /*
1052 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1053 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1054 */
1055 // First character should be just past '['
1056 static Boolean parseInlineDTD(CFXMLParserRef parser) {
1057 Boolean success = true;
1058 while (success && !_inputStreamAtEOF(&parser->input)) {
1059 UniChar ch;
1060
1061 parseWhitespace(parser);
1062 if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1063 if (ch == '%') {
1064 // PEReference
1065 success = parsePhysicalEntityReference(parser);
1066 } else if (ch == '<') {
1067 // markupdecl
1068 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1069 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1070 return false;
1071 }
1072 if (ch == '?') {
1073 // Processing Instruction
1074 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1075 } else if (ch == '!') {
1076 UniChar dashes[2] = {'-', '-'};
1077 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1078 // Comment
1079 success = parseComment(parser, true);
1080 } else {
1081 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1082 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1083 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1084 return false;
1085 } else if (ch == 'A') {
1086 // AttListDecl
1087 success = parseAttributeListDeclaration(parser);
1088 } else if (ch == 'N') {
1089 success = parseNotationDeclaration(parser);
1090 } else if (ch == 'E') {
1091 // elementdecl | EntityDecl
1092 _inputStreamGetCharacter(&parser->input, &ch);
1093 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1094 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1095 return false;
1096 }
1097 _inputStreamReturnCharacter(&parser->input, 'E');
1098 if (ch == 'L') {
1099 success = parseElementDeclaration(parser);
1100 } else if (ch == 'N') {
1101 success = parseEntityDeclaration(parser);
1102 } else {
1103 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1104 return false;
1105 }
1106 } else {
1107 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1108 return false;
1109 }
1110 }
1111 } else {
1112 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1113 return false;
1114 }
1115 } else if (ch == ']') {
1116 return true;
1117 } else {
1118 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1119 return false;
1120 }
1121 }
1122 if (success) {
1123 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1124 }
1125 return false;
1126 }
1127
1128 /*
1129 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1130 */
1131 static Boolean parseTagContent(CFXMLParserRef parser) {
1132 while (!_inputStreamAtEOF(&parser->input)) {
1133 UniChar ch;
1134 CFIndex numWhitespaceCharacters;
1135
1136 _inputStreamSetMark(&parser->input);
1137 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1138 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1139 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF
1140
1141 if (ch != '<' && ch != '&') { // CharData
1142 // Back off the whitespace; we'll report it with the PCData
1143 _inputStreamBackUpToMark(&parser->input);
1144 _inputStreamClearMark(&parser->input);
1145 if (!parsePCData(parser)) return false;
1146 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1147 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1148 return false;
1149 }
1150 continue;
1151 }
1152
1153 // element | Reference | CDSect | PI | Comment
1154 // We can safely report any whitespace now
1155 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1156 _inputStreamReturnCharacter(&parser->input, ch);
1157 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1158 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1159 parser->node->additionalData = NULL;
1160 if (!reportNewLeaf(parser)) return false;
1161 _inputStreamGetCharacter(&parser->input, &ch);
1162 }
1163 _inputStreamClearMark(&parser->input);
1164
1165 if (ch == '&') {
1166 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1167 if (!parseEntityReference(parser, true)) return false;
1168 continue;
1169 }
1170
1171 // ch == '<'; element | CDSect | PI | Comment
1172 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1173 if (ch == '?') { // PI
1174 _inputStreamGetCharacter(&parser->input, &ch);
1175 if (!parseProcessingInstruction(parser, true))
1176 return false;
1177 } else if (ch == '/') { // end tag; we're passing outside of content's production
1178 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1179 return true;
1180 } else if (ch != '!') { // element
1181 if (!parseTag(parser)) return false;
1182 } else {
1183 // Comment | CDSect
1184 UniChar dashes[3] = {'!', '-', '-'};
1185 if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1186 // Comment
1187 if (!parseComment(parser, true)) return false;
1188 } else {
1189 // Should have a CDSect; back off the "<!" and call parseCDSect
1190 _inputStreamReturnCharacter(&parser->input, '<');
1191 if (!parseCDSect(parser)) return false;
1192 }
1193 }
1194 }
1195
1196 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1197 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1198 return false;
1199 }
1200 // Only way to get here is if premature EOF was found
1201 //#warning CF:Include the tag name here
1202 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1203 return false;
1204 }
1205
1206 static Boolean parseCDSect(CFXMLParserRef parser) {
1207 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1208 const UniChar _CDSectClose[3] = {']', ']', '>'};
1209 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1210 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1211 return false;
1212 }
1213 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1214 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1215 return false;
1216 }
1217
1218 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1219 parser->node->additionalData = NULL;
1220 return reportNewLeaf(parser);
1221 }
1222
1223 /*
1224 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1225 */
1226 static Boolean validateCharacterReference(CFStringRef str) {
1227 Boolean isHex;
1228 CFIndex idx, len = CFStringGetLength(str);
1229 if (len < 2) return false;
1230 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1231 if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1232 isHex = true;
1233 idx = 2;
1234 if (len == 2) return false;
1235 } else {
1236 isHex = false;
1237 idx = 1;
1238 }
1239
1240 while (idx < len) {
1241 UniChar ch;
1242 ch = CFStringGetCharacterAtIndex(str, idx);
1243 idx ++;
1244 if (!(ch <= '9' && ch >= '0') &&
1245 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1246 break;
1247 }
1248 }
1249 return (idx == len);
1250 }
1251
1252 /*
1253 [67] Reference ::= EntityRef | CharRef
1254 [68] EntityRef ::= '&' Name ';'
1255 */
1256 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1257 UniChar ch;
1258 CFXMLEntityReferenceInfo entData;
1259 CFStringRef name = NULL;
1260 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1261 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1262 return false;
1263 }
1264 if (ch == '#') {
1265 ch = ';';
1266 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1267 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1268 return false;
1269 } else if (!validateCharacterReference(parser->node->dataString)) {
1270 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1271 return false;
1272 }
1273 entData.entityType = kCFXMLEntityTypeCharacter;
1274 name = parser->node->dataString;
1275 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1276 if (_inputStreamAtEOF(&parser->input)) {
1277 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1278 return false;
1279 } else {
1280 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1281 return false;
1282 }
1283 } else {
1284 entData.entityType = kCFXMLEntityTypeParsedInternal;
1285 }
1286 if (report) {
1287 CFStringRef tmp = parser->node->dataString;
1288 Boolean success;
1289 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1290 parser->node->dataString = name;
1291 parser->node->additionalData = &entData;
1292 success = reportNewLeaf(parser);
1293 parser->node->additionalData = NULL;
1294 parser->node->dataString = tmp;
1295 return success;
1296 } else {
1297 return true;
1298 }
1299 }
1300
1301 #if 0
1302 // Kept from old entity reference parsing....
1303 {
1304 switch (*(parser->curr)) {
1305 case 'l': // "lt"
1306 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1307 ch = '<';
1308 parser->curr += 3;
1309 break;
1310 }
1311 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1312 return;
1313 case 'g': // "gt"
1314 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1315 ch = '>';
1316 parser->curr += 3;
1317 break;
1318 }
1319 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1320 return;
1321 case 'a': // "apos" or "amp"
1322 if (len < 4) { // Not enough characters for either conversion
1323 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1324 return;
1325 }
1326 if (*(parser->curr+1) == 'm') {
1327 // "amp"
1328 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1329 ch = '&';
1330 parser->curr += 4;
1331 break;
1332 }
1333 } else if (*(parser->curr+1) == 'p') {
1334 // "apos"
1335 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1336 ch = '\'';
1337 parser->curr += 5;
1338 break;
1339 }
1340 }
1341 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1342 return;
1343 case 'q': // "quote"
1344 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1345 ch = '\"';
1346 parser->curr += 6;
1347 break;
1348 }
1349 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1350 return;
1351 case '#':
1352 {
1353 UniChar num = 0;
1354 Boolean isHex = false;
1355 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1356 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1357 return;
1358 }
1359 parser->curr ++;
1360 if (*(parser->curr) == 'x') {
1361 isHex = true;
1362 parser->curr ++;
1363 }
1364 while (parser->curr < parser->end) {
1365 ch = *(parser->curr);
1366 if (ch == ';') {
1367 CFStringAppendCharacters(string, &num, 1);
1368 parser->curr ++;
1369 return;
1370 }
1371 if (!isHex) num = num*10;
1372 else num = num << 4;
1373 if (ch <= '9' && ch >= '0') {
1374 num += (ch - '0');
1375 } else if (!isHex) {
1376 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1377 return;
1378 } else if (ch >= 'a' && ch <= 'f') {
1379 num += 10 + (ch - 'a');
1380 } else if (ch >= 'A' && ch <= 'F') {
1381 num += 10 + (ch - 'A');
1382 } else {
1383 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1384 return;
1385 }
1386 }
1387 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1388 return;
1389 }
1390 default:
1391 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1392 return;
1393 }
1394 CFStringAppendCharacters(string, &ch, 1);
1395 }
1396 #endif
1397
1398 /*
1399 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1400 */
1401 static Boolean parsePCData(CFXMLParserRef parser) {
1402 UniChar ch;
1403 Boolean done = false;
1404 _inputStreamSetMark(&parser->input);
1405 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1406 switch (ch) {
1407 case '<':
1408 case '&':
1409 _inputStreamReturnCharacter(&parser->input, ch);
1410 done = true;
1411 break;
1412 case ']':
1413 {
1414 const UniChar endSequence[2] = {']', '>'};
1415 if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1416 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1417 _inputStreamClearMark(&parser->input);
1418 return false;
1419 }
1420 break;
1421 }
1422 default:
1423 ;
1424 }
1425 }
1426 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1427 _inputStreamClearMark(&parser->input);
1428 parser->node->dataTypeID = kCFXMLNodeTypeText;
1429 parser->node->additionalData = NULL;
1430 return reportNewLeaf(parser);
1431 }
1432
1433 /*
1434 [42] ETag ::= '</' Name S? '>'
1435 */
1436 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1437 const UniChar beginEndTag[2] = {'<', '/'};
1438 Boolean unexpectedEOF = false, mismatch = false;
1439 CFStringRef closeTag;
1440
1441 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1442 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1443
1444 UniChar ch;
1445 _inputStreamSkipWhitespace(&parser->input, NULL);
1446 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1447 unexpectedEOF = true;
1448 } else if (ch != '>') {
1449 mismatch = true;
1450 }
1451 } else if (_inputStreamAtEOF(&parser->input)) {
1452 unexpectedEOF = true;
1453 } else {
1454 mismatch = true;
1455 }
1456
1457 if (unexpectedEOF || mismatch) {
1458 if (unexpectedEOF) {
1459 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1460 parser->status = kCFXMLErrorUnexpectedEOF;
1461 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1462 } else {
1463 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1464 parser->status = kCFXMLErrorMalformedCloseTag;
1465 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1466 }
1467 return false;
1468 }
1469 return true;
1470 }
1471
1472 /*
1473 [39] element ::= EmptyElementTag | STag content ETag
1474 [40] STag ::= '<' Name (S Attribute)* S? '>'
1475 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1476 */
1477 static Boolean parseTag(CFXMLParserRef parser) {
1478 UniChar ch;
1479 void *tag;
1480 CFXMLElementInfo data;
1481 Boolean success = true;
1482 CFStringRef tagName;
1483
1484 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1485 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1486 return false;
1487 }
1488
1489 _inputStreamSkipWhitespace(&parser->input, NULL);
1490
1491 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1492 data.attributes = parser->argDict;
1493 data.attributeOrder = parser->argArray;
1494 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1495 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1496 return false;
1497 }
1498 if (ch == '/') {
1499 data.isEmpty = true;
1500 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1501 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1502 return false;
1503 }
1504 } else {
1505 data.isEmpty = false;
1506 }
1507 if (ch != '>') {
1508 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1509 return false;
1510 }
1511
1512 if (*parser->top || parser->top == parser->stack) {
1513 CFStringRef oldStr = parser->node->dataString;
1514 parser->node->dataTypeID = kCFXMLNodeTypeElement;
1515 parser->node->dataString = tagName;
1516 parser->node->additionalData = &data;
1517 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1518 if (tag && parser->status == kCFXMLStatusParseInProgress) {
1519 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1520 }
1521 parser->node->additionalData = NULL;
1522 parser->node->dataString = oldStr;
1523 if (parser->status != kCFXMLStatusParseInProgress) {
1524 // callback called CFXMLParserAbort()
1525 _CFReportError(parser, parser->status, NULL);
1526 return false;
1527 }
1528 } else {
1529 tag = NULL;
1530 }
1531
1532 pushXMLNode(parser, tag);
1533 if (!data.isEmpty) {
1534 success = parseTagContent(parser);
1535 if (success) {
1536 success = parseCloseTag(parser, tagName);
1537 }
1538 }
1539 parser->top --;
1540
1541 if (success && tag) {
1542 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1543 if (parser->status != kCFXMLStatusParseInProgress) {
1544 _CFReportError(parser, parser->status, NULL);
1545 return false;
1546 }
1547 }
1548 return success;
1549 }
1550
1551 /*
1552 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1553 [67] Reference ::= EntityRef | CharRef
1554 [68] EntityRef ::= '&' Name ';'
1555 */
1556 // For the moment, we don't worry about references in the attribute values.
1557 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1558 UniChar quote, ch;
1559 Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1560 if (!success || (quote != '\'' && quote != '\"')) return false;
1561 if (str) _inputStreamSetMark(&parser->input);
1562 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1563 switch (ch) {
1564 case '<': success = false; break;
1565 case '&':
1566 if (!parseEntityReference(parser, false)) {
1567 success = false;
1568 break;
1569 }
1570 default:
1571 ;
1572 }
1573 }
1574
1575 if (success && _inputStreamAtEOF(&parser->input)) {
1576 success = false;
1577 }
1578 if (str) {
1579 if (success) {
1580 _inputStreamReturnCharacter(&parser->input, quote);
1581 _inputStreamGetCharactersFromMark(&parser->input, str);
1582 _inputStreamGetCharacter(&parser->input, &ch);
1583 }
1584 _inputStreamClearMark(&parser->input);
1585 }
1586 return success;
1587 }
1588
1589 /*
1590 [40] STag ::= '<' Name (S Attribute)* S? '>'
1591 [41] Attribute ::= Name Eq AttValue
1592 [25] Eq ::= S? '=' S?
1593 */
1594
1595 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1596 Boolean parseAttributes(CFXMLParserRef parser) {
1597 UniChar ch;
1598 CFMutableDictionaryRef dict;
1599 CFMutableArrayRef array;
1600 Boolean failure = false;
1601 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1602 if (parser->argDict) {
1603 CFDictionaryRemoveAllValues(parser->argDict);
1604 CFArrayRemoveAllValues(parser->argArray);
1605 }
1606 return true; // No attributes; let caller deal with it
1607 }
1608 if (!parser->argDict) {
1609 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1610 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1611 } else {
1612 CFDictionaryRemoveAllValues(parser->argDict);
1613 CFArrayRemoveAllValues(parser->argArray);
1614 }
1615 dict = parser->argDict;
1616 array = parser->argArray;
1617 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1618 CFStringRef key;
1619 CFMutableStringRef value;
1620 if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1621 failure = true;
1622 break;
1623 }
1624 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1625 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1626 return false;
1627 }
1628 _inputStreamSkipWhitespace(&parser->input, NULL);
1629 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1630 failure = true;
1631 break;
1632 }
1633 _inputStreamSkipWhitespace(&parser->input, NULL);
1634 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1635 if (!parseAttributeValue(parser, value)) {
1636 CFRelease(value);
1637 failure = true;
1638 break;
1639 }
1640 CFArrayAppendValue(array, key);
1641 CFDictionarySetValue(dict, key, value);
1642 CFRelease(value);
1643 _inputStreamSkipWhitespace(&parser->input, NULL);
1644 }
1645 if (failure) {
1646 //#warning CF:Include tag name in this error report
1647 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1648 return false;
1649 } else if (_inputStreamAtEOF(&parser->input)) {
1650 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1651 return false;
1652 } else {
1653 return true;
1654 }
1655 }
1656
1657 /*
1658 [1] document ::= prolog element Misc*
1659 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1660 [27] Misc ::= Comment | PI | S
1661 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1662
1663 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1664 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1665 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1666
1667 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1668 */
1669 static Boolean parseXML(CFXMLParserRef parser) {
1670 Boolean success = true, sawDTD = false, sawElement = false;
1671 UniChar ch;
1672 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1673 switch (ch) {
1674 case ' ':
1675 case '\n':
1676 case '\t':
1677 case '\r':
1678 success = parseWhitespace(parser);
1679 break;
1680 case '<':
1681 _inputStreamGetCharacter(&parser->input, &ch);
1682 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1683 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1684 return false;
1685 }
1686 if (ch == '!') {
1687 // Comment or DTD
1688 UniChar dashes[2] = {'-', '-'};
1689 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1690 // Comment
1691 success = parseComment(parser, true);
1692 } else {
1693 // Should be DTD
1694 if (sawDTD) {
1695 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1696 return false;
1697 }
1698 success = parseDTD(parser);
1699 if (success) sawDTD = true;
1700 }
1701 } else if (ch == '?') {
1702 // Processing instruction
1703 success = parseProcessingInstruction(parser, true);
1704 } else {
1705 // Tag or malformed
1706 if (sawElement) {
1707 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1708 return false;
1709 }
1710 _inputStreamReturnCharacter(&parser->input, ch);
1711 success = parseTag(parser);
1712 if (success) sawElement = true;
1713 }
1714 break;
1715 default: {
1716 parser->status = kCFXMLErrorMalformedDocument;
1717 parser->errorString = ch < 256 ?
1718 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1719 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1720
1721 if (parser->callBacks.handleError) {
1722 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1723 }
1724 return false;
1725 }
1726 }
1727 }
1728
1729 if (!success) return false;
1730 if (!sawElement) {
1731 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1732 return false;
1733 }
1734 return true;
1735 }
1736
1737 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1738 if (str) {
1739 parser->status = errNum;
1740 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1741 }
1742 if (parser->callBacks.handleError) {
1743 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1744 }
1745 }
1746
1747 // Assumes parser->node has been set and is ready to go
1748 static Boolean reportNewLeaf(CFXMLParserRef parser) {
1749 void *xmlStruct;
1750 if (*(parser->top) == NULL) return true;
1751
1752 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1753 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1754 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1755 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1756 }
1757 if (parser->status != kCFXMLStatusParseInProgress) {
1758 _CFReportError(parser, parser->status, NULL);
1759 return false;
1760 }
1761 return true;
1762 }
1763
1764 static void pushXMLNode(CFXMLParserRef parser, void *node) {
1765 parser->top ++;
1766 if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1767 parser->stack = CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1768 parser->top = parser->stack + parser->capacity;
1769 parser->capacity = 2*parser->capacity;
1770 }
1771 *(parser->top) = node;
1772 }
1773
1774 /**************************/
1775 /* Parsing to a CFXMLTree */
1776 /**************************/
1777
1778 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1779 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1780 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1781 CFRelease(myNode);
1782 return (void *)tree;
1783 }
1784
1785 static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1786 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1787 }
1788
1789 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1790 CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1791 if (CFTreeGetParent(node))
1792 CFRelease((CFXMLTreeRef)xmlType);
1793 }
1794
1795
1796 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, UInt32 parseOptions, CFIndex parserVersion) {
1797 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1798 }
1799
1800 #if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_3
1801
1802 CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1803 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1804 CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1805 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1806
1807 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1808 CFXMLParserRef parser;
1809 CFXMLParserCallBacks callbacks;
1810 CFXMLTreeRef result;
1811
1812 __CFGenericValidateType(xmlData, CFDataGetTypeID());
1813 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1814
1815 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1816 callbacks.addChild = _XMLTreeAddChild;
1817 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1818 callbacks.resolveExternalEntity = NULL;
1819 callbacks.handleError = NULL;
1820 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1821
1822 if (CFXMLParserParse(parser)) {
1823 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1824 } else {
1825 if (errorDict) { // collect the error dictionary
1826 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1827 if (*errorDict) {
1828 CFIndex rawnum;
1829 CFNumberRef cfnum;
1830 CFStringRef errstring;
1831
1832 rawnum = CFXMLParserGetLocation(parser);
1833 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1834 if(cfnum) {
1835 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1836 CFRelease(cfnum);
1837 }
1838
1839 rawnum = CFXMLParserGetLineNumber(parser);
1840 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1841 if(cfnum) {
1842 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1843 CFRelease(cfnum);
1844 }
1845
1846 rawnum = CFXMLParserGetStatusCode(parser);
1847 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1848 if(cfnum) {
1849 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1850 CFRelease(cfnum);
1851 }
1852
1853 errstring = CFXMLParserCopyErrorDescription(parser);
1854 if(errstring) {
1855 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1856 CFRelease(errstring);
1857 }
1858 }
1859 }
1860 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1861 if (result) CFRelease(result);
1862 result = NULL;
1863 }
1864 CFRelease(parser);
1865 return result;
1866 }
1867
1868 /*
1869 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1870 We should also be handling items that are up over certain values correctly.
1871 */
1872 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1873 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1874 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1875 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1876
1877 CFStringInlineBuffer inlineBuf;
1878 CFIndex idx = 0;
1879 CFIndex mark = idx;
1880 CFIndex stringLength = CFStringGetLength(string);
1881 UniChar uc;
1882
1883 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1884
1885 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1886 for(idx = 0; idx < stringLength; idx++) {
1887 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1888 if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1889 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1890 CFStringAppend(newString, previousSubstring);
1891 CFRelease(previousSubstring);
1892 switch(uc) {
1893 case '&':
1894 CFStringAppend(newString, CFSTR("&amp;"));
1895 break;
1896 case '<':
1897 CFStringAppend(newString, CFSTR("&lt;"));
1898 break;
1899 case '>':
1900 CFStringAppend(newString, CFSTR("&gt;"));
1901 break;
1902 case '\'':
1903 CFStringAppend(newString, CFSTR("&apos;"));
1904 break;
1905 case '"':
1906 CFStringAppend(newString, CFSTR("&quot;"));
1907 break;
1908 }
1909 mark = idx + 1;
1910 }
1911 }
1912 CFRelease(startChars);
1913 return newString;
1914 }
1915
1916 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1917 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1918
1919 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1920 CFStringRef sub;
1921 CFIndex lastChunkStart, length = CFStringGetLength(string);
1922 CFIndex i, entityStart;
1923 UniChar uc;
1924 UInt32 entity;
1925 int base;
1926 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1927
1928 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1929 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1930 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1931 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1932 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1933
1934 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1935 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1936
1937 lastChunkStart = 0;
1938 // Scan through the string in its entirety
1939 for(i = 0; i < length; ) {
1940 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1941
1942 if(uc == '&') {
1943 entityStart = i - 1;
1944 entity = 0xFFFF; // set this to a not-Unicode character as sentinel
1945 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1946 if(lastChunkStart < i - 1) {
1947 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1948 CFStringAppend(newString, sub);
1949 CFRelease(sub);
1950 }
1951
1952 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1953 // Now we can process the entity reference itself
1954 if(uc == '#') { // this is a numeric entity.
1955 base = 10;
1956 entity = 0;
1957 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1958
1959 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
1960 base = 16;
1961 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1962 }
1963
1964 // process the provided digits 'til we're finished
1965 while(true) {
1966 if (uc >= '0' && uc <= '9')
1967 entity = entity * base + (uc-'0');
1968 else if (uc >= 'a' && uc <= 'f' && base == 16)
1969 entity = entity * base + (uc-'a'+10);
1970 else if (uc >= 'A' && uc <= 'F' && base == 16)
1971 entity = entity * base + (uc-'A'+10);
1972 else break;
1973
1974 if (i < length) {
1975 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1976 }
1977 else
1978 break;
1979 }
1980 }
1981
1982 // Scan to the end of the entity
1983 while(uc != ';' && i < length) {
1984 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1985 }
1986
1987 if(0xFFFF != entity) { // it was numeric, and translated.
1988 // Now, output the result fo the entity
1989 if(entity >= 0x10000) {
1990 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
1991 CFStringAppendCharacters(newString, characters, 2);
1992 } else {
1993 UniChar character = entity;
1994 CFStringAppendCharacters(newString, &character, 1);
1995 }
1996 } else { // it wasn't numeric.
1997 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
1998 CFStringRef replacementString = CFDictionaryGetValue(fullReplDict, sub);
1999 if(replacementString) {
2000 CFStringAppend(newString, replacementString);
2001 } else {
2002 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2003 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2004 CFStringAppend(newString, sub); // ...and append that.
2005 }
2006 CFRelease(sub); // in either case, release the most-recent "sub"
2007 }
2008
2009 // move the lastChunkStart to the beginning of the next chunk.
2010 lastChunkStart = i;
2011 }
2012 }
2013 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2014 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2015 CFStringAppend(newString, sub);
2016 CFRelease(sub);
2017 }
2018
2019 CFRelease(fullReplDict);
2020
2021 return newString;
2022 }
2023
2024 #endif
2025