]> git.saurik.com Git - apple/cf.git/blob - CFXMLParser.c
CF-855.11.tar.gz
[apple/cf.git] / CFXMLParser.c
1 /*
2 * Copyright (c) 2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24 /* CFXMLParser.c
25 Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26 Responsibility: David Smith
27 */
28
29 #include <CoreFoundation/CFXMLParser.h>
30 #include <CoreFoundation/CFNumber.h>
31 #include "CFXMLInputStream.h"
32 #include "CFUniChar.h"
33 #include "CFInternal.h"
34
35 #pragma GCC diagnostic push
36 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
37
38 struct __CFXMLParser {
39 CFRuntimeBase _cfBase;
40
41 _CFXMLInputStream input;
42
43 void **stack;
44 void **top;
45 UInt32 capacity;
46
47 struct __CFXMLNode *node; // Our private node; we use it to report back information
48 CFMutableDictionaryRef argDict;
49 CFMutableArrayRef argArray;
50
51 UInt32 options;
52 CFXMLParserCallBacks callBacks;
53 CFXMLParserContext context;
54
55 CFXMLParserStatusCode status;
56 CFStringRef errorString;
57 };
58
59 static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) {
60 const struct __CFXMLParser *parser = (const struct __CFXMLParser *)cf;
61 return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR("<CFXMLParser %p>"), parser);
62 }
63
64 static void __CFXMLParserDeallocate(CFTypeRef cf) {
65 struct __CFXMLParser *parser = (struct __CFXMLParser *)cf;
66 CFAllocatorRef alloc = CFGetAllocator(parser);
67 _freeInputStream(&(parser->input));
68 if (parser->argDict) CFRelease(parser->argDict);
69 if (parser->argArray) CFRelease(parser->argArray);
70 if (parser->errorString) CFRelease(parser->errorString);
71 if (parser->node) CFRelease(parser->node);
72 CFAllocatorDeallocate(alloc, parser->stack);
73 if (parser->context.info && parser->context.release) {
74 parser->context.release(parser->context.info);
75 }
76 }
77
78 static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID;
79
80 static const CFRuntimeClass __CFXMLParserClass = {
81 0,
82 "CFXMLParser",
83 NULL, // init
84 NULL, // copy
85 __CFXMLParserDeallocate,
86 NULL,
87 NULL,
88 NULL, //
89 __CFXMLParserCopyDescription
90 };
91
92 static void __CFXMLParserInitialize(void) {
93 __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass);
94 }
95
96 CFTypeID CFXMLParserGetTypeID(void) {
97 if (_kCFRuntimeNotATypeID == __kCFXMLParserTypeID) __CFXMLParserInitialize();
98 return __kCFXMLParserTypeID;
99 }
100
101 void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) {
102 CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__);
103 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
104 if (context) {
105 context->version = parser->context.version;
106 context->info = parser->context.info;
107 context->retain = parser->context.retain;
108 context->release = parser->context.release;
109 context->copyDescription = parser->context.copyDescription;
110 UNFAULT_CALLBACK(context->retain);
111 UNFAULT_CALLBACK(context->release);
112 UNFAULT_CALLBACK(context->copyDescription);
113 }
114 }
115
116 void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) {
117 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
118 if (callBacks) {
119 callBacks->version = parser->callBacks.version;
120 callBacks->createXMLStructure = parser->callBacks.createXMLStructure;
121 callBacks->addChild = parser->callBacks.addChild;
122 callBacks->endXMLStructure = parser->callBacks.endXMLStructure;
123 callBacks->resolveExternalEntity = parser->callBacks.resolveExternalEntity;
124 callBacks->handleError = parser->callBacks.handleError;
125 UNFAULT_CALLBACK(callBacks->createXMLStructure);
126 UNFAULT_CALLBACK(callBacks->addChild);
127 UNFAULT_CALLBACK(callBacks->endXMLStructure);
128 UNFAULT_CALLBACK(callBacks->resolveExternalEntity);
129 UNFAULT_CALLBACK(callBacks->handleError);
130 }
131 }
132
133 CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) {
134 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
135 return parser->input.url;
136 }
137
138 /* Returns the character index or line number of the current parse location */
139 CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) {
140 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
141 return _inputStreamCurrentLocation(&parser->input);
142 }
143
144 CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) {
145 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
146 return _inputStreamCurrentLine(&parser->input);
147 }
148
149 /* Returns the top-most object returned by the createXMLStructure callback */
150 void *CFXMLParserGetDocument(CFXMLParserRef parser) {
151 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
152 if (parser->capacity > 0)
153 return parser->stack[0];
154 else
155 return NULL;
156 }
157
158 CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) {
159 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
160 return parser->status;
161 }
162
163 CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) {
164 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
165 return (CFStringRef)CFRetain(parser->errorString);
166 }
167
168 void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) {
169 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
170 CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__);
171 CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__);
172 __CFGenericValidateType(errorDescription, CFStringGetTypeID());
173
174 parser->status = errorCode;
175 if (parser->errorString) CFRelease(parser->errorString);
176 parser->errorString = (CFStringRef)CFStringCreateCopy(kCFAllocatorSystemDefault, errorDescription);
177 }
178
179
180 static Boolean parseXML(CFXMLParserRef parser);
181 static Boolean parseComment(CFXMLParserRef parser, Boolean report);
182 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report);
183 static Boolean parseInlineDTD(CFXMLParserRef parser);
184 static Boolean parseDTD(CFXMLParserRef parser);
185 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser);
186 static Boolean parseCDSect(CFXMLParserRef parser);
187 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report);
188 static Boolean parsePCData(CFXMLParserRef parser);
189 static Boolean parseWhitespace(CFXMLParserRef parser);
190 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser);
191 static Boolean parseNotationDeclaration(CFXMLParserRef parser);
192 static Boolean parseElementDeclaration(CFXMLParserRef parser);
193 static Boolean parseEntityDeclaration(CFXMLParserRef parser);
194 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID);
195 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag);
196 static Boolean parseTagContent(CFXMLParserRef parser);
197 static Boolean parseTag(CFXMLParserRef parser);
198 static Boolean parseAttributes(CFXMLParserRef parser);
199 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str);
200
201 // Utilities; may need to make these accessible to the property list parser to avoid code duplication
202 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str);
203 static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go
204 static void pushXMLNode(CFXMLParserRef parser, void *node);
205
206 static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
207 struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, CFXMLParserGetTypeID(), sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL);
208 struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL);
209 UniChar *buf;
210 if (parser && node) {
211 alloc = CFGetAllocator(parser);
212 _initializeInputStream(&(parser->input), alloc, dataSource, xmlData);
213 parser->top = parser->stack;
214 parser->stack = NULL;
215 parser->capacity = 0;
216
217 buf = (UniChar *)CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0);
218 parser->node = node;
219 parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc);
220 parser->node->additionalData = NULL;
221 parser->node->version = version;
222 parser->argDict = NULL; // don't create these until necessary
223 parser->argArray = NULL;
224
225 parser->options = options;
226 parser->callBacks = *callBacks;
227
228 FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure));
229 FAULT_CALLBACK((void **)&(parser->callBacks.addChild));
230 FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure));
231 FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity));
232 FAULT_CALLBACK((void **)&(parser->callBacks.handleError));
233
234 if (context) {
235 parser->context = *context;
236 if (parser->context.info && parser->context.retain) {
237 parser->context.retain(parser->context.info);
238 }
239 } else {
240 parser->context.version = 0;
241 parser->context.info = NULL;
242 parser->context.retain = NULL;
243 parser->context.release = NULL;
244 parser->context.copyDescription = NULL;
245 }
246 parser->status = kCFXMLStatusParseNotBegun;
247 parser->errorString = NULL;
248 } else {
249 if (parser) CFRelease(parser);
250 if (node) CFRelease(node);
251 parser = NULL;
252 }
253 return parser;
254 }
255
256 CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
257 CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__);
258 __CFGenericValidateType(xmlData, CFDataGetTypeID());
259 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
260 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
261 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
262 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
263 return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context);
264 }
265
266 CFXMLParserRef CFXMLParserCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) {
267 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
268 CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__);
269 CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes);
270 CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__);
271
272 return __CFXMLParserInit(allocator, dataSource, parseOptions, NULL, versionOfNodes, callBacks, context);
273 }
274
275 Boolean CFXMLParserParse(CFXMLParserRef parser) {
276 CFXMLDocumentInfo docData;
277 __CFGenericValidateType(parser, CFXMLParserGetTypeID());
278 if (parser->status != kCFXMLStatusParseNotBegun) return false;
279 parser->status = kCFXMLStatusParseInProgress;
280
281 if (!_openInputStream(&parser->input)) {
282 if (!parser->input.data) {
283 // couldn't load URL
284 parser->status = kCFXMLErrorNoData;
285 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url));
286 } else {
287 // couldn't figure out the encoding
288 CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened");
289 parser->status = kCFXMLErrorUnknownEncoding;
290 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII);
291 }
292 if (parser->callBacks.handleError) {
293 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
294 }
295 return false;
296 }
297
298 // Create the document
299 parser->stack = (void **)CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0);
300 parser->capacity = 16;
301 parser->node->dataTypeID = kCFXMLNodeTypeDocument;
302 docData.encoding = _inputStreamGetEncoding(&parser->input);
303 docData.sourceURL = parser->input.url;
304 parser->node->additionalData = &docData;
305 parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
306 parser->top = parser->stack;
307 parser->node->additionalData = NULL;
308
309 // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback
310 if (parser->status != kCFXMLStatusParseInProgress) {
311 _CFReportError(parser, parser->status, NULL);
312 return false;
313 }
314 return parseXML(parser);
315 }
316
317 /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "<!--"). They parse to the end of their structure, calling any necessary callbacks along the way, and advancing parser->curr as they go. They either return void (not possible for the parse to fail) or they return a Boolean (success/failure). The calling routines are expected to catch returned Booleans and fail immediately if false is returned. */
318
319 // [3] S ::= (#x20 | #x9 | #xD | #xA)+
320 static Boolean parseWhitespace(CFXMLParserRef parser) {
321 CFIndex len;
322 Boolean report = !(parser->options & kCFXMLParserSkipWhitespace);
323 len = _inputStreamSkipWhitespace(&parser->input, report ? (CFMutableStringRef)(parser->node->dataString) : NULL);
324 if (report && len) {
325 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
326 parser->node->additionalData = NULL;
327 return reportNewLeaf(parser);
328 } else {
329 return true;
330 }
331 }
332
333 // parser should be just past "<!--"
334 static Boolean parseComment(CFXMLParserRef parser, Boolean report) {
335 const UniChar dashes[2] = {'-', '-'};
336 UniChar ch;
337 report = report && (!(parser->options & kCFXMLParserSkipMetaData));
338 if (!_inputStreamScanToCharacters(&parser->input, dashes, 2, report ? (CFMutableStringRef)(parser->node->dataString) : NULL) || !_inputStreamGetCharacter(&parser->input, &ch)) {
339 _CFReportError(parser, kCFXMLErrorUnexpectedEOF,"Found unexpected EOF while parsing comment");
340 return false;
341 } else if (ch != '>') {
342 _CFReportError(parser, kCFXMLErrorMalformedComment, "Found \"--\" within a comment");
343 return false;
344 } else if (report) {
345 parser->node->dataTypeID = kCFXMLNodeTypeComment;
346 parser->node->additionalData = NULL;
347 return reportNewLeaf(parser);
348 } else {
349 return true;
350 }
351 }
352
353 /*
354 [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
355 [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
356 */
357 // parser should be set to the first character after "<?"
358 static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report) {
359 const UniChar piTermination[2] = {'?', '>'};
360 CFMutableStringRef str;
361 CFStringRef name;
362
363 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
364 _CFReportError(parser, kCFXMLErrorMalformedProcessingInstruction, "Found malformed processing instruction");
365 return false;
366 }
367 _inputStreamSkipWhitespace(&parser->input, NULL);
368 str = (report && *parser->top) ? CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser)) : NULL;
369 if (!_inputStreamScanToCharacters(&parser->input, piTermination, 2, str)) {
370 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing processing instruction");
371 if (str) CFRelease(str);
372 return false;
373 }
374
375 if (str) {
376 CFXMLProcessingInstructionInfo data;
377 Boolean result;
378 CFStringRef tmp = parser->node->dataString;
379 parser->node->dataTypeID = kCFXMLNodeTypeProcessingInstruction;
380 parser->node->dataString = name;
381 data.dataString = str;
382 parser->node->additionalData = &data;
383 result = reportNewLeaf(parser);
384 parser->node->additionalData = NULL;
385 parser->node->dataString = tmp;
386 CFRelease(str);
387 return result;
388 } else {
389 return true;
390 }
391 }
392
393 /*
394 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
395 */
396 static const UniChar _DoctypeOpening[7] = {'D', 'O', 'C', 'T', 'Y', 'P', 'E'};
397 // first character should be immediately after the "<!"
398 static Boolean parseDTD(CFXMLParserRef parser) {
399 UniChar ch;
400 Boolean success, hasExtID = false;
401 CFXMLDocumentTypeInfo docData = {{NULL, NULL}};
402 void *dtdStructure = NULL;
403 CFStringRef name;
404
405 // First pass "DOCTYPE"
406 success = _inputStreamMatchString(&parser->input, _DoctypeOpening, 7);
407 success = success && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
408 success = success && _inputStreamScanXMLName(&parser->input, false, &name);
409 if (success) {
410 _inputStreamSkipWhitespace(&parser->input, NULL);
411 success = _inputStreamPeekCharacter(&parser->input, &ch);
412 } else {
413 // didn't make it past "DOCTYPE" successfully.
414 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
415 return false;
416 }
417 if (success && ch != '[' && ch != '>') {
418 // ExternalID
419 hasExtID = true;
420 success = parseExternalID(parser, false, &(docData.externalID));
421 if (success) {
422 _inputStreamSkipWhitespace(&parser->input, NULL);
423 success = _inputStreamPeekCharacter(&parser->input, &ch);
424 }
425 }
426
427 if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
428 CFStringRef tmp = parser->node->dataString;
429 parser->node->dataTypeID = kCFXMLNodeTypeDocumentType;
430 parser->node->dataString = name;
431 parser->node->additionalData = &docData;
432 dtdStructure = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
433 if (dtdStructure && parser->status == kCFXMLStatusParseInProgress) {
434 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, dtdStructure, parser->context.info);
435 }
436 parser->node->additionalData = NULL;
437 parser->node->dataString = tmp;
438 if (parser->status != kCFXMLStatusParseInProgress) {
439 // callback called CFXMLParserAbort()
440 _CFReportError(parser, parser->status, NULL);
441 return false;
442 }
443 } else {
444 dtdStructure = NULL;
445 }
446 if (docData.externalID.publicID) CFRelease(docData.externalID.publicID);
447 if (docData.externalID.systemID) CFRelease(docData.externalID.systemID);
448 pushXMLNode(parser, dtdStructure);
449
450 if (success && ch == '[') {
451 // inline DTD
452 _inputStreamGetCharacter(&parser->input, &ch);
453 if (!parseInlineDTD(parser)) return false;
454 _inputStreamSkipWhitespace(&parser->input, NULL);
455 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
456 } else if (success && ch == '>') {
457 // End of the DTD
458 _inputStreamGetCharacter(&parser->input, &ch);
459 }
460 if (!success) {
461 if (_inputStreamAtEOF(&parser->input)) {
462 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing DTD");
463 } else {
464 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found malformed DTD");
465 }
466 return false;
467 }
468
469 parser->top --; // Remove dtdStructure from the stack
470
471 if (success && dtdStructure) {
472 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, dtdStructure, parser->context.info);
473 if (parser->status != kCFXMLStatusParseInProgress) {
474 _CFReportError(parser, parser->status, NULL);
475 return false;
476 }
477 }
478 return true;
479 }
480
481 /*
482 [69] PEReference ::= '%' Name ';'
483 */
484 static Boolean parsePhysicalEntityReference(CFXMLParserRef parser) {
485 UniChar ch;
486 CFStringRef name;
487 if (!_inputStreamScanXMLName(&parser->input, false, &name)) {
488 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
489 return false;
490 } else if (!_inputStreamGetCharacter(&parser->input, &ch)) {
491 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing physical entity reference");
492 return false;
493 } else if (ch != ';') {
494 _CFReportError(parser, kCFXMLErrorMalformedName, "Found malformed name while parsing physical entity reference");
495 return false;
496 } else if (!(parser->options & kCFXMLParserSkipMetaData) && *(parser->top)) {
497 CFXMLEntityReferenceInfo myData;
498 Boolean result;
499 CFStringRef tmp = parser->node->dataString;
500 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
501 parser->node->dataString = name;
502 myData.entityType = kCFXMLEntityTypeParameter;
503 parser->node->additionalData = &myData;
504 result = reportNewLeaf(parser);
505 parser->node->additionalData = NULL;
506 parser->node->dataString = tmp;
507 return result;
508 } else {
509 return true;
510 }
511 }
512
513 /*
514 [54] AttType ::= StringType | TokenizedType | EnumeratedType
515 [55] StringType ::= 'CDATA'
516 [56] TokenizedType ::= 'ID' | 'IDREF'| 'IDREFS'| 'ENTITY'| 'ENTITIES'| 'NMTOKEN'| 'NMTOKENS'
517 [57] EnumeratedType ::= NotationType | Enumeration
518 [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
519 [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
520 */
521 static Boolean parseEnumeration(CFXMLParserRef parser, Boolean useNMTokens) {
522 UniChar ch;
523 Boolean done = false;
524 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
525 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
526 return false;
527 } else if (ch != '(') {
528 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
529 return false;
530 }
531 _inputStreamSkipWhitespace(&parser->input, NULL);
532 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
533 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
534 return false;
535 }
536 while (!done) {
537 _inputStreamSkipWhitespace(&parser->input, NULL);
538 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
539 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
540 return false;
541 } else if (ch == ')') {
542 done = true;
543 } else if (ch == '|') {
544 _inputStreamSkipWhitespace(&parser->input, NULL);
545 if (!_inputStreamScanXMLName(&parser->input, useNMTokens, NULL)) {
546 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
547 return false;
548 }
549 } else {
550 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
551 return false;
552 }
553 }
554 return true;
555 }
556
557 static Boolean parseAttributeType(CFXMLParserRef parser, CFMutableStringRef str) {
558 Boolean success = false;
559 static const UniChar attTypeStrings[6][8] = {
560 {'C', 'D', 'A', 'T', 'A', '\0', '\0', '\0'},
561 {'I', 'D', 'R', 'E', 'F', 'S', '\0', '\0'},
562 {'E', 'N', 'T', 'I', 'T', 'Y', '\0', '\0'},
563 {'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S'},
564 {'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S'},
565 {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'} };
566 if (str) _inputStreamSetMark(&parser->input);
567 if (_inputStreamMatchString(&parser->input, attTypeStrings[0], 5) ||
568 _inputStreamMatchString(&parser->input, attTypeStrings[1], 6) ||
569 _inputStreamMatchString(&parser->input, attTypeStrings[1], 5) ||
570 _inputStreamMatchString(&parser->input, attTypeStrings[1], 2) ||
571 _inputStreamMatchString(&parser->input, attTypeStrings[2], 6) ||
572 _inputStreamMatchString(&parser->input, attTypeStrings[3], 8) ||
573 _inputStreamMatchString(&parser->input, attTypeStrings[4], 8) ||
574 _inputStreamMatchString(&parser->input, attTypeStrings[4], 7)) {
575 success = true;
576 } else if (_inputStreamMatchString(&parser->input, attTypeStrings[5], 8)) {
577 // Notation
578 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
579 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
580 success = false;
581 } else {
582 success = parseEnumeration(parser, false);
583 }
584 } else {
585 success = parseEnumeration(parser, true);
586 }
587 if (str) {
588 if (success) {
589 _inputStreamGetCharactersFromMark(&parser->input, str);
590 }
591 _inputStreamClearMark(&parser->input);
592 }
593 return success;
594 }
595
596 /* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) */
597 static Boolean parseAttributeDefaultDeclaration(CFXMLParserRef parser, CFMutableStringRef str) {
598 const UniChar strings[3][8] = {
599 {'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D'},
600 {'I', 'M', 'P', 'L', 'I', 'E', 'D', '\0'},
601 {'F', 'I', 'X', 'E', 'D', '\0', '\0', '\0'}};
602 UniChar ch;
603 Boolean success;
604 if (str) _inputStreamSetMark(&parser->input);
605 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
606 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
607 success = false;
608 } else if (ch == '#') {
609 if (_inputStreamMatchString(&parser->input, strings[0], 8) ||
610 _inputStreamMatchString(&parser->input, strings[1], 7)) {
611 success = true;
612 } else if (!_inputStreamMatchString(&parser->input, strings[2], 5) || _inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
613 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
614 success = false;
615 } else {
616 // we fall through if "#FIXED" was matched, and at least one whitespace character was stripped.
617 success = parseAttributeValue(parser, NULL);
618 }
619 } else {
620 _inputStreamReturnCharacter(&parser->input, ch);
621 success = parseAttributeValue(parser, NULL);
622 }
623 if (str) {
624 if (success) {
625 _inputStreamGetCharactersFromMark(&parser->input, str);
626 }
627 _inputStreamClearMark(&parser->input);
628 }
629 return success;
630 }
631
632 /*
633 [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
634 [53] AttDef ::= S Name S AttType S DefaultDecl
635 */
636 static Boolean parseAttributeListDeclaration(CFXMLParserRef parser) {
637 const UniChar attList[7] = {'A', 'T', 'T', 'L', 'I', 'S', 'T'};
638 CFXMLAttributeListDeclarationInfo attListData;
639 CFXMLAttributeDeclarationInfo attributeArray[8], *attributes=attributeArray;
640 CFIndex capacity = 8;
641 UniChar ch;
642 Boolean success = true;
643 CFStringRef name;
644 if (!_inputStreamMatchString(&parser->input, attList, 7) ||
645 _inputStreamSkipWhitespace(&parser->input, NULL) == 0 ||
646 !_inputStreamScanXMLName(&parser->input, false, &name)) {
647 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
648 return false;
649 }
650 attListData.numberOfAttributes = 0;
651 if (!(*parser->top) || (parser->options & kCFXMLParserSkipMetaData)) {
652 // Use this to mark that we don't need to collect attribute information to report to the client. Ultimately, we may want to collect this for our own use (for validation, for instance), but for now, the only reason we would create it would be for the client. -- REW, 2/9/2000
653 attributes = NULL;
654 }
655 while (_inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
656 CFXMLAttributeDeclarationInfo *attribute = NULL;
657 if (_inputStreamPeekCharacter(&parser->input, &ch) && ch == '>')
658 break;
659 if (attributes) {
660 if (capacity == attListData.numberOfAttributes) {
661 capacity = 2*capacity;
662 if (attributes != attributeArray) {
663 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorReallocate(CFGetAllocator(parser), attributes, capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
664 } else {
665 attributes = (CFXMLAttributeDeclarationInfo *)CFAllocatorAllocate(CFGetAllocator(parser), capacity * sizeof(CFXMLAttributeDeclarationInfo), 0);
666 }
667 }
668 attribute = &(attributes[attListData.numberOfAttributes]);
669 // Much better if we can somehow create these strings immutable - then if the client (or we ourselves) has to copy them, they will end up multiply-retained, rather than having a new alloc and data copy performed. -- REW, 2/9/2000
670 attribute->typeString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
671 attribute->defaultString = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
672 }
673 if (!_inputStreamScanXMLName(&parser->input, false, &(attribute->attributeName)) || (_inputStreamSkipWhitespace(&parser->input, NULL) == 0)) {
674 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
675 success = false;
676 break;
677 }
678 if (!parseAttributeType(parser, attribute ? (CFMutableStringRef)attribute->typeString : NULL)) {
679 success = false;
680 break;
681 }
682 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0) {
683 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
684 success = false;
685 break;
686 }
687 if (!parseAttributeDefaultDeclaration(parser, attribute ? (CFMutableStringRef)attribute->defaultString : NULL)) {
688 success = false;
689 break;
690 }
691 attListData.numberOfAttributes ++;
692 }
693 if (success) {
694 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
695 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
696 success = false;
697 } else if (ch != '>') {
698 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
699 success = false;
700 } else if (attributes) {
701 CFStringRef tmp = parser->node->dataString;
702 parser->node->dataTypeID = kCFXMLNodeTypeAttributeListDeclaration;
703 parser->node->dataString = name;
704 attListData.attributes = attributes;
705 parser->node->additionalData = (void *)&attListData;
706 success = reportNewLeaf(parser);
707 parser->node->additionalData = NULL;
708 parser->node->dataString = tmp;
709 }
710 }
711 if (attributes) {
712 // Free up all that memory
713 CFIndex idx;
714 for (idx = 0; idx < attListData.numberOfAttributes; idx ++) {
715 // Do not release attributeName here; it's a uniqued string from scanXMLName
716 CFRelease(attributes[idx].typeString);
717 CFRelease(attributes[idx].defaultString);
718 }
719 if (attributes != attributeArray) {
720 CFAllocatorDeallocate(CFGetAllocator(parser), attributes);
721 }
722 }
723 return success;
724 }
725
726 CF_INLINE Boolean parseSystemLiteral(CFXMLParserRef parser, CFXMLExternalID *extID) {
727 Boolean success;
728 if (extID) {
729 CFMutableStringRef urlStr = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
730 if (_inputStreamScanQuotedString(&parser->input, urlStr)) {
731 success = true;
732 extID->systemID = CFURLCreateWithString(CFGetAllocator(parser), urlStr, parser->input.url);
733 } else {
734 extID->systemID = NULL;
735 success = false;
736 }
737 CFRelease(urlStr);
738 } else {
739 success = _inputStreamScanQuotedString(&parser->input, NULL);
740 }
741 return success;
742 }
743
744 /*
745 [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
746 [83] PublicID ::= 'PUBLIC' S PubidLiteral
747 [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
748 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
749 [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
750 */
751 // This does NOT report errors itself; caller can check to see if parser->input is at EOF to determine whether the formatting failed or unexpected EOF occurred. -- REW, 2/2/2000
752 static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID) {
753 const UniChar publicString[6] = {'P', 'U', 'B', 'L', 'I', 'C'};
754 const UniChar systemString[6] = {'S', 'Y', 'S', 'T', 'E', 'M'};
755 Boolean success;
756 if (extID) {
757 extID->systemID = NULL;
758 extID->publicID = NULL;
759 }
760 if (_inputStreamMatchString(&parser->input, publicString, 6)) {
761 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
762 if (extID) {
763 extID->publicID = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
764 success = success && _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)extID->publicID);
765 } else {
766 success = success && _inputStreamScanQuotedString(&parser->input, NULL);
767 }
768 if (success) {
769 UniChar ch;
770 if (alsoAcceptPublicID) {
771 _inputStreamSetMark(&parser->input); // In case we need to roll back the parser
772 }
773 if (_inputStreamSkipWhitespace(&parser->input, NULL) == 0
774 || !_inputStreamPeekCharacter(&parser->input, &ch)
775 || (ch != '\'' && ch != '\"')
776 || !parseSystemLiteral(parser, extID)) {
777 success = alsoAcceptPublicID;
778 if (alsoAcceptPublicID) {
779 _inputStreamBackUpToMark(&parser->input);
780 }
781 } else {
782 success = true;
783 }
784 if (alsoAcceptPublicID) {
785 _inputStreamClearMark(&parser->input);
786 }
787 }
788 } else if (_inputStreamMatchString(&parser->input, systemString, 6)) {
789 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0 && parseSystemLiteral(parser, extID);
790 } else {
791 success = false;
792 }
793 return success;
794 }
795
796 /*
797 [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
798 */
799 static Boolean parseNotationDeclaration(CFXMLParserRef parser) {
800 static UniChar notationString[8] = {'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N'};
801 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
802 CFXMLNotationInfo notationData = {{NULL, NULL}};
803 CFStringRef name;
804 Boolean success =
805 _inputStreamMatchString(&parser->input, notationString, 8) &&
806 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
807 _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) &&
808 _inputStreamSkipWhitespace(&parser->input, NULL) != 0 &&
809 parseExternalID(parser, true, report ? &(notationData.externalID) : NULL);
810
811 if (success) {
812 UniChar ch;
813 _inputStreamSkipWhitespace(&parser->input, NULL);
814 success = (_inputStreamGetCharacter(&parser->input, &ch) && ch == '>');
815 }
816 if (!success) {
817 if (_inputStreamAtEOF(&parser->input)) {
818 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
819 } else {
820 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
821 }
822 } else if (report) {
823 CFStringRef tmp = parser->node->dataString;
824 parser->node->dataTypeID = kCFXMLNodeTypeNotation;
825 parser->node->dataString = name;
826 parser->node->additionalData = &notationData;
827 success = reportNewLeaf(parser);
828 parser->node->additionalData = NULL;
829 parser->node->dataString = tmp;
830 }
831 if (notationData.externalID.systemID) CFRelease(notationData.externalID.systemID);
832 if (notationData.externalID.publicID) CFRelease(notationData.externalID.publicID);
833 return success;
834 }
835
836 /*
837 [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
838 [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
839 [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
840 */
841 static Boolean parseChoiceOrSequence(CFXMLParserRef parser, Boolean pastParen) {
842 UniChar ch, separator;
843 if (!pastParen) {
844 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '(') return false;
845 _inputStreamSkipWhitespace(&parser->input, NULL);
846 }
847 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
848
849 /* Now scanning cp, production [48] */
850 if (ch == '(') {
851 if (!parseChoiceOrSequence(parser, false)) return false;
852 } else {
853 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
854 }
855 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
856 if (ch == '?' || ch == '*' || ch == '+') _inputStreamGetCharacter(&parser->input, &ch);
857
858 /* Now past cp */
859 _inputStreamSkipWhitespace(&parser->input, NULL);
860 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
861 if (ch == ')') return true;
862 if (ch != '|' && ch != ',') return false;
863 separator = ch;
864 while (ch == separator) {
865 _inputStreamSkipWhitespace(&parser->input, NULL);
866 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
867 if (ch != '(') {
868 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
869 } else if (!parseChoiceOrSequence(parser, false)) {
870 return false;
871 }
872 _inputStreamSkipWhitespace(&parser->input, NULL);
873 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
874 }
875 return ch == ')';
876 }
877
878 /*
879 [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'
880 */
881 static Boolean parseMixedElementContent(CFXMLParserRef parser) {
882 static const UniChar pcdataString[7] = {'#', 'P', 'C', 'D', 'A', 'T', 'A'};
883 UniChar ch;
884 if (!_inputStreamMatchString(&parser->input, pcdataString, 7)) return false;
885 _inputStreamSkipWhitespace(&parser->input, NULL);
886 if (!_inputStreamGetCharacter(&parser->input, &ch) && (ch == ')' || ch == '|')) return false;
887 if (ch == ')') return true;
888
889 while (ch == '|') {
890 _inputStreamSkipWhitespace(&parser->input, NULL);
891 if (!_inputStreamScanXMLName(&parser->input, false, NULL)) return false;
892 _inputStreamSkipWhitespace(&parser->input, NULL);
893 if (!_inputStreamGetCharacter(&parser->input, &ch)) return false;
894 }
895 if (ch != ')') return false;
896 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '*') return false;
897 return true;
898 }
899
900 /*
901 [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
902 [47] children ::= (choice | seq) ('?' | '*' | '+')?
903 */
904 static Boolean parseElementContentSpec(CFXMLParserRef parser) {
905 static const UniChar eltContentEmpty[5] = {'E', 'M', 'P', 'T', 'Y'};
906 static const UniChar eltContentAny[3] = {'A', 'N', 'Y'};
907 UniChar ch;
908 if (_inputStreamMatchString(&parser->input, eltContentEmpty, 5) || _inputStreamMatchString(&parser->input, eltContentAny, 3)) {
909 return true;
910 } else if (!_inputStreamPeekCharacter(&parser->input, &ch) || ch != '(') {
911 return false;
912 } else {
913 // We want to know if we have a Mixed per production [51]. If we don't, we will need to back up and call the parseChoiceOrSequence function. So we set the mark now. -- REW, 2/10/2000
914 _inputStreamGetCharacter(&parser->input, &ch);
915 _inputStreamSkipWhitespace(&parser->input, NULL);
916 if (!_inputStreamPeekCharacter(&parser->input, &ch)) return false;
917 if (ch == '#') {
918 // Mixed
919 return parseMixedElementContent(parser);
920 } else {
921 if (parseChoiceOrSequence(parser, true)) {
922 if (_inputStreamPeekCharacter(&parser->input, &ch) && (ch == '*' || ch == '?' || ch == '+')) {
923 _inputStreamGetCharacter(&parser->input, &ch);
924 }
925 return true;
926 } else {
927 return false;
928 }
929 }
930 }
931 }
932
933 /*
934 [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
935 */
936 static Boolean parseElementDeclaration(CFXMLParserRef parser) {
937 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
938 Boolean success;
939 static const UniChar eltChars[7] = {'E', 'L', 'E', 'M', 'E', 'N', 'T'};
940 UniChar ch = '>';
941 CFMutableStringRef contentDesc = NULL;
942 CFStringRef name;
943 success = _inputStreamMatchString(&parser->input, eltChars, 7)
944 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0
945 && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL)
946 && _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
947 if (success) {
948 if (report) _inputStreamSetMark(&parser->input);
949 success = parseElementContentSpec(parser);
950 if (success && report) {
951 contentDesc = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
952 _inputStreamGetCharactersFromMark(&parser->input, contentDesc);
953 }
954 if (report) _inputStreamClearMark(&parser->input);
955 if (success) _inputStreamSkipWhitespace(&parser->input, NULL);
956 success = success && _inputStreamMatchString(&parser->input, &ch, 1);
957 }
958 if (!success) {
959 if (_inputStreamAtEOF(&parser->input)) {
960 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
961 } else {
962 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
963 }
964 } else if (report) {
965 CFXMLElementTypeDeclarationInfo eltData;
966 CFStringRef tmp = parser->node->dataString;
967 parser->node->dataTypeID = kCFXMLNodeTypeElementTypeDeclaration;
968 parser->node->dataString = name;
969 eltData.contentDescription = contentDesc;
970 parser->node->additionalData = &eltData;
971 success = reportNewLeaf(parser);
972 parser->node->additionalData = NULL;
973 parser->node->dataString = tmp;
974 }
975 if (contentDesc) CFRelease(contentDesc);
976 return success;
977 }
978
979 /*
980 [70] EntityDecl ::= GEDecl | PEDecl
981 [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
982 [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
983 [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
984 [74] PEDef ::= EntityValue | ExternalID
985 [76] NDataDecl ::= S 'NDATA' S Name
986 [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
987 */
988 static Boolean parseEntityDeclaration(CFXMLParserRef parser) {
989 const UniChar entityStr[6] = {'E', 'N', 'T', 'I', 'T', 'Y'};
990 UniChar ch;
991 Boolean isPEDecl = false;
992 CFXMLEntityInfo entityData;
993 CFStringRef name;
994 Boolean report = *(parser->top) && !(parser->options & kCFXMLParserSkipMetaData);
995 Boolean success =
996 _inputStreamMatchString(&parser->input, entityStr, 6) &&
997 (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) &&
998 _inputStreamPeekCharacter(&parser->input, &ch);
999
1000 entityData.replacementText = NULL;
1001 entityData.entityID.publicID = NULL;
1002 entityData.entityID.systemID = NULL;
1003 entityData.notationName = NULL;
1004 // We will set entityType immediately before reporting
1005
1006 if (success && ch == '%') {
1007 _inputStreamGetCharacter(&parser->input, &ch);
1008 success = _inputStreamSkipWhitespace(&parser->input, NULL) != 0;
1009 isPEDecl = true;
1010 }
1011 success = success && _inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) && (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamPeekCharacter(&parser->input, &ch);
1012 if (success && (ch == '\"' || ch == '\'')) {
1013 // EntityValue
1014 // This is not quite correct - the string scanned cannot contain '%' or '&' unless it's as part of a valid entity reference -- REW, 2/2/2000
1015 if (report) {
1016 entityData.replacementText = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1017 success = _inputStreamScanQuotedString(&parser->input, (CFMutableStringRef)entityData.replacementText);
1018 } else {
1019 success = _inputStreamScanQuotedString(&parser->input, NULL);
1020 }
1021 } else if (success) {
1022 // ExternalID
1023 success = parseExternalID(parser, false, report ? &(entityData.entityID) : NULL);
1024 if (success && !isPEDecl && _inputStreamSkipWhitespace(&parser->input, NULL) != 0) {
1025 // There could be an option NDataDecl
1026 // Don't we need to set entityData.notationName? -- REW, 3/6/2000
1027 const UniChar nDataStr[5] = {'N', 'D', 'A', 'T', 'A'};
1028 if (_inputStreamMatchString(&parser->input, nDataStr, 5)) {
1029 success = (_inputStreamSkipWhitespace(&parser->input, NULL) != 0) && _inputStreamScanXMLName(&parser->input, false, NULL);
1030 }
1031 }
1032 }
1033 if (success) {
1034 _inputStreamSkipWhitespace(&parser->input, NULL);
1035 success = _inputStreamGetCharacter(&parser->input, &ch) && ch == '>';
1036 }
1037 if (!success) {
1038 if (_inputStreamAtEOF(&parser->input)) {
1039 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1040 } else {
1041 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1042 }
1043 } else {
1044 CFStringRef tmp = parser->node->dataString;
1045 if (isPEDecl) entityData.entityType = kCFXMLEntityTypeParameter;
1046 else if (entityData.replacementText) entityData.entityType = kCFXMLEntityTypeParsedInternal;
1047 else if (!entityData.notationName) entityData.entityType = kCFXMLEntityTypeParsedExternal;
1048 else entityData.entityType = kCFXMLEntityTypeUnparsed;
1049 parser->node->dataTypeID = kCFXMLNodeTypeEntity;
1050 parser->node->dataString = name;
1051 parser->node->additionalData = &entityData;
1052 success = reportNewLeaf(parser);
1053 parser->node->additionalData = NULL;
1054 parser->node->dataString = tmp;
1055 if (entityData.replacementText) CFRelease(entityData.replacementText);
1056 }
1057 if (entityData.entityID.publicID) CFRelease(entityData.entityID.publicID);
1058 if (entityData.entityID.systemID) CFRelease(entityData.entityID.systemID);
1059 return success;
1060 }
1061
1062 /*
1063 [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
1064 [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
1065 */
1066 // First character should be just past '['
1067 static Boolean parseInlineDTD(CFXMLParserRef parser) {
1068 Boolean success = true;
1069 while (success && !_inputStreamAtEOF(&parser->input)) {
1070 UniChar ch;
1071
1072 parseWhitespace(parser);
1073 if (!_inputStreamGetCharacter(&parser->input, &ch)) break;
1074 if (ch == '%') {
1075 // PEReference
1076 success = parsePhysicalEntityReference(parser);
1077 } else if (ch == '<') {
1078 // markupdecl
1079 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1080 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1081 return false;
1082 }
1083 if (ch == '?') {
1084 // Processing Instruction
1085 success = parseProcessingInstruction(parser, true); // We can safely pass true here, because *parser->top will be NULL if kCFXMLParserSkipMetaData is true
1086 } else if (ch == '!') {
1087 UniChar dashes[2] = {'-', '-'};
1088 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1089 // Comment
1090 success = parseComment(parser, true);
1091 } else {
1092 // elementdecl | AttListDecl | EntityDecl | NotationDecl
1093 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1094 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1095 return false;
1096 } else if (ch == 'A') {
1097 // AttListDecl
1098 success = parseAttributeListDeclaration(parser);
1099 } else if (ch == 'N') {
1100 success = parseNotationDeclaration(parser);
1101 } else if (ch == 'E') {
1102 // elementdecl | EntityDecl
1103 _inputStreamGetCharacter(&parser->input, &ch);
1104 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1105 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1106 return false;
1107 }
1108 _inputStreamReturnCharacter(&parser->input, 'E');
1109 if (ch == 'L') {
1110 success = parseElementDeclaration(parser);
1111 } else if (ch == 'N') {
1112 success = parseEntityDeclaration(parser);
1113 } else {
1114 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1115 return false;
1116 }
1117 } else {
1118 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1119 return false;
1120 }
1121 }
1122 } else {
1123 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1124 return false;
1125 }
1126 } else if (ch == ']') {
1127 return true;
1128 } else {
1129 _CFReportError(parser, kCFXMLErrorMalformedDTD, "Found unexpected character while parsing inline DTD");
1130 return false;
1131 }
1132 }
1133 if (success) {
1134 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Found unexpected EOF while parsing inline DTD");
1135 }
1136 return false;
1137 }
1138
1139 /*
1140 [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
1141 */
1142 static Boolean parseTagContent(CFXMLParserRef parser) {
1143 while (!_inputStreamAtEOF(&parser->input)) {
1144 UniChar ch;
1145 CFIndex numWhitespaceCharacters;
1146
1147 _inputStreamSetMark(&parser->input);
1148 numWhitespaceCharacters = _inputStreamSkipWhitespace(&parser->input, NULL);
1149 // Don't report the whitespace yet; if the first thing we see is character data, we put the whitespace back and report it as part of the character data.
1150 if (!_inputStreamGetCharacter(&parser->input, &ch)) break; // break == report unexpected EOF
1151
1152 if (ch != '<' && ch != '&') { // CharData
1153 // Back off the whitespace; we'll report it with the PCData
1154 _inputStreamBackUpToMark(&parser->input);
1155 _inputStreamClearMark(&parser->input);
1156 if (!parsePCData(parser)) return false;
1157 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1158 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1159 return false;
1160 }
1161 continue;
1162 }
1163
1164 // element | Reference | CDSect | PI | Comment
1165 // We can safely report any whitespace now
1166 if (!(parser->options & kCFXMLParserSkipWhitespace) && numWhitespaceCharacters != 0 && *(parser->top)) {
1167 _inputStreamReturnCharacter(&parser->input, ch);
1168 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1169 parser->node->dataTypeID = kCFXMLNodeTypeWhitespace;
1170 parser->node->additionalData = NULL;
1171 if (!reportNewLeaf(parser)) return false;
1172 _inputStreamGetCharacter(&parser->input, &ch);
1173 }
1174 _inputStreamClearMark(&parser->input);
1175
1176 if (ch == '&') {
1177 // Reference; for the time being, we don't worry about processing these; just report them as Entity references
1178 if (!parseEntityReference(parser, true)) return false;
1179 continue;
1180 }
1181
1182 // ch == '<'; element | CDSect | PI | Comment
1183 if (!_inputStreamPeekCharacter(&parser->input, &ch)) break;
1184 if (ch == '?') { // PI
1185 _inputStreamGetCharacter(&parser->input, &ch);
1186 if (!parseProcessingInstruction(parser, true))
1187 return false;
1188 } else if (ch == '/') { // end tag; we're passing outside of content's production
1189 _inputStreamReturnCharacter(&parser->input, '<'); // Back off to the '<'
1190 return true;
1191 } else if (ch != '!') { // element
1192 if (!parseTag(parser)) return false;
1193 } else {
1194 // Comment | CDSect
1195 UniChar dashes[3] = {'!', '-', '-'};
1196 if (_inputStreamMatchString(&parser->input, dashes, 3)) {
1197 // Comment
1198 if (!parseComment(parser, true)) return false;
1199 } else {
1200 // Should have a CDSect; back off the "<!" and call parseCDSect
1201 _inputStreamReturnCharacter(&parser->input, '<');
1202 if (!parseCDSect(parser)) return false;
1203 }
1204 }
1205 }
1206
1207 if(_inputStreamComposingErrorOccurred(&parser->input)) {
1208 _CFReportError(parser, kCFXMLErrorEncodingConversionFailure, "Encountered string encoding error");
1209 return false;
1210 }
1211 // Only way to get here is if premature EOF was found
1212 //#warning CF:Include the tag name here
1213 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing tag content");
1214 return false;
1215 }
1216
1217 static Boolean parseCDSect(CFXMLParserRef parser) {
1218 const UniChar _CDSectOpening[9] = {'<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['};
1219 const UniChar _CDSectClose[3] = {']', ']', '>'};
1220 if (!_inputStreamMatchString(&parser->input, _CDSectOpening, 9)) {
1221 _CFReportError(parser, kCFXMLErrorMalformedCDSect, "Encountered bad prefix to a presumed CDATA section");
1222 return false;
1223 }
1224 if (!_inputStreamScanToCharacters(&parser->input, _CDSectClose, 3, (CFMutableStringRef)(parser->node->dataString))) {
1225 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing CDATA section");
1226 return false;
1227 }
1228
1229 parser->node->dataTypeID = kCFXMLNodeTypeCDATASection;
1230 parser->node->additionalData = NULL;
1231 return reportNewLeaf(parser);
1232 }
1233
1234 /*
1235 [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1236 */
1237 static Boolean validateCharacterReference(CFStringRef str) {
1238 Boolean isHex;
1239 CFIndex idx, len = CFStringGetLength(str);
1240 if (len < 2) return false;
1241 if (CFStringGetCharacterAtIndex(str, 0) != '#') return false;
1242 if (CFStringGetCharacterAtIndex(str, 1) == 'x') {
1243 isHex = true;
1244 idx = 2;
1245 if (len == 2) return false;
1246 } else {
1247 isHex = false;
1248 idx = 1;
1249 }
1250
1251 while (idx < len) {
1252 UniChar ch;
1253 ch = CFStringGetCharacterAtIndex(str, idx);
1254 idx ++;
1255 if (!(ch <= '9' && ch >= '0') &&
1256 !(isHex && ((ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')))) {
1257 break;
1258 }
1259 }
1260 return (idx == len);
1261 }
1262
1263 /*
1264 [67] Reference ::= EntityRef | CharRef
1265 [68] EntityRef ::= '&' Name ';'
1266 */
1267 static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report) {
1268 UniChar ch;
1269 CFXMLEntityReferenceInfo entData;
1270 CFStringRef name = NULL;
1271 if (!_inputStreamPeekCharacter(&parser->input, &ch)) {
1272 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1273 return false;
1274 }
1275 if (ch == '#') {
1276 ch = ';';
1277 if (!_inputStreamScanToCharacters(&parser->input, &ch, 1, (CFMutableStringRef)parser->node->dataString)) {
1278 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1279 return false;
1280 } else if (!validateCharacterReference(parser->node->dataString)) {
1281 _CFReportError(parser, kCFXMLErrorMalformedCharacterReference, "Encountered illegal character while parsing character reference");
1282 return false;
1283 }
1284 entData.entityType = kCFXMLEntityTypeCharacter;
1285 name = parser->node->dataString;
1286 } else if (!_inputStreamScanXMLName(&parser->input, false, report ? &name : NULL) || !_inputStreamGetCharacter(&parser->input, &ch) || ch != ';') {
1287 if (_inputStreamAtEOF(&parser->input)) {
1288 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing EntityReference");
1289 return false;
1290 } else {
1291 _CFReportError(parser, kCFXMLErrorMalformedName, "Encountered malformed name while parsing EntityReference");
1292 return false;
1293 }
1294 } else {
1295 entData.entityType = kCFXMLEntityTypeParsedInternal;
1296 }
1297 if (report) {
1298 CFStringRef tmp = parser->node->dataString;
1299 Boolean success;
1300 parser->node->dataTypeID = kCFXMLNodeTypeEntityReference;
1301 parser->node->dataString = name;
1302 parser->node->additionalData = &entData;
1303 success = reportNewLeaf(parser);
1304 parser->node->additionalData = NULL;
1305 parser->node->dataString = tmp;
1306 return success;
1307 } else {
1308 return true;
1309 }
1310 }
1311
1312 #if 0
1313 // Kept from old entity reference parsing....
1314 {
1315 switch (*(parser->curr)) {
1316 case 'l': // "lt"
1317 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1318 ch = '<';
1319 parser->curr += 3;
1320 break;
1321 }
1322 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1323 return;
1324 case 'g': // "gt"
1325 if (len >= 3 && *(parser->curr+1) == 't' && *(parser->curr+2) == ';') {
1326 ch = '>';
1327 parser->curr += 3;
1328 break;
1329 }
1330 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1331 return;
1332 case 'a': // "apos" or "amp"
1333 if (len < 4) { // Not enough characters for either conversion
1334 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1335 return;
1336 }
1337 if (*(parser->curr+1) == 'm') {
1338 // "amp"
1339 if (*(parser->curr+2) == 'p' && *(parser->curr+3) == ';') {
1340 ch = '&';
1341 parser->curr += 4;
1342 break;
1343 }
1344 } else if (*(parser->curr+1) == 'p') {
1345 // "apos"
1346 if (len > 4 && *(parser->curr+2) == 'o' && *(parser->curr+3) == 's' && *(parser->curr+4) == ';') {
1347 ch = '\'';
1348 parser->curr += 5;
1349 break;
1350 }
1351 }
1352 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1353 return;
1354 case 'q': // "quote"
1355 if (len >= 6 && *(parser->curr+1) == 'u' && *(parser->curr+2) == 'o' && *(parser->curr+3) == 't' && *(parser->curr+4) == 'e' && *(parser->curr+5) == ';') {
1356 ch = '\"';
1357 parser->curr += 6;
1358 break;
1359 }
1360 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1361 return;
1362 case '#':
1363 {
1364 UniChar num = 0;
1365 Boolean isHex = false;
1366 if ( len < 4) { // Not enough characters to make it all fit! Need at least "&#d;"
1367 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1368 return;
1369 }
1370 parser->curr ++;
1371 if (*(parser->curr) == 'x') {
1372 isHex = true;
1373 parser->curr ++;
1374 }
1375 while (parser->curr < parser->end) {
1376 ch = *(parser->curr);
1377 if (ch == ';') {
1378 CFStringAppendCharacters(string, &num, 1);
1379 parser->curr ++;
1380 return;
1381 }
1382 if (!isHex) num = num*10;
1383 else num = num << 4;
1384 if (ch <= '9' && ch >= '0') {
1385 num += (ch - '0');
1386 } else if (!isHex) {
1387 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1388 return;
1389 } else if (ch >= 'a' && ch <= 'f') {
1390 num += 10 + (ch - 'a');
1391 } else if (ch >= 'A' && ch <= 'F') {
1392 num += 10 + (ch - 'A');
1393 } else {
1394 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unexpected character %c at line %d"), ch, lineNumber(parser));
1395 return;
1396 }
1397 }
1398 parser->errorString = CFStringCreateWithCString(parser->allocator, "Encountered unexpected EOF", kCFStringEncodingASCII);
1399 return;
1400 }
1401 default:
1402 parser->errorString = CFStringCreateWithFormat(parser->allocator, NULL, CFSTR("Encountered unknown ampersand-escape sequence at line %d"), lineNumber(parser));
1403 return;
1404 }
1405 CFStringAppendCharacters(string, &ch, 1);
1406 }
1407 #endif
1408
1409 /*
1410 [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
1411 */
1412 static Boolean parsePCData(CFXMLParserRef parser) {
1413 UniChar ch;
1414 Boolean done = false;
1415 _inputStreamSetMark(&parser->input);
1416 while (!done && _inputStreamGetCharacter(&parser->input, &ch)) {
1417 switch (ch) {
1418 case '<':
1419 case '&':
1420 _inputStreamReturnCharacter(&parser->input, ch);
1421 done = true;
1422 break;
1423 case ']':
1424 {
1425 const UniChar endSequence[2] = {']', '>'};
1426 if (_inputStreamMatchString(&parser->input, endSequence, 2)) {
1427 _CFReportError(parser, kCFXMLErrorMalformedParsedCharacterData, "Encountered \"]]>\" in parsed character data");
1428 _inputStreamClearMark(&parser->input);
1429 return false;
1430 }
1431 break;
1432 }
1433 default:
1434 ;
1435 }
1436 }
1437 _inputStreamGetCharactersFromMark(&parser->input, (CFMutableStringRef)(parser->node->dataString));
1438 _inputStreamClearMark(&parser->input);
1439 parser->node->dataTypeID = kCFXMLNodeTypeText;
1440 parser->node->additionalData = NULL;
1441 return reportNewLeaf(parser);
1442 }
1443
1444 /*
1445 [42] ETag ::= '</' Name S? '>'
1446 */
1447 static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag) {
1448 const UniChar beginEndTag[2] = {'<', '/'};
1449 Boolean unexpectedEOF = false, mismatch = false;
1450 CFStringRef closeTag;
1451
1452 // We can get away with testing pointer equality between tag & closeTag because scanXMLName guarantees the strings it returns are unique.
1453 if (_inputStreamMatchString(&parser->input, beginEndTag, 2) && _inputStreamScanXMLName(&parser->input, false, &closeTag) && closeTag == tag) {
1454
1455 UniChar ch;
1456 _inputStreamSkipWhitespace(&parser->input, NULL);
1457 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1458 unexpectedEOF = true;
1459 } else if (ch != '>') {
1460 mismatch = true;
1461 }
1462 } else if (_inputStreamAtEOF(&parser->input)) {
1463 unexpectedEOF = true;
1464 } else {
1465 mismatch = true;
1466 }
1467
1468 if (unexpectedEOF || mismatch) {
1469 if (unexpectedEOF) {
1470 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected EOF while parsing close tag for <%@>"), tag);
1471 parser->status = kCFXMLErrorUnexpectedEOF;
1472 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorUnexpectedEOF, parser->context.info);
1473 } else {
1474 parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered malformed close tag for <%@>"), tag);
1475 parser->status = kCFXMLErrorMalformedCloseTag;
1476 if(parser->callBacks.handleError) INVOKE_CALLBACK3(parser->callBacks.handleError, parser, kCFXMLErrorMalformedCloseTag, parser->context.info);
1477 }
1478 return false;
1479 }
1480 return true;
1481 }
1482
1483 /*
1484 [39] element ::= EmptyElementTag | STag content ETag
1485 [40] STag ::= '<' Name (S Attribute)* S? '>'
1486 [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
1487 */
1488 static Boolean parseTag(CFXMLParserRef parser) {
1489 UniChar ch;
1490 void *tag;
1491 CFXMLElementInfo data;
1492 Boolean success = true;
1493 CFStringRef tagName;
1494
1495 if (!_inputStreamScanXMLName(&parser->input, false, &tagName)) {
1496 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1497 return false;
1498 }
1499
1500 _inputStreamSkipWhitespace(&parser->input, NULL);
1501
1502 if (!parseAttributes(parser)) return false; // parsed directly into parser->argDict ; parseAttributes consumes any trailing whitespace
1503 data.attributes = parser->argDict;
1504 data.attributeOrder = parser->argArray;
1505 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1506 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1507 return false;
1508 }
1509 if (ch == '/') {
1510 data.isEmpty = true;
1511 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1512 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF");
1513 return false;
1514 }
1515 } else {
1516 data.isEmpty = false;
1517 }
1518 if (ch != '>') {
1519 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Encountered malformed start tag");
1520 return false;
1521 }
1522
1523 if (*parser->top || parser->top == parser->stack) {
1524 CFStringRef oldStr = parser->node->dataString;
1525 parser->node->dataTypeID = kCFXMLNodeTypeElement;
1526 parser->node->dataString = tagName;
1527 parser->node->additionalData = &data;
1528 tag = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1529 if (tag && parser->status == kCFXMLStatusParseInProgress) {
1530 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *parser->top, tag, parser->context.info);
1531 }
1532 parser->node->additionalData = NULL;
1533 parser->node->dataString = oldStr;
1534 if (parser->status != kCFXMLStatusParseInProgress) {
1535 // callback called CFXMLParserAbort()
1536 _CFReportError(parser, parser->status, NULL);
1537 return false;
1538 }
1539 } else {
1540 tag = NULL;
1541 }
1542
1543 pushXMLNode(parser, tag);
1544 if (!data.isEmpty) {
1545 success = parseTagContent(parser);
1546 if (success) {
1547 success = parseCloseTag(parser, tagName);
1548 }
1549 }
1550 parser->top --;
1551
1552 if (success && tag) {
1553 INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, tag, parser->context.info);
1554 if (parser->status != kCFXMLStatusParseInProgress) {
1555 _CFReportError(parser, parser->status, NULL);
1556 return false;
1557 }
1558 }
1559 return success;
1560 }
1561
1562 /*
1563 [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
1564 [67] Reference ::= EntityRef | CharRef
1565 [68] EntityRef ::= '&' Name ';'
1566 */
1567 // For the moment, we don't worry about references in the attribute values.
1568 static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str) {
1569 UniChar quote, ch;
1570 Boolean success = _inputStreamGetCharacter(&parser->input, &quote);
1571 if (!success || (quote != '\'' && quote != '\"')) return false;
1572 if (str) _inputStreamSetMark(&parser->input);
1573 while (_inputStreamGetCharacter(&parser->input, &ch) && ch != quote) {
1574 switch (ch) {
1575 case '<': success = false; break;
1576 case '&':
1577 if (!parseEntityReference(parser, false)) {
1578 success = false;
1579 break;
1580 }
1581 default:
1582 ;
1583 }
1584 }
1585
1586 if (success && _inputStreamAtEOF(&parser->input)) {
1587 success = false;
1588 }
1589 if (str) {
1590 if (success) {
1591 _inputStreamReturnCharacter(&parser->input, quote);
1592 _inputStreamGetCharactersFromMark(&parser->input, str);
1593 _inputStreamGetCharacter(&parser->input, &ch);
1594 }
1595 _inputStreamClearMark(&parser->input);
1596 }
1597 return success;
1598 }
1599
1600 /*
1601 [40] STag ::= '<' Name (S Attribute)* S? '>'
1602 [41] Attribute ::= Name Eq AttValue
1603 [25] Eq ::= S? '=' S?
1604 */
1605
1606 // Expects parser->curr to be at the first content character; will consume the trailing whitespace.
1607 Boolean parseAttributes(CFXMLParserRef parser) {
1608 UniChar ch;
1609 CFMutableDictionaryRef dict;
1610 CFMutableArrayRef array;
1611 Boolean failure = false;
1612 if (_inputStreamPeekCharacter(&parser->input, &ch) == '>') {
1613 if (parser->argDict) {
1614 CFDictionaryRemoveAllValues(parser->argDict);
1615 CFArrayRemoveAllValues(parser->argArray);
1616 }
1617 return true; // No attributes; let caller deal with it
1618 }
1619 if (!parser->argDict) {
1620 parser->argDict = CFDictionaryCreateMutable(CFGetAllocator(parser), 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1621 parser->argArray = CFArrayCreateMutable(CFGetAllocator(parser), 0, &kCFTypeArrayCallBacks);
1622 } else {
1623 CFDictionaryRemoveAllValues(parser->argDict);
1624 CFArrayRemoveAllValues(parser->argArray);
1625 }
1626 dict = parser->argDict;
1627 array = parser->argArray;
1628 while (!failure && _inputStreamPeekCharacter(&parser->input, &ch) && ch != '>' && ch != '/') {
1629 CFStringRef key;
1630 CFMutableStringRef value;
1631 if (!_inputStreamScanXMLName(&parser->input, false, &key)) {
1632 failure = true;
1633 break;
1634 }
1635 if (CFArrayGetFirstIndexOfValue(array, CFRangeMake(0, CFArrayGetCount(array)), key) != kCFNotFound) {
1636 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found repeated attribute");
1637 return false;
1638 }
1639 _inputStreamSkipWhitespace(&parser->input, NULL);
1640 if (!_inputStreamGetCharacter(&parser->input, &ch) || ch != '=') {
1641 failure = true;
1642 break;
1643 }
1644 _inputStreamSkipWhitespace(&parser->input, NULL);
1645 value = CFStringCreateMutableWithExternalCharactersNoCopy(CFGetAllocator(parser), NULL, 0, 0, CFGetAllocator(parser));
1646 if (!parseAttributeValue(parser, value)) {
1647 CFRelease(value);
1648 failure = true;
1649 break;
1650 }
1651 CFArrayAppendValue(array, key);
1652 CFDictionarySetValue(dict, key, value);
1653 CFRelease(value);
1654 _inputStreamSkipWhitespace(&parser->input, NULL);
1655 }
1656 if (failure) {
1657 //#warning CF:Include tag name in this error report
1658 _CFReportError(parser, kCFXMLErrorMalformedStartTag, "Found illegal character while parsing element tag");
1659 return false;
1660 } else if (_inputStreamAtEOF(&parser->input)) {
1661 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing element attributes");
1662 return false;
1663 } else {
1664 return true;
1665 }
1666 }
1667
1668 /*
1669 [1] document ::= prolog element Misc*
1670 [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
1671 [27] Misc ::= Comment | PI | S
1672 [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
1673
1674 We treat XMLDecl as a plain old PI, since PI is part of Misc. This changes the prolog and document productions to
1675 [22-1] prolog ::= Misc* (doctypedecl Misc*)?
1676 [1-1] document ::= Misc* (doctypedecl Misc*)? element Misc*
1677
1678 NOTE: This function assumes parser->stack has a valid top. I.e. the document pointer has already been created!
1679 */
1680 static Boolean parseXML(CFXMLParserRef parser) {
1681 Boolean success = true, sawDTD = false, sawElement = false;
1682 UniChar ch;
1683 while (success && _inputStreamPeekCharacter(&parser->input, &ch)) {
1684 switch (ch) {
1685 case ' ':
1686 case '\n':
1687 case '\t':
1688 case '\r':
1689 success = parseWhitespace(parser);
1690 break;
1691 case '<':
1692 _inputStreamGetCharacter(&parser->input, &ch);
1693 if (!_inputStreamGetCharacter(&parser->input, &ch)) {
1694 _CFReportError(parser, kCFXMLErrorUnexpectedEOF, "Encountered unexpected EOF while parsing top-level document");
1695 return false;
1696 }
1697 if (ch == '!') {
1698 // Comment or DTD
1699 UniChar dashes[2] = {'-', '-'};
1700 if (_inputStreamMatchString(&parser->input, dashes, 2)) {
1701 // Comment
1702 success = parseComment(parser, true);
1703 } else {
1704 // Should be DTD
1705 if (sawDTD) {
1706 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered a second DTD");
1707 return false;
1708 }
1709 success = parseDTD(parser);
1710 if (success) sawDTD = true;
1711 }
1712 } else if (ch == '?') {
1713 // Processing instruction
1714 success = parseProcessingInstruction(parser, true);
1715 } else {
1716 // Tag or malformed
1717 if (sawElement) {
1718 _CFReportError(parser, kCFXMLErrorMalformedDocument, "Encountered second top-level element");
1719 return false;
1720 }
1721 _inputStreamReturnCharacter(&parser->input, ch);
1722 success = parseTag(parser);
1723 if (success) sawElement = true;
1724 }
1725 break;
1726 default: {
1727 parser->status = kCFXMLErrorMalformedDocument;
1728 parser->errorString = ch < 256 ?
1729 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected character 0x%x (\'%c\') at top-level"), ch, ch) :
1730 CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("Encountered unexpected Unicode character 0x%x at top-level"), ch);
1731
1732 if (parser->callBacks.handleError) {
1733 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info);
1734 }
1735 return false;
1736 }
1737 }
1738 }
1739
1740 if (!success) return false;
1741 if (!sawElement) {
1742 _CFReportError(parser, kCFXMLErrorElementlessDocument, "No element found in document");
1743 return false;
1744 }
1745 return true;
1746 }
1747
1748 static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str) {
1749 if (str) {
1750 parser->status = errNum;
1751 parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), str, kCFStringEncodingASCII);
1752 }
1753 if (parser->callBacks.handleError) {
1754 INVOKE_CALLBACK3(parser->callBacks.handleError, parser, errNum, parser->context.info);
1755 }
1756 }
1757
1758 // Assumes parser->node has been set and is ready to go
1759 static Boolean reportNewLeaf(CFXMLParserRef parser) {
1760 void *xmlStruct;
1761 if (*(parser->top) == NULL) return true;
1762
1763 xmlStruct = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info);
1764 if (xmlStruct && parser->status == kCFXMLStatusParseInProgress) {
1765 INVOKE_CALLBACK4(parser->callBacks.addChild, parser, *(parser->top), xmlStruct, parser->context.info);
1766 if (parser->status == kCFXMLStatusParseInProgress) INVOKE_CALLBACK3(parser->callBacks.endXMLStructure, parser, xmlStruct, parser->context.info);
1767 }
1768 if (parser->status != kCFXMLStatusParseInProgress) {
1769 _CFReportError(parser, parser->status, NULL);
1770 return false;
1771 }
1772 return true;
1773 }
1774
1775 static void pushXMLNode(CFXMLParserRef parser, void *node) {
1776 parser->top ++;
1777 if ((unsigned)(parser->top - parser->stack) == parser->capacity) {
1778 parser->stack = (void **)CFAllocatorReallocate(CFGetAllocator(parser), parser->stack, 2 * parser->capacity * sizeof(void *), 0);
1779 parser->top = parser->stack + parser->capacity;
1780 parser->capacity = 2*parser->capacity;
1781 }
1782 *(parser->top) = node;
1783 }
1784
1785 /**************************/
1786 /* Parsing to a CFXMLTree */
1787 /**************************/
1788
1789 static void *_XMLTreeCreateXMLStructure(CFXMLParserRef parser, CFXMLNodeRef node, void *context) {
1790 CFXMLNodeRef myNode = CFXMLNodeCreateCopy(CFGetAllocator(parser), node);
1791 CFXMLTreeRef tree = CFXMLTreeCreateWithNode(CFGetAllocator(parser), myNode);
1792 CFRelease(myNode);
1793 return (void *)tree;
1794 }
1795
1796 static void _XMLTreeAddChild(CFXMLParserRef parser, void *parent, void *child, void *context) {
1797 CFTreeAppendChild((CFTreeRef)parent, (CFTreeRef)child);
1798 }
1799
1800 static void _XMLTreeEndXMLStructure(CFXMLParserRef parser, void *xmlType, void *context) {
1801 CFXMLTreeRef node = (CFXMLTreeRef)xmlType;
1802 if (CFTreeGetParent(node))
1803 CFRelease((CFXMLTreeRef)xmlType);
1804 }
1805
1806 CFXMLTreeRef CFXMLTreeCreateWithDataFromURL(CFAllocatorRef allocator, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex version) {
1807 CFXMLParserRef parser;
1808 CFXMLParserCallBacks callbacks;
1809 CFXMLTreeRef result;
1810
1811 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1812
1813 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1814 callbacks.addChild = _XMLTreeAddChild;
1815 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1816 callbacks.resolveExternalEntity = NULL;
1817 callbacks.handleError = NULL;
1818 parser = CFXMLParserCreateWithDataFromURL(allocator, dataSource, parseOptions, version, &callbacks, NULL);
1819
1820 if (CFXMLParserParse(parser)) {
1821 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1822 } else {
1823 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1824 if (result) CFRelease(result);
1825 result = NULL;
1826 }
1827 CFRelease(parser);
1828 return result;
1829 }
1830
1831 CFXMLTreeRef CFXMLTreeCreateFromData(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion) {
1832 return CFXMLTreeCreateFromDataWithError(allocator, xmlData, dataSource, parseOptions, parserVersion, NULL);
1833 }
1834
1835 CONST_STRING_DECL(kCFXMLTreeErrorDescription, "kCFXMLTreeErrorDescription");
1836 CONST_STRING_DECL(kCFXMLTreeErrorLineNumber, "kCFXMLTreeErrorLineNumber");
1837 CONST_STRING_DECL(kCFXMLTreeErrorLocation, "kCFXMLTreeErrorLocation");
1838 CONST_STRING_DECL(kCFXMLTreeErrorStatusCode, "kCFXMLTreeErrorStatusCode");
1839
1840 CFXMLTreeRef CFXMLTreeCreateFromDataWithError(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex parserVersion, CFDictionaryRef *errorDict) {
1841 CFXMLParserRef parser;
1842 CFXMLParserCallBacks callbacks;
1843 CFXMLTreeRef result;
1844
1845 __CFGenericValidateType(xmlData, CFDataGetTypeID());
1846 CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__);
1847
1848 callbacks.createXMLStructure = _XMLTreeCreateXMLStructure;
1849 callbacks.addChild = _XMLTreeAddChild;
1850 callbacks.endXMLStructure = _XMLTreeEndXMLStructure;
1851 callbacks.resolveExternalEntity = NULL;
1852 callbacks.handleError = NULL;
1853 parser = CFXMLParserCreate(allocator, xmlData, dataSource, parseOptions, parserVersion, &callbacks, NULL);
1854
1855 if (CFXMLParserParse(parser)) {
1856 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1857 } else {
1858 if (errorDict) { // collect the error dictionary
1859 *errorDict = CFDictionaryCreateMutable(allocator, 4, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1860 if (*errorDict) {
1861 CFIndex rawnum;
1862 CFNumberRef cfnum;
1863 CFStringRef errstring;
1864
1865 rawnum = CFXMLParserGetLocation(parser);
1866 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1867 if(cfnum) {
1868 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLocation, cfnum);
1869 CFRelease(cfnum);
1870 }
1871
1872 rawnum = CFXMLParserGetLineNumber(parser);
1873 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1874 if(cfnum) {
1875 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorLineNumber, cfnum);
1876 CFRelease(cfnum);
1877 }
1878
1879 rawnum = CFXMLParserGetStatusCode(parser);
1880 cfnum = CFNumberCreate(allocator, kCFNumberSInt32Type, &rawnum);
1881 if(cfnum) {
1882 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorStatusCode, cfnum);
1883 CFRelease(cfnum);
1884 }
1885
1886 errstring = CFXMLParserCopyErrorDescription(parser);
1887 if(errstring) {
1888 CFDictionaryAddValue((CFMutableDictionaryRef)*errorDict, kCFXMLTreeErrorDescription, errstring);
1889 CFRelease(errstring);
1890 }
1891 }
1892 }
1893 result = (CFXMLTreeRef)CFXMLParserGetDocument(parser);
1894 if (result) CFRelease(result);
1895 result = NULL;
1896 }
1897 CFRelease(parser);
1898 return result;
1899 }
1900
1901 /*
1902 At the very least we need to do <, >, &, ", and '. In addition, we'll have to do everything else in the string.
1903 We should also be handling items that are up over certain values correctly.
1904 */
1905 CFStringRef CFXMLCreateStringByEscapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1906 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1907 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0); // unbounded mutable string
1908 CFMutableCharacterSetRef startChars = CFCharacterSetCreateMutable(allocator);
1909
1910 CFStringInlineBuffer inlineBuf;
1911 CFIndex idx = 0;
1912 CFIndex mark = idx;
1913 CFIndex stringLength = CFStringGetLength(string);
1914 UniChar uc;
1915
1916 CFCharacterSetAddCharactersInString(startChars, CFSTR("&<>'\""));
1917
1918 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, stringLength));
1919 for(idx = 0; idx < stringLength; idx++) {
1920 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, idx);
1921 if(CFCharacterSetIsCharacterMember(startChars, uc)) {
1922 CFStringRef previousSubstring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1923 CFStringAppend(newString, previousSubstring);
1924 CFRelease(previousSubstring);
1925 switch(uc) {
1926 case '&':
1927 CFStringAppend(newString, CFSTR("&amp;"));
1928 break;
1929 case '<':
1930 CFStringAppend(newString, CFSTR("&lt;"));
1931 break;
1932 case '>':
1933 CFStringAppend(newString, CFSTR("&gt;"));
1934 break;
1935 case '\'':
1936 CFStringAppend(newString, CFSTR("&apos;"));
1937 break;
1938 case '"':
1939 CFStringAppend(newString, CFSTR("&quot;"));
1940 break;
1941 }
1942 mark = idx + 1;
1943 }
1944 }
1945 // Copy the remainder to the output string before returning.
1946 CFStringRef remainder = CFStringCreateWithSubstring(allocator, string, CFRangeMake(mark, idx - mark));
1947 if (NULL != remainder) {
1948 CFStringAppend(newString, remainder);
1949 CFRelease(remainder);
1950 }
1951
1952 CFRelease(startChars);
1953 return newString;
1954 }
1955
1956 CFStringRef CFXMLCreateStringByUnescapingEntities(CFAllocatorRef allocator, CFStringRef string, CFDictionaryRef entitiesDictionary) {
1957 CFAssert1(string != NULL, __kCFLogAssertion, "%s(): NULL string not permitted.", __PRETTY_FUNCTION__);
1958
1959 CFStringInlineBuffer inlineBuf; /* use this for fast traversal of the string in question */
1960 CFStringRef sub;
1961 CFIndex lastChunkStart, length = CFStringGetLength(string);
1962 CFIndex i, entityStart;
1963 UniChar uc;
1964 UInt32 entity;
1965 int base;
1966 CFMutableDictionaryRef fullReplDict = entitiesDictionary ? CFDictionaryCreateMutableCopy(allocator, 0, entitiesDictionary) : CFDictionaryCreateMutable(allocator, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
1967
1968 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("amp"), (const void *)CFSTR("&"));
1969 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("quot"), (const void *)CFSTR("\""));
1970 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("lt"), (const void *)CFSTR("<"));
1971 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("gt"), (const void *)CFSTR(">"));
1972 CFDictionaryAddValue(fullReplDict, (const void *)CFSTR("apos"), (const void *)CFSTR("'"));
1973
1974 CFStringInitInlineBuffer(string, &inlineBuf, CFRangeMake(0, length - 1));
1975 CFMutableStringRef newString = CFStringCreateMutable(allocator, 0);
1976
1977 lastChunkStart = 0;
1978 // Scan through the string in its entirety
1979 for(i = 0; i < length; ) {
1980 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1981
1982 if(uc == '&') {
1983 entityStart = i - 1;
1984 entity = 0xFFFF; // set this to a not-Unicode character as sentinel
1985 // we've hit the beginning of an entity. Copy everything from lastChunkStart to this point.
1986 if(lastChunkStart < i - 1) {
1987 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, (i - 1) - lastChunkStart));
1988 CFStringAppend(newString, sub);
1989 CFRelease(sub);
1990 }
1991
1992 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++; // grab the next character and move i.
1993 // Now we can process the entity reference itself
1994 if(uc == '#') { // this is a numeric entity.
1995 base = 10;
1996 entity = 0;
1997 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
1998
1999 if(uc == 'x') { // only lowercase x allowed. Translating numeric entity as hexadecimal.
2000 base = 16;
2001 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2002 }
2003
2004 // process the provided digits 'til we're finished
2005 while(true) {
2006 if (uc >= '0' && uc <= '9')
2007 entity = entity * base + (uc-'0');
2008 else if (uc >= 'a' && uc <= 'f' && base == 16)
2009 entity = entity * base + (uc-'a'+10);
2010 else if (uc >= 'A' && uc <= 'F' && base == 16)
2011 entity = entity * base + (uc-'A'+10);
2012 else break;
2013
2014 if (i < length) {
2015 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2016 }
2017 else
2018 break;
2019 }
2020 }
2021
2022 // Scan to the end of the entity
2023 while(uc != ';' && i < length) {
2024 uc = CFStringGetCharacterFromInlineBuffer(&inlineBuf, i); i++;
2025 }
2026
2027 if(0xFFFF != entity) { // it was numeric, and translated.
2028 // Now, output the result fo the entity
2029 if(entity >= 0x10000) {
2030 UniChar characters[2] = { ((entity - 0x10000) >> 10) + 0xD800, ((entity - 0x10000) & 0x3ff) + 0xDC00 };
2031 CFStringAppendCharacters(newString, characters, 2);
2032 } else {
2033 UniChar character = entity;
2034 CFStringAppendCharacters(newString, &character, 1);
2035 }
2036 } else { // it wasn't numeric.
2037 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart + 1, (i - entityStart - 2))); // This trims off the & and ; from the string, so we can use it against the dictionary itself.
2038 CFStringRef replacementString = (CFStringRef)CFDictionaryGetValue(fullReplDict, sub);
2039 if(replacementString) {
2040 CFStringAppend(newString, replacementString);
2041 } else {
2042 CFRelease(sub); // let the old substring go, since we didn't find it in the dictionary
2043 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(entityStart, (i - entityStart))); // create a new one, including the & and ;
2044 CFStringAppend(newString, sub); // ...and append that.
2045 }
2046 CFRelease(sub); // in either case, release the most-recent "sub"
2047 }
2048
2049 // move the lastChunkStart to the beginning of the next chunk.
2050 lastChunkStart = i;
2051 }
2052 }
2053 if(lastChunkStart < length) { // we've come out of the loop, let's get the rest of the string and tack it on.
2054 sub = CFStringCreateWithSubstring(allocator, string, CFRangeMake(lastChunkStart, i - lastChunkStart));
2055 CFStringAppend(newString, sub);
2056 CFRelease(sub);
2057 }
2058
2059 CFRelease(fullReplDict);
2060
2061 return newString;
2062 }
2063
2064 #pragma GCC diagnostic pop