2 * Copyright (c) 1999-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
35 // parser for unserializing OSContainer objects serialized to XML
38 // bison -p OSUnserializeXML OSUnserializeXML.y
39 // head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
40 // sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
42 // when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
48 // DO NOT EDIT OSUnserializeXML.cpp!
63 #include <libkern/c++/OSMetaClass.h>
64 #include <libkern/c++/OSContainers.h>
65 #include <libkern/c++/OSLib.h>
67 #define YYSTYPE object_t *
68 #define YYPARSE_PARAM state
69 #define YYLEX_PARAM (parser_state_t *)state
71 // this is the internal struct used to hold objects on parser stack
72 // it represents objects both before and after they have been created
73 typedef struct object {
76 struct object *elements;
78 OSString *key; // for dictionary
80 void *data; // for data
81 char *string; // for string & symbol
82 long long number; // for number
86 // this code is reentrant, this structure contains all
87 // state information for the parsing of a single buffer
88 typedef struct parser_state {
89 const char *parseBuffer; // start of text to be parsed
90 int parseBufferIndex; // current index into text
91 int lineNumber; // current line number
92 object_t *objects; // internal objects in use
93 object_t *freeObjects; // internal objects that are free
94 OSDictionary *tags; // used to remember "ID" tags
95 OSString **errorString; // parse error with line
96 OSObject *parsedObject; // resultant object of parsed text
99 #define STATE ((parser_state_t *)state)
102 #define yyerror(s) OSUnserializeerror(STATE, (s))
103 static int OSUnserializeerror(parser_state_t *state, const char *s);
105 static int yylex(YYSTYPE *lvalp, parser_state_t *state);
107 static object_t *newObject(parser_state_t *state);
108 static void freeObject(parser_state_t *state, object_t *o);
109 static void rememberObject(parser_state_t *state, int tag, OSObject *o);
110 static object_t *retrieveObject(parser_state_t *state, int tag);
111 static void cleanupObjects(parser_state_t *state);
113 static object_t *buildDictionary(parser_state_t *state, object_t *o);
114 static object_t *buildArray(parser_state_t *state, object_t *o);
115 static object_t *buildSet(parser_state_t *state, object_t *o);
116 static object_t *buildString(parser_state_t *state, object_t *o);
117 static object_t *buildData(parser_state_t *state, object_t *o);
118 static object_t *buildNumber(parser_state_t *state, object_t *o);
119 static object_t *buildBoolean(parser_state_t *state, object_t *o);
122 extern void *kern_os_malloc(size_t size);
123 extern void *kern_os_realloc(void * addr, size_t size);
124 extern void kern_os_free(void * addr);
128 #define malloc(s) kern_os_malloc(s)
129 #define realloc(a, s) kern_os_realloc(a, s)
130 #define free(a) kern_os_free((void *)a)
143 %% /* Grammar rules and actions follow */
145 input: /* empty */ { yyerror("unexpected end of buffer");
148 | object { STATE->parsedObject = $1->object;
150 freeObject(STATE, $1);
153 | SYNTAX_ERROR { yyerror("syntax error");
158 object: dict { $$ = buildDictionary(STATE, $1); }
159 | array { $$ = buildArray(STATE, $1); }
160 | set { $$ = buildSet(STATE, $1); }
161 | string { $$ = buildString(STATE, $1); }
162 | data { $$ = buildData(STATE, $1); }
163 | number { $$ = buildNumber(STATE, $1); }
164 | boolean { $$ = buildBoolean(STATE, $1); }
165 | idref { $$ = retrieveObject(STATE, $1->idref);
167 $$->object->retain();
169 yyerror("forward reference detected");
172 freeObject(STATE, $1);
176 //------------------------------------------------------------------------------
178 dict: '{' '}' { $$ = $1;
181 | '{' pairs '}' { $$ = $1;
188 | pairs pair { $$ = $2;
193 pair: key object { $$ = $1;
194 $$->key = (OSString *)$$->object;
195 $$->object = $2->object;
198 freeObject(STATE, $2);
202 key: KEY { $$ = buildString(STATE, $1); }
205 //------------------------------------------------------------------------------
207 array: '(' ')' { $$ = $1;
210 | '(' elements ')' { $$ = $1;
216 set: '[' ']' { $$ = $1;
219 | '[' elements ']' { $$ = $1;
225 elements: object { $$ = $1;
228 | elements object { $$ = $2;
233 //------------------------------------------------------------------------------
253 OSUnserializeerror(parser_state_t * state, const char *s) /* Called by yyparse on errors */
255 if (state->errorString) {
256 char tempString[128];
257 snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
258 *(state->errorString) = OSString::withCString(tempString);
264 #define TAG_MAX_LENGTH 32
265 #define TAG_MAX_ATTRIBUTES 32
272 #define currentChar() (state->parseBuffer[state->parseBufferIndex])
273 #define nextChar() (state->parseBuffer[++state->parseBufferIndex])
274 #define prevChar() (state->parseBuffer[state->parseBufferIndex - 1])
276 #define isSpace(c) ((c) == ' ' || (c) == '\t')
277 #define isAlpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
278 #define isDigit(c) ((c) >= '0' && (c) <= '9')
279 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
280 #define isHexDigit(c) (isDigit(c) || isAlphaDigit(c))
281 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
284 getTag(parser_state_t *state,
285 char tag[TAG_MAX_LENGTH],
287 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
288 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
291 int c = currentChar();
292 int tagType = TAG_START;
296 if (c != '<') return TAG_BAD;
297 c = nextChar(); // skip '<'
300 // <!TAG declarations >
304 bool isComment = (c == '-') && ((c = nextChar()) != 0) && (c == '-');
305 if (!isComment && !isAlpha(c)) return TAG_BAD; // <!1, <!-A, <!eos
307 while (c && (c = nextChar()) != 0) {
308 if (c == '\n') state->lineNumber++;
310 if (c != '-') continue;
312 if (c != '-') continue;
319 if (isComment) break;
326 // <? Processing Instructions ?>
328 while ((c = nextChar()) != 0) {
329 if (c == '\n') state->lineNumber++;
330 if (c != '?') continue;
344 c = nextChar(); // skip '/'
347 if (!isAlpha(c)) return TAG_BAD;
349 /* find end of tag while copying it */
350 while (isAlphaNumeric(c)) {
353 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
358 // printf("tag %s, type %d\n", tag, tagType);
360 // look for attributes of the form attribute = "value" ...
361 while ((c != '>') && (c != '/')) {
362 while (isSpace(c)) c = nextChar();
365 while (isAlphaNumeric(c)) {
366 attributes[*attributeCount][length++] = c;
367 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
370 attributes[*attributeCount][length] = 0;
372 while (isSpace(c)) c = nextChar();
374 if (c != '=') return TAG_BAD;
377 while (isSpace(c)) c = nextChar();
379 if (c != '"') return TAG_BAD;
383 values[*attributeCount][length++] = c;
384 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
387 values[*attributeCount][length] = 0;
389 c = nextChar(); // skip closing quote
391 // printf(" attribute '%s' = '%s', nextchar = '%c'\n",
392 // attributes[*attributeCount], values[*attributeCount], c);
395 if (*attributeCount >= TAG_MAX_ATTRIBUTES) return TAG_BAD;
399 c = nextChar(); // skip '/'
402 if (c != '>') return TAG_BAD;
403 c = nextChar(); // skip '>'
409 getString(parser_state_t *state)
411 int c = currentChar();
412 int start, length, i, j;
415 start = state->parseBufferIndex;
416 /* find end of string */
419 if (c == '\n') state->lineNumber++;
426 if (c != '<') return 0;
428 length = state->parseBufferIndex - start;
430 /* copy to null terminated buffer */
431 tempString = (char *)malloc(length + 1);
432 if (tempString == 0) {
433 printf("OSUnserializeXML: can't alloc temp memory\n");
437 // copy out string in tempString
438 // "&" -> '&', "<" -> '<', ">" -> '>'
442 c = state->parseBuffer[start + i++];
446 if ((i+3) > length) goto error;
447 c = state->parseBuffer[start + i++];
449 if (state->parseBuffer[start + i++] != 't') goto error;
450 if (state->parseBuffer[start + i++] != ';') goto error;
451 tempString[j++] = '<';
455 if (state->parseBuffer[start + i++] != 't') goto error;
456 if (state->parseBuffer[start + i++] != ';') goto error;
457 tempString[j++] = '>';
460 if ((i+3) > length) goto error;
462 if (state->parseBuffer[start + i++] != 'm') goto error;
463 if (state->parseBuffer[start + i++] != 'p') goto error;
464 if (state->parseBuffer[start + i++] != ';') goto error;
465 tempString[j++] = '&';
473 // printf("string %s\n", tempString);
478 if (tempString) free(tempString);
483 getNumber(parser_state_t *state)
485 unsigned long long n = 0;
488 int c = currentChar();
503 n = (n * base + c - '0');
507 n = (unsigned long long)((long long)n * (long long)-1);
510 while(isHexDigit(c)) {
512 n = (n * base + c - '0');
514 n = (n * base + 0xa + c - 'a');
519 // printf("number 0x%x\n", (unsigned long)n);
523 // taken from CFXMLParsing/CFPropertyList.c
525 static const signed char __CFPLDataDecodeTable[128] = {
526 /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
527 /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
528 /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
529 /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
530 /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
531 /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
532 /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
533 /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
534 /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
535 /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
536 /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
537 /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
538 /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
539 /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
540 /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
541 /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
544 #define DATA_ALLOC_SIZE 4096
547 getCFEncodedData(parser_state_t *state, unsigned int *size)
549 int numeq = 0, acc = 0, cntr = 0;
550 int tmpbufpos = 0, tmpbuflen = 0;
551 unsigned char *tmpbuf = (unsigned char *)malloc(DATA_ALLOC_SIZE);
553 int c = currentChar();
562 if (c == '=') numeq++; else numeq = 0;
563 if (c == '\n') state->lineNumber++;
564 if (__CFPLDataDecodeTable[c] < 0) {
570 acc += __CFPLDataDecodeTable[c];
571 if (0 == (cntr & 0x3)) {
572 if (tmpbuflen <= tmpbufpos + 2) {
573 tmpbuflen += DATA_ALLOC_SIZE;
574 tmpbuf = (unsigned char *)realloc(tmpbuf, tmpbuflen);
576 tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
578 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
580 tmpbuf[tmpbufpos++] = acc & 0xff;
593 getHexData(parser_state_t *state, unsigned int *size)
596 unsigned char *d, *start, *lastStart;
598 start = lastStart = d = (unsigned char *)malloc(DATA_ALLOC_SIZE);
603 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
613 } else if (isAlphaDigit(c)) {
614 *d = (0xa + (c - 'a')) << 4;
623 } else if (isAlphaDigit(c)) {
624 *d |= 0xa + (c - 'a');
630 if ((d - lastStart) >= DATA_ALLOC_SIZE) {
631 int oldsize = d - start;
632 start = (unsigned char *)realloc(start, oldsize + DATA_ALLOC_SIZE);
633 d = lastStart = start + oldsize;
649 yylex(YYSTYPE *lvalp, parser_state_t *state)
653 char tag[TAG_MAX_LENGTH];
655 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
656 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
662 /* skip white space */
663 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
665 /* keep track of line number, don't return \n's */
672 // end of the buffer?
675 tagType = getTag(STATE, tag, &attributeCount, attributes, values);
676 if (tagType == TAG_BAD) return SYNTAX_ERROR;
677 if (tagType == TAG_IGNORE) goto top;
679 // handle allocation and check for "ID" and "IDREF" tags up front
680 *lvalp = object = newObject(STATE);
682 for (i=0; i < attributeCount; i++) {
683 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
684 // check for idref's, note: we ignore the tag, for
685 // this to work correctly, all idrefs must be unique
686 // across the whole serialization
687 if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
688 attributes[i][4] == 'F' && !attributes[i][5]) {
689 if (tagType != TAG_EMPTY) return SYNTAX_ERROR;
690 object->idref = strtol(values[i], NULL, 0);
694 if (!attributes[i][2]) {
695 object->idref = strtol(values[i], NULL, 0);
704 if (!strcmp(tag, "array")) {
705 if (tagType == TAG_EMPTY) {
706 object->elements = NULL;
709 return (tagType == TAG_START) ? '(' : ')';
713 if (!strcmp(tag, "dict")) {
714 if (tagType == TAG_EMPTY) {
715 object->elements = NULL;
718 return (tagType == TAG_START) ? '{' : '}';
720 if (!strcmp(tag, "data")) {
722 if (tagType == TAG_EMPTY) {
728 bool isHexFormat = false;
729 for (i=0; i < attributeCount; i++) {
730 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
735 // CF encoded is the default form
737 object->data = getHexData(STATE, &size);
739 object->data = getCFEncodedData(STATE, &size);
742 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
749 if (!strcmp(tag, "false")) {
750 if (tagType == TAG_EMPTY) {
757 if (!strcmp(tag, "integer")) {
758 object->size = 64; // default
759 for (i=0; i < attributeCount; i++) {
760 if (!strcmp(attributes[i], "size")) {
761 object->size = strtoul(values[i], NULL, 0);
764 if (tagType == TAG_EMPTY) {
768 object->number = getNumber(STATE);
769 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
776 if (!strcmp(tag, "key")) {
777 if (tagType == TAG_EMPTY) return SYNTAX_ERROR;
778 object->string = getString(STATE);
779 if (!object->string) {
782 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
783 || strcmp(tag, "key")) {
790 if (!strcmp(tag, "plist")) {
791 freeObject(STATE, object);
796 if (!strcmp(tag, "string")) {
797 if (tagType == TAG_EMPTY) {
798 object->string = (char *)malloc(1);
799 object->string[0] = 0;
802 object->string = getString(STATE);
803 if (!object->string) {
806 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
807 || strcmp(tag, "string")) {
812 if (!strcmp(tag, "set")) {
813 if (tagType == TAG_EMPTY) {
814 object->elements = NULL;
817 if (tagType == TAG_START) {
825 if (!strcmp(tag, "true")) {
826 if (tagType == TAG_EMPTY) {
837 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
838 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
839 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
841 // "java" like allocation, if this code hits a syntax error in the
842 // the middle of the parsed string we just bail with pointers hanging
843 // all over place, this code helps keeps it all together
845 //static int object_count = 0;
848 newObject(parser_state_t *state)
852 if (state->freeObjects) {
853 o = state->freeObjects;
854 state->freeObjects = state->freeObjects->next;
856 o = (object_t *)malloc(sizeof(object_t));
858 bzero(o, sizeof(object_t));
859 o->free = state->objects;
867 freeObject(parser_state_t * state, object_t *o)
869 o->next = state->freeObjects;
870 state->freeObjects = o;
874 cleanupObjects(parser_state_t *state)
876 object_t *t, *o = state->objects;
880 // printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
881 o->object->release();
884 // printf("OSUnserializeXML: freeing object o=%x data=%x\n", (int)o, (int)o->data);
888 // printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
892 // printf("OSUnserializeXML: freeing object o=%x string=%x\n", (int)o, (int)o->string);
901 // printf("object_count = %d\n", object_count);
904 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
905 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
906 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
909 rememberObject(parser_state_t *state, int tag, OSObject *o)
912 snprintf(key, 16, "%u", tag);
914 // printf("remember key %s\n", key);
916 state->tags->setObject(key, o);
920 retrieveObject(parser_state_t *state, int tag)
925 snprintf(key, 16, "%u", tag);
927 // printf("retrieve key '%s'\n", key);
929 ref = state->tags->getObject(key);
932 o = newObject(state);
937 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
938 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
939 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
942 buildDictionary(parser_state_t *state, object_t * header)
948 // get count and reverse order
949 o = header->elements;
950 header->elements = 0;
956 t->next = header->elements;
957 header->elements = t;
960 dict = OSDictionary::withCapacity(count);
961 if (header->idref >= 0) rememberObject(state, header->idref, dict);
963 o = header->elements;
965 dict->setObject(o->key, o->object);
968 o->object->release();
974 freeObject(state, t);
982 buildArray(parser_state_t *state, object_t * header)
988 // get count and reverse order
989 o = header->elements;
990 header->elements = 0;
996 t->next = header->elements;
997 header->elements = t;
1000 array = OSArray::withCapacity(count);
1001 if (header->idref >= 0) rememberObject(state, header->idref, array);
1003 o = header->elements;
1005 array->setObject(o->object);
1007 o->object->release();
1012 freeObject(state, t);
1020 buildSet(parser_state_t *state, object_t *header)
1022 object_t *o = buildArray(state, header);
1024 OSArray *array = (OSArray *)o->object;
1025 OSSet *set = OSSet::withArray(array, array->getCapacity());
1027 // write over the reference created in buildArray
1028 if (header->idref >= 0) rememberObject(state, header->idref, set);
1036 buildString(parser_state_t *state, object_t *o)
1040 string = OSString::withCString(o->string);
1041 if (o->idref >= 0) rememberObject(state, o->idref, string);
1051 buildData(parser_state_t *state, object_t *o)
1056 data = OSData::withBytes(o->data, o->size);
1058 data = OSData::withCapacity(0);
1060 if (o->idref >= 0) rememberObject(state, o->idref, data);
1062 if (o->size) free(o->data);
1069 buildNumber(parser_state_t *state, object_t *o)
1071 OSNumber *number = OSNumber::withNumber(o->number, o->size);
1073 if (o->idref >= 0) rememberObject(state, o->idref, number);
1080 buildBoolean(parser_state_t *state __unused, object_t *o)
1082 o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1083 o->object->retain();
1088 OSUnserializeXML(const char *buffer, OSString **errorString)
1091 parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1093 if ((!state) || (!buffer)) return 0;
1096 if (errorString) *errorString = NULL;
1098 state->parseBuffer = buffer;
1099 state->parseBufferIndex = 0;
1100 state->lineNumber = 1;
1102 state->freeObjects = 0;
1103 state->tags = OSDictionary::withCapacity(128);
1104 state->errorString = errorString;
1105 state->parsedObject = 0;
1107 (void)yyparse((void *)state);
1109 object = state->parsedObject;
1111 cleanupObjects(state);
1112 state->tags->release();
1124 // DO NOT EDIT OSUnserializeXML.cpp!