2 * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
34 * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
37 // parser for unserializing OSContainer objects serialized to XML
40 // bison -p OSUnserializeXML OSUnserializeXML.y
41 // head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
42 // sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
44 // when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
50 // DO NOT EDIT OSUnserializeXML.cpp!
65 #include <libkern/c++/OSMetaClass.h>
66 #include <libkern/c++/OSContainers.h>
67 #include <libkern/c++/OSLib.h>
69 #define YYSTYPE object_t *
70 #define YYPARSE_PARAM state
71 #define YYLEX_PARAM (parser_state_t *)state
73 // this is the internal struct used to hold objects on parser stack
74 // it represents objects both before and after they have been created
75 typedef struct object {
78 struct object *elements;
80 OSString *key; // for dictionary
82 void *data; // for data
83 char *string; // for string & symbol
84 long long number; // for number
88 // this code is reentrant, this structure contains all
89 // state information for the parsing of a single buffer
90 typedef struct parser_state {
91 const char *parseBuffer; // start of text to be parsed
92 int parseBufferIndex; // current index into text
93 int lineNumber; // current line number
94 object_t *objects; // internal objects in use
95 object_t *freeObjects; // internal objects that are free
96 OSDictionary *tags; // used to remember "ID" tags
97 OSString **errorString; // parse error with line
98 OSObject *parsedObject; // resultant object of parsed text
101 #define STATE ((parser_state_t *)state)
104 #define yyerror(s) OSUnserializeerror(STATE, (s))
105 static int OSUnserializeerror(parser_state_t *state, char *s);
107 static int yylex(YYSTYPE *lvalp, parser_state_t *state);
108 static int yyparse(void * state);
110 static object_t *newObject(parser_state_t *state);
111 static void freeObject(parser_state_t *state, object_t *o);
112 static void rememberObject(parser_state_t *state, int tag, OSObject *o);
113 static object_t *retrieveObject(parser_state_t *state, int tag);
114 static void cleanupObjects(parser_state_t *state);
116 static object_t *buildDictionary(parser_state_t *state, object_t *o);
117 static object_t *buildArray(parser_state_t *state, object_t *o);
118 static object_t *buildSet(parser_state_t *state, object_t *o);
119 static object_t *buildString(parser_state_t *state, object_t *o);
120 static object_t *buildData(parser_state_t *state, object_t *o);
121 static object_t *buildNumber(parser_state_t *state, object_t *o);
122 static object_t *buildBoolean(parser_state_t *state, object_t *o);
125 extern void *kern_os_malloc(size_t size);
126 extern void *kern_os_realloc(void * addr, size_t size);
127 extern void kern_os_free(void * addr);
129 //XXX shouldn't have to define these
130 extern long strtol(const char *, char **, int);
131 extern unsigned long strtoul(const char *, char **, int);
135 #define malloc(s) kern_os_malloc(s)
136 #define realloc(a, s) kern_os_realloc(a, s)
137 #define free(a) kern_os_free((void *)a)
150 %% /* Grammar rules and actions follow */
152 input: /* empty */ { yyerror("unexpected end of buffer");
155 | object { STATE->parsedObject = $1->object;
157 freeObject(STATE, $1);
160 | SYNTAX_ERROR { yyerror("syntax error");
165 object: dict { $$ = buildDictionary(STATE, $1); }
166 | array { $$ = buildArray(STATE, $1); }
167 | set { $$ = buildSet(STATE, $1); }
168 | string { $$ = buildString(STATE, $1); }
169 | data { $$ = buildData(STATE, $1); }
170 | number { $$ = buildNumber(STATE, $1); }
171 | boolean { $$ = buildBoolean(STATE, $1); }
172 | idref { $$ = retrieveObject(STATE, $1->idref);
174 $$->object->retain();
176 yyerror("forward reference detected");
179 freeObject(STATE, $1);
183 //------------------------------------------------------------------------------
185 dict: '{' '}' { $$ = $1;
188 | '{' pairs '}' { $$ = $1;
195 | pairs pair { $$ = $2;
200 pair: key object { $$ = $1;
201 $$->key = (OSString *)$$->object;
202 $$->object = $2->object;
205 freeObject(STATE, $2);
209 key: KEY { $$ = buildString(STATE, $1); }
212 //------------------------------------------------------------------------------
214 array: '(' ')' { $$ = $1;
217 | '(' elements ')' { $$ = $1;
223 set: '[' ']' { $$ = $1;
226 | '[' elements ']' { $$ = $1;
232 elements: object { $$ = $1;
235 | elements object { $$ = $2;
240 //------------------------------------------------------------------------------
260 OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on errors */
262 char tempString[128];
264 if (state->errorString) {
265 snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
266 *(state->errorString) = OSString::withCString(tempString);
272 #define TAG_MAX_LENGTH 32
273 #define TAG_MAX_ATTRIBUTES 32
278 #define TAG_COMMENT 4
280 #define currentChar() (state->parseBuffer[state->parseBufferIndex])
281 #define nextChar() (state->parseBuffer[++state->parseBufferIndex])
282 #define prevChar() (state->parseBuffer[state->parseBufferIndex - 1])
284 #define isSpace(c) ((c) == ' ' || (c) == '\t')
285 #define isAlpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
286 #define isDigit(c) ((c) >= '0' && (c) <= '9')
287 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
288 #define isHexDigit(c) (isDigit(c) || isAlphaDigit(c))
289 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
292 getTag(parser_state_t *state,
293 char tag[TAG_MAX_LENGTH],
295 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
296 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
299 int c = currentChar();
300 int tagType = TAG_START;
304 if (c != '<') return TAG_BAD;
305 c = nextChar(); // skip '<'
307 if (c == '?' || c == '!') {
308 while ((c = nextChar()) != 0) {
309 if (c == '\n') state->lineNumber++;
318 c = nextChar(); // skip '/'
321 if (!isAlpha(c)) return TAG_BAD;
323 /* find end of tag while copying it */
324 while (isAlphaNumeric(c)) {
327 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
332 // printf("tag %s, type %d\n", tag, tagType);
334 // look for attributes of the form attribute = "value" ...
335 while ((c != '>') && (c != '/')) {
336 while (isSpace(c)) c = nextChar();
339 while (isAlphaNumeric(c)) {
340 attributes[*attributeCount][length++] = c;
341 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
344 attributes[*attributeCount][length] = 0;
346 while (isSpace(c)) c = nextChar();
348 if (c != '=') return TAG_BAD;
351 while (isSpace(c)) c = nextChar();
353 if (c != '"') return TAG_BAD;
357 values[*attributeCount][length++] = c;
358 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
361 values[*attributeCount][length] = 0;
363 c = nextChar(); // skip closing quote
365 // printf(" attribute '%s' = '%s', nextchar = '%c'\n",
366 // attributes[*attributeCount], values[*attributeCount], c);
369 if (*attributeCount >= TAG_MAX_ATTRIBUTES) return TAG_BAD;
373 c = nextChar(); // skip '/'
376 if (c != '>') return TAG_BAD;
377 c = nextChar(); // skip '>'
383 getString(parser_state_t *state)
385 int c = currentChar();
386 int start, length, i, j;
389 start = state->parseBufferIndex;
390 /* find end of string */
393 if (c == '\n') state->lineNumber++;
400 if (c != '<') return 0;
402 length = state->parseBufferIndex - start;
404 /* copy to null terminated buffer */
405 tempString = (char *)malloc(length + 1);
406 if (tempString == 0) {
407 printf("OSUnserializeXML: can't alloc temp memory\n");
411 // copy out string in tempString
412 // "&" -> '&', "<" -> '<', ">" -> '>'
416 c = state->parseBuffer[start + i++];
420 if ((i+3) > length) goto error;
421 c = state->parseBuffer[start + i++];
423 if (state->parseBuffer[start + i++] != 't') goto error;
424 if (state->parseBuffer[start + i++] != ';') goto error;
425 tempString[j++] = '<';
429 if (state->parseBuffer[start + i++] != 't') goto error;
430 if (state->parseBuffer[start + i++] != ';') goto error;
431 tempString[j++] = '>';
434 if ((i+3) > length) goto error;
436 if (state->parseBuffer[start + i++] != 'm') goto error;
437 if (state->parseBuffer[start + i++] != 'p') goto error;
438 if (state->parseBuffer[start + i++] != ';') goto error;
439 tempString[j++] = '&';
447 // printf("string %s\n", tempString);
452 if (tempString) free(tempString);
457 getNumber(parser_state_t *state)
459 unsigned long long n = 0;
462 int c = currentChar();
477 n = (n * base + c - '0');
481 n = (unsigned long long)((long long)n * (long long)-1);
484 while(isHexDigit(c)) {
486 n = (n * base + c - '0');
488 n = (n * base + 0xa + c - 'a');
493 // printf("number 0x%x\n", (unsigned long)n);
497 // taken from CFXMLParsing/CFPropertyList.c
499 static const signed char __CFPLDataDecodeTable[128] = {
500 /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
501 /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
502 /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
503 /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
504 /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
505 /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
506 /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
507 /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
508 /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
509 /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
510 /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
511 /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
512 /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
513 /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
514 /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
515 /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
518 #define DATA_ALLOC_SIZE 4096
521 getCFEncodedData(parser_state_t *state, unsigned int *size)
523 int numeq = 0, acc = 0, cntr = 0;
524 int tmpbufpos = 0, tmpbuflen = 0;
525 unsigned char *tmpbuf = (unsigned char *)malloc(DATA_ALLOC_SIZE);
527 int c = currentChar();
536 if (c == '=') numeq++; else numeq = 0;
537 if (c == '\n') state->lineNumber++;
538 if (__CFPLDataDecodeTable[c] < 0) {
544 acc += __CFPLDataDecodeTable[c];
545 if (0 == (cntr & 0x3)) {
546 if (tmpbuflen <= tmpbufpos + 2) {
547 tmpbuflen += DATA_ALLOC_SIZE;
548 tmpbuf = (unsigned char *)realloc(tmpbuf, tmpbuflen);
550 tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
552 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
554 tmpbuf[tmpbufpos++] = acc & 0xff;
567 getHexData(parser_state_t *state, unsigned int *size)
570 unsigned char *d, *start, *lastStart;
572 start = lastStart = d = (unsigned char *)malloc(DATA_ALLOC_SIZE);
577 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
587 } else if (isAlphaDigit(c)) {
588 *d = (0xa + (c - 'a')) << 4;
597 } else if (isAlphaDigit(c)) {
598 *d |= 0xa + (c - 'a');
604 if ((d - lastStart) >= DATA_ALLOC_SIZE) {
605 int oldsize = d - start;
606 start = (unsigned char *)realloc(start, oldsize + DATA_ALLOC_SIZE);
607 d = lastStart = start + oldsize;
623 yylex(YYSTYPE *lvalp, parser_state_t *state)
627 char tag[TAG_MAX_LENGTH];
629 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
630 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
636 /* skip white space */
637 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
639 /* keep track of line number, don't return \n's */
646 // end of the buffer?
649 tagType = getTag(STATE, tag, &attributeCount, attributes, values);
650 if (tagType == TAG_BAD) return SYNTAX_ERROR;
651 if (tagType == TAG_COMMENT) goto top;
653 // handle allocation and check for "ID" and "IDREF" tags up front
654 *lvalp = object = newObject(STATE);
656 for (i=0; i < attributeCount; i++) {
657 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
658 // check for idref's, note: we ignore the tag, for
659 // this to work correctly, all idrefs must be unique
660 // across the whole serialization
661 if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
662 attributes[i][4] == 'F' && !attributes[i][5]) {
663 if (tagType != TAG_EMPTY) return SYNTAX_ERROR;
664 object->idref = strtol(values[i], NULL, 0);
668 if (!attributes[i][2]) {
669 object->idref = strtol(values[i], NULL, 0);
678 if (!strcmp(tag, "array")) {
679 if (tagType == TAG_EMPTY) {
680 object->elements = NULL;
683 return (tagType == TAG_START) ? '(' : ')';
687 if (!strcmp(tag, "dict")) {
688 if (tagType == TAG_EMPTY) {
689 object->elements = NULL;
692 return (tagType == TAG_START) ? '{' : '}';
694 if (!strcmp(tag, "data")) {
696 if (tagType == TAG_EMPTY) {
702 bool isHexFormat = false;
703 for (int i=0; i < attributeCount; i++) {
704 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
709 // CF encoded is the default form
711 object->data = getHexData(STATE, &size);
713 object->data = getCFEncodedData(STATE, &size);
716 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
723 if (!strcmp(tag, "false")) {
724 if (tagType == TAG_EMPTY) {
731 if (!strcmp(tag, "integer")) {
732 object->size = 64; // default
733 for (i=0; i < attributeCount; i++) {
734 if (!strcmp(attributes[i], "size")) {
735 object->size = strtoul(values[i], NULL, 0);
738 if (tagType == TAG_EMPTY) {
742 object->number = getNumber(STATE);
743 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
750 if (!strcmp(tag, "key")) {
751 if (tagType == TAG_EMPTY) return SYNTAX_ERROR;
752 object->string = getString(STATE);
753 if (!object->string) {
756 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
757 || strcmp(tag, "key")) {
764 if (!strcmp(tag, "plist")) {
765 freeObject(STATE, object);
770 if (!strcmp(tag, "string")) {
771 if (tagType == TAG_EMPTY) {
772 object->string = (char *)malloc(1);
773 object->string[0] = 0;
776 object->string = getString(STATE);
777 if (!object->string) {
780 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
781 || strcmp(tag, "string")) {
786 if (!strcmp(tag, "set")) {
787 if (tagType == TAG_EMPTY) {
788 object->elements = NULL;
791 if (tagType == TAG_START) {
799 if (!strcmp(tag, "true")) {
800 if (tagType == TAG_EMPTY) {
811 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
812 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
813 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
815 // "java" like allocation, if this code hits a syntax error in the
816 // the middle of the parsed string we just bail with pointers hanging
817 // all over place, this code helps keeps it all together
819 //static int object_count = 0;
822 newObject(parser_state_t *state)
826 if (state->freeObjects) {
827 o = state->freeObjects;
828 state->freeObjects = state->freeObjects->next;
830 o = (object_t *)malloc(sizeof(object_t));
832 bzero(o, sizeof(object_t));
833 o->free = state->objects;
841 freeObject(parser_state_t * state, object_t *o)
843 o->next = state->freeObjects;
844 state->freeObjects = o;
848 cleanupObjects(parser_state_t *state)
850 object_t *t, *o = state->objects;
854 // printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
855 o->object->release();
858 // printf("OSUnserializeXML: freeing object o=%x data=%x\n", (int)o, (int)o->data);
862 // printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
866 // printf("OSUnserializeXML: freeing object o=%x string=%x\n", (int)o, (int)o->string);
875 // printf("object_count = %d\n", object_count);
878 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
879 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
880 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
883 rememberObject(parser_state_t *state, int tag, OSObject *o)
886 snprintf(key, 16, "%u", tag);
888 // printf("remember key %s\n", key);
890 state->tags->setObject(key, o);
894 retrieveObject(parser_state_t *state, int tag)
899 snprintf(key, 16, "%u", tag);
901 // printf("retrieve key '%s'\n", key);
903 ref = state->tags->getObject(key);
906 o = newObject(state);
911 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
912 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
913 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
916 buildDictionary(parser_state_t *state, object_t * header)
922 // get count and reverse order
923 o = header->elements;
924 header->elements = 0;
930 t->next = header->elements;
931 header->elements = t;
934 dict = OSDictionary::withCapacity(count);
935 if (header->idref >= 0) rememberObject(state, header->idref, dict);
937 o = header->elements;
939 dict->setObject(o->key, o->object);
942 o->object->release();
948 freeObject(state, t);
956 buildArray(parser_state_t *state, object_t * header)
962 // get count and reverse order
963 o = header->elements;
964 header->elements = 0;
970 t->next = header->elements;
971 header->elements = t;
974 array = OSArray::withCapacity(count);
975 if (header->idref >= 0) rememberObject(state, header->idref, array);
977 o = header->elements;
979 array->setObject(o->object);
981 o->object->release();
986 freeObject(state, t);
994 buildSet(parser_state_t *state, object_t *header)
996 object_t *o = buildArray(state, header);
998 OSArray *array = (OSArray *)o->object;
999 OSSet *set = OSSet::withArray(array, array->getCapacity());
1001 // write over the reference created in buildArray
1002 if (header->idref >= 0) rememberObject(state, header->idref, set);
1010 buildString(parser_state_t *state, object_t *o)
1014 string = OSString::withCString(o->string);
1015 if (o->idref >= 0) rememberObject(state, o->idref, string);
1025 buildData(parser_state_t *state, object_t *o)
1030 data = OSData::withBytes(o->data, o->size);
1032 data = OSData::withCapacity(0);
1034 if (o->idref >= 0) rememberObject(state, o->idref, data);
1036 if (o->size) free(o->data);
1043 buildNumber(parser_state_t *state, object_t *o)
1045 OSNumber *number = OSNumber::withNumber(o->number, o->size);
1047 if (o->idref >= 0) rememberObject(state, o->idref, number);
1054 buildBoolean(parser_state_t *state, object_t *o)
1056 o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1057 o->object->retain();
1062 OSUnserializeXML(const char *buffer, OSString **errorString)
1065 parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1067 if ((!state) || (!buffer)) return 0;
1070 if (errorString) *errorString = NULL;
1072 state->parseBuffer = buffer;
1073 state->parseBufferIndex = 0;
1074 state->lineNumber = 1;
1076 state->freeObjects = 0;
1077 state->tags = OSDictionary::withCapacity(128);
1078 state->errorString = errorString;
1079 state->parsedObject = 0;
1081 (void)yyparse((void *)state);
1083 object = state->parsedObject;
1085 cleanupObjects(state);
1086 state->tags->release();
1098 // DO NOT EDIT OSUnserializeXML.cpp!