2 * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved.
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
32 * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
35 // parser for unserializing OSContainer objects serialized to XML
38 // bison -p OSUnserializeXML OSUnserializeXML.y
39 // head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
40 // sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
42 // when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
48 // DO NOT EDIT OSUnserializeXML.cpp!
63 #include <libkern/c++/OSMetaClass.h>
64 #include <libkern/c++/OSContainers.h>
65 #include <libkern/c++/OSLib.h>
67 #define YYSTYPE object_t *
68 #define YYPARSE_PARAM state
69 #define YYLEX_PARAM (parser_state_t *)state
71 // this is the internal struct used to hold objects on parser stack
72 // it represents objects both before and after they have been created
73 typedef struct object {
76 struct object *elements;
78 OSString *key; // for dictionary
80 void *data; // for data
81 char *string; // for string & symbol
82 long long number; // for number
86 // this code is reentrant, this structure contains all
87 // state information for the parsing of a single buffer
88 typedef struct parser_state {
89 const char *parseBuffer; // start of text to be parsed
90 int parseBufferIndex; // current index into text
91 int lineNumber; // current line number
92 object_t *objects; // internal objects in use
93 object_t *freeObjects; // internal objects that are free
94 OSDictionary *tags; // used to remember "ID" tags
95 OSString **errorString; // parse error with line
96 OSObject *parsedObject; // resultant object of parsed text
99 #define STATE ((parser_state_t *)state)
102 #define yyerror(s) OSUnserializeerror(STATE, (s))
103 static int OSUnserializeerror(parser_state_t *state, char *s);
105 static int yylex(YYSTYPE *lvalp, parser_state_t *state);
106 static int yyparse(void * state);
108 static object_t *newObject(parser_state_t *state);
109 static void freeObject(parser_state_t *state, object_t *o);
110 static void rememberObject(parser_state_t *state, int tag, OSObject *o);
111 static object_t *retrieveObject(parser_state_t *state, int tag);
112 static void cleanupObjects(parser_state_t *state);
114 static object_t *buildDictionary(parser_state_t *state, object_t *o);
115 static object_t *buildArray(parser_state_t *state, object_t *o);
116 static object_t *buildSet(parser_state_t *state, object_t *o);
117 static object_t *buildString(parser_state_t *state, object_t *o);
118 static object_t *buildData(parser_state_t *state, object_t *o);
119 static object_t *buildNumber(parser_state_t *state, object_t *o);
120 static object_t *buildBoolean(parser_state_t *state, object_t *o);
123 extern void *kern_os_malloc(size_t size);
124 extern void *kern_os_realloc(void * addr, size_t size);
125 extern void kern_os_free(void * addr);
127 //XXX shouldn't have to define these
128 extern long strtol(const char *, char **, int);
129 extern unsigned long strtoul(const char *, char **, int);
133 #define malloc(s) kern_os_malloc(s)
134 #define realloc(a, s) kern_os_realloc(a, s)
135 #define free(a) kern_os_free((void *)a)
148 %% /* Grammar rules and actions follow */
150 input: /* empty */ { yyerror("unexpected end of buffer");
153 | object { STATE->parsedObject = $1->object;
155 freeObject(STATE, $1);
158 | SYNTAX_ERROR { yyerror("syntax error");
163 object: dict { $$ = buildDictionary(STATE, $1); }
164 | array { $$ = buildArray(STATE, $1); }
165 | set { $$ = buildSet(STATE, $1); }
166 | string { $$ = buildString(STATE, $1); }
167 | data { $$ = buildData(STATE, $1); }
168 | number { $$ = buildNumber(STATE, $1); }
169 | boolean { $$ = buildBoolean(STATE, $1); }
170 | idref { $$ = retrieveObject(STATE, $1->idref);
172 $$->object->retain();
174 yyerror("forward reference detected");
177 freeObject(STATE, $1);
181 //------------------------------------------------------------------------------
183 dict: '{' '}' { $$ = $1;
186 | '{' pairs '}' { $$ = $1;
193 | pairs pair { $$ = $2;
198 pair: key object { $$ = $1;
199 $$->key = (OSString *)$$->object;
200 $$->object = $2->object;
203 freeObject(STATE, $2);
207 key: KEY { $$ = buildString(STATE, $1); }
210 //------------------------------------------------------------------------------
212 array: '(' ')' { $$ = $1;
215 | '(' elements ')' { $$ = $1;
221 set: '[' ']' { $$ = $1;
224 | '[' elements ']' { $$ = $1;
230 elements: object { $$ = $1;
233 | elements object { $$ = $2;
238 //------------------------------------------------------------------------------
258 OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on errors */
260 char tempString[128];
262 if (state->errorString) {
263 snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
264 *(state->errorString) = OSString::withCString(tempString);
270 #define TAG_MAX_LENGTH 32
271 #define TAG_MAX_ATTRIBUTES 32
276 #define TAG_COMMENT 4
278 #define currentChar() (state->parseBuffer[state->parseBufferIndex])
279 #define nextChar() (state->parseBuffer[++state->parseBufferIndex])
280 #define prevChar() (state->parseBuffer[state->parseBufferIndex - 1])
282 #define isSpace(c) ((c) == ' ' || (c) == '\t')
283 #define isAlpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
284 #define isDigit(c) ((c) >= '0' && (c) <= '9')
285 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
286 #define isHexDigit(c) (isDigit(c) || isAlphaDigit(c))
287 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
290 getTag(parser_state_t *state,
291 char tag[TAG_MAX_LENGTH],
293 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
297 int c = currentChar();
298 int tagType = TAG_START;
302 if (c != '<') return TAG_BAD;
303 c = nextChar(); // skip '<'
305 if (c == '?' || c == '!') {
306 while ((c = nextChar()) != 0) {
307 if (c == '\n') state->lineNumber++;
316 c = nextChar(); // skip '/'
319 if (!isAlpha(c)) return TAG_BAD;
321 /* find end of tag while copying it */
322 while (isAlphaNumeric(c)) {
325 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
330 // printf("tag %s, type %d\n", tag, tagType);
332 // look for attributes of the form attribute = "value" ...
333 while ((c != '>') && (c != '/')) {
334 while (isSpace(c)) c = nextChar();
337 while (isAlphaNumeric(c)) {
338 attributes[*attributeCount][length++] = c;
339 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
342 attributes[*attributeCount][length] = 0;
344 while (isSpace(c)) c = nextChar();
346 if (c != '=') return TAG_BAD;
349 while (isSpace(c)) c = nextChar();
351 if (c != '"') return TAG_BAD;
355 values[*attributeCount][length++] = c;
356 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
359 values[*attributeCount][length] = 0;
361 c = nextChar(); // skip closing quote
363 // printf(" attribute '%s' = '%s', nextchar = '%c'\n",
364 // attributes[*attributeCount], values[*attributeCount], c);
367 if (*attributeCount >= TAG_MAX_ATTRIBUTES) return TAG_BAD;
371 c = nextChar(); // skip '/'
374 if (c != '>') return TAG_BAD;
375 c = nextChar(); // skip '>'
381 getString(parser_state_t *state)
383 int c = currentChar();
384 int start, length, i, j;
387 start = state->parseBufferIndex;
388 /* find end of string */
391 if (c == '\n') state->lineNumber++;
398 if (c != '<') return 0;
400 length = state->parseBufferIndex - start;
402 /* copy to null terminated buffer */
403 tempString = (char *)malloc(length + 1);
404 if (tempString == 0) {
405 printf("OSUnserializeXML: can't alloc temp memory\n");
409 // copy out string in tempString
410 // "&" -> '&', "<" -> '<', ">" -> '>'
414 c = state->parseBuffer[start + i++];
418 if ((i+3) > length) goto error;
419 c = state->parseBuffer[start + i++];
421 if (state->parseBuffer[start + i++] != 't') goto error;
422 if (state->parseBuffer[start + i++] != ';') goto error;
423 tempString[j++] = '<';
427 if (state->parseBuffer[start + i++] != 't') goto error;
428 if (state->parseBuffer[start + i++] != ';') goto error;
429 tempString[j++] = '>';
432 if ((i+3) > length) goto error;
434 if (state->parseBuffer[start + i++] != 'm') goto error;
435 if (state->parseBuffer[start + i++] != 'p') goto error;
436 if (state->parseBuffer[start + i++] != ';') goto error;
437 tempString[j++] = '&';
445 // printf("string %s\n", tempString);
450 if (tempString) free(tempString);
455 getNumber(parser_state_t *state)
457 unsigned long long n = 0;
460 int c = currentChar();
475 n = (n * base + c - '0');
479 n = (unsigned long long)((long long)n * (long long)-1);
482 while(isHexDigit(c)) {
484 n = (n * base + c - '0');
486 n = (n * base + 0xa + c - 'a');
491 // printf("number 0x%x\n", (unsigned long)n);
495 // taken from CFXMLParsing/CFPropertyList.c
497 static const signed char __CFPLDataDecodeTable[128] = {
498 /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
499 /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
500 /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
501 /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
502 /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
503 /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
504 /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
505 /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
506 /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
507 /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
508 /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
509 /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
510 /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
511 /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
512 /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
513 /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
516 #define DATA_ALLOC_SIZE 4096
519 getCFEncodedData(parser_state_t *state, unsigned int *size)
521 int numeq = 0, acc = 0, cntr = 0;
522 int tmpbufpos = 0, tmpbuflen = 0;
523 unsigned char *tmpbuf = (unsigned char *)malloc(DATA_ALLOC_SIZE);
525 int c = currentChar();
534 if (c == '=') numeq++; else numeq = 0;
535 if (c == '\n') state->lineNumber++;
536 if (__CFPLDataDecodeTable[c] < 0) {
542 acc += __CFPLDataDecodeTable[c];
543 if (0 == (cntr & 0x3)) {
544 if (tmpbuflen <= tmpbufpos + 2) {
545 tmpbuflen += DATA_ALLOC_SIZE;
546 tmpbuf = (unsigned char *)realloc(tmpbuf, tmpbuflen);
548 tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
550 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
552 tmpbuf[tmpbufpos++] = acc & 0xff;
565 getHexData(parser_state_t *state, unsigned int *size)
568 unsigned char *d, *start, *lastStart;
570 start = lastStart = d = (unsigned char *)malloc(DATA_ALLOC_SIZE);
575 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
585 } else if (isAlphaDigit(c)) {
586 *d = (0xa + (c - 'a')) << 4;
595 } else if (isAlphaDigit(c)) {
596 *d |= 0xa + (c - 'a');
602 if ((d - lastStart) >= DATA_ALLOC_SIZE) {
603 int oldsize = d - start;
604 start = (unsigned char *)realloc(start, oldsize + DATA_ALLOC_SIZE);
605 d = lastStart = start + oldsize;
621 yylex(YYSTYPE *lvalp, parser_state_t *state)
625 char tag[TAG_MAX_LENGTH];
627 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
634 /* skip white space */
635 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
637 /* keep track of line number, don't return \n's */
644 // end of the buffer?
647 tagType = getTag(STATE, tag, &attributeCount, attributes, values);
648 if (tagType == TAG_BAD) return SYNTAX_ERROR;
649 if (tagType == TAG_COMMENT) goto top;
651 // handle allocation and check for "ID" and "IDREF" tags up front
652 *lvalp = object = newObject(STATE);
654 for (i=0; i < attributeCount; i++) {
655 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
656 // check for idref's, note: we ignore the tag, for
657 // this to work correctly, all idrefs must be unique
658 // across the whole serialization
659 if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
660 attributes[i][4] == 'F' && !attributes[i][5]) {
661 if (tagType != TAG_EMPTY) return SYNTAX_ERROR;
662 object->idref = strtol(values[i], NULL, 0);
666 if (!attributes[i][2]) {
667 object->idref = strtol(values[i], NULL, 0);
676 if (!strcmp(tag, "array")) {
677 if (tagType == TAG_EMPTY) {
678 object->elements = NULL;
681 return (tagType == TAG_START) ? '(' : ')';
685 if (!strcmp(tag, "dict")) {
686 if (tagType == TAG_EMPTY) {
687 object->elements = NULL;
690 return (tagType == TAG_START) ? '{' : '}';
692 if (!strcmp(tag, "data")) {
694 if (tagType == TAG_EMPTY) {
700 bool isHexFormat = false;
701 for (int i=0; i < attributeCount; i++) {
702 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
707 // CF encoded is the default form
709 object->data = getHexData(STATE, &size);
711 object->data = getCFEncodedData(STATE, &size);
714 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
721 if (!strcmp(tag, "false")) {
722 if (tagType == TAG_EMPTY) {
729 if (!strcmp(tag, "integer")) {
730 object->size = 64; // default
731 for (i=0; i < attributeCount; i++) {
732 if (!strcmp(attributes[i], "size")) {
733 object->size = strtoul(values[i], NULL, 0);
736 if (tagType == TAG_EMPTY) {
740 object->number = getNumber(STATE);
741 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
748 if (!strcmp(tag, "key")) {
749 if (tagType == TAG_EMPTY) return SYNTAX_ERROR;
750 object->string = getString(STATE);
751 if (!object->string) {
754 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
755 || strcmp(tag, "key")) {
762 if (!strcmp(tag, "plist")) {
763 freeObject(STATE, object);
768 if (!strcmp(tag, "string")) {
769 if (tagType == TAG_EMPTY) {
770 object->string = (char *)malloc(1);
771 object->string[0] = 0;
774 object->string = getString(STATE);
775 if (!object->string) {
778 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
779 || strcmp(tag, "string")) {
784 if (!strcmp(tag, "set")) {
785 if (tagType == TAG_EMPTY) {
786 object->elements = NULL;
789 if (tagType == TAG_START) {
797 if (!strcmp(tag, "true")) {
798 if (tagType == TAG_EMPTY) {
809 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
810 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
811 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
813 // "java" like allocation, if this code hits a syntax error in the
814 // the middle of the parsed string we just bail with pointers hanging
815 // all over place, this code helps keeps it all together
817 //static int object_count = 0;
820 newObject(parser_state_t *state)
824 if (state->freeObjects) {
825 o = state->freeObjects;
826 state->freeObjects = state->freeObjects->next;
828 o = (object_t *)malloc(sizeof(object_t));
830 bzero(o, sizeof(object_t));
831 o->free = state->objects;
839 freeObject(parser_state_t * state, object_t *o)
841 o->next = state->freeObjects;
842 state->freeObjects = o;
846 cleanupObjects(parser_state_t *state)
848 object_t *t, *o = state->objects;
852 // printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
853 o->object->release();
856 // printf("OSUnserializeXML: freeing object o=%x data=%x\n", (int)o, (int)o->data);
860 // printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
864 // printf("OSUnserializeXML: freeing object o=%x string=%x\n", (int)o, (int)o->string);
873 // printf("object_count = %d\n", object_count);
876 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
877 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
878 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
881 rememberObject(parser_state_t *state, int tag, OSObject *o)
884 snprintf(key, 16, "%u", tag);
886 // printf("remember key %s\n", key);
888 state->tags->setObject(key, o);
892 retrieveObject(parser_state_t *state, int tag)
897 snprintf(key, 16, "%u", tag);
899 // printf("retrieve key '%s'\n", key);
901 ref = state->tags->getObject(key);
904 o = newObject(state);
909 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
910 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
911 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
914 buildDictionary(parser_state_t *state, object_t * header)
920 // get count and reverse order
921 o = header->elements;
922 header->elements = 0;
928 t->next = header->elements;
929 header->elements = t;
932 dict = OSDictionary::withCapacity(count);
933 if (header->idref >= 0) rememberObject(state, header->idref, dict);
935 o = header->elements;
937 dict->setObject(o->key, o->object);
940 o->object->release();
946 freeObject(state, t);
954 buildArray(parser_state_t *state, object_t * header)
960 // get count and reverse order
961 o = header->elements;
962 header->elements = 0;
968 t->next = header->elements;
969 header->elements = t;
972 array = OSArray::withCapacity(count);
973 if (header->idref >= 0) rememberObject(state, header->idref, array);
975 o = header->elements;
977 array->setObject(o->object);
979 o->object->release();
984 freeObject(state, t);
992 buildSet(parser_state_t *state, object_t *header)
994 object_t *o = buildArray(state, header);
996 OSArray *array = (OSArray *)o->object;
997 OSSet *set = OSSet::withArray(array, array->getCapacity());
999 // write over the reference created in buildArray
1000 if (header->idref >= 0) rememberObject(state, header->idref, set);
1008 buildString(parser_state_t *state, object_t *o)
1012 string = OSString::withCString(o->string);
1013 if (o->idref >= 0) rememberObject(state, o->idref, string);
1023 buildData(parser_state_t *state, object_t *o)
1028 data = OSData::withBytes(o->data, o->size);
1030 data = OSData::withCapacity(0);
1032 if (o->idref >= 0) rememberObject(state, o->idref, data);
1034 if (o->size) free(o->data);
1041 buildNumber(parser_state_t *state, object_t *o)
1043 OSNumber *number = OSNumber::withNumber(o->number, o->size);
1045 if (o->idref >= 0) rememberObject(state, o->idref, number);
1052 buildBoolean(parser_state_t *state, object_t *o)
1054 o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1055 o->object->retain();
1060 OSUnserializeXML(const char *buffer, OSString **errorString)
1063 parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1065 if ((!state) || (!buffer)) return 0;
1068 if (errorString) *errorString = NULL;
1070 state->parseBuffer = buffer;
1071 state->parseBufferIndex = 0;
1072 state->lineNumber = 1;
1074 state->freeObjects = 0;
1075 state->tags = OSDictionary::withCapacity(128);
1076 state->errorString = errorString;
1077 state->parsedObject = 0;
1079 (void)yyparse((void *)state);
1081 object = state->parsedObject;
1083 cleanupObjects(state);
1084 state->tags->release();
1096 // DO NOT EDIT OSUnserializeXML.cpp!