2 * Copyright (c) 1999-2002 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
29 // parser for unserializing OSContainer objects serialized to XML
32 // bison -p OSUnserializeXML OSUnserializeXML.y
33 // head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
34 // sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
36 // when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
42 // DO NOT EDIT OSUnserializeXML.cpp!
57 #include <libkern/c++/OSMetaClass.h>
58 #include <libkern/c++/OSContainers.h>
59 #include <libkern/c++/OSLib.h>
61 #define YYSTYPE object_t *
62 #define YYPARSE_PARAM state
63 #define YYLEX_PARAM state
65 // this is the internal struct used to hold objects on parser stack
66 // it represents objects both before and after they have been created
67 typedef struct object {
70 struct object *elements;
72 OSString *key; // for dictionary
74 void *data; // for data
75 char *string; // for string & symbol
76 long long number; // for number
80 // this code is reentrant, this structure contains all
81 // state information for the parsing of a single buffer
82 typedef struct parser_state {
83 const char *parseBuffer; // start of text to be parsed
84 int parseBufferIndex; // current index into text
85 int lineNumber; // current line number
86 object_t *objects; // internal objects in use
87 object_t *freeObjects; // internal objects that are free
88 OSDictionary *tags; // used to remember "ID" tags
89 OSString **errorString; // parse error with line
90 OSObject *parsedObject; // resultant object of parsed text
93 #define STATE ((parser_state_t *)state)
96 #define yyerror(s) OSUnserializeerror(STATE, (s))
97 static int OSUnserializeerror(parser_state_t *state, char *s);
99 static int yylex(YYSTYPE *lvalp, parser_state_t *state);
100 static int yyparse(void * state);
102 static object_t *newObject(parser_state_t *state);
103 static void freeObject(parser_state_t *state, object_t *o);
104 static void rememberObject(parser_state_t *state, int tag, OSObject *o);
105 static object_t *retrieveObject(parser_state_t *state, int tag);
106 static void cleanupObjects(parser_state_t *state);
108 static object_t *buildDictionary(parser_state_t *state, object_t *o);
109 static object_t *buildArray(parser_state_t *state, object_t *o);
110 static object_t *buildSet(parser_state_t *state, object_t *o);
111 static object_t *buildString(parser_state_t *state, object_t *o);
112 static object_t *buildData(parser_state_t *state, object_t *o);
113 static object_t *buildNumber(parser_state_t *state, object_t *o);
114 static object_t *buildBoolean(parser_state_t *state, object_t *o);
117 extern void *kern_os_malloc(size_t size);
118 extern void *kern_os_realloc(void * addr, size_t size);
119 extern void kern_os_free(void * addr);
121 //XXX shouldn't have to define these
122 extern long strtol(const char *, char **, int);
123 extern unsigned long strtoul(const char *, char **, int);
127 #define malloc(s) kern_os_malloc(s)
128 #define realloc(a, s) kern_os_realloc(a, s)
129 #define free(a) kern_os_free(a)
142 %% /* Grammar rules and actions follow */
144 input: /* empty */ { yyerror("unexpected end of buffer");
147 | object { STATE->parsedObject = $1->object;
149 freeObject(STATE, $1);
152 | SYNTAX_ERROR { yyerror("syntax error");
157 object: dict { $$ = buildDictionary(STATE, $1); }
158 | array { $$ = buildArray(STATE, $1); }
159 | set { $$ = buildSet(STATE, $1); }
160 | string { $$ = buildString(STATE, $1); }
161 | data { $$ = buildData(STATE, $1); }
162 | number { $$ = buildNumber(STATE, $1); }
163 | boolean { $$ = buildBoolean(STATE, $1); }
164 | idref { $$ = retrieveObject(STATE, $1->idref);
166 $$->object->retain();
168 yyerror("forward reference detected");
171 freeObject(STATE, $1);
175 //------------------------------------------------------------------------------
177 dict: '{' '}' { $$ = $1;
180 | '{' pairs '}' { $$ = $1;
187 | pairs pair { $$ = $2;
192 pair: key object { $$ = $1;
193 $$->key = $$->object;
194 $$->object = $2->object;
197 freeObject(STATE, $2);
201 key: KEY { $$ = buildString(STATE, $1); }
204 //------------------------------------------------------------------------------
206 array: '(' ')' { $$ = $1;
209 | '(' elements ')' { $$ = $1;
215 set: '[' ']' { $$ = $1;
218 | '[' elements ']' { $$ = $1;
224 elements: object { $$ = $1;
227 | elements object { $$ = $2;
232 //------------------------------------------------------------------------------
252 OSUnserializeerror(parser_state_t * state, char *s) /* Called by yyparse on errors */
254 char tempString[128];
256 if (state->errorString) {
257 snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
258 *(state->errorString) = OSString::withCString(tempString);
264 #define TAG_MAX_LENGTH 32
265 #define TAG_MAX_ATTRIBUTES 32
270 #define TAG_COMMENT 4
272 #define currentChar() (state->parseBuffer[state->parseBufferIndex])
273 #define nextChar() (state->parseBuffer[++state->parseBufferIndex])
274 #define prevChar() (state->parseBuffer[state->parseBufferIndex - 1])
276 #define isSpace(c) ((c) == ' ' || (c) == '\t')
277 #define isAlpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
278 #define isDigit(c) ((c) >= '0' && (c) <= '9')
279 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
280 #define isHexDigit(c) (isDigit(c) || isAlphaDigit(c))
281 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
284 getTag(parser_state_t *state,
285 char tag[TAG_MAX_LENGTH],
287 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
288 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
291 int c = currentChar();
292 int tagType = TAG_START;
296 if (c != '<') return TAG_BAD;
297 c = nextChar(); // skip '<'
299 if (c == '?' || c == '!') {
300 while ((c = nextChar()) != 0) {
301 if (c == '\n') state->lineNumber++;
310 c = nextChar(); // skip '/'
313 if (!isAlpha(c)) return TAG_BAD;
315 /* find end of tag while copying it */
316 while (isAlphaNumeric(c)) {
319 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
324 // printf("tag %s, type %d\n", tag, tagType);
326 // look for attributes of the form attribute = "value" ...
327 while ((c != '>') && (c != '/')) {
328 while (isSpace(c)) c = nextChar();
331 while (isAlphaNumeric(c)) {
332 attributes[*attributeCount][length++] = c;
333 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
336 attributes[*attributeCount][length] = 0;
338 while (isSpace(c)) c = nextChar();
340 if (c != '=') return TAG_BAD;
343 while (isSpace(c)) c = nextChar();
345 if (c != '"') return TAG_BAD;
349 values[*attributeCount][length++] = c;
350 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
353 values[*attributeCount][length] = 0;
355 c = nextChar(); // skip closing quote
357 // printf(" attribute '%s' = '%s', nextchar = '%c'\n",
358 // attributes[*attributeCount], values[*attributeCount], c);
361 if (*attributeCount >= TAG_MAX_ATTRIBUTES) return TAG_BAD;
365 c = nextChar(); // skip '/'
368 if (c != '>') return TAG_BAD;
369 c = nextChar(); // skip '>'
375 getString(parser_state_t *state)
377 int c = currentChar();
378 int start, length, i, j;
381 start = state->parseBufferIndex;
382 /* find end of string */
385 if (c == '\n') state->lineNumber++;
392 if (c != '<') return 0;
394 length = state->parseBufferIndex - start;
396 /* copy to null terminated buffer */
397 tempString = (char *)malloc(length + 1);
398 if (tempString == 0) {
399 printf("OSUnserializeXML: can't alloc temp memory\n");
403 // copy out string in tempString
404 // "&" -> '&', "<" -> '<', ">" -> '>'
408 c = state->parseBuffer[start + i++];
412 if ((i+3) > length) goto error;
413 c = state->parseBuffer[start + i++];
415 if (state->parseBuffer[start + i++] != 't') goto error;
416 if (state->parseBuffer[start + i++] != ';') goto error;
417 tempString[j++] = '<';
421 if (state->parseBuffer[start + i++] != 't') goto error;
422 if (state->parseBuffer[start + i++] != ';') goto error;
423 tempString[j++] = '>';
426 if ((i+3) > length) goto error;
428 if (state->parseBuffer[start + i++] != 'm') goto error;
429 if (state->parseBuffer[start + i++] != 'p') goto error;
430 if (state->parseBuffer[start + i++] != ';') goto error;
431 tempString[j++] = '&';
439 // printf("string %s\n", tempString);
444 if (tempString) free(tempString);
449 getNumber(parser_state_t *state)
451 unsigned long long n = 0;
454 int c = currentChar();
469 n = (n * base + c - '0');
473 n = (unsigned long long)((long long)n * (long long)-1);
476 while(isHexDigit(c)) {
478 n = (n * base + c - '0');
480 n = (n * base + 0xa + c - 'a');
485 // printf("number 0x%x\n", (unsigned long)n);
489 // taken from CFXMLParsing/CFPropertyList.c
491 static const signed char __CFPLDataDecodeTable[128] = {
492 /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
493 /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
494 /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
495 /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
496 /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
497 /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
498 /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
499 /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
500 /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
501 /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
502 /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
503 /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
504 /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
505 /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
506 /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
507 /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
510 #define DATA_ALLOC_SIZE 4096
513 getCFEncodedData(parser_state_t *state, unsigned int *size)
515 int numeq = 0, acc = 0, cntr = 0;
516 int tmpbufpos = 0, tmpbuflen = 0;
517 unsigned char *tmpbuf = (unsigned char *)malloc(DATA_ALLOC_SIZE);
519 int c = currentChar();
528 if (c == '=') numeq++; else numeq = 0;
529 if (c == '\n') state->lineNumber++;
530 if (__CFPLDataDecodeTable[c] < 0) {
536 acc += __CFPLDataDecodeTable[c];
537 if (0 == (cntr & 0x3)) {
538 if (tmpbuflen <= tmpbufpos + 2) {
539 tmpbuflen += DATA_ALLOC_SIZE;
540 tmpbuf = (unsigned char *)realloc(tmpbuf, tmpbuflen);
542 tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
544 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
546 tmpbuf[tmpbufpos++] = acc & 0xff;
559 getHexData(parser_state_t *state, unsigned int *size)
562 unsigned char *d, *start, *lastStart;
564 start = lastStart = d = (unsigned char *)malloc(DATA_ALLOC_SIZE);
569 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
579 } else if (isAlphaDigit(c)) {
580 *d = (0xa + (c - 'a')) << 4;
589 } else if (isAlphaDigit(c)) {
590 *d |= 0xa + (c - 'a');
596 if ((d - lastStart) >= DATA_ALLOC_SIZE) {
597 int oldsize = d - start;
598 start = (unsigned char *)realloc(start, oldsize + DATA_ALLOC_SIZE);
599 d = lastStart = start + oldsize;
615 yylex(YYSTYPE *lvalp, parser_state_t *state)
619 char tag[TAG_MAX_LENGTH];
621 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
622 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
628 /* skip white space */
629 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
631 /* keep track of line number, don't return \n's */
638 // end of the buffer?
641 tagType = getTag(STATE, tag, &attributeCount, attributes, values);
642 if (tagType == TAG_BAD) return SYNTAX_ERROR;
643 if (tagType == TAG_COMMENT) goto top;
645 // handle allocation and check for "ID" and "IDREF" tags up front
646 *lvalp = object = newObject(STATE);
648 for (i=0; i < attributeCount; i++) {
649 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
650 // check for idref's, note: we ignore the tag, for
651 // this to work correctly, all idrefs must be unique
652 // across the whole serialization
653 if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
654 attributes[i][4] == 'F' && !attributes[i][5]) {
655 if (tagType != TAG_EMPTY) return SYNTAX_ERROR;
656 object->idref = strtol(values[i], NULL, 0);
660 if (!attributes[i][2]) {
661 object->idref = strtol(values[i], NULL, 0);
670 if (!strcmp(tag, "array")) {
671 if (tagType == TAG_EMPTY) {
672 object->elements = NULL;
675 return (tagType == TAG_START) ? '(' : ')';
679 if (!strcmp(tag, "dict")) {
680 if (tagType == TAG_EMPTY) {
681 object->elements = NULL;
684 return (tagType == TAG_START) ? '{' : '}';
686 if (!strcmp(tag, "data")) {
688 if (tagType == TAG_EMPTY) {
694 bool isHexFormat = false;
695 for (int i=0; i < attributeCount; i++) {
696 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
701 // CF encoded is the default form
703 object->data = getHexData(STATE, &size);
705 object->data = getCFEncodedData(STATE, &size);
708 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
715 if (!strcmp(tag, "false")) {
716 if (tagType == TAG_EMPTY) {
723 if (!strcmp(tag, "integer")) {
724 object->size = 64; // default
725 for (i=0; i < attributeCount; i++) {
726 if (!strcmp(attributes[i], "size")) {
727 object->size = strtoul(values[i], NULL, 0);
730 if (tagType == TAG_EMPTY) {
734 object->number = getNumber(STATE);
735 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
742 if (!strcmp(tag, "key")) {
743 if (tagType == TAG_EMPTY) return SYNTAX_ERROR;
744 object->string = getString(STATE);
745 if (!object->string) {
748 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
749 || strcmp(tag, "key")) {
756 if (!strcmp(tag, "plist")) {
757 freeObject(STATE, object);
762 if (!strcmp(tag, "string")) {
763 if (tagType == TAG_EMPTY) {
764 object->string = (char *)malloc(1);
765 object->string[0] = 0;
768 object->string = getString(STATE);
769 if (!object->string) {
772 if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
773 || strcmp(tag, "string")) {
778 if (!strcmp(tag, "set")) {
779 if (tagType == TAG_EMPTY) {
780 object->elements = NULL;
783 if (tagType == TAG_START) {
791 if (!strcmp(tag, "true")) {
792 if (tagType == TAG_EMPTY) {
803 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
804 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
805 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
807 // "java" like allocation, if this code hits a syntax error in the
808 // the middle of the parsed string we just bail with pointers hanging
809 // all over place, this code helps keeps it all together
811 //static int object_count = 0;
814 newObject(parser_state_t *state)
818 if (state->freeObjects) {
819 o = state->freeObjects;
820 state->freeObjects = state->freeObjects->next;
822 o = (object_t *)malloc(sizeof(object_t));
824 bzero(o, sizeof(object_t));
825 o->free = state->objects;
833 freeObject(parser_state_t * state, object_t *o)
835 o->next = state->freeObjects;
836 state->freeObjects = o;
840 cleanupObjects(parser_state_t *state)
842 object_t *t, *o = state->objects;
846 // printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
847 o->object->release();
850 // printf("OSUnserializeXML: freeing object o=%x data=%x\n", (int)o, (int)o->data);
854 // printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
858 // printf("OSUnserializeXML: freeing object o=%x string=%x\n", (int)o, (int)o->string);
867 // printf("object_count = %d\n", object_count);
870 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
871 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
872 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
875 rememberObject(parser_state_t *state, int tag, OSObject *o)
878 snprintf(key, 16, "%u", tag);
880 // printf("remember key %s\n", key);
882 state->tags->setObject(key, o);
886 retrieveObject(parser_state_t *state, int tag)
891 snprintf(key, 16, "%u", tag);
893 // printf("retrieve key '%s'\n", key);
895 ref = state->tags->getObject(key);
898 o = newObject(state);
903 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
904 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
905 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
908 buildDictionary(parser_state_t *state, object_t * header)
914 // get count and reverse order
915 o = header->elements;
916 header->elements = 0;
922 t->next = header->elements;
923 header->elements = t;
926 dict = OSDictionary::withCapacity(count);
927 if (header->idref >= 0) rememberObject(state, header->idref, dict);
929 o = header->elements;
931 dict->setObject(o->key, o->object);
934 o->object->release();
940 freeObject(state, t);
948 buildArray(parser_state_t *state, object_t * header)
954 // get count and reverse order
955 o = header->elements;
956 header->elements = 0;
962 t->next = header->elements;
963 header->elements = t;
966 array = OSArray::withCapacity(count);
967 if (header->idref >= 0) rememberObject(state, header->idref, array);
969 o = header->elements;
971 array->setObject(o->object);
973 o->object->release();
978 freeObject(state, t);
986 buildSet(parser_state_t *state, object_t *header)
988 object_t *o = buildArray(state, header);
990 OSArray *array = (OSArray *)o->object;
991 OSSet *set = OSSet::withArray(array, array->getCapacity());
993 // write over the reference created in buildArray
994 if (header->idref >= 0) rememberObject(state, header->idref, set);
1002 buildString(parser_state_t *state, object_t *o)
1006 string = OSString::withCString(o->string);
1007 if (o->idref >= 0) rememberObject(state, o->idref, string);
1017 buildData(parser_state_t *state, object_t *o)
1022 data = OSData::withBytes(o->data, o->size);
1024 data = OSData::withCapacity(0);
1026 if (o->idref >= 0) rememberObject(state, o->idref, data);
1028 if (o->size) free(o->data);
1035 buildNumber(parser_state_t *state, object_t *o)
1037 OSNumber *number = OSNumber::withNumber(o->number, o->size);
1039 if (o->idref >= 0) rememberObject(state, o->idref, number);
1046 buildBoolean(parser_state_t *state, object_t *o)
1048 o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1049 o->object->retain();
1054 OSUnserializeXML(const char *buffer, OSString **errorString)
1057 parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1059 if ((!state) || (!buffer)) return 0;
1062 if (errorString) *errorString = NULL;
1064 state->parseBuffer = buffer;
1065 state->parseBufferIndex = 0;
1066 state->lineNumber = 1;
1068 state->freeObjects = 0;
1069 state->tags = OSDictionary::withCapacity(128);
1070 state->errorString = errorString;
1071 state->parsedObject = 0;
1073 (void)yyparse((void *)state);
1075 object = state->parsedObject;
1077 cleanupObjects(state);
1078 state->tags->release();
1090 // DO NOT EDIT OSUnserializeXML.cpp!