2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 /* OSUnserializeXML.y created by rsulack on Tue Oct 12 1999 */
25 // XML parser for unserializing OSContainer objects
28 // bison -p OSUnserializeXML OSUnserializeXML.y
29 // head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
30 // sed -e "s/stdio.h/stddef.h/" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
32 // when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
40 // DO NOT EDIT OSUnserializeXML.cpp!
54 #include <libkern/c++/OSMetaClass.h>
55 #include <libkern/c++/OSContainers.h>
56 #include <libkern/c++/OSLib.h>
58 typedef struct object {
61 struct object *elements;
63 const OSSymbol *key; // for dictionary
65 void *data; // for data
66 char *string; // for string & symbol
67 long long number; // for number
72 static int yyerror(char *s);
75 static object_t * newObject();
76 static void freeObject(object_t *o);
78 static object_t *buildOSDictionary(object_t *);
79 static object_t *buildOSArray(object_t *);
80 static object_t *buildOSSet(object_t *);
81 static object_t *buildOSString(object_t *);
82 static object_t *buildKey(object_t *);
83 static object_t *buildOSData(object_t *);
84 static object_t *buildOSNumber(object_t *);
85 static object_t *buildOSBoolean(object_t *o);
87 static void rememberObject(int, OSObject *);
88 static object_t *retrieveObject(int);
90 // resultant object of parsed text
91 static OSObject *parsedObject;
93 #define YYSTYPE object_t *
96 extern void *kern_os_malloc(size_t size);
97 extern void *kern_os_realloc(void * addr, size_t size);
98 extern void kern_os_free(void * addr);
100 //XXX shouldn't have to define these
101 extern long strtol(const char *, char **, int);
102 extern unsigned long strtoul(const char *, char **, int);
106 #define malloc(s) kern_os_malloc(s)
107 #define realloc(a, s) kern_os_realloc(a, s)
108 #define free(a) kern_os_free(a)
121 %% /* Grammar rules and actions follow */
123 input: /* empty */ { parsedObject = (OSObject *)NULL; YYACCEPT; }
124 | object { parsedObject = $1->object;
130 yyerror("syntax error");
135 object: dict { $$ = buildOSDictionary($1); }
136 | array { $$ = buildOSArray($1); }
137 | set { $$ = buildOSSet($1); }
138 | string { $$ = buildOSString($1); }
139 | data { $$ = buildOSData($1); }
140 | number { $$ = buildOSNumber($1); }
141 | boolean { $$ = buildOSBoolean($1); }
142 | idref { $$ = retrieveObject($1->idref);
144 $$->object->retain();
146 yyerror("forward reference detected");
153 //------------------------------------------------------------------------------
155 dict: '{' '}' { $$ = $1;
158 | '{' pairs '}' { $$ = $1;
165 | pairs pair { $$ = $2;
170 pair: key object { $$ = $1;
172 $$->object = $2->object;
178 key: KEY { $$ = buildKey($1); }
181 //------------------------------------------------------------------------------
183 array: '(' ')' { $$ = $1;
186 | '(' elements ')' { $$ = $1;
192 set: '[' ']' { $$ = $1;
195 | '[' elements ']' { $$ = $1;
201 elements: object { $$ = $1;
204 | elements object { $$ = $2;
209 //------------------------------------------------------------------------------
228 static int lineNumber = 0;
229 static const char *parseBuffer;
230 static int parseBufferIndex;
232 #define currentChar() (parseBuffer[parseBufferIndex])
233 #define nextChar() (parseBuffer[++parseBufferIndex])
234 #define prevChar() (parseBuffer[parseBufferIndex - 1])
236 #define isSpace(c) ((c) == ' ' || (c) == '\t')
237 #define isAlpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
238 #define isDigit(c) ((c) >= '0' && (c) <= '9')
239 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
240 #define isHexDigit(c) (isDigit(c) || isAlphaDigit(c))
241 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
243 static char yyerror_message[128];
246 yyerror(char *s) /* Called by yyparse on error */
248 sprintf(yyerror_message, "OSUnserializeXML: %s near line %d\n", s, lineNumber);
252 #define TAG_MAX_LENGTH 32
253 #define TAG_MAX_ATTRIBUTES 32
258 #define TAG_COMMENT 4
261 getTag(char tag[TAG_MAX_LENGTH],
263 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
264 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
267 int c = currentChar();
268 int tagType = TAG_START;
272 if (c != '<') return TAG_BAD;
273 c = nextChar(); // skip '<'
275 if (c == '?' || c == '!') {
276 while ((c = nextChar()) != 0) {
277 if (c == '\n') lineNumber++;
286 c = nextChar(); // skip '/'
289 if (!isAlpha(c)) return TAG_BAD;
291 /* find end of tag while copying it */
292 while (isAlphaNumeric(c)) {
295 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
300 //printf("tag %s, type %d\n", tag, tagType);
302 // look for attributes of the form attribute = "value" ...
303 while ((c != '>') && (c != '/')) {
304 while (isSpace(c)) c = nextChar();
307 while (isAlphaNumeric(c)) {
308 attributes[*attributeCount][length++] = c;
309 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
312 attributes[*attributeCount][length] = 0;
314 while (isSpace(c)) c = nextChar();
316 if (c != '=') return TAG_BAD;
319 while (isSpace(c)) c = nextChar();
321 if (c != '"') return TAG_BAD;
325 values[*attributeCount][length++] = c;
326 if (length >= (TAG_MAX_LENGTH - 1)) return TAG_BAD;
329 values[*attributeCount][length] = 0;
331 c = nextChar(); // skip closing quote
333 //printf(" attribute '%s' = '%s', nextchar = '%c'\n", attributes[*attributeCount], values[*attributeCount], c);
336 if (*attributeCount >= TAG_MAX_ATTRIBUTES) return TAG_BAD;
340 c = nextChar(); // skip '/'
343 if (c != '>') return TAG_BAD;
344 c = nextChar(); // skip '>'
352 int c = currentChar();
354 int start, length, i, j;;
357 start = parseBufferIndex;
358 /* find end of string */
361 if (c == '\n') lineNumber++;
368 if (c != '<') return 0;
370 length = parseBufferIndex - start;
372 /* copy to null terminated buffer */
373 tempString = (char *)malloc(length + 1);
374 if (tempString == 0) {
375 printf("OSUnserializeXML: can't alloc temp memory\n");
379 // copy out string in tempString
380 // "&" -> '&', "<" -> '<', ">" -> '>'
384 c = parseBuffer[start + i++];
388 if ((i+3) > length) goto error;
389 c = parseBuffer[start + i++];
391 if (parseBuffer[start + i++] != 't') goto error;
392 if (parseBuffer[start + i++] != ';') goto error;
393 tempString[j++] = '<';
397 if (parseBuffer[start + i++] != 't') goto error;
398 if (parseBuffer[start + i++] != ';') goto error;
399 tempString[j++] = '>';
402 if ((i+3) > length) goto error;
404 if (parseBuffer[start + i++] != 'm') goto error;
405 if (parseBuffer[start + i++] != 'p') goto error;
406 if (parseBuffer[start + i++] != ';') goto error;
407 tempString[j++] = '&';
415 //printf("string %s\n", tempString);
420 if (tempString) free(tempString);
427 unsigned long long n = 0;
429 int c = currentChar();
431 if (!isDigit (c)) return 0;
442 n = (n * base + c - '0');
446 while(isHexDigit(c)) {
448 n = (n * base + c - '0');
450 n = (n * base + 0xa + c - 'a');
455 //printf("number 0x%x\n", (unsigned long)n);
459 // taken from CFXMLParsing/CFPropertyList.c
461 static const signed char __CFPLDataDecodeTable[128] = {
462 /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
463 /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
464 /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
465 /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
466 /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
467 /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
468 /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
469 /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
470 /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
471 /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
472 /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
473 /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
474 /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
475 /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
476 /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
477 /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
480 #define OSDATA_ALLOC_SIZE 4096
483 getCFEncodedData(unsigned int *size)
485 int numeq = 0, acc = 0, cntr = 0;
486 int tmpbufpos = 0, tmpbuflen = 0;
487 unsigned char *tmpbuf = (unsigned char *)malloc(OSDATA_ALLOC_SIZE);
489 int c = currentChar();
498 if (c == '=') numeq++; else numeq = 0;
499 if (c == '\n') lineNumber++;
500 if (__CFPLDataDecodeTable[c] < 0) {
506 acc += __CFPLDataDecodeTable[c];
507 if (0 == (cntr & 0x3)) {
508 if (tmpbuflen <= tmpbufpos + 2) {
509 tmpbuflen += OSDATA_ALLOC_SIZE;
510 tmpbuf = (unsigned char *)realloc(tmpbuf, tmpbuflen);
512 tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
514 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
516 tmpbuf[tmpbufpos++] = acc & 0xff;
525 getHexData(unsigned int *size)
528 unsigned char *d, *start, *lastStart;
530 start = lastStart = d = (unsigned char *)malloc(OSDATA_ALLOC_SIZE);
535 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
545 } else if (isAlphaDigit(c)) {
546 *d = (0xa + (c - 'a')) << 4;
555 } else if (isAlphaDigit(c)) {
556 *d |= 0xa + (c - 'a');
562 if ((d - lastStart) >= OSDATA_ALLOC_SIZE) {
563 int oldsize = d - start;
564 start = (unsigned char *)realloc(start, oldsize + OSDATA_ALLOC_SIZE);
565 d = lastStart = start + oldsize;
585 char tag[TAG_MAX_LENGTH];
587 char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
588 char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
590 if (parseBufferIndex == 0) lineNumber = 1;
595 /* skip white space */
596 if (isSpace(c)) while ((c = nextChar()) != 0 && isSpace(c)) {};
598 /* keep track of line number, don't return \n's */
607 tagType = getTag(tag, &attributeCount, attributes, values);
608 if (tagType == TAG_BAD) return SYNTAX_ERROR;
609 if (tagType == TAG_COMMENT) goto top;
611 // handle allocation and check for "ID" and "IDREF" tags up front
612 yylval = newObject();
614 for (int i=0; i < attributeCount; i++) {
615 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
616 // check for idref's, note: we ignore the tag, for
617 // this to work correctly, all idrefs must be unique
618 // across the whole serialization
619 if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
620 attributes[i][4] == 'F' && !attributes[i][5]) {
621 if (tagType != TAG_EMPTY) return SYNTAX_ERROR;
622 yylval->idref = strtol(values[i], NULL, 0);
626 if (!attributes[i][2]) {
627 yylval->idref = strtol(values[i], NULL, 0);
636 if (!strcmp(tag, "array")) {
637 if (tagType == TAG_EMPTY) {
638 yylval->elements = NULL;
641 return (tagType == TAG_START) ? '(' : ')';
645 if (!strcmp(tag, "dict")) {
646 if (tagType == TAG_EMPTY) {
647 yylval->elements = NULL;
650 return (tagType == TAG_START) ? '{' : '}';
652 if (!strcmp(tag, "data")) {
655 if (tagType == TAG_EMPTY) {
660 for (int i=0; i < attributeCount; i++) {
661 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
666 // CF encoded is the default form
668 yylval->data = getHexData(&size);
670 yylval->data = getCFEncodedData(&size);
673 if ((getTag(tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
680 if (!strcmp(tag, "false")) {
681 if (tagType == TAG_EMPTY) {
688 if (!strcmp(tag, "integer")) {
689 yylval->size = 64; // default
690 for (int i=0; i < attributeCount; i++) {
691 if (!strcmp(attributes[i], "size")) {
692 yylval->size = strtoul(values[i], NULL, 0);
695 if (tagType == TAG_EMPTY) {
699 yylval->number = getNumber();
700 if ((getTag(tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
707 if (!strcmp(tag, "key")) {
708 if (tagType == TAG_EMPTY) return SYNTAX_ERROR;
709 yylval->string = getString();
710 if (!yylval->string) {
713 if ((getTag(tag, &attributeCount, attributes, values) != TAG_END)
714 || strcmp(tag, "key")) {
721 if (!strcmp(tag, "plist")) {
727 if (!strcmp(tag, "string")) {
728 if (tagType == TAG_EMPTY) {
729 yylval->string = (char *)malloc(1);
733 yylval->string = getString();
734 if (!yylval->string) {
737 if ((getTag(tag, &attributeCount, attributes, values) != TAG_END)
738 || strcmp(tag, "string")) {
743 if (!strcmp(tag, "set")) {
744 if (tagType == TAG_EMPTY) {
745 yylval->elements = NULL;
748 if (tagType == TAG_START) {
756 if (!strcmp(tag, "true")) {
757 if (tagType == TAG_EMPTY) {
765 // XXX should we ignore invalid tags?
773 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
774 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
775 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
777 // "java" like allocation, if this code hits a syntax error in the
778 // the middle of the parsed string we just bail with pointers hanging
779 // all over place, so this code helps keeps all together
781 static object_t *objects = 0;
782 static object_t *freeObjects = 0;
791 freeObjects = freeObjects->next;
793 o = (object_t *)malloc(sizeof(object_t));
794 bzero(o, sizeof(object_t));
803 freeObject(object_t *o)
805 o->next = freeObjects;
812 object_t *t, *o = objects;
816 printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
817 o->object->release();
820 printf("OSUnserializeXML: freeing object o=%x data=%x\n", (int)o, (int)o->data);
824 printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
828 printf("OSUnserializeXML: freeing object o=%x string=%x\n", (int)o, (int)o->string);
838 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
839 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
840 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
842 static OSDictionary *tags;
845 rememberObject(int tag, OSObject *o)
848 sprintf(key, "%u", tag);
850 //printf("remember key %s\n", key);
852 tags->setObject(key, o);
856 retrieveObject(int tag)
859 sprintf(key, "%u", tag);
861 //printf("retrieve key '%s'\n", key);
863 OSObject *ref = tags->getObject(key);
866 object_t *o = newObject();
871 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
872 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
873 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
876 buildOSDictionary(object_t * header)
881 // get count and reverse order
882 o = header->elements;
883 header->elements = 0;
889 t->next = header->elements;
890 header->elements = t;
893 OSDictionary *d = OSDictionary::withCapacity(count);
895 if (header->idref >= 0) rememberObject(header->idref, d);
897 o = header->elements;
899 d->setObject(o->key, o->object);
900 o->object->release();
914 buildOSArray(object_t * header)
919 // get count and reverse order
920 o = header->elements;
921 header->elements = 0;
927 t->next = header->elements;
928 header->elements = t;
931 OSArray *a = OSArray::withCapacity(count);
933 if (header->idref >= 0) rememberObject(header->idref, a);
935 o = header->elements;
937 a->setObject(o->object);
938 o->object->release();
950 buildOSSet(object_t *o)
953 OSArray *a = (OSArray *)o->object;
955 OSSet *s = OSSet::withArray(a, a->getCapacity());
957 //write over reference created in array
958 if (o->idref >= 0) rememberObject(o->idref, s);
966 buildOSString(object_t *o)
968 OSString *s = OSString::withCString(o->string);
970 if (o->idref >= 0) rememberObject(o->idref, s);
980 buildKey(object_t *o)
982 const OSSymbol *s = OSSymbol::withCString(o->string);
992 buildOSData(object_t *o)
997 d = OSData::withBytes(o->data, o->size);
1000 d = OSData::withCapacity(0);
1002 if (o->idref >= 0) rememberObject(o->idref, d);
1010 buildOSNumber(object_t *o)
1012 OSNumber *n = OSNumber::withNumber(o->number, o->size);
1014 if (o->idref >= 0) rememberObject(o->idref, n);
1021 buildOSBoolean(object_t *o)
1023 OSBoolean *b = OSBoolean::withBoolean(o->number != 0);
1029 #include <kern/lock.h>
1032 static mutex_t *lock = 0;
1035 OSUnserializeXML(const char *buffer, OSString **errorString)
1040 lock = mutex_alloc(ETAP_IO_AHA);
1049 yyerror_message[0] = 0; //just in case
1050 parseBuffer = buffer;
1051 parseBufferIndex = 0;
1052 tags = OSDictionary::withCapacity(128);
1053 if (yyparse() == 0) {
1054 object = parsedObject;
1055 if (errorString) *errorString = 0;
1059 *errorString = OSString::withCString(yyerror_message);
1075 // DO NOT EDIT OSUnserializeXML.cpp!