libkern/c++/OSUnserializeXML.y

   1 /*
   2  * Copyright (c) 1999-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * HISTORY
  31  *
  32  * OSUnserializeXML.y created by rsulack on Tue Oct 12 1999
  33  */
  34
  35 // parser for unserializing OSContainer objects serialized to XML
  36 //
  37 // to build :
  38 //      bison -p OSUnserializeXML OSUnserializeXML.y
  39 //      head -50 OSUnserializeXML.y > OSUnserializeXML.cpp
  40 //      sed -e "s/#include <stdio.h>//" < OSUnserializeXML.tab.c >> OSUnserializeXML.cpp
  41 //
  42 //      when changing code check in both OSUnserializeXML.y and OSUnserializeXML.cpp
  43 //
  44 //
  45 //
  46 //
  47 //
  48 //               DO NOT EDIT OSUnserializeXML.cpp!
  49 //
  50 //                      this means you!
  51 //
  52 //
  53 //
  54 //
  55 //
  56 //
  57
  58
  59 %pure_parser
  60
  61 %{
  62 #include <string.h>
  63 #include <libkern/c++/OSMetaClass.h>
  64 #include <libkern/c++/OSContainers.h>
  65 #include <libkern/c++/OSLib.h>
  66
  67 #define MAX_OBJECTS              131071
  68 #define MAX_REFED_OBJECTS        65535
  69
  70 #define YYSTYPE object_t *
  71 #define YYPARSE_PARAM   state
  72 #define YYLEX_PARAM     (parser_state_t *)state
  73
  74 // this is the internal struct used to hold objects on parser stack
  75 // it represents objects both before and after they have been created
  76 typedef struct object {
  77         struct object   *next;
  78         struct object   *free;
  79         struct object   *elements;
  80         OSObject        *object;
  81         OSSymbol        *key;                   // for dictionary
  82         int             size;
  83         void            *data;                  // for data
  84         char            *string;                // for string & symbol
  85         long long       number;                 // for number
  86         int             idref;
  87 } object_t;
  88
  89 // this code is reentrant, this structure contains all
  90 // state information for the parsing of a single buffer
  91 typedef struct parser_state {
  92         const char      *parseBuffer;           // start of text to be parsed
  93         int             parseBufferIndex;       // current index into text
  94         int             lineNumber;             // current line number
  95         object_t        *objects;               // internal objects in use
  96         object_t        *freeObjects;           // internal objects that are free
  97         OSDictionary    *tags;                  // used to remember "ID" tags
  98         OSString        **errorString;          // parse error with line
  99         OSObject        *parsedObject;          // resultant object of parsed text
 100         int             parsedObjectCount;
 101         int             retrievedObjectCount;
 102 } parser_state_t;
 103
 104 #define STATE           ((parser_state_t *)state)
 105
 106 #undef yyerror
 107 #define yyerror(s)      OSUnserializeerror(STATE, (s))
 108 static int              OSUnserializeerror(parser_state_t *state, const char *s);
 109
 110 static int              yylex(YYSTYPE *lvalp, parser_state_t *state);
 111
 112 static object_t         *newObject(parser_state_t *state);
 113 static void             freeObject(parser_state_t *state, object_t *o);
 114 static void             rememberObject(parser_state_t *state, int tag, OSObject *o);
 115 static object_t         *retrieveObject(parser_state_t *state, int tag);
 116 static void             cleanupObjects(parser_state_t *state);
 117
 118 static object_t         *buildDictionary(parser_state_t *state, object_t *o);
 119 static object_t         *buildArray(parser_state_t *state, object_t *o);
 120 static object_t         *buildSet(parser_state_t *state, object_t *o);
 121 static object_t         *buildString(parser_state_t *state, object_t *o);
 122 static object_t         *buildSymbol(parser_state_t *state, object_t *o);
 123 static object_t         *buildData(parser_state_t *state, object_t *o);
 124 static object_t         *buildNumber(parser_state_t *state, object_t *o);
 125 static object_t         *buildBoolean(parser_state_t *state, object_t *o);
 126
 127 __BEGIN_DECLS
 128 #include <kern/kalloc.h>
 129 __END_DECLS
 130
 131 #define malloc(size) malloc_impl(size)
 132 static inline void *
 133 malloc_impl(size_t size)
 134 {
 135         if (size == 0) {
 136                 return NULL;
 137         }
 138         return kheap_alloc_tag_bt(KHEAP_DEFAULT, size,
 139                    (zalloc_flags_t) (Z_WAITOK | Z_ZERO),
 140                    VM_KERN_MEMORY_LIBKERN);
 141 }
 142
 143 #define free(addr) free_impl(addr)
 144 static inline void
 145 free_impl(void *addr)
 146 {
 147         kheap_free_addr(KHEAP_DEFAULT, addr);
 148 }
 149 static inline void
 150 safe_free(void *addr, size_t size)
 151 {
 152   if(addr) {
 153     assert(size != 0);
 154     kheap_free(KHEAP_DEFAULT, addr, size);
 155   }
 156 }
 157
 158 #define realloc(addr, osize, nsize) realloc_impl(addr, osize, nsize)
 159 static inline void *
 160 realloc_impl(void *addr, size_t osize, size_t nsize)
 161 {
 162         if (!addr) {
 163                 return malloc(nsize);
 164         }
 165         if (nsize == osize) {
 166                 return addr;
 167         }
 168         void *nmem = malloc(nsize);
 169         if (!nmem) {
 170                 safe_free(addr, osize);
 171                 return NULL;
 172         }
 173         (void)memcpy(nmem, addr, (nsize > osize) ? osize : nsize);
 174         safe_free(addr, osize);
 175
 176         return nmem;
 177 }
 178
 179 %}
 180 %token ARRAY
 181 %token BOOLEAN
 182 %token DATA
 183 %token DICTIONARY
 184 %token IDREF
 185 %token KEY
 186 %token NUMBER
 187 %token SET
 188 %token STRING
 189 %token SYNTAX_ERROR
 190 %% /* Grammar rules and actions follow */
 191
 192 input:    /* empty */           { yyerror("unexpected end of buffer");
 193                                   YYERROR;
 194                                 }
 195         | object                { STATE->parsedObject = $1->object;
 196                                   $1->object = 0;
 197                                   freeObject(STATE, $1);
 198                                   YYACCEPT;
 199                                 }
 200         | SYNTAX_ERROR          { yyerror("syntax error");
 201                                   YYERROR;
 202                                 }
 203         ;
 204
 205 object:   dict                  { $$ = buildDictionary(STATE, $1);
 206
 207                                   if (!yyval->object) {
 208                                     yyerror("buildDictionary");
 209                                     YYERROR;
 210                                   }
 211                                   STATE->parsedObjectCount++;
 212                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 213                                     yyerror("maximum object count");
 214                                     YYERROR;
 215                                   }
 216                                 }
 217         | array                 { $$ = buildArray(STATE, $1);
 218
 219                                   if (!yyval->object) {
 220                                     yyerror("buildArray");
 221                                     YYERROR;
 222                                   }
 223                                   STATE->parsedObjectCount++;
 224                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 225                                     yyerror("maximum object count");
 226                                     YYERROR;
 227                                   }
 228                                 }
 229         | set                   { $$ = buildSet(STATE, $1);
 230
 231                                   if (!yyval->object) {
 232                                     yyerror("buildSet");
 233                                     YYERROR;
 234                                   }
 235                                   STATE->parsedObjectCount++;
 236                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 237                                     yyerror("maximum object count");
 238                                     YYERROR;
 239                                   }
 240                                 }
 241         | string                { $$ = buildString(STATE, $1);
 242
 243                                   if (!yyval->object) {
 244                                     yyerror("buildString");
 245                                     YYERROR;
 246                                   }
 247                                   STATE->parsedObjectCount++;
 248                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 249                                     yyerror("maximum object count");
 250                                     YYERROR;
 251                                   }
 252                                 }
 253         | data                  { $$ = buildData(STATE, $1);
 254
 255                                   if (!yyval->object) {
 256                                     yyerror("buildData");
 257                                     YYERROR;
 258                                   }
 259                                   STATE->parsedObjectCount++;
 260                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 261                                     yyerror("maximum object count");
 262                                     YYERROR;
 263                                   }
 264                                 }
 265         | number                { $$ = buildNumber(STATE, $1);
 266
 267                                   if (!yyval->object) {
 268                                     yyerror("buildNumber");
 269                                     YYERROR;
 270                                   }
 271                                   STATE->parsedObjectCount++;
 272                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 273                                     yyerror("maximum object count");
 274                                     YYERROR;
 275                                   }
 276                                 }
 277         | boolean               { $$ = buildBoolean(STATE, $1);
 278
 279                                   if (!yyval->object) {
 280                                     yyerror("buildBoolean");
 281                                     YYERROR;
 282                                   }
 283                                   STATE->parsedObjectCount++;
 284                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 285                                     yyerror("maximum object count");
 286                                     YYERROR;
 287                                   }
 288                                 }
 289         | idref                 { $$ = retrieveObject(STATE, $1->idref);
 290                                   if ($$) {
 291                                     STATE->retrievedObjectCount++;
 292                                     $$->object->retain();
 293                                     if (STATE->retrievedObjectCount > MAX_REFED_OBJECTS) {
 294                                       yyerror("maximum object reference count");
 295                                       YYERROR;
 296                                     }
 297                                   } else {
 298                                     yyerror("forward reference detected");
 299                                     YYERROR;
 300                                   }
 301                                   freeObject(STATE, $1);
 302
 303                                   STATE->parsedObjectCount++;
 304                                   if (STATE->parsedObjectCount > MAX_OBJECTS) {
 305                                     yyerror("maximum object count");
 306                                     YYERROR;
 307                                   }
 308                                 }
 309         ;
 310
 311 //------------------------------------------------------------------------------
 312
 313 dict:     '{' '}'               { $$ = $1;
 314                                   $$->elements = NULL;
 315                                 }
 316         | '{' pairs '}'         { $$ = $1;
 317                                   $$->elements = $2;
 318                                 }
 319         | DICTIONARY
 320         ;
 321
 322 pairs:    pair
 323         | pairs pair            { $$ = $2;
 324                                   $$->next = $1;
 325
 326                                   object_t *o;
 327                                   o = $$->next;
 328                                   while (o) {
 329                                     if (o->key == $$->key) {
 330                                       yyerror("duplicate dictionary key");
 331                                       YYERROR;
 332                                     }
 333                                     o = o->next;
 334                                   }
 335                                 }
 336         ;
 337
 338 pair:     key object            { $$ = $1;
 339                                   $$->key = (OSSymbol *)$$->object;
 340                                   $$->object = $2->object;
 341                                   $$->next = NULL;
 342                                   $2->object = 0;
 343                                   freeObject(STATE, $2);
 344                                 }
 345         ;
 346
 347 key:      KEY                   { $$ = buildSymbol(STATE, $1);
 348
 349 //                                STATE->parsedObjectCount++;
 350 //                                if (STATE->parsedObjectCount > MAX_OBJECTS) {
 351 //                                  yyerror("maximum object count");
 352 //                                  YYERROR;
 353 //                                }
 354                                 }
 355         ;
 356
 357 //------------------------------------------------------------------------------
 358
 359 array:    '(' ')'               { $$ = $1;
 360                                   $$->elements = NULL;
 361                                 }
 362         | '(' elements ')'      { $$ = $1;
 363                                   $$->elements = $2;
 364                                 }
 365         | ARRAY
 366         ;
 367
 368 set:      '[' ']'               { $$ = $1;
 369                                   $$->elements = NULL;
 370                                 }
 371         | '[' elements ']'      { $$ = $1;
 372                                   $$->elements = $2;
 373                                 }
 374         | SET
 375         ;
 376
 377 elements: object                { $$ = $1;
 378                                   $$->next = NULL;
 379                                 }
 380         | elements object       { $$ = $2;
 381                                   $$->next = $1;
 382                                 }
 383         ;
 384
 385 //------------------------------------------------------------------------------
 386
 387 boolean:  BOOLEAN
 388         ;
 389
 390 data:     DATA
 391         ;
 392
 393 idref:    IDREF
 394         ;
 395
 396 number:   NUMBER
 397         ;
 398
 399 string:   STRING
 400         ;
 401
 402 %%
 403
 404 int
 405 OSUnserializeerror(parser_state_t * state, const char *s)  /* Called by yyparse on errors */
 406 {
 407         if (state->errorString) {
 408                 char tempString[128];
 409                 snprintf(tempString, 128, "OSUnserializeXML: %s near line %d\n", s, state->lineNumber);
 410                 *(state->errorString) = OSString::withCString(tempString);
 411         }
 412
 413         return 0;
 414 }
 415
 416 #define TAG_MAX_LENGTH          32
 417 #define TAG_MAX_ATTRIBUTES      32
 418 #define TAG_BAD                 0
 419 #define TAG_START               1
 420 #define TAG_END                 2
 421 #define TAG_EMPTY               3
 422 #define TAG_IGNORE              4
 423
 424 #define currentChar()   (state->parseBuffer[state->parseBufferIndex])
 425 #define nextChar()      (state->parseBuffer[++state->parseBufferIndex])
 426 #define prevChar()      (state->parseBuffer[state->parseBufferIndex - 1])
 427
 428 #define isSpace(c)      ((c) == ' ' || (c) == '\t')
 429 #define isAlpha(c)      (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
 430 #define isDigit(c)      ((c) >= '0' && (c) <= '9')
 431 #define isAlphaDigit(c) ((c) >= 'a' && (c) <= 'f')
 432 #define isHexDigit(c)   (isDigit(c) || isAlphaDigit(c))
 433 #define isAlphaNumeric(c) (isAlpha(c) || isDigit(c) || ((c) == '-'))
 434
 435 static int
 436 getTag(parser_state_t *state,
 437     char tag[TAG_MAX_LENGTH],
 438     int *attributeCount,
 439     char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH],
 440     char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH] )
 441 {
 442         int length = 0;
 443         int c = currentChar();
 444         int tagType = TAG_START;
 445
 446         *attributeCount = 0;
 447
 448         if (c != '<') {
 449                 return TAG_BAD;
 450         }
 451         c = nextChar();         // skip '<'
 452
 453
 454         // <!TAG   declarations     >
 455         // <!--     comments      -->
 456         if (c == '!') {
 457                 c = nextChar();
 458                 bool isComment = (c == '-') && ((c = nextChar()) != 0) && (c == '-');
 459                 if (!isComment && !isAlpha(c)) {
 460                         return TAG_BAD;                      // <!1, <!-A, <!eos
 461                 }
 462                 while (c && (c = nextChar()) != 0) {
 463                         if (c == '\n') {
 464                                 state->lineNumber++;
 465                         }
 466                         if (isComment) {
 467                                 if (c != '-') {
 468                                         continue;
 469                                 }
 470                                 c = nextChar();
 471                                 if (c != '-') {
 472                                         continue;
 473                                 }
 474                                 c = nextChar();
 475                         }
 476                         if (c == '>') {
 477                                 (void)nextChar();
 478                                 return TAG_IGNORE;
 479                         }
 480                         if (isComment) {
 481                                 break;
 482                         }
 483                 }
 484                 return TAG_BAD;
 485         } else
 486         // <? Processing Instructions  ?>
 487         if (c == '?') {
 488                 while ((c = nextChar()) != 0) {
 489                         if (c == '\n') {
 490                                 state->lineNumber++;
 491                         }
 492                         if (c != '?') {
 493                                 continue;
 494                         }
 495                         c = nextChar();
 496                         if (!c) {
 497                                 return TAG_IGNORE;
 498                         }
 499                         if (c == '>') {
 500                                 (void)nextChar();
 501                                 return TAG_IGNORE;
 502                         }
 503                 }
 504                 return TAG_BAD;
 505         } else
 506         // </ end tag >
 507         if (c == '/') {
 508                 c = nextChar();         // skip '/'
 509                 tagType = TAG_END;
 510         }
 511         if (!isAlpha(c)) {
 512                 return TAG_BAD;
 513         }
 514
 515         /* find end of tag while copying it */
 516         while (isAlphaNumeric(c)) {
 517                 tag[length++] = c;
 518                 c = nextChar();
 519                 if (length >= (TAG_MAX_LENGTH - 1)) {
 520                         return TAG_BAD;
 521                 }
 522         }
 523
 524         tag[length] = 0;
 525
 526 //      printf("tag %s, type %d\n", tag, tagType);
 527
 528         // look for attributes of the form attribute = "value" ...
 529         while ((c != '>') && (c != '/')) {
 530                 while (isSpace(c)) {
 531                         c = nextChar();
 532                 }
 533
 534                 length = 0;
 535                 while (isAlphaNumeric(c)) {
 536                         attributes[*attributeCount][length++] = c;
 537                         if (length >= (TAG_MAX_LENGTH - 1)) {
 538                                 return TAG_BAD;
 539                         }
 540                         c = nextChar();
 541                 }
 542                 attributes[*attributeCount][length] = 0;
 543
 544                 while (isSpace(c)) {
 545                         c = nextChar();
 546                 }
 547
 548                 if (c != '=') {
 549                         return TAG_BAD;
 550                 }
 551                 c = nextChar();
 552
 553                 while (isSpace(c)) {
 554                         c = nextChar();
 555                 }
 556
 557                 if (c != '"') {
 558                         return TAG_BAD;
 559                 }
 560                 c = nextChar();
 561                 length = 0;
 562                 while (c != '"') {
 563                         values[*attributeCount][length++] = c;
 564                         if (length >= (TAG_MAX_LENGTH - 1)) {
 565                                 return TAG_BAD;
 566                         }
 567                         c = nextChar();
 568                         if (!c) {
 569                                 return TAG_BAD;
 570                         }
 571                 }
 572                 values[*attributeCount][length] = 0;
 573
 574                 c = nextChar(); // skip closing quote
 575
 576 //              printf("        attribute '%s' = '%s', nextchar = '%c'\n",
 577 //                     attributes[*attributeCount], values[*attributeCount], c);
 578
 579                 (*attributeCount)++;
 580                 if (*attributeCount >= TAG_MAX_ATTRIBUTES) {
 581                         return TAG_BAD;
 582                 }
 583         }
 584
 585         if (c == '/') {
 586                 c = nextChar();         // skip '/'
 587                 tagType = TAG_EMPTY;
 588         }
 589         if (c != '>') {
 590                 return TAG_BAD;
 591         }
 592         c = nextChar();         // skip '>'
 593
 594         return tagType;
 595 }
 596
 597 static char *
 598 getString(parser_state_t *state, int *alloc_lengthp)
 599 {
 600         int c = currentChar();
 601         int start, length, i, j;
 602         char * tempString;
 603
 604         start = state->parseBufferIndex;
 605         /* find end of string */
 606
 607         while (c != 0) {
 608                 if (c == '\n') {
 609                         state->lineNumber++;
 610                 }
 611                 if (c == '<') {
 612                         break;
 613                 }
 614                 c = nextChar();
 615         }
 616
 617         if (c != '<') {
 618                 return 0;
 619         }
 620
 621         length = state->parseBufferIndex - start;
 622
 623         /* copy to null terminated buffer */
 624         tempString = (char *)malloc(length + 1);
 625         if (tempString == NULL) {
 626                 printf("OSUnserializeXML: can't alloc temp memory\n");
 627                 goto error;
 628         }
 629         if (alloc_lengthp != NULL) {
 630                 *alloc_lengthp = length + 1;
 631         }
 632
 633         // copy out string in tempString
 634         // "&amp;" -> '&', "&lt;" -> '<', "&gt;" -> '>'
 635
 636         i = j = 0;
 637         while (i < length) {
 638                 c = state->parseBuffer[start + i++];
 639                 if (c != '&') {
 640                         tempString[j++] = c;
 641                 } else {
 642                         if ((i + 3) > length) {
 643                                 goto error;
 644                         }
 645                         c = state->parseBuffer[start + i++];
 646                         if (c == 'l') {
 647                                 if (state->parseBuffer[start + i++] != 't') {
 648                                         goto error;
 649                                 }
 650                                 if (state->parseBuffer[start + i++] != ';') {
 651                                         goto error;
 652                                 }
 653                                 tempString[j++] = '<';
 654                                 continue;
 655                         }
 656                         if (c == 'g') {
 657                                 if (state->parseBuffer[start + i++] != 't') {
 658                                         goto error;
 659                                 }
 660                                 if (state->parseBuffer[start + i++] != ';') {
 661                                         goto error;
 662                                 }
 663                                 tempString[j++] = '>';
 664                                 continue;
 665                         }
 666                         if ((i + 3) > length) {
 667                                 goto error;
 668                         }
 669                         if (c == 'a') {
 670                                 if (state->parseBuffer[start + i++] != 'm') {
 671                                         goto error;
 672                                 }
 673                                 if (state->parseBuffer[start + i++] != 'p') {
 674                                         goto error;
 675                                 }
 676                                 if (state->parseBuffer[start + i++] != ';') {
 677                                         goto error;
 678                                 }
 679                                 tempString[j++] = '&';
 680                                 continue;
 681                         }
 682                         goto error;
 683                 }
 684         }
 685         tempString[j] = 0;
 686
 687 //      printf("string %s\n", tempString);
 688
 689         return tempString;
 690
 691 error:
 692         if (tempString) {
 693                 safe_free(tempString, length + 1);
 694                 if (alloc_lengthp != NULL) {
 695                         *alloc_lengthp = 0;
 696                 }
 697         }
 698         return 0;
 699 }
 700
 701 static long long
 702 getNumber(parser_state_t *state)
 703 {
 704         unsigned long long n = 0;
 705         int base = 10;
 706         bool negate = false;
 707         int c = currentChar();
 708
 709         if (c == '0') {
 710                 c = nextChar();
 711                 if (c == 'x') {
 712                         base = 16;
 713                         c = nextChar();
 714                 }
 715         }
 716         if (base == 10) {
 717                 if (c == '-') {
 718                         negate = true;
 719                         c = nextChar();
 720                 }
 721                 while (isDigit(c)) {
 722                         n = (n * base + c - '0');
 723                         c = nextChar();
 724                 }
 725                 if (negate) {
 726                         n = (unsigned long long)((long long)n * (long long)-1);
 727                 }
 728         } else {
 729                 while (isHexDigit(c)) {
 730                         if (isDigit(c)) {
 731                                 n = (n * base + c - '0');
 732                         } else {
 733                                 n = (n * base + 0xa + c - 'a');
 734                         }
 735                         c = nextChar();
 736                 }
 737         }
 738 //      printf("number 0x%x\n", (unsigned long)n);
 739         return n;
 740 }
 741
 742 // taken from CFXMLParsing/CFPropertyList.c
 743
 744 static const signed char __CFPLDataDecodeTable[128] = {
 745         /* 000 */ -1, -1, -1, -1, -1, -1, -1, -1,
 746         /* 010 */ -1, -1, -1, -1, -1, -1, -1, -1,
 747         /* 020 */ -1, -1, -1, -1, -1, -1, -1, -1,
 748         /* 030 */ -1, -1, -1, -1, -1, -1, -1, -1,
 749         /* ' ' */ -1, -1, -1, -1, -1, -1, -1, -1,
 750         /* '(' */ -1, -1, -1, 62, -1, -1, -1, 63,
 751         /* '0' */ 52, 53, 54, 55, 56, 57, 58, 59,
 752         /* '8' */ 60, 61, -1, -1, -1, 0, -1, -1,
 753         /* '@' */ -1, 0, 1, 2, 3, 4, 5, 6,
 754         /* 'H' */ 7, 8, 9, 10, 11, 12, 13, 14,
 755         /* 'P' */ 15, 16, 17, 18, 19, 20, 21, 22,
 756         /* 'X' */ 23, 24, 25, -1, -1, -1, -1, -1,
 757         /* '`' */ -1, 26, 27, 28, 29, 30, 31, 32,
 758         /* 'h' */ 33, 34, 35, 36, 37, 38, 39, 40,
 759         /* 'p' */ 41, 42, 43, 44, 45, 46, 47, 48,
 760         /* 'x' */ 49, 50, 51, -1, -1, -1, -1, -1
 761 };
 762
 763 #define DATA_ALLOC_SIZE 4096
 764
 765 static void *
 766 getCFEncodedData(parser_state_t *state, unsigned int *size)
 767 {
 768         int numeq = 0, cntr = 0;
 769         unsigned int acc = 0;
 770         int tmpbufpos = 0;
 771         size_t tmpbuflen = DATA_ALLOC_SIZE;
 772         unsigned char *tmpbuf = (unsigned char *)malloc(tmpbuflen);
 773
 774         int c = currentChar();
 775         *size = 0;
 776
 777         while (c != '<') {
 778                 c &= 0x7f;
 779                 if (c == 0) {
 780                         safe_free(tmpbuf, tmpbuflen);
 781                         return 0;
 782                 }
 783                 if (c == '=') {
 784                         numeq++;
 785                 } else {
 786                         numeq = 0;
 787                 }
 788                 if (c == '\n') {
 789                         state->lineNumber++;
 790                 }
 791                 if (__CFPLDataDecodeTable[c] < 0) {
 792                         c = nextChar();
 793                         continue;
 794                 }
 795                 cntr++;
 796                 acc <<= 6;
 797                 acc += __CFPLDataDecodeTable[c];
 798                 if (0 == (cntr & 0x3)) {
 799                         if (tmpbuflen <= tmpbufpos + 2) {
 800                                 size_t oldsize = tmpbuflen;
 801                                 tmpbuflen += DATA_ALLOC_SIZE;
 802                                 tmpbuf = (unsigned char *)realloc(tmpbuf, oldsize, tmpbuflen);
 803                         }
 804                         tmpbuf[tmpbufpos++] = (acc >> 16) & 0xff;
 805                         if (numeq < 2) {
 806                                 tmpbuf[tmpbufpos++] = (acc >> 8) & 0xff;
 807                         }
 808                         if (numeq < 1) {
 809                                 tmpbuf[tmpbufpos++] = acc & 0xff;
 810                         }
 811                 }
 812                 c = nextChar();
 813         }
 814         *size = tmpbufpos;
 815         if (*size == 0) {
 816                 safe_free(tmpbuf, tmpbuflen);
 817                 return 0;
 818         }
 819         return tmpbuf;
 820 }
 821
 822 static void *
 823 getHexData(parser_state_t *state, unsigned int *size)
 824 {
 825         int c;
 826         unsigned char *d, *start, *lastStart;
 827
 828         size_t buflen = DATA_ALLOC_SIZE;
 829         start = lastStart = d = (unsigned char *)malloc(buflen);
 830         c = currentChar();
 831
 832         while (c != '<') {
 833                 if (isSpace(c)) {
 834                         while ((c = nextChar()) != 0 && isSpace(c)) {
 835                         }
 836                 }
 837                 ;
 838                 if (c == '\n') {
 839                         state->lineNumber++;
 840                         c = nextChar();
 841                         continue;
 842                 }
 843
 844                 // get high nibble
 845                 if (isDigit(c)) {
 846                         *d = (c - '0') << 4;
 847                 } else if (isAlphaDigit(c)) {
 848                         *d =  (0xa + (c - 'a')) << 4;
 849                 } else {
 850                         goto error;
 851                 }
 852
 853                 // get low nibble
 854                 c = nextChar();
 855                 if (isDigit(c)) {
 856                         *d |= c - '0';
 857                 } else if (isAlphaDigit(c)) {
 858                         *d |= 0xa + (c - 'a');
 859                 } else {
 860                         goto error;
 861                 }
 862
 863                 d++;
 864                 if ((d - lastStart) >= DATA_ALLOC_SIZE) {
 865                         int oldsize = d - start;
 866                         assert(oldsize == buflen);
 867                         buflen += DATA_ALLOC_SIZE;
 868                         start = (unsigned char *)realloc(start, oldsize, buflen);
 869                         d = lastStart = start + oldsize;
 870                 }
 871                 c = nextChar();
 872         }
 873
 874         *size = d - start;
 875         return start;
 876
 877 error:
 878
 879         *size = 0;
 880         safe_free(start, buflen);
 881         return 0;
 882 }
 883
 884 static int
 885 yylex(YYSTYPE *lvalp, parser_state_t *state)
 886 {
 887         int c, i;
 888         int tagType;
 889         char tag[TAG_MAX_LENGTH];
 890         int attributeCount;
 891         char attributes[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
 892         char values[TAG_MAX_ATTRIBUTES][TAG_MAX_LENGTH];
 893         object_t *object;
 894         int alloc_length;
 895
 896 top:
 897         c = currentChar();
 898
 899         /* skip white space  */
 900         if (isSpace(c)) {
 901                 while ((c = nextChar()) != 0 && isSpace(c)) {
 902                 }
 903         }
 904         ;
 905
 906         /* keep track of line number, don't return \n's */
 907         if (c == '\n') {
 908                 STATE->lineNumber++;
 909                 (void)nextChar();
 910                 goto top;
 911         }
 912
 913         // end of the buffer?
 914         if (!c) {
 915                 return 0;
 916         }
 917
 918         tagType = getTag(STATE, tag, &attributeCount, attributes, values);
 919         if (tagType == TAG_BAD) {
 920                 return SYNTAX_ERROR;
 921         }
 922         if (tagType == TAG_IGNORE) {
 923                 goto top;
 924         }
 925
 926         // handle allocation and check for "ID" and "IDREF" tags up front
 927         *lvalp = object = newObject(STATE);
 928         object->idref = -1;
 929         for (i = 0; i < attributeCount; i++) {
 930                 if (attributes[i][0] == 'I' && attributes[i][1] == 'D') {
 931                         // check for idref's, note: we ignore the tag, for
 932                         // this to work correctly, all idrefs must be unique
 933                         // across the whole serialization
 934                         if (attributes[i][2] == 'R' && attributes[i][3] == 'E' &&
 935                             attributes[i][4] == 'F' && !attributes[i][5]) {
 936                                 if (tagType != TAG_EMPTY) {
 937                                         return SYNTAX_ERROR;
 938                                 }
 939                                 object->idref = strtol(values[i], NULL, 0);
 940                                 return IDREF;
 941                         }
 942                         // check for id's
 943                         if (!attributes[i][2]) {
 944                                 object->idref = strtol(values[i], NULL, 0);
 945                         } else {
 946                                 return SYNTAX_ERROR;
 947                         }
 948                 }
 949         }
 950
 951         switch (*tag) {
 952         case 'a':
 953                 if (!strcmp(tag, "array")) {
 954                         if (tagType == TAG_EMPTY) {
 955                                 object->elements = NULL;
 956                                 return ARRAY;
 957                         }
 958                         return (tagType == TAG_START) ? '(' : ')';
 959                 }
 960                 break;
 961         case 'd':
 962                 if (!strcmp(tag, "dict")) {
 963                         if (tagType == TAG_EMPTY) {
 964                                 object->elements = NULL;
 965                                 return DICTIONARY;
 966                         }
 967                         return (tagType == TAG_START) ? '{' : '}';
 968                 }
 969                 if (!strcmp(tag, "data")) {
 970                         unsigned int size;
 971                         if (tagType == TAG_EMPTY) {
 972                                 object->data = NULL;
 973                                 object->size = 0;
 974                                 return DATA;
 975                         }
 976
 977                         bool isHexFormat = false;
 978                         for (i = 0; i < attributeCount; i++) {
 979                                 if (!strcmp(attributes[i], "format") && !strcmp(values[i], "hex")) {
 980                                         isHexFormat = true;
 981                                         break;
 982                                 }
 983                         }
 984                         // CF encoded is the default form
 985                         if (isHexFormat) {
 986                                 object->data = getHexData(STATE, &size);
 987                         } else {
 988                                 object->data = getCFEncodedData(STATE, &size);
 989                         }
 990                         object->size = size;
 991                         if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "data")) {
 992                                 return SYNTAX_ERROR;
 993                         }
 994                         return DATA;
 995                 }
 996                 break;
 997         case 'f':
 998                 if (!strcmp(tag, "false")) {
 999                         if (tagType == TAG_EMPTY) {
1000                                 object->number = 0;
1001                                 return BOOLEAN;
1002                         }
1003                 }
1004                 break;
1005         case 'i':
1006                 if (!strcmp(tag, "integer")) {
1007                         object->size = 64;      // default
1008                         for (i = 0; i < attributeCount; i++) {
1009                                 if (!strcmp(attributes[i], "size")) {
1010                                         object->size = strtoul(values[i], NULL, 0);
1011                                 }
1012                         }
1013                         if (tagType == TAG_EMPTY) {
1014                                 object->number = 0;
1015                                 return NUMBER;
1016                         }
1017                         object->number = getNumber(STATE);
1018                         if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END) || strcmp(tag, "integer")) {
1019                                 return SYNTAX_ERROR;
1020                         }
1021                         return NUMBER;
1022                 }
1023                 break;
1024         case 'k':
1025                 if (!strcmp(tag, "key")) {
1026                         if (tagType == TAG_EMPTY) {
1027                                 return SYNTAX_ERROR;
1028                         }
1029                         object->string = getString(STATE, &alloc_length);
1030                         if (!object->string) {
1031                                 return SYNTAX_ERROR;
1032                         }
1033                         object->string_alloc_length = alloc_length;
1034                         if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
1035                             || strcmp(tag, "key")) {
1036                                 return SYNTAX_ERROR;
1037                         }
1038                         return KEY;
1039                 }
1040                 break;
1041         case 'p':
1042                 if (!strcmp(tag, "plist")) {
1043                         freeObject(STATE, object);
1044                         goto top;
1045                 }
1046                 break;
1047         case 's':
1048                 if (!strcmp(tag, "string")) {
1049                         if (tagType == TAG_EMPTY) {
1050                                 object->string = (char *)malloc(1);
1051                                 object->string[0] = 0;
1052                                 object->string_alloc_length = 1;
1053                                 return STRING;
1054                         }
1055                         object->string = getString(STATE, &alloc_length);
1056                         if (!object->string) {
1057                                 return SYNTAX_ERROR;
1058                         }
1059                         object->string_alloc_length = alloc_length;
1060                         if ((getTag(STATE, tag, &attributeCount, attributes, values) != TAG_END)
1061                             || strcmp(tag, "string")) {
1062                                 return SYNTAX_ERROR;
1063                         }
1064                         return STRING;
1065                 }
1066                 if (!strcmp(tag, "set")) {
1067                         if (tagType == TAG_EMPTY) {
1068                                 object->elements = NULL;
1069                                 return SET;;
1070                         }
1071                         if (tagType == TAG_START) {
1072                                 return '[';
1073                         } else {
1074                                 return ']';
1075                         }
1076                 }
1077                 break;
1078         case 't':
1079                 if (!strcmp(tag, "true")) {
1080                         if (tagType == TAG_EMPTY) {
1081                                 object->number = 1;
1082                                 return BOOLEAN;
1083                         }
1084                 }
1085                 break;
1086         }
1087
1088         return SYNTAX_ERROR;
1089 }
1090
1091 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1092 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1093 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1094
1095 // "java" like allocation, if this code hits a syntax error in the
1096 // the middle of the parsed string we just bail with pointers hanging
1097 // all over place, this code helps keeps it all together
1098
1099 //static int object_count = 0;
1100
1101 object_t *
1102 newObject(parser_state_t *state)
1103 {
1104         object_t *o;
1105
1106         if (state->freeObjects) {
1107                 o = state->freeObjects;
1108                 state->freeObjects = state->freeObjects->next;
1109         } else {
1110                 o = (object_t *)malloc(sizeof(object_t));
1111 //              object_count++;
1112                 o->free = state->objects;
1113                 state->objects = o;
1114         }
1115
1116         return o;
1117 }
1118
1119 void
1120 freeObject(parser_state_t * state, object_t *o)
1121 {
1122         o->next = state->freeObjects;
1123         state->freeObjects = o;
1124 }
1125
1126 void
1127 cleanupObjects(parser_state_t *state)
1128 {
1129         object_t *t, *o = state->objects;
1130
1131         while (o) {
1132                 if (o->object) {
1133 //                      printf("OSUnserializeXML: releasing object o=%x object=%x\n", (int)o, (int)o->object);
1134                         o->object->release();
1135                 }
1136                 if (o->data) {
1137 //                      printf("OSUnserializeXML: freeing   object o=%x data=%x\n", (int)o, (int)o->data);
1138                         free(o->data);
1139                 }
1140                 if (o->key) {
1141 //                      printf("OSUnserializeXML: releasing object o=%x key=%x\n", (int)o, (int)o->key);
1142                         o->key->release();
1143                 }
1144                 if (o->string) {
1145 //                      printf("OSUnserializeXML: freeing   object o=%x string=%x\n", (int)o, (int)o->string);
1146                         free(o->string);
1147                 }
1148
1149                 t = o;
1150                 o = o->free;
1151                 safe_free(t, sizeof(object_t));
1152 //              object_count--;
1153         }
1154 //      printf("object_count = %d\n", object_count);
1155 }
1156
1157 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1158 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1159 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1160
1161 static void
1162 rememberObject(parser_state_t *state, int tag, OSObject *o)
1163 {
1164         char key[16];
1165         snprintf(key, 16, "%u", tag);
1166
1167 //      printf("remember key %s\n", key);
1168
1169         state->tags->setObject(key, o);
1170 }
1171
1172 static object_t *
1173 retrieveObject(parser_state_t *state, int tag)
1174 {
1175         OSObject *ref;
1176         object_t *o;
1177         char key[16];
1178         snprintf(key, 16, "%u", tag);
1179
1180 //      printf("retrieve key '%s'\n", key);
1181
1182         ref = state->tags->getObject(key);
1183         if (!ref) {
1184                 return 0;
1185         }
1186
1187         o = newObject(state);
1188         o->object = ref;
1189         return o;
1190 }
1191
1192 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1193 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1194 // !@$&)(^Q$&*^!$(*!@$_(^%_(*Q#$(_*&!$_(*&!$_(*&!#$(*!@&^!@#%!_!#
1195
1196 object_t *
1197 buildDictionary(parser_state_t *state, object_t * header)
1198 {
1199         object_t *o, *t;
1200         int count = 0;
1201         OSDictionary *dict;
1202
1203         // get count and reverse order
1204         o = header->elements;
1205         header->elements = 0;
1206         while (o) {
1207                 count++;
1208                 t = o;
1209                 o = o->next;
1210
1211                 t->next = header->elements;
1212                 header->elements = t;
1213         }
1214
1215         dict = OSDictionary::withCapacity(count);
1216         if (header->idref >= 0) {
1217                 rememberObject(state, header->idref, dict);
1218         }
1219
1220         o = header->elements;
1221         while (o) {
1222                 dict->setObject(o->key, o->object);
1223
1224                 o->key->release();
1225                 o->object->release();
1226                 o->key = 0;
1227                 o->object = 0;
1228
1229                 t = o;
1230                 o = o->next;
1231                 freeObject(state, t);
1232         }
1233         o = header;
1234         o->object = dict;
1235         return o;
1236 };
1237
1238 object_t *
1239 buildArray(parser_state_t *state, object_t * header)
1240 {
1241         object_t *o, *t;
1242         int count = 0;
1243         OSArray *array;
1244
1245         // get count and reverse order
1246         o = header->elements;
1247         header->elements = 0;
1248         while (o) {
1249                 count++;
1250                 t = o;
1251                 o = o->next;
1252
1253                 t->next = header->elements;
1254                 header->elements = t;
1255         }
1256
1257         array = OSArray::withCapacity(count);
1258         if (header->idref >= 0) {
1259                 rememberObject(state, header->idref, array);
1260         }
1261
1262         o = header->elements;
1263         while (o) {
1264                 array->setObject(o->object);
1265
1266                 o->object->release();
1267                 o->object = 0;
1268
1269                 t = o;
1270                 o = o->next;
1271                 freeObject(state, t);
1272         }
1273         o = header;
1274         o->object = array;
1275         return o;
1276 };
1277
1278 object_t *
1279 buildSet(parser_state_t *state, object_t *header)
1280 {
1281         object_t *o = buildArray(state, header);
1282
1283         OSArray *array = (OSArray *)o->object;
1284         OSSet *set = OSSet::withArray(array, array->getCapacity());
1285
1286         // write over the reference created in buildArray
1287         if (header->idref >= 0) {
1288                 rememberObject(state, header->idref, set);
1289         }
1290
1291         array->release();
1292         o->object = set;
1293         return o;
1294 };
1295
1296 object_t *
1297 buildString(parser_state_t *state, object_t *o)
1298 {
1299         OSString *string;
1300
1301         string = OSString::withCString(o->string);
1302         if (o->idref >= 0) {
1303                 rememberObject(state, o->idref, string);
1304         }
1305
1306         free(o->string);
1307         o->string = 0;
1308         o->object = string;
1309
1310         return o;
1311 };
1312
1313 object_t *
1314 buildSymbol(parser_state_t *state, object_t *o)
1315 {
1316         OSSymbol *symbol;
1317
1318         symbol = const_cast < OSSymbol * > (OSSymbol::withCString(o->string));
1319         if (o->idref >= 0) {
1320                 rememberObject(state, o->idref, symbol);
1321         }
1322
1323         safe_free(o->string, strlen(o->string) + 1);
1324         o->string = 0;
1325         o->object = symbol;
1326
1327         return o;
1328 };
1329
1330 object_t *
1331 buildData(parser_state_t *state, object_t *o)
1332 {
1333         OSData *data;
1334
1335         if (o->size) {
1336                 data = OSData::withBytes(o->data, o->size);
1337         } else {
1338                 data = OSData::withCapacity(0);
1339         }
1340         if (o->idref >= 0) {
1341                 rememberObject(state, o->idref, data);
1342         }
1343
1344         if (o->size) {
1345                 free(o->data);
1346         }
1347         o->data = 0;
1348         o->object = data;
1349         return o;
1350 };
1351
1352 object_t *
1353 buildNumber(parser_state_t *state, object_t *o)
1354 {
1355         OSNumber *number = OSNumber::withNumber(o->number, o->size);
1356
1357         if (o->idref >= 0) {
1358                 rememberObject(state, o->idref, number);
1359         }
1360
1361         o->object = number;
1362         return o;
1363 };
1364
1365 object_t *
1366 buildBoolean(parser_state_t *state __unused, object_t *o)
1367 {
1368         o->object = ((o->number == 0) ? kOSBooleanFalse : kOSBooleanTrue);
1369         o->object->retain();
1370         return o;
1371 };
1372
1373 OSObject*
1374 OSUnserializeXML(const char *buffer, OSString **errorString)
1375 {
1376         OSObject *object;
1377
1378         if (!buffer) {
1379                 return 0;
1380         }
1381         parser_state_t *state = (parser_state_t *)malloc(sizeof(parser_state_t));
1382         if (!state) {
1383                 return 0;
1384         }
1385
1386         // just in case
1387         if (errorString) {
1388                 *errorString = NULL;
1389         }
1390
1391         state->parseBuffer = buffer;
1392         state->parseBufferIndex = 0;
1393         state->lineNumber = 1;
1394         state->objects = 0;
1395         state->freeObjects = 0;
1396         state->tags = OSDictionary::withCapacity(128);
1397         state->errorString = errorString;
1398         state->parsedObject = 0;
1399         state->parsedObjectCount = 0;
1400         state->retrievedObjectCount = 0;
1401
1402         (void)yyparse((void *)state);
1403
1404         object = state->parsedObject;
1405
1406         cleanupObjects(state);
1407         state->tags->release();
1408         safe_free(state, sizeof(parser_state_t));
1409
1410         return object;
1411 }
1412
1413 #include <libkern/OSSerializeBinary.h>
1414
1415 OSObject*
1416 OSUnserializeXML(const char *buffer, size_t bufferSize, OSString **errorString)
1417 {
1418         if (!buffer) {
1419                 return 0;
1420         }
1421         if (bufferSize < sizeof(kOSSerializeBinarySignature)) {
1422                 return 0;
1423         }
1424
1425         if (!strcmp(kOSSerializeBinarySignature, buffer)
1426             || (kOSSerializeIndexedBinarySignature == (uint8_t)buffer[0])) {
1427                 return OSUnserializeBinary(buffer, bufferSize, errorString);
1428         }
1429
1430         // XML must be null terminated
1431         if (buffer[bufferSize - 1]) {
1432                 return 0;
1433         }
1434
1435         return OSUnserializeXML(buffer, errorString);
1436 }
1437
1438
1439 //
1440 //
1441 //
1442 //
1443 //
1444 //               DO NOT EDIT OSUnserializeXML.cpp!
1445 //
1446 //                      this means you!
1447 //
1448 //
1449 //
1450 //
1451 //