src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "symtab.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36
  37 typedef struct symbol_list
  38 {
  39   struct symbol_list *next;
  40   bucket *sym;
  41   int line;
  42   bucket *ruleprec;
  43 }
  44 symbol_list;
  45
  46 int lineno;
  47 char **tags;
  48 short *user_toknums;
  49 static symbol_list *grammar;
  50 static int start_flag;
  51 static bucket *startval;
  52
  53 /* Nonzero if components of semantic values are used, implying
  54    they must be unions.  */
  55 static int value_components_used;
  56
  57 /* Nonzero if %union has been seen.  */
  58 static int typed;
  59
  60 /* Incremented for each %left, %right or %nonassoc seen */
  61 static int lastprec;
  62
  63 static bucket *errtoken;
  64 static bucket *undeftoken;
  65
  66
  67 static symbol_list *
  68 symbol_list_new (bucket *sym)
  69 {
  70   symbol_list *res = XMALLOC (symbol_list, 1);
  71   res->next = NULL;
  72   res->sym = sym;
  73   res->line = lineno;
  74   res->ruleprec = NULL;
  75   return res;
  76 }
  77
  78 \f
  79
  80 /*===================\
  81 | Low level lexing.  |
  82 \===================*/
  83
  84 static void
  85 skip_to_char (int target)
  86 {
  87   int c;
  88   if (target == '\n')
  89     complain (_("   Skipping to next \\n"));
  90   else
  91     complain (_("   Skipping to next %c"), target);
  92
  93   do
  94     c = skip_white_space ();
  95   while (c != target && c != EOF);
  96   if (c != EOF)
  97     ungetc (c, finput);
  98 }
  99
 100
 101 /*---------------------------------------------------------.
 102 | Read a signed integer from STREAM and return its value.  |
 103 `---------------------------------------------------------*/
 104
 105 static inline int
 106 read_signed_integer (FILE *stream)
 107 {
 108   int c = getc (stream);
 109   int sign = 1;
 110   int n = 0;
 111
 112   if (c == '-')
 113     {
 114       c = getc (stream);
 115       sign = -1;
 116     }
 117
 118   while (isdigit (c))
 119     {
 120       n = 10 * n + (c - '0');
 121       c = getc (stream);
 122     }
 123
 124   ungetc (c, stream);
 125
 126   return sign * n;
 127 }
 128 \f
 129 /*--------------------------------------------------------------.
 130 | Get the data type (alternative in the union) of the value for |
 131 | symbol N in rule RULE.                                        |
 132 `--------------------------------------------------------------*/
 133
 134 static char *
 135 get_type_name (int n, symbol_list *rule)
 136 {
 137   int i;
 138   symbol_list *rp;
 139
 140   if (n < 0)
 141     {
 142       complain (_("invalid $ value"));
 143       return NULL;
 144     }
 145
 146   rp = rule;
 147   i = 0;
 148
 149   while (i < n)
 150     {
 151       rp = rp->next;
 152       if (rp == NULL || rp->sym == NULL)
 153         {
 154           complain (_("invalid $ value"));
 155           return NULL;
 156         }
 157       i++;
 158     }
 159
 160   return rp->sym->type_name;
 161 }
 162 \f
 163 /*------------------------------------------------------------.
 164 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 165 | delimiter of the string (either ' or ").                    |
 166 `------------------------------------------------------------*/
 167
 168 static inline void
 169 copy_string (FILE *fin, struct obstack *oout, int match)
 170 {
 171   int c;
 172
 173   obstack_1grow (oout, match);
 174
 175   c = getc (fin);
 176
 177   while (c != match)
 178     {
 179       if (c == EOF)
 180         fatal (_("unterminated string at end of file"));
 181       if (c == '\n')
 182         {
 183           complain (_("unterminated string"));
 184           ungetc (c, fin);
 185           c = match;            /* invent terminator */
 186           continue;
 187         }
 188
 189       obstack_1grow (oout, c);
 190
 191       if (c == '\\')
 192         {
 193           c = getc (fin);
 194           if (c == EOF)
 195             fatal (_("unterminated string at end of file"));
 196           obstack_1grow (oout, c);
 197
 198           if (c == '\n')
 199             lineno++;
 200         }
 201
 202       c = getc (fin);
 203     }
 204
 205   obstack_1grow (oout, c);
 206 }
 207
 208
 209 /*-----------------------------------------------------------------.
 210 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 211 | NULL).  In fact we just saw a `/', which might or might not be a |
 212 | comment.  In any case, copy what we saw.                         |
 213 |                                                                  |
 214 | OUT2 might be NULL.                                              |
 215 `-----------------------------------------------------------------*/
 216
 217 static inline void
 218 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 219 {
 220   int cplus_comment;
 221   int ended;
 222   int c;
 223
 224   /* We read a `/', output it. */
 225   obstack_1grow (oout1, '/');
 226   if (oout2)
 227     obstack_1grow (oout2, '/');
 228
 229   switch ((c = getc (fin)))
 230     {
 231     case '/':
 232       cplus_comment = 1;
 233       break;
 234     case '*':
 235       cplus_comment = 0;
 236       break;
 237     default:
 238       ungetc (c, fin);
 239       return;
 240     }
 241
 242   obstack_1grow (oout1, c);
 243   if (oout2)
 244     obstack_1grow (oout2, c);
 245   c = getc (fin);
 246
 247   ended = 0;
 248   while (!ended)
 249     {
 250       if (!cplus_comment && c == '*')
 251         {
 252           while (c == '*')
 253             {
 254               obstack_1grow (oout1, c);
 255               if (oout2)
 256                 obstack_1grow (oout2, c);
 257               c = getc (fin);
 258             }
 259
 260           if (c == '/')
 261             {
 262               obstack_1grow (oout1, c);
 263               if (oout2)
 264                 obstack_1grow (oout2, c);
 265               ended = 1;
 266             }
 267         }
 268       else if (c == '\n')
 269         {
 270           lineno++;
 271           obstack_1grow (oout1, c);
 272           if (oout2)
 273             obstack_1grow (oout2, c);
 274           if (cplus_comment)
 275             ended = 1;
 276           else
 277             c = getc (fin);
 278         }
 279       else if (c == EOF)
 280         fatal (_("unterminated comment"));
 281       else
 282         {
 283           obstack_1grow (oout1, c);
 284           if (oout2)
 285             obstack_1grow (oout2, c);
 286           c = getc (fin);
 287         }
 288     }
 289 }
 290
 291
 292 /*-------------------------------------------------------------------.
 293 | Dump the comment (actually the current string starting with a `/') |
 294 | from FIN to OOUT.                                                  |
 295 `-------------------------------------------------------------------*/
 296
 297 static inline void
 298 copy_comment (FILE *fin, struct obstack *oout)
 299 {
 300   copy_comment2 (fin, oout, NULL);
 301 }
 302
 303
 304 /*-----------------------------------------------------------------.
 305 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 306 | reference to this location. STACK_OFFSET is the number of values |
 307 | in the current rule so far, which says where to find `$0' with   |
 308 | respect to the top of the stack.                                 |
 309 `-----------------------------------------------------------------*/
 310
 311 static inline void
 312 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 313 {
 314   int c;
 315
 316   c = getc (fin);
 317   if (c == '$')
 318     {
 319       obstack_sgrow (oout, "yyloc");
 320       locations_flag = 1;
 321     }
 322   else if (isdigit (c) || c == '-')
 323     {
 324       int n;
 325
 326       ungetc (c, fin);
 327       n = read_signed_integer (fin);
 328
 329       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 330       locations_flag = 1;
 331     }
 332   else
 333     {
 334       char buf[] = "@c";
 335       buf[1] = c;
 336       complain (_("%s is invalid"), quote (buf));
 337     }
 338 }
 339
 340
 341 /*-------------------------------------------------------------------.
 342 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 343 |                                                                    |
 344 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 345 |                                                                    |
 346 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 347 | the number of values in the current rule so far, which says where  |
 348 | to find `$0' with respect to the top of the stack.                 |
 349 `-------------------------------------------------------------------*/
 350
 351 static inline void
 352 copy_dollar (FILE *fin, struct obstack *oout,
 353              symbol_list *rule, int stack_offset)
 354 {
 355   int c = getc (fin);
 356   const char *type_name = NULL;
 357
 358   /* Get the type name if explicit. */
 359   if (c == '<')
 360     {
 361       read_type_name (fin);
 362       type_name = token_buffer;
 363       value_components_used = 1;
 364       c = getc (fin);
 365     }
 366
 367   if (c == '$')
 368     {
 369       obstack_sgrow (oout, "yyval");
 370
 371       if (!type_name)
 372         type_name = get_type_name (0, rule);
 373       if (type_name)
 374         obstack_fgrow1 (oout, ".%s", type_name);
 375       if (!type_name && typed)
 376         complain (_("$$ of `%s' has no declared type"),
 377                   rule->sym->tag);
 378     }
 379   else if (isdigit (c) || c == '-')
 380     {
 381       int n;
 382       ungetc (c, fin);
 383       n = read_signed_integer (fin);
 384
 385       if (!type_name && n > 0)
 386         type_name = get_type_name (n, rule);
 387
 388       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 389
 390       if (type_name)
 391         obstack_fgrow1 (oout, ".%s", type_name);
 392       if (!type_name && typed)
 393         complain (_("$%d of `%s' has no declared type"),
 394                   n, rule->sym->tag);
 395     }
 396   else
 397     {
 398       char buf[] = "$c";
 399       buf[1] = c;
 400       complain (_("%s is invalid"), quote (buf));
 401     }
 402 }
 403 \f
 404 /*-------------------------------------------------------------------.
 405 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 406 | `%{' has already been read.  Return after reading the `%}'.        |
 407 `-------------------------------------------------------------------*/
 408
 409 static void
 410 copy_definition (void)
 411 {
 412   int c;
 413   /* -1 while reading a character if prev char was %. */
 414   int after_percent;
 415
 416   if (!no_lines_flag)
 417     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 418                     lineno, quotearg_style (c_quoting_style, infile));
 419
 420   after_percent = 0;
 421
 422   c = getc (finput);
 423
 424   for (;;)
 425     {
 426       switch (c)
 427         {
 428         case '\n':
 429           obstack_1grow (&attrs_obstack, c);
 430           lineno++;
 431           break;
 432
 433         case '%':
 434           after_percent = -1;
 435           break;
 436
 437         case '\'':
 438         case '"':
 439           copy_string (finput, &attrs_obstack, c);
 440           break;
 441
 442         case '/':
 443           copy_comment (finput, &attrs_obstack);
 444           break;
 445
 446         case EOF:
 447           fatal ("%s", _("unterminated `%{' definition"));
 448
 449         default:
 450           obstack_1grow (&attrs_obstack, c);
 451         }
 452
 453       c = getc (finput);
 454
 455       if (after_percent)
 456         {
 457           if (c == '}')
 458             return;
 459           obstack_1grow (&attrs_obstack, '%');
 460         }
 461       after_percent = 0;
 462     }
 463 }
 464
 465
 466 /*-------------------------------------------------------------------.
 467 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 468 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 469 | are reversed.                                                      |
 470 `-------------------------------------------------------------------*/
 471
 472 static void
 473 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 474 {
 475   token_t token = tok_undef;
 476   char *typename = NULL;
 477
 478   /* The symbol being defined.  */
 479   struct bucket *symbol = NULL;
 480
 481   /* After `%token' and `%nterm', any number of symbols maybe be
 482      defined.  */
 483   for (;;)
 484     {
 485       int tmp_char = ungetc (skip_white_space (), finput);
 486
 487       /* `%' (for instance from `%token', or from `%%' etc.) is the
 488          only valid means to end this declaration.  */
 489       if (tmp_char == '%')
 490         return;
 491       if (tmp_char == EOF)
 492         fatal (_("Premature EOF after %s"), token_buffer);
 493
 494       token = lex ();
 495       if (token == tok_comma)
 496         {
 497           symbol = NULL;
 498           continue;
 499         }
 500       if (token == tok_typename)
 501         {
 502           typename = xstrdup (token_buffer);
 503           value_components_used = 1;
 504           symbol = NULL;
 505         }
 506       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 507         {
 508           if (symval->alias)
 509             warn (_("symbol `%s' used more than once as a literal string"),
 510                   symval->tag);
 511           else if (symbol->alias)
 512             warn (_("symbol `%s' given more than one literal string"),
 513                   symbol->tag);
 514           else
 515             {
 516               symval->class = token_sym;
 517               symval->type_name = typename;
 518               symval->user_token_number = symbol->user_token_number;
 519               symbol->user_token_number = SALIAS;
 520               symval->alias = symbol;
 521               symbol->alias = symval;
 522               /* symbol and symval combined are only one symbol */
 523               nsyms--;
 524             }
 525           symbol = NULL;
 526         }
 527       else if (token == tok_identifier)
 528         {
 529           int oldclass = symval->class;
 530           symbol = symval;
 531
 532           if (symbol->class == what_is_not)
 533             complain (_("symbol %s redefined"), symbol->tag);
 534           symbol->class = what_is;
 535           if (what_is == nterm_sym && oldclass != nterm_sym)
 536             symbol->value = nvars++;
 537
 538           if (typename)
 539             {
 540               if (symbol->type_name == NULL)
 541                 symbol->type_name = typename;
 542               else if (strcmp (typename, symbol->type_name) != 0)
 543                 complain (_("type redeclaration for %s"), symbol->tag);
 544             }
 545         }
 546       else if (symbol && token == tok_number)
 547         {
 548           symbol->user_token_number = numval;
 549         }
 550       else
 551         {
 552           complain (_("`%s' is invalid in %s"),
 553                     token_buffer,
 554                     (what_is == token_sym) ? "%token" : "%nterm");
 555           skip_to_char ('%');
 556         }
 557     }
 558
 559 }
 560
 561
 562 /*------------------------------.
 563 | Parse what comes after %start |
 564 `------------------------------*/
 565
 566 static void
 567 parse_start_decl (void)
 568 {
 569   if (start_flag)
 570     complain (_("multiple %s declarations"), "%start");
 571   if (lex () != tok_identifier)
 572     complain (_("invalid %s declaration"), "%start");
 573   else
 574     {
 575       start_flag = 1;
 576       startval = symval;
 577     }
 578 }
 579
 580 /*-----------------------------------------------------------.
 581 | read in a %type declaration and record its information for |
 582 | get_type_name to access                                    |
 583 `-----------------------------------------------------------*/
 584
 585 static void
 586 parse_type_decl (void)
 587 {
 588   char *name;
 589
 590   if (lex () != tok_typename)
 591     {
 592       complain ("%s", _("%type declaration has no <typename>"));
 593       skip_to_char ('%');
 594       return;
 595     }
 596
 597   name = xstrdup (token_buffer);
 598
 599   for (;;)
 600     {
 601       token_t t;
 602       int tmp_char = ungetc (skip_white_space (), finput);
 603
 604       if (tmp_char == '%')
 605         return;
 606       if (tmp_char == EOF)
 607         fatal (_("Premature EOF after %s"), token_buffer);
 608
 609       t = lex ();
 610
 611       switch (t)
 612         {
 613
 614         case tok_comma:
 615         case tok_semicolon:
 616           break;
 617
 618         case tok_identifier:
 619           if (symval->type_name == NULL)
 620             symval->type_name = name;
 621           else if (strcmp (name, symval->type_name) != 0)
 622             complain (_("type redeclaration for %s"), symval->tag);
 623
 624           break;
 625
 626         default:
 627           complain (_("invalid %%type declaration due to item: %s"),
 628                     token_buffer);
 629           skip_to_char ('%');
 630         }
 631     }
 632 }
 633
 634
 635
 636 /*----------------------------------------------------------------.
 637 | Read in a %left, %right or %nonassoc declaration and record its |
 638 | information.                                                    |
 639 `----------------------------------------------------------------*/
 640
 641 static void
 642 parse_assoc_decl (associativity assoc)
 643 {
 644   char *name = NULL;
 645   int prev = 0;
 646
 647   lastprec++;                   /* Assign a new precedence level, never 0.  */
 648
 649   for (;;)
 650     {
 651       token_t t;
 652       int tmp_char = ungetc (skip_white_space (), finput);
 653
 654       if (tmp_char == '%')
 655         return;
 656       if (tmp_char == EOF)
 657         fatal (_("Premature EOF after %s"), token_buffer);
 658
 659       t = lex ();
 660
 661       switch (t)
 662         {
 663         case tok_typename:
 664           name = xstrdup (token_buffer);
 665           break;
 666
 667         case tok_comma:
 668           break;
 669
 670         case tok_identifier:
 671           if (symval->prec != 0)
 672             complain (_("redefining precedence of %s"), symval->tag);
 673           symval->prec = lastprec;
 674           symval->assoc = assoc;
 675           if (symval->class == nterm_sym)
 676             complain (_("symbol %s redefined"), symval->tag);
 677           symval->class = token_sym;
 678           if (name)
 679             {                   /* record the type, if one is specified */
 680               if (symval->type_name == NULL)
 681                 symval->type_name = name;
 682               else if (strcmp (name, symval->type_name) != 0)
 683                 complain (_("type redeclaration for %s"), symval->tag);
 684             }
 685           break;
 686
 687         case tok_number:
 688           if (prev == tok_identifier)
 689             {
 690               symval->user_token_number = numval;
 691             }
 692           else
 693             {
 694               complain (_
 695                         ("invalid text (%s) - number should be after identifier"),
 696 token_buffer);
 697               skip_to_char ('%');
 698             }
 699           break;
 700
 701         case tok_semicolon:
 702           return;
 703
 704         default:
 705           complain (_("unexpected item: %s"), token_buffer);
 706           skip_to_char ('%');
 707         }
 708
 709       prev = t;
 710     }
 711 }
 712
 713
 714
 715 /*--------------------------------------------------------------.
 716 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 717 | where it is made into the definition of YYSTYPE, the type of  |
 718 | elements of the parser value stack.                           |
 719 `--------------------------------------------------------------*/
 720
 721 static void
 722 parse_union_decl (void)
 723 {
 724   int c;
 725   int count = 0;
 726   const char *prologue = "\
 727 #ifndef YYSTYPE\n\
 728 typedef union";
 729   const char *epilogue = "\
 730  yystype;\n\
 731 # define YYSTYPE yystype\n\
 732 #endif\n";
 733
 734   if (typed)
 735     complain (_("multiple %s declarations"), "%union");
 736
 737   typed = 1;
 738
 739   if (!no_lines_flag)
 740     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 741                     lineno, quotearg_style (c_quoting_style, infile));
 742   else
 743     obstack_1grow (&attrs_obstack, '\n');
 744
 745   obstack_sgrow (&attrs_obstack, prologue);
 746   if (defines_flag)
 747     obstack_sgrow (&defines_obstack, prologue);
 748
 749   c = getc (finput);
 750
 751   while (c != EOF)
 752     {
 753
 754       /* If C contains '/', it is output by copy_comment ().  */
 755       if (c != '/')
 756         {
 757           obstack_1grow (&attrs_obstack, c);
 758           if (defines_flag)
 759             obstack_1grow (&defines_obstack, c);
 760         }
 761
 762       switch (c)
 763         {
 764         case '\n':
 765           lineno++;
 766           break;
 767
 768         case '/':
 769           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 770           break;
 771
 772         case '{':
 773           count++;
 774           break;
 775
 776         case '}':
 777           if (count == 0)
 778             complain (_("unmatched %s"), "`}'");
 779           count--;
 780           if (count <= 0)
 781             {
 782               obstack_sgrow (&attrs_obstack, epilogue);
 783               if (defines_flag)
 784                 obstack_sgrow (&defines_obstack, epilogue);
 785               /* JF don't choke on trailing semi */
 786               c = skip_white_space ();
 787               if (c != ';')
 788                 ungetc (c, finput);
 789               return;
 790             }
 791         }
 792
 793       c = getc (finput);
 794     }
 795 }
 796
 797
 798 /*-------------------------------------------------------.
 799 | Parse the declaration %expect N which says to expect N |
 800 | shift-reduce conflicts.                                |
 801 `-------------------------------------------------------*/
 802
 803 static void
 804 parse_expect_decl (void)
 805 {
 806   int c = skip_white_space ();
 807   ungetc (c, finput);
 808
 809   if (!isdigit (c))
 810     complain (_("argument of %%expect is not an integer"));
 811   else
 812     expected_conflicts = read_signed_integer (finput);
 813 }
 814
 815
 816 /*-------------------------------------------------------------------.
 817 | Parse what comes after %thong.  the full syntax is                 |
 818 |                                                                    |
 819 |                %thong <type> token number literal                  |
 820 |                                                                    |
 821 | the <type> or number may be omitted.  The number specifies the     |
 822 | user_token_number.                                                 |
 823 |                                                                    |
 824 | Two symbols are entered in the table, one for the token symbol and |
 825 | one for the literal.  Both are given the <type>, if any, from the  |
 826 | declaration.  The ->user_token_number of the first is SALIAS and   |
 827 | the ->user_token_number of the second is set to the number, if     |
 828 | any, from the declaration.  The two symbols are linked via         |
 829 | pointers in their ->alias fields.                                  |
 830 |                                                                    |
 831 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 832 | only the literal string is retained it is the literal string that  |
 833 | is output to yytname                                               |
 834 `-------------------------------------------------------------------*/
 835
 836 static void
 837 parse_thong_decl (void)
 838 {
 839   token_t token;
 840   struct bucket *symbol;
 841   char *typename = 0;
 842   int usrtoknum = SUNDEF;
 843
 844   token = lex ();               /* fetch typename or first token */
 845   if (token == tok_typename)
 846     {
 847       typename = xstrdup (token_buffer);
 848       value_components_used = 1;
 849       token = lex ();           /* fetch first token */
 850     }
 851
 852   /* process first token */
 853
 854   if (token != tok_identifier)
 855     {
 856       complain (_("unrecognized item %s, expected an identifier"),
 857                 token_buffer);
 858       skip_to_char ('%');
 859       return;
 860     }
 861   symval->class = token_sym;
 862   symval->type_name = typename;
 863   symval->user_token_number = SALIAS;
 864   symbol = symval;
 865
 866   token = lex ();               /* get number or literal string */
 867
 868   if (token == tok_number)
 869     {
 870       usrtoknum = numval;
 871       token = lex ();           /* okay, did number, now get literal */
 872     }
 873
 874   /* process literal string token */
 875
 876   if (token != tok_identifier || *symval->tag != '\"')
 877     {
 878       complain (_("expected string constant instead of %s"), token_buffer);
 879       skip_to_char ('%');
 880       return;
 881     }
 882   symval->class = token_sym;
 883   symval->type_name = typename;
 884   symval->user_token_number = usrtoknum;
 885
 886   symval->alias = symbol;
 887   symbol->alias = symval;
 888
 889   /* symbol and symval combined are only one symbol.  */
 890   nsyms--;
 891 }
 892
 893
 894 /*------------------------------------------------------------------.
 895 | Parse a double quoted parameter. It was used for                  |
 896 | %{source,header}_extension.  For the moment, It is not used since |
 897 | extension features have been removed.                             |
 898 `------------------------------------------------------------------*/
 899
 900 #if 0
 901
 902 static const char *
 903 parse_dquoted_param (const char *from)
 904 {
 905   char buff[32];
 906   int c;
 907   int i;
 908
 909   c = skip_white_space ();
 910
 911   if (c != '"')
 912     {
 913       ungetc (c, finput);
 914       complain (_("invalid %s declaration"), from);
 915       return NULL;
 916     }
 917
 918   c = getc (finput);
 919   for (i = 0; (c >= '!') && (c <= '~'); i++)
 920     {
 921       if (c == '"')
 922         break;
 923
 924       if (c == '\\')
 925         {
 926           c = getc (finput);
 927           if ((c < '!') && (c > '~'))
 928             break;
 929         }
 930
 931       buff[i] = c;
 932       c = getc (finput);
 933     }
 934   buff[i] = '\0';
 935
 936   if (c != '"')
 937     {
 938       ungetc (c, finput);
 939       complain (_("invalid %s declaration"), from);
 940       return NULL;
 941     }
 942
 943   return xstrdup (buff);
 944 }
 945
 946 #endif
 947
 948
 949 /*----------------------------------------------------------------.
 950 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 951 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 952 | groups to ATTRS_OBSTACK.                                        |
 953 `----------------------------------------------------------------*/
 954
 955 static void
 956 read_declarations (void)
 957 {
 958   for (;;)
 959     {
 960       int c = skip_white_space ();
 961
 962       if (c == '%')
 963         {
 964           token_t tok = parse_percent_token ();
 965
 966           switch (tok)
 967             {
 968             case tok_two_percents:
 969               return;
 970
 971             case tok_percent_left_curly:
 972               copy_definition ();
 973               break;
 974
 975             case tok_token:
 976               parse_token_decl (token_sym, nterm_sym);
 977               break;
 978
 979             case tok_nterm:
 980               parse_token_decl (nterm_sym, token_sym);
 981               break;
 982
 983             case tok_type:
 984               parse_type_decl ();
 985               break;
 986
 987             case tok_start:
 988               parse_start_decl ();
 989               break;
 990
 991             case tok_union:
 992               parse_union_decl ();
 993               break;
 994
 995             case tok_expect:
 996               parse_expect_decl ();
 997               break;
 998
 999             case tok_thong:
1000               parse_thong_decl ();
1001               break;
1002
1003             case tok_left:
1004               parse_assoc_decl (left_assoc);
1005               break;
1006
1007             case tok_right:
1008               parse_assoc_decl (right_assoc);
1009               break;
1010
1011             case tok_nonassoc:
1012               parse_assoc_decl (non_assoc);
1013               break;
1014
1015             case tok_noop:
1016               break;
1017
1018             case tok_stropt:
1019             case tok_intopt:
1020             case tok_obsolete:
1021               abort ();
1022               break;
1023
1024             case tok_illegal:
1025             default:
1026               complain (_("unrecognized: %s"), token_buffer);
1027               skip_to_char ('%');
1028             }
1029         }
1030       else if (c == EOF)
1031         fatal (_("no input grammar"));
1032       else
1033         {
1034           char buf[] = "c";
1035           buf[0] = c;
1036           complain (_("unknown character: %s"), quote (buf));
1037           skip_to_char ('%');
1038         }
1039     }
1040 }
1041 \f
1042 /*-------------------------------------------------------------------.
1043 | Assuming that a `{' has just been seen, copy everything up to the  |
1044 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1045 | values in the current rule so far, which says where to find `$0'   |
1046 | with respect to the top of the stack.                              |
1047 `-------------------------------------------------------------------*/
1048
1049 static void
1050 copy_action (symbol_list *rule, int stack_offset)
1051 {
1052   int c;
1053   int count;
1054   char buf[4096];
1055
1056   /* offset is always 0 if parser has already popped the stack pointer */
1057   if (semantic_parser)
1058     stack_offset = 0;
1059
1060   sprintf (buf, "\ncase %d:\n", nrules);
1061   obstack_grow (&action_obstack, buf, strlen (buf));
1062
1063   if (!no_lines_flag)
1064     {
1065       sprintf (buf, "#line %d %s\n",
1066                lineno, quotearg_style (c_quoting_style, infile));
1067       obstack_grow (&action_obstack, buf, strlen (buf));
1068     }
1069   obstack_1grow (&action_obstack, '{');
1070
1071   count = 1;
1072   c = getc (finput);
1073
1074   while (count > 0)
1075     {
1076       while (c != '}')
1077         {
1078           switch (c)
1079             {
1080             case '\n':
1081               obstack_1grow (&action_obstack, c);
1082               lineno++;
1083               break;
1084
1085             case '{':
1086               obstack_1grow (&action_obstack, c);
1087               count++;
1088               break;
1089
1090             case '\'':
1091             case '"':
1092               copy_string (finput, &action_obstack, c);
1093               break;
1094
1095             case '/':
1096               copy_comment (finput, &action_obstack);
1097               break;
1098
1099             case '$':
1100               copy_dollar (finput, &action_obstack,
1101                            rule, stack_offset);
1102               break;
1103
1104             case '@':
1105               copy_at (finput, &action_obstack,
1106                        stack_offset);
1107               break;
1108
1109             case EOF:
1110               fatal (_("unmatched %s"), "`{'");
1111
1112             default:
1113               obstack_1grow (&action_obstack, c);
1114             }
1115
1116           c = getc (finput);
1117         }
1118
1119       /* above loop exits when c is '}' */
1120
1121       if (--count)
1122         {
1123           obstack_1grow (&action_obstack, c);
1124           c = getc (finput);
1125         }
1126     }
1127
1128   /* As a Bison extension, add the ending semicolon.  Since some Yacc
1129      don't do that, help people using bison as a Yacc finding their
1130      missing semicolons.  */
1131   if (yacc_flag)
1132     obstack_sgrow (&action_obstack, "}\n    break;");
1133   else
1134     obstack_sgrow (&action_obstack, ";\n    break;}");
1135 }
1136 \f
1137 /*-------------------------------------------------------------------.
1138 | After `%guard' is seen in the input file, copy the actual guard    |
1139 | into the guards file.  If the guard is followed by an action, copy |
1140 | that into the actions file.  STACK_OFFSET is the number of values  |
1141 | in the current rule so far, which says where to find `$0' with     |
1142 | respect to the top of the stack, for the simple parser in which    |
1143 | the stack is not popped until after the guard is run.              |
1144 `-------------------------------------------------------------------*/
1145
1146 static void
1147 copy_guard (symbol_list *rule, int stack_offset)
1148 {
1149   int c;
1150   int count;
1151   int brace_flag = 0;
1152
1153   /* offset is always 0 if parser has already popped the stack pointer */
1154   if (semantic_parser)
1155     stack_offset = 0;
1156
1157   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1158   if (!no_lines_flag)
1159     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1160                     lineno, quotearg_style (c_quoting_style, infile));
1161   obstack_1grow (&guard_obstack, '{');
1162
1163   count = 0;
1164   c = getc (finput);
1165
1166   while (brace_flag ? (count > 0) : (c != ';'))
1167     {
1168       switch (c)
1169         {
1170         case '\n':
1171           obstack_1grow (&guard_obstack, c);
1172           lineno++;
1173           break;
1174
1175         case '{':
1176           obstack_1grow (&guard_obstack, c);
1177           brace_flag = 1;
1178           count++;
1179           break;
1180
1181         case '}':
1182           obstack_1grow (&guard_obstack, c);
1183           if (count > 0)
1184             count--;
1185           else
1186             {
1187               complain (_("unmatched %s"), "`}'");
1188               c = getc (finput);        /* skip it */
1189             }
1190           break;
1191
1192         case '\'':
1193         case '"':
1194           copy_string (finput, &guard_obstack, c);
1195           break;
1196
1197         case '/':
1198           copy_comment (finput, &guard_obstack);
1199           break;
1200
1201         case '$':
1202           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1203           break;
1204
1205         case '@':
1206           copy_at (finput, &guard_obstack, stack_offset);
1207           break;
1208
1209         case EOF:
1210           fatal ("%s", _("unterminated %guard clause"));
1211
1212         default:
1213           obstack_1grow (&guard_obstack, c);
1214         }
1215
1216       if (c != '}' || count != 0)
1217         c = getc (finput);
1218     }
1219
1220   c = skip_white_space ();
1221
1222   obstack_sgrow (&guard_obstack, ";\n    break;}");
1223   if (c == '{')
1224     copy_action (rule, stack_offset);
1225   else if (c == '=')
1226     {
1227       c = getc (finput);        /* why not skip_white_space -wjh */
1228       if (c == '{')
1229         copy_action (rule, stack_offset);
1230     }
1231   else
1232     ungetc (c, finput);
1233 }
1234 \f
1235
1236 /*-------------------------------------------------------------------.
1237 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1238 | with the user's names.                                             |
1239 `-------------------------------------------------------------------*/
1240
1241 static bucket *
1242 gensym (void)
1243 {
1244   /* Incremented for each generated symbol */
1245   static int gensym_count = 0;
1246   static char buf[256];
1247
1248   bucket *sym;
1249
1250   sprintf (buf, "@%d", ++gensym_count);
1251   token_buffer = buf;
1252   sym = getsym (token_buffer);
1253   sym->class = nterm_sym;
1254   sym->value = nvars++;
1255   return sym;
1256 }
1257
1258 #if 0
1259 /*------------------------------------------------------------------.
1260 | read in a %type declaration and record its information for        |
1261 | get_type_name to access.  This is unused.  It is only called from |
1262 | the #if 0 part of readgram                                        |
1263 `------------------------------------------------------------------*/
1264
1265 static int
1266 get_type (void)
1267 {
1268   int k;
1269   token_t token;
1270   char *name;
1271
1272   token = lex ();
1273
1274   if (token != tok_typename)
1275     {
1276       complain (_("invalid %s declaration"), "%type");
1277       return t;
1278     }
1279
1280   name = xstrdup (token_buffer);
1281
1282   for (;;)
1283     {
1284       token = lex ();
1285
1286       switch (token)
1287         {
1288         case tok_semicolon:
1289           return lex ();
1290
1291         case tok_comma:
1292           break;
1293
1294         case tok_identifier:
1295           if (symval->type_name == NULL)
1296             symval->type_name = name;
1297           else if (strcmp (name, symval->type_name) != 0)
1298             complain (_("type redeclaration for %s"), symval->tag);
1299
1300           break;
1301
1302         default:
1303           return token;
1304         }
1305     }
1306 }
1307
1308 #endif
1309 \f
1310 /*------------------------------------------------------------------.
1311 | Parse the input grammar into a one symbol_list structure.  Each   |
1312 | rule is represented by a sequence of symbols: the left hand side  |
1313 | followed by the contents of the right hand side, followed by a    |
1314 | null pointer instead of a symbol to terminate the rule.  The next |
1315 | symbol is the lhs of the following rule.                          |
1316 |                                                                   |
1317 | All guards and actions are copied out to the appropriate files,   |
1318 | labelled by the rule number they apply to.                        |
1319 `------------------------------------------------------------------*/
1320
1321 static void
1322 readgram (void)
1323 {
1324   token_t t;
1325   bucket *lhs = NULL;
1326   symbol_list *p;
1327   symbol_list *p1;
1328   bucket *bp;
1329
1330   /* Points to first symbol_list of current rule. its symbol is the
1331      lhs of the rule.  */
1332   symbol_list *crule;
1333   /* Points to the symbol_list preceding crule.  */
1334   symbol_list *crule1;
1335
1336   p1 = NULL;
1337
1338   t = lex ();
1339
1340   while (t != tok_two_percents && t != tok_eof)
1341     {
1342       if (t == tok_identifier || t == tok_bar)
1343         {
1344           int action_flag = 0;
1345           /* Number of symbols in rhs of this rule so far */
1346           int rulelength = 0;
1347           int xactions = 0;     /* JF for error checking */
1348           bucket *first_rhs = 0;
1349
1350           if (t == tok_identifier)
1351             {
1352               lhs = symval;
1353
1354               if (!start_flag)
1355                 {
1356                   startval = lhs;
1357                   start_flag = 1;
1358                 }
1359
1360               t = lex ();
1361               if (t != tok_colon)
1362                 {
1363                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1364                   unlex (t);
1365                 }
1366             }
1367
1368           if (nrules == 0 && t == tok_bar)
1369             {
1370               complain (_("grammar starts with vertical bar"));
1371               lhs = symval;     /* BOGUS: use a random symval */
1372             }
1373           /* start a new rule and record its lhs.  */
1374
1375           nrules++;
1376           nitems++;
1377
1378           p = symbol_list_new (lhs);
1379
1380           crule1 = p1;
1381           if (p1)
1382             p1->next = p;
1383           else
1384             grammar = p;
1385
1386           p1 = p;
1387           crule = p;
1388
1389           /* mark the rule's lhs as a nonterminal if not already so.  */
1390
1391           if (lhs->class == unknown_sym)
1392             {
1393               lhs->class = nterm_sym;
1394               lhs->value = nvars;
1395               nvars++;
1396             }
1397           else if (lhs->class == token_sym)
1398             complain (_("rule given for %s, which is a token"), lhs->tag);
1399
1400           /* read the rhs of the rule.  */
1401
1402           for (;;)
1403             {
1404               t = lex ();
1405               if (t == tok_prec)
1406                 {
1407                   t = lex ();
1408                   crule->ruleprec = symval;
1409                   t = lex ();
1410                 }
1411
1412               if (!(t == tok_identifier || t == tok_left_curly))
1413                 break;
1414
1415               /* If next token is an identifier, see if a colon follows it.
1416                  If one does, exit this rule now.  */
1417               if (t == tok_identifier)
1418                 {
1419                   bucket *ssave;
1420                   token_t t1;
1421
1422                   ssave = symval;
1423                   t1 = lex ();
1424                   unlex (t1);
1425                   symval = ssave;
1426                   if (t1 == tok_colon)
1427                     break;
1428
1429                   if (!first_rhs)       /* JF */
1430                     first_rhs = symval;
1431                   /* Not followed by colon =>
1432                      process as part of this rule's rhs.  */
1433                 }
1434
1435               /* If we just passed an action, that action was in the middle
1436                  of a rule, so make a dummy rule to reduce it to a
1437                  non-terminal.  */
1438               if (action_flag)
1439                 {
1440                   /* Since the action was written out with this rule's
1441                      number, we must give the new rule this number by
1442                      inserting the new rule before it.  */
1443
1444                   /* Make a dummy nonterminal, a gensym.  */
1445                   bucket *sdummy = gensym ();
1446
1447                   /* Make a new rule, whose body is empty, before the
1448                      current one, so that the action just read can
1449                      belong to it.  */
1450                   nrules++;
1451                   nitems++;
1452                   p = symbol_list_new (sdummy);
1453                   /* Attach its lineno to that of the host rule. */
1454                   p->line = crule->line;
1455                   if (crule1)
1456                     crule1->next = p;
1457                   else
1458                     grammar = p;
1459                   /* End of the rule. */
1460                   crule1 = symbol_list_new (NULL);
1461                   crule1->next = crule;
1462
1463                   p->next = crule1;
1464
1465                   /* Insert the dummy generated by that rule into this
1466                      rule.  */
1467                   nitems++;
1468                   p = symbol_list_new (sdummy);
1469                   p1->next = p;
1470                   p1 = p;
1471
1472                   action_flag = 0;
1473                 }
1474
1475               if (t == tok_identifier)
1476                 {
1477                   nitems++;
1478                   p = symbol_list_new (symval);
1479                   p1->next = p;
1480                   p1 = p;
1481                 }
1482               else              /* handle an action.  */
1483                 {
1484                   copy_action (crule, rulelength);
1485                   action_flag = 1;
1486                   xactions++;   /* JF */
1487                 }
1488               rulelength++;
1489             }                   /* end of  read rhs of rule */
1490
1491           /* Put an empty link in the list to mark the end of this rule  */
1492           p = symbol_list_new (NULL);
1493           p1->next = p;
1494           p1 = p;
1495
1496           if (t == tok_prec)
1497             {
1498               complain (_("two @prec's in a row"));
1499               t = lex ();
1500               crule->ruleprec = symval;
1501               t = lex ();
1502             }
1503           if (t == tok_guard)
1504             {
1505               if (!semantic_parser)
1506                 complain (_("%%guard present but %%semantic_parser not specified"));
1507
1508               copy_guard (crule, rulelength);
1509               t = lex ();
1510             }
1511           else if (t == tok_left_curly)
1512             {
1513               /* This case never occurs -wjh */
1514               if (action_flag)
1515                 complain (_("two actions at end of one rule"));
1516               copy_action (crule, rulelength);
1517               action_flag = 1;
1518               xactions++;       /* -wjh */
1519               t = lex ();
1520             }
1521           /* If $$ is being set in default way, report if any type
1522              mismatch.  */
1523           else if (!xactions
1524                    && first_rhs && lhs->type_name != first_rhs->type_name)
1525             {
1526               if (lhs->type_name == 0
1527                   || first_rhs->type_name == 0
1528                   || strcmp (lhs->type_name, first_rhs->type_name))
1529                 complain (_("type clash (`%s' `%s') on default action"),
1530                           lhs->type_name ? lhs->type_name : "",
1531                           first_rhs->type_name ? first_rhs->type_name : "");
1532             }
1533           /* Warn if there is no default for $$ but we need one.  */
1534           else if (!xactions && !first_rhs && lhs->type_name != 0)
1535             complain (_("empty rule for typed nonterminal, and no action"));
1536           if (t == tok_semicolon)
1537             t = lex ();
1538         }
1539 #if 0
1540       /* these things can appear as alternatives to rules.  */
1541 /* NO, they cannot.
1542         a) none of the documentation allows them
1543         b) most of them scan forward until finding a next %
1544                 thus they may swallow lots of intervening rules
1545 */
1546       else if (t == tok_token)
1547         {
1548           parse_token_decl (token_sym, nterm_sym);
1549           t = lex ();
1550         }
1551       else if (t == tok_nterm)
1552         {
1553           parse_token_decl (nterm_sym, token_sym);
1554           t = lex ();
1555         }
1556       else if (t == tok_type)
1557         {
1558           t = get_type ();
1559         }
1560       else if (t == tok_union)
1561         {
1562           parse_union_decl ();
1563           t = lex ();
1564         }
1565       else if (t == tok_expect)
1566         {
1567           parse_expect_decl ();
1568           t = lex ();
1569         }
1570       else if (t == tok_start)
1571         {
1572           parse_start_decl ();
1573           t = lex ();
1574         }
1575 #endif
1576
1577       else
1578         {
1579           complain (_("invalid input: %s"), quote (token_buffer));
1580           t = lex ();
1581         }
1582     }
1583
1584   /* grammar has been read.  Do some checking */
1585
1586   if (nsyms > MAXSHORT)
1587     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1588            MAXSHORT);
1589   if (nrules == 0)
1590     fatal (_("no rules in the input grammar"));
1591
1592   /* JF put out same default YYSTYPE as YACC does */
1593   if (typed == 0
1594       && !value_components_used)
1595     {
1596       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1597          but it seems better to be consistent.
1598          Most programs should declare their own type anyway.  */
1599       obstack_sgrow (&attrs_obstack,
1600                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1601       if (defines_flag)
1602         obstack_sgrow (&defines_obstack, "\
1603 # ifndef YYSTYPE\n\
1604 #  define YYSTYPE int\n\
1605 # endif\n");
1606     }
1607
1608   /* Report any undefined symbols and consider them nonterminals.  */
1609
1610   for (bp = firstsymbol; bp; bp = bp->next)
1611     if (bp->class == unknown_sym)
1612       {
1613         complain (_
1614                   ("symbol %s is used, but is not defined as a token and has no rules"),
1615                   bp->tag);
1616         bp->class = nterm_sym;
1617         bp->value = nvars++;
1618       }
1619
1620   ntokens = nsyms - nvars;
1621 }
1622 \f
1623 /*--------------------------------------------------------------.
1624 | For named tokens, but not literal ones, define the name.  The |
1625 | value is the user token number.                               |
1626 `--------------------------------------------------------------*/
1627
1628 static void
1629 output_token_defines (struct obstack *oout)
1630 {
1631   bucket *bp;
1632   char *cp, *symbol;
1633   char c;
1634
1635   for (bp = firstsymbol; bp; bp = bp->next)
1636     {
1637       symbol = bp->tag;         /* get symbol */
1638
1639       if (bp->value >= ntokens)
1640         continue;
1641       if (bp->user_token_number == SALIAS)
1642         continue;
1643       if ('\'' == *symbol)
1644         continue;               /* skip literal character */
1645       if (bp == errtoken)
1646         continue;               /* skip error token */
1647       if ('\"' == *symbol)
1648         {
1649           /* use literal string only if given a symbol with an alias */
1650           if (bp->alias)
1651             symbol = bp->alias->tag;
1652           else
1653             continue;
1654         }
1655
1656       /* Don't #define nonliteral tokens whose names contain periods.  */
1657       cp = symbol;
1658       while ((c = *cp++) && c != '.');
1659       if (c != '\0')
1660         continue;
1661
1662       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1663                       symbol, bp->user_token_number);
1664       if (semantic_parser)
1665         /* FIXME: This is certainly dead wrong, and should be just as
1666            above. --akim.  */
1667         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1668     }
1669
1670   obstack_1grow (oout, '\n');
1671 }
1672
1673
1674 /*--------------------.
1675 | Output the header.  |
1676 `--------------------*/
1677
1678 static void
1679 symbols_output (void)
1680 {
1681   if (defines_flag)
1682     {
1683       output_token_defines (&defines_obstack);
1684
1685       if (!pure_parser)
1686         {
1687           if (spec_name_prefix)
1688             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1689                             spec_name_prefix);
1690           else
1691             obstack_sgrow (&defines_obstack,
1692                                  "\nextern YYSTYPE yylval;\n");
1693         }
1694
1695       if (semantic_parser)
1696         {
1697           int i;
1698
1699           for (i = ntokens; i < nsyms; i++)
1700             {
1701               /* don't make these for dummy nonterminals made by gensym.  */
1702               if (*tags[i] != '@')
1703                 obstack_fgrow2 (&defines_obstack,
1704                                 "# define\tNT%s\t%d\n", tags[i], i);
1705             }
1706 #if 0
1707           /* `fdefines' is now a temporary file, so we need to copy its
1708              contents in `done', so we can't close it here.  */
1709           fclose (fdefines);
1710           fdefines = NULL;
1711 #endif
1712         }
1713     }
1714 }
1715
1716
1717 /*------------------------------------------------------------------.
1718 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1719 | number.                                                           |
1720 `------------------------------------------------------------------*/
1721
1722 static void
1723 token_translations_init (void)
1724 {
1725   bucket *bp = NULL;
1726   int i;
1727
1728   token_translations = XCALLOC (short, max_user_token_number + 1);
1729
1730   /* Initialize all entries for literal tokens to 2, the internal
1731      token number for $undefined., which represents all invalid
1732      inputs.  */
1733   for (i = 0; i <= max_user_token_number; i++)
1734     token_translations[i] = 2;
1735
1736   for (bp = firstsymbol; bp; bp = bp->next)
1737     {
1738       /* Non-terminal? */
1739       if (bp->value >= ntokens)
1740         continue;
1741       /* A token string alias? */
1742       if (bp->user_token_number == SALIAS)
1743         continue;
1744
1745       assert (bp->user_token_number != SUNDEF);
1746
1747       /* A token which translation has already been set? */
1748       if (token_translations[bp->user_token_number] != 2)
1749         complain (_("tokens %s and %s both assigned number %d"),
1750                   tags[token_translations[bp->user_token_number]],
1751                   bp->tag, bp->user_token_number);
1752       token_translations[bp->user_token_number] = bp->value;
1753     }
1754 }
1755
1756
1757 /*------------------------------------------------------------------.
1758 | Assign symbol numbers, and write definition of token names into   |
1759 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1760 | of symbols.                                                       |
1761 `------------------------------------------------------------------*/
1762
1763 static void
1764 packsymbols (void)
1765 {
1766   bucket *bp = NULL;
1767   int tokno = 1;
1768   int last_user_token_number;
1769   static char DOLLAR[] = "$";
1770
1771   tags = XCALLOC (char *, nsyms + 1);
1772   user_toknums = XCALLOC (short, nsyms + 1);
1773
1774   sprec = XCALLOC (short, nsyms);
1775   sassoc = XCALLOC (short, nsyms);
1776
1777   /* The EOF token. */
1778   tags[0] = DOLLAR;
1779   user_toknums[0] = 0;
1780
1781   max_user_token_number = 256;
1782   last_user_token_number = 256;
1783
1784   for (bp = firstsymbol; bp; bp = bp->next)
1785     {
1786       if (bp->class == nterm_sym)
1787         {
1788           bp->value += ntokens;
1789         }
1790       else if (bp->alias)
1791         {
1792           /* this symbol and its alias are a single token defn.
1793              allocate a tokno, and assign to both check agreement of
1794              ->prec and ->assoc fields and make both the same */
1795           if (bp->value == 0)
1796             bp->value = bp->alias->value = tokno++;
1797
1798           if (bp->prec != bp->alias->prec)
1799             {
1800               if (bp->prec != 0 && bp->alias->prec != 0
1801                   && bp->user_token_number == SALIAS)
1802                 complain (_("conflicting precedences for %s and %s"),
1803                           bp->tag, bp->alias->tag);
1804               if (bp->prec != 0)
1805                 bp->alias->prec = bp->prec;
1806               else
1807                 bp->prec = bp->alias->prec;
1808             }
1809
1810           if (bp->assoc != bp->alias->assoc)
1811             {
1812               if (bp->assoc != 0 && bp->alias->assoc != 0
1813                   && bp->user_token_number == SALIAS)
1814                 complain (_("conflicting assoc values for %s and %s"),
1815                           bp->tag, bp->alias->tag);
1816               if (bp->assoc != 0)
1817                 bp->alias->assoc = bp->assoc;
1818               else
1819                 bp->assoc = bp->alias->assoc;
1820             }
1821
1822           if (bp->user_token_number == SALIAS)
1823             continue;           /* do not do processing below for SALIASs */
1824
1825         }
1826       else                      /* bp->class == token_sym */
1827         {
1828           bp->value = tokno++;
1829         }
1830
1831       if (bp->class == token_sym)
1832         {
1833           if (bp->user_token_number == SUNDEF)
1834             bp->user_token_number = ++last_user_token_number;
1835           if (bp->user_token_number > max_user_token_number)
1836             max_user_token_number = bp->user_token_number;
1837         }
1838
1839       tags[bp->value] = bp->tag;
1840       user_toknums[bp->value] = bp->user_token_number;
1841       sprec[bp->value] = bp->prec;
1842       sassoc[bp->value] = bp->assoc;
1843     }
1844
1845   token_translations_init ();
1846
1847   error_token_number = errtoken->value;
1848
1849   if (!no_parser_flag)
1850     output_token_defines (&table_obstack);
1851
1852   if (startval->class == unknown_sym)
1853     fatal (_("the start symbol %s is undefined"), startval->tag);
1854   else if (startval->class == token_sym)
1855     fatal (_("the start symbol %s is a token"), startval->tag);
1856
1857   start_symbol = startval->value;
1858 }
1859
1860
1861 /*---------------------------------------------------------------.
1862 | Convert the rules into the representation using RRHS, RLHS and |
1863 | RITEMS.                                                        |
1864 `---------------------------------------------------------------*/
1865
1866 static void
1867 packgram (void)
1868 {
1869   int itemno;
1870   int ruleno;
1871   symbol_list *p;
1872
1873   ritem = XCALLOC (short, nitems + 1);
1874   rule_table = XCALLOC (rule_t, nrules) - 1;
1875
1876   itemno = 0;
1877   ruleno = 1;
1878
1879   p = grammar;
1880   while (p)
1881     {
1882       bucket *ruleprec = p->ruleprec;
1883       rule_table[ruleno].lhs = p->sym->value;
1884       rule_table[ruleno].rhs = itemno;
1885       rule_table[ruleno].line = p->line;
1886       rule_table[ruleno].useful = TRUE;
1887
1888       p = p->next;
1889       while (p && p->sym)
1890         {
1891           ritem[itemno++] = p->sym->value;
1892           /* A rule gets by default the precedence and associativity
1893              of the last token in it.  */
1894           if (p->sym->class == token_sym)
1895             {
1896               rule_table[ruleno].prec = p->sym->prec;
1897               rule_table[ruleno].assoc = p->sym->assoc;
1898             }
1899           if (p)
1900             p = p->next;
1901         }
1902
1903       /* If this rule has a %prec,
1904          the specified symbol's precedence replaces the default.  */
1905       if (ruleprec)
1906         {
1907           rule_table[ruleno].prec = ruleprec->prec;
1908           rule_table[ruleno].assoc = ruleprec->assoc;
1909           rule_table[ruleno].precsym = ruleprec->value;
1910         }
1911
1912       ritem[itemno++] = -ruleno;
1913       ruleno++;
1914
1915       if (p)
1916         p = p->next;
1917     }
1918
1919   ritem[itemno] = 0;
1920
1921   if (trace_flag)
1922     ritem_print (stderr);
1923 }
1924 \f
1925 /*-------------------------------------------------------------------.
1926 | Read in the grammar specification and record it in the format      |
1927 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1928 | and all actions into ACTION_OBSTACK, in each case forming the body |
1929 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1930 | statement to decide which guard or action to execute.              |
1931 `-------------------------------------------------------------------*/
1932
1933 void
1934 reader (void)
1935 {
1936   start_flag = 0;
1937   startval = NULL;              /* start symbol not specified yet. */
1938
1939   nsyms = 1;
1940   nvars = 0;
1941   nrules = 0;
1942   nitems = 0;
1943
1944   typed = 0;
1945   lastprec = 0;
1946
1947   semantic_parser = 0;
1948   pure_parser = 0;
1949
1950   grammar = NULL;
1951
1952   lex_init ();
1953   lineno = 1;
1954
1955   /* Initialize the symbol table.  */
1956   tabinit ();
1957   /* Construct the error token */
1958   errtoken = getsym ("error");
1959   errtoken->class = token_sym;
1960   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1961   /* Construct a token that represents all undefined literal tokens.
1962      It is always token number 2.  */
1963   undeftoken = getsym ("$undefined.");
1964   undeftoken->class = token_sym;
1965   undeftoken->user_token_number = 2;
1966
1967   /* Read the declaration section.  Copy %{ ... %} groups to
1968      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1969      etc. found there.  */
1970   obstack_fgrow3 (&table_obstack, "\
1971 /* %s, made from %s\n\
1972    by GNU bison %s.  */\n\
1973 \n",
1974                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1975                   infile, VERSION);
1976
1977   obstack_sgrow (&table_obstack,
1978                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
1979   read_declarations ();
1980   /* Start writing the guard and action files, if they are needed.  */
1981   output_headers ();
1982   /* Read in the grammar, build grammar in list form.  Write out
1983      guards and actions.  */
1984   readgram ();
1985   /* Now we know whether we need the line-number stack.  If we do,
1986      write its type into the .tab.h file.  */
1987   if (defines_flag)
1988     reader_output_yylsp (&defines_obstack);
1989   /* Write closing delimiters for actions and guards.  */
1990   output_trailers ();
1991   if (locations_flag)
1992     obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
1993   /* Assign the symbols their symbol numbers.  Write #defines for the
1994      token symbols into FDEFINES if requested.  */
1995   packsymbols ();
1996   /* Convert the grammar into the format described in gram.h.  */
1997   packgram ();
1998   /* Output the headers. */
1999   symbols_output ();
2000 }
2001
2002
2003 /*------------------------------------------------------------------.
2004 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
2005 | output it in the headers if --defines is used.                    |
2006 `------------------------------------------------------------------*/
2007
2008 void
2009 reader_output_yylsp (struct obstack *oout)
2010 {
2011   if (locations_flag)
2012     obstack_sgrow (oout, "\
2013 \n\
2014 #ifndef YYLTYPE\n\
2015 typedef struct yyltype\n\
2016 {\n\
2017   int first_line;\n\
2018   int first_column;\n\
2019 \n\
2020   int last_line;\n\
2021   int last_column;\n\
2022 } yyltype;\n\
2023 \n\
2024 # define YYLTYPE yyltype\n\
2025 #endif\n\
2026 \n");
2027 }