src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "symtab.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36
  37 typedef struct symbol_list
  38 {
  39   struct symbol_list *next;
  40   bucket *sym;
  41   int line;
  42   bucket *ruleprec;
  43 }
  44 symbol_list;
  45
  46 int lineno;
  47 char **tags;
  48 short *user_toknums;
  49 static symbol_list *grammar;
  50 static int start_flag;
  51 static bucket *startval;
  52
  53 /* Nonzero if components of semantic values are used, implying
  54    they must be unions.  */
  55 static int value_components_used;
  56
  57 /* Nonzero if %union has been seen.  */
  58 static int typed;
  59
  60 /* Incremented for each %left, %right or %nonassoc seen */
  61 static int lastprec;
  62
  63 static bucket *errtoken;
  64 static bucket *undeftoken;
  65
  66
  67 static symbol_list *
  68 symbol_list_new (bucket *sym)
  69 {
  70   symbol_list *res = XMALLOC (symbol_list, 1);
  71   res->next = NULL;
  72   res->sym = sym;
  73   res->line = lineno;
  74   res->ruleprec = NULL;
  75   return res;
  76 }
  77
  78 \f
  79
  80 /*===================\
  81 | Low level lexing.  |
  82 \===================*/
  83
  84 static void
  85 skip_to_char (int target)
  86 {
  87   int c;
  88   if (target == '\n')
  89     complain (_("   Skipping to next \\n"));
  90   else
  91     complain (_("   Skipping to next %c"), target);
  92
  93   do
  94     c = skip_white_space ();
  95   while (c != target && c != EOF);
  96   if (c != EOF)
  97     ungetc (c, finput);
  98 }
  99
 100
 101 /*---------------------------------------------------------.
 102 | Read a signed integer from STREAM and return its value.  |
 103 `---------------------------------------------------------*/
 104
 105 static inline int
 106 read_signed_integer (FILE *stream)
 107 {
 108   int c = getc (stream);
 109   int sign = 1;
 110   int n = 0;
 111
 112   if (c == '-')
 113     {
 114       c = getc (stream);
 115       sign = -1;
 116     }
 117
 118   while (isdigit (c))
 119     {
 120       n = 10 * n + (c - '0');
 121       c = getc (stream);
 122     }
 123
 124   ungetc (c, stream);
 125
 126   return sign * n;
 127 }
 128 \f
 129 /*--------------------------------------------------------------.
 130 | Get the data type (alternative in the union) of the value for |
 131 | symbol N in rule RULE.                                        |
 132 `--------------------------------------------------------------*/
 133
 134 static char *
 135 get_type_name (int n, symbol_list *rule)
 136 {
 137   int i;
 138   symbol_list *rp;
 139
 140   if (n < 0)
 141     {
 142       complain (_("invalid $ value"));
 143       return NULL;
 144     }
 145
 146   rp = rule;
 147   i = 0;
 148
 149   while (i < n)
 150     {
 151       rp = rp->next;
 152       if (rp == NULL || rp->sym == NULL)
 153         {
 154           complain (_("invalid $ value"));
 155           return NULL;
 156         }
 157       i++;
 158     }
 159
 160   return rp->sym->type_name;
 161 }
 162 \f
 163 /*------------------------------------------------------------.
 164 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 165 | delimiter of the string (either ' or ").                    |
 166 `------------------------------------------------------------*/
 167
 168 static inline void
 169 copy_string (FILE *fin, struct obstack *oout, int match)
 170 {
 171   int c;
 172
 173   obstack_1grow (oout, match);
 174
 175   c = getc (fin);
 176
 177   while (c != match)
 178     {
 179       if (c == EOF)
 180         fatal (_("unterminated string at end of file"));
 181       if (c == '\n')
 182         {
 183           complain (_("unterminated string"));
 184           ungetc (c, fin);
 185           c = match;            /* invent terminator */
 186           continue;
 187         }
 188
 189       obstack_1grow (oout, c);
 190
 191       if (c == '\\')
 192         {
 193           c = getc (fin);
 194           if (c == EOF)
 195             fatal (_("unterminated string at end of file"));
 196           obstack_1grow (oout, c);
 197
 198           if (c == '\n')
 199             lineno++;
 200         }
 201
 202       c = getc (fin);
 203     }
 204
 205   obstack_1grow (oout, c);
 206 }
 207
 208
 209 /*-----------------------------------------------------------------.
 210 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 211 | NULL).  In fact we just saw a `/', which might or might not be a |
 212 | comment.  In any case, copy what we saw.                         |
 213 |                                                                  |
 214 | OUT2 might be NULL.                                              |
 215 `-----------------------------------------------------------------*/
 216
 217 static inline void
 218 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 219 {
 220   int cplus_comment;
 221   int ended;
 222   int c;
 223
 224   /* We read a `/', output it. */
 225   obstack_1grow (oout1, '/');
 226   if (oout2)
 227     obstack_1grow (oout2, '/');
 228
 229   switch ((c = getc (fin)))
 230     {
 231     case '/':
 232       cplus_comment = 1;
 233       break;
 234     case '*':
 235       cplus_comment = 0;
 236       break;
 237     default:
 238       ungetc (c, fin);
 239       return;
 240     }
 241
 242   obstack_1grow (oout1, c);
 243   if (oout2)
 244     obstack_1grow (oout2, c);
 245   c = getc (fin);
 246
 247   ended = 0;
 248   while (!ended)
 249     {
 250       if (!cplus_comment && c == '*')
 251         {
 252           while (c == '*')
 253             {
 254               obstack_1grow (oout1, c);
 255               if (oout2)
 256                 obstack_1grow (oout2, c);
 257               c = getc (fin);
 258             }
 259
 260           if (c == '/')
 261             {
 262               obstack_1grow (oout1, c);
 263               if (oout2)
 264                 obstack_1grow (oout2, c);
 265               ended = 1;
 266             }
 267         }
 268       else if (c == '\n')
 269         {
 270           lineno++;
 271           obstack_1grow (oout1, c);
 272           if (oout2)
 273             obstack_1grow (oout2, c);
 274           if (cplus_comment)
 275             ended = 1;
 276           else
 277             c = getc (fin);
 278         }
 279       else if (c == EOF)
 280         fatal (_("unterminated comment"));
 281       else
 282         {
 283           obstack_1grow (oout1, c);
 284           if (oout2)
 285             obstack_1grow (oout2, c);
 286           c = getc (fin);
 287         }
 288     }
 289 }
 290
 291
 292 /*-------------------------------------------------------------------.
 293 | Dump the comment (actually the current string starting with a `/') |
 294 | from FIN to OOUT.                                                  |
 295 `-------------------------------------------------------------------*/
 296
 297 static inline void
 298 copy_comment (FILE *fin, struct obstack *oout)
 299 {
 300   copy_comment2 (fin, oout, NULL);
 301 }
 302
 303
 304 /*-----------------------------------------------------------------.
 305 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 306 | reference to this location. STACK_OFFSET is the number of values |
 307 | in the current rule so far, which says where to find `@0' with   |
 308 | respect to the top of the stack.                                 |
 309 `-----------------------------------------------------------------*/
 310
 311 static inline void
 312 copy_at (FILE *fin, struct obstack *oout,
 313          struct symbol_list *rule, int stack_offset)
 314 {
 315   symbol_list *rp;
 316   int c;
 317
 318   c = getc (fin);
 319   if (c == '$')
 320     {
 321       obstack_sgrow (oout, "yyloc");
 322       locations_flag = 1;
 323     }
 324   else if (isdigit (c) || c == '-')
 325     {
 326       int n, i;
 327
 328       ungetc (c, fin);
 329       n = read_signed_integer (fin);
 330
 331       rp = rule;
 332       i = 0;
 333
 334       while (i < n)
 335         {
 336           rp = rp->next;
 337           if (rp == NULL)
 338             {
 339               complain (_("invalid @ value"));
 340               return;
 341             }
 342           i++;
 343         }
 344
 345       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 346       locations_flag = 1;
 347     }
 348   else
 349     {
 350       char buf[] = "@c";
 351       buf[1] = c;
 352       complain (_("%s is invalid"), quote (buf));
 353     }
 354 }
 355
 356
 357 /*-------------------------------------------------------------------.
 358 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 359 |                                                                    |
 360 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 361 |                                                                    |
 362 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 363 | the number of values in the current rule so far, which says where  |
 364 | to find `$0' with respect to the top of the stack.                 |
 365 `-------------------------------------------------------------------*/
 366
 367 static inline void
 368 copy_dollar (FILE *fin, struct obstack *oout,
 369              symbol_list *rule, int stack_offset)
 370 {
 371   int c = getc (fin);
 372   const char *type_name = NULL;
 373
 374   /* Get the type name if explicit. */
 375   if (c == '<')
 376     {
 377       read_type_name (fin);
 378       type_name = token_buffer;
 379       value_components_used = 1;
 380       c = getc (fin);
 381     }
 382
 383   if (c == '$')
 384     {
 385       obstack_sgrow (oout, "yyval");
 386
 387       if (!type_name)
 388         type_name = get_type_name (0, rule);
 389       if (type_name)
 390         obstack_fgrow1 (oout, ".%s", type_name);
 391       if (!type_name && typed)
 392         complain (_("$$ of `%s' has no declared type"),
 393                   rule->sym->tag);
 394     }
 395   else if (isdigit (c) || c == '-')
 396     {
 397       int n;
 398       ungetc (c, fin);
 399       n = read_signed_integer (fin);
 400
 401       if (!type_name && n > 0)
 402         type_name = get_type_name (n, rule);
 403
 404       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 405
 406       if (type_name)
 407         obstack_fgrow1 (oout, ".%s", type_name);
 408       if (!type_name && typed)
 409         complain (_("$%d of `%s' has no declared type"),
 410                   n, rule->sym->tag);
 411     }
 412   else
 413     {
 414       char buf[] = "$c";
 415       buf[1] = c;
 416       complain (_("%s is invalid"), quote (buf));
 417     }
 418 }
 419 \f
 420 /*-------------------------------------------------------------------.
 421 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 422 | `%{' has already been read.  Return after reading the `%}'.        |
 423 `-------------------------------------------------------------------*/
 424
 425 static void
 426 copy_definition (void)
 427 {
 428   int c;
 429   /* -1 while reading a character if prev char was %. */
 430   int after_percent;
 431
 432   if (!no_lines_flag)
 433     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 434                     lineno, quotearg_style (c_quoting_style, infile));
 435
 436   after_percent = 0;
 437
 438   c = getc (finput);
 439
 440   for (;;)
 441     {
 442       switch (c)
 443         {
 444         case '\n':
 445           obstack_1grow (&attrs_obstack, c);
 446           lineno++;
 447           break;
 448
 449         case '%':
 450           after_percent = -1;
 451           break;
 452
 453         case '\'':
 454         case '"':
 455           copy_string (finput, &attrs_obstack, c);
 456           break;
 457
 458         case '/':
 459           copy_comment (finput, &attrs_obstack);
 460           break;
 461
 462         case EOF:
 463           fatal ("%s", _("unterminated `%{' definition"));
 464
 465         default:
 466           obstack_1grow (&attrs_obstack, c);
 467         }
 468
 469       c = getc (finput);
 470
 471       if (after_percent)
 472         {
 473           if (c == '}')
 474             return;
 475           obstack_1grow (&attrs_obstack, '%');
 476         }
 477       after_percent = 0;
 478     }
 479 }
 480
 481
 482 /*-------------------------------------------------------------------.
 483 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 484 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 485 | are reversed.                                                      |
 486 `-------------------------------------------------------------------*/
 487
 488 static void
 489 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 490 {
 491   token_t token = tok_undef;
 492   char *typename = NULL;
 493
 494   /* The symbol being defined.  */
 495   struct bucket *symbol = NULL;
 496
 497   /* After `%token' and `%nterm', any number of symbols maybe be
 498      defined.  */
 499   for (;;)
 500     {
 501       int tmp_char = ungetc (skip_white_space (), finput);
 502
 503       /* `%' (for instance from `%token', or from `%%' etc.) is the
 504          only valid means to end this declaration.  */
 505       if (tmp_char == '%')
 506         return;
 507       if (tmp_char == EOF)
 508         fatal (_("Premature EOF after %s"), token_buffer);
 509
 510       token = lex ();
 511       if (token == tok_comma)
 512         {
 513           symbol = NULL;
 514           continue;
 515         }
 516       if (token == tok_typename)
 517         {
 518           typename = xstrdup (token_buffer);
 519           value_components_used = 1;
 520           symbol = NULL;
 521         }
 522       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 523         {
 524           if (symval->alias)
 525             warn (_("symbol `%s' used more than once as a literal string"),
 526                   symval->tag);
 527           else if (symbol->alias)
 528             warn (_("symbol `%s' given more than one literal string"),
 529                   symbol->tag);
 530           else
 531             {
 532               symval->class = token_sym;
 533               symval->type_name = typename;
 534               symval->user_token_number = symbol->user_token_number;
 535               symbol->user_token_number = SALIAS;
 536               symval->alias = symbol;
 537               symbol->alias = symval;
 538               /* symbol and symval combined are only one symbol */
 539               nsyms--;
 540             }
 541           symbol = NULL;
 542         }
 543       else if (token == tok_identifier)
 544         {
 545           int oldclass = symval->class;
 546           symbol = symval;
 547
 548           if (symbol->class == what_is_not)
 549             complain (_("symbol %s redefined"), symbol->tag);
 550           symbol->class = what_is;
 551           if (what_is == nterm_sym && oldclass != nterm_sym)
 552             symbol->value = nvars++;
 553
 554           if (typename)
 555             {
 556               if (symbol->type_name == NULL)
 557                 symbol->type_name = typename;
 558               else if (strcmp (typename, symbol->type_name) != 0)
 559                 complain (_("type redeclaration for %s"), symbol->tag);
 560             }
 561         }
 562       else if (symbol && token == tok_number)
 563         {
 564           symbol->user_token_number = numval;
 565         }
 566       else
 567         {
 568           complain (_("`%s' is invalid in %s"),
 569                     token_buffer,
 570                     (what_is == token_sym) ? "%token" : "%nterm");
 571           skip_to_char ('%');
 572         }
 573     }
 574
 575 }
 576
 577
 578 /*------------------------------.
 579 | Parse what comes after %start |
 580 `------------------------------*/
 581
 582 static void
 583 parse_start_decl (void)
 584 {
 585   if (start_flag)
 586     complain (_("multiple %s declarations"), "%start");
 587   if (lex () != tok_identifier)
 588     complain (_("invalid %s declaration"), "%start");
 589   else
 590     {
 591       start_flag = 1;
 592       startval = symval;
 593     }
 594 }
 595
 596 /*-----------------------------------------------------------.
 597 | read in a %type declaration and record its information for |
 598 | get_type_name to access                                    |
 599 `-----------------------------------------------------------*/
 600
 601 static void
 602 parse_type_decl (void)
 603 {
 604   char *name;
 605
 606   if (lex () != tok_typename)
 607     {
 608       complain ("%s", _("%type declaration has no <typename>"));
 609       skip_to_char ('%');
 610       return;
 611     }
 612
 613   name = xstrdup (token_buffer);
 614
 615   for (;;)
 616     {
 617       token_t t;
 618       int tmp_char = ungetc (skip_white_space (), finput);
 619
 620       if (tmp_char == '%')
 621         return;
 622       if (tmp_char == EOF)
 623         fatal (_("Premature EOF after %s"), token_buffer);
 624
 625       t = lex ();
 626
 627       switch (t)
 628         {
 629
 630         case tok_comma:
 631         case tok_semicolon:
 632           break;
 633
 634         case tok_identifier:
 635           if (symval->type_name == NULL)
 636             symval->type_name = name;
 637           else if (strcmp (name, symval->type_name) != 0)
 638             complain (_("type redeclaration for %s"), symval->tag);
 639
 640           break;
 641
 642         default:
 643           complain (_("invalid %%type declaration due to item: %s"),
 644                     token_buffer);
 645           skip_to_char ('%');
 646         }
 647     }
 648 }
 649
 650
 651
 652 /*----------------------------------------------------------------.
 653 | Read in a %left, %right or %nonassoc declaration and record its |
 654 | information.                                                    |
 655 `----------------------------------------------------------------*/
 656
 657 static void
 658 parse_assoc_decl (associativity assoc)
 659 {
 660   char *name = NULL;
 661   int prev = 0;
 662
 663   lastprec++;                   /* Assign a new precedence level, never 0.  */
 664
 665   for (;;)
 666     {
 667       token_t t;
 668       int tmp_char = ungetc (skip_white_space (), finput);
 669
 670       if (tmp_char == '%')
 671         return;
 672       if (tmp_char == EOF)
 673         fatal (_("Premature EOF after %s"), token_buffer);
 674
 675       t = lex ();
 676
 677       switch (t)
 678         {
 679         case tok_typename:
 680           name = xstrdup (token_buffer);
 681           break;
 682
 683         case tok_comma:
 684           break;
 685
 686         case tok_identifier:
 687           if (symval->prec != 0)
 688             complain (_("redefining precedence of %s"), symval->tag);
 689           symval->prec = lastprec;
 690           symval->assoc = assoc;
 691           if (symval->class == nterm_sym)
 692             complain (_("symbol %s redefined"), symval->tag);
 693           symval->class = token_sym;
 694           if (name)
 695             {                   /* record the type, if one is specified */
 696               if (symval->type_name == NULL)
 697                 symval->type_name = name;
 698               else if (strcmp (name, symval->type_name) != 0)
 699                 complain (_("type redeclaration for %s"), symval->tag);
 700             }
 701           break;
 702
 703         case tok_number:
 704           if (prev == tok_identifier)
 705             {
 706               symval->user_token_number = numval;
 707             }
 708           else
 709             {
 710               complain (_
 711                         ("invalid text (%s) - number should be after identifier"),
 712 token_buffer);
 713               skip_to_char ('%');
 714             }
 715           break;
 716
 717         case tok_semicolon:
 718           return;
 719
 720         default:
 721           complain (_("unexpected item: %s"), token_buffer);
 722           skip_to_char ('%');
 723         }
 724
 725       prev = t;
 726     }
 727 }
 728
 729
 730
 731 /*--------------------------------------------------------------.
 732 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 733 | where it is made into the definition of YYSTYPE, the type of  |
 734 | elements of the parser value stack.                           |
 735 `--------------------------------------------------------------*/
 736
 737 static void
 738 parse_union_decl (void)
 739 {
 740   int c;
 741   int count = 0;
 742   const char *prologue = "\
 743 #ifndef YYSTYPE\n\
 744 typedef union";
 745   const char *epilogue = "\
 746  yystype;\n\
 747 # define YYSTYPE yystype\n\
 748 # define YYSTYPE_IS_TRIVIAL 1\n\
 749 #endif\n";
 750
 751   if (typed)
 752     complain (_("multiple %s declarations"), "%union");
 753
 754   typed = 1;
 755
 756   if (!no_lines_flag)
 757     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 758                     lineno, quotearg_style (c_quoting_style, infile));
 759   else
 760     obstack_1grow (&attrs_obstack, '\n');
 761
 762   obstack_sgrow (&attrs_obstack, prologue);
 763   if (defines_flag)
 764     obstack_sgrow (&defines_obstack, prologue);
 765
 766   c = getc (finput);
 767
 768   while (c != EOF)
 769     {
 770
 771       /* If C contains '/', it is output by copy_comment ().  */
 772       if (c != '/')
 773         {
 774           obstack_1grow (&attrs_obstack, c);
 775           if (defines_flag)
 776             obstack_1grow (&defines_obstack, c);
 777         }
 778
 779       switch (c)
 780         {
 781         case '\n':
 782           lineno++;
 783           break;
 784
 785         case '/':
 786           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 787           break;
 788
 789         case '{':
 790           count++;
 791           break;
 792
 793         case '}':
 794           if (count == 0)
 795             complain (_("unmatched %s"), "`}'");
 796           count--;
 797           if (count <= 0)
 798             {
 799               obstack_sgrow (&attrs_obstack, epilogue);
 800               if (defines_flag)
 801                 obstack_sgrow (&defines_obstack, epilogue);
 802               /* JF don't choke on trailing semi */
 803               c = skip_white_space ();
 804               if (c != ';')
 805                 ungetc (c, finput);
 806               return;
 807             }
 808         }
 809
 810       c = getc (finput);
 811     }
 812 }
 813
 814
 815 /*-------------------------------------------------------.
 816 | Parse the declaration %expect N which says to expect N |
 817 | shift-reduce conflicts.                                |
 818 `-------------------------------------------------------*/
 819
 820 static void
 821 parse_expect_decl (void)
 822 {
 823   int c = skip_white_space ();
 824   ungetc (c, finput);
 825
 826   if (!isdigit (c))
 827     complain (_("argument of %%expect is not an integer"));
 828   else
 829     expected_conflicts = read_signed_integer (finput);
 830 }
 831
 832
 833 /*-------------------------------------------------------------------.
 834 | Parse what comes after %thong.  the full syntax is                 |
 835 |                                                                    |
 836 |                %thong <type> token number literal                  |
 837 |                                                                    |
 838 | the <type> or number may be omitted.  The number specifies the     |
 839 | user_token_number.                                                 |
 840 |                                                                    |
 841 | Two symbols are entered in the table, one for the token symbol and |
 842 | one for the literal.  Both are given the <type>, if any, from the  |
 843 | declaration.  The ->user_token_number of the first is SALIAS and   |
 844 | the ->user_token_number of the second is set to the number, if     |
 845 | any, from the declaration.  The two symbols are linked via         |
 846 | pointers in their ->alias fields.                                  |
 847 |                                                                    |
 848 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 849 | only the literal string is retained it is the literal string that  |
 850 | is output to yytname                                               |
 851 `-------------------------------------------------------------------*/
 852
 853 static void
 854 parse_thong_decl (void)
 855 {
 856   token_t token;
 857   struct bucket *symbol;
 858   char *typename = 0;
 859   int usrtoknum = SUNDEF;
 860
 861   token = lex ();               /* fetch typename or first token */
 862   if (token == tok_typename)
 863     {
 864       typename = xstrdup (token_buffer);
 865       value_components_used = 1;
 866       token = lex ();           /* fetch first token */
 867     }
 868
 869   /* process first token */
 870
 871   if (token != tok_identifier)
 872     {
 873       complain (_("unrecognized item %s, expected an identifier"),
 874                 token_buffer);
 875       skip_to_char ('%');
 876       return;
 877     }
 878   symval->class = token_sym;
 879   symval->type_name = typename;
 880   symval->user_token_number = SALIAS;
 881   symbol = symval;
 882
 883   token = lex ();               /* get number or literal string */
 884
 885   if (token == tok_number)
 886     {
 887       usrtoknum = numval;
 888       token = lex ();           /* okay, did number, now get literal */
 889     }
 890
 891   /* process literal string token */
 892
 893   if (token != tok_identifier || *symval->tag != '\"')
 894     {
 895       complain (_("expected string constant instead of %s"), token_buffer);
 896       skip_to_char ('%');
 897       return;
 898     }
 899   symval->class = token_sym;
 900   symval->type_name = typename;
 901   symval->user_token_number = usrtoknum;
 902
 903   symval->alias = symbol;
 904   symbol->alias = symval;
 905
 906   /* symbol and symval combined are only one symbol.  */
 907   nsyms--;
 908 }
 909
 910
 911 /*------------------------------------------------------------------.
 912 | Parse a double quoted parameter. It was used for                  |
 913 | %{source,header}_extension.  For the moment, It is not used since |
 914 | extension features have been removed.                             |
 915 `------------------------------------------------------------------*/
 916
 917 #if 0
 918
 919 static const char *
 920 parse_dquoted_param (const char *from)
 921 {
 922   char buff[32];
 923   int c;
 924   int i;
 925
 926   c = skip_white_space ();
 927
 928   if (c != '"')
 929     {
 930       ungetc (c, finput);
 931       complain (_("invalid %s declaration"), from);
 932       return NULL;
 933     }
 934
 935   c = getc (finput);
 936   for (i = 0; (c >= '!') && (c <= '~'); i++)
 937     {
 938       if (c == '"')
 939         break;
 940
 941       if (c == '\\')
 942         {
 943           c = getc (finput);
 944           if ((c < '!') && (c > '~'))
 945             break;
 946         }
 947
 948       buff[i] = c;
 949       c = getc (finput);
 950     }
 951   buff[i] = '\0';
 952
 953   if (c != '"')
 954     {
 955       ungetc (c, finput);
 956       complain (_("invalid %s declaration"), from);
 957       return NULL;
 958     }
 959
 960   return xstrdup (buff);
 961 }
 962
 963 #endif
 964
 965
 966 /*----------------------------------------------------------------.
 967 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 968 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 969 | groups to ATTRS_OBSTACK.                                        |
 970 `----------------------------------------------------------------*/
 971
 972 static void
 973 read_declarations (void)
 974 {
 975   for (;;)
 976     {
 977       int c = skip_white_space ();
 978
 979       if (c == '%')
 980         {
 981           token_t tok = parse_percent_token ();
 982
 983           switch (tok)
 984             {
 985             case tok_two_percents:
 986               return;
 987
 988             case tok_percent_left_curly:
 989               copy_definition ();
 990               break;
 991
 992             case tok_token:
 993               parse_token_decl (token_sym, nterm_sym);
 994               break;
 995
 996             case tok_nterm:
 997               parse_token_decl (nterm_sym, token_sym);
 998               break;
 999
1000             case tok_type:
1001               parse_type_decl ();
1002               break;
1003
1004             case tok_start:
1005               parse_start_decl ();
1006               break;
1007
1008             case tok_union:
1009               parse_union_decl ();
1010               break;
1011
1012             case tok_expect:
1013               parse_expect_decl ();
1014               break;
1015
1016             case tok_thong:
1017               parse_thong_decl ();
1018               break;
1019
1020             case tok_left:
1021               parse_assoc_decl (left_assoc);
1022               break;
1023
1024             case tok_right:
1025               parse_assoc_decl (right_assoc);
1026               break;
1027
1028             case tok_nonassoc:
1029               parse_assoc_decl (non_assoc);
1030               break;
1031
1032             case tok_noop:
1033               break;
1034
1035             case tok_stropt:
1036             case tok_intopt:
1037             case tok_obsolete:
1038               abort ();
1039               break;
1040
1041             case tok_illegal:
1042             default:
1043               complain (_("unrecognized: %s"), token_buffer);
1044               skip_to_char ('%');
1045             }
1046         }
1047       else if (c == EOF)
1048         fatal (_("no input grammar"));
1049       else
1050         {
1051           char buf[] = "c";
1052           buf[0] = c;
1053           complain (_("unknown character: %s"), quote (buf));
1054           skip_to_char ('%');
1055         }
1056     }
1057 }
1058 \f
1059 /*-------------------------------------------------------------------.
1060 | Assuming that a `{' has just been seen, copy everything up to the  |
1061 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1062 | values in the current rule so far, which says where to find `$0'   |
1063 | with respect to the top of the stack.                              |
1064 `-------------------------------------------------------------------*/
1065
1066 static void
1067 copy_action (symbol_list *rule, int stack_offset)
1068 {
1069   int c;
1070   int count;
1071   char buf[4096];
1072
1073   /* offset is always 0 if parser has already popped the stack pointer */
1074   if (semantic_parser)
1075     stack_offset = 0;
1076
1077   sprintf (buf, "\ncase %d:\n", nrules);
1078   obstack_grow (&action_obstack, buf, strlen (buf));
1079
1080   if (!no_lines_flag)
1081     {
1082       sprintf (buf, "#line %d %s\n",
1083                lineno, quotearg_style (c_quoting_style, infile));
1084       obstack_grow (&action_obstack, buf, strlen (buf));
1085     }
1086   obstack_1grow (&action_obstack, '{');
1087
1088   count = 1;
1089   c = getc (finput);
1090
1091   while (count > 0)
1092     {
1093       while (c != '}')
1094         {
1095           switch (c)
1096             {
1097             case '\n':
1098               obstack_1grow (&action_obstack, c);
1099               lineno++;
1100               break;
1101
1102             case '{':
1103               obstack_1grow (&action_obstack, c);
1104               count++;
1105               break;
1106
1107             case '\'':
1108             case '"':
1109               copy_string (finput, &action_obstack, c);
1110               break;
1111
1112             case '/':
1113               copy_comment (finput, &action_obstack);
1114               break;
1115
1116             case '$':
1117               copy_dollar (finput, &action_obstack,
1118                            rule, stack_offset);
1119               break;
1120
1121             case '@':
1122               copy_at (finput, &action_obstack,
1123                        rule, stack_offset);
1124               break;
1125
1126             case EOF:
1127               fatal (_("unmatched %s"), "`{'");
1128
1129             default:
1130               obstack_1grow (&action_obstack, c);
1131             }
1132
1133           c = getc (finput);
1134         }
1135
1136       /* above loop exits when c is '}' */
1137
1138       if (--count)
1139         {
1140           obstack_1grow (&action_obstack, c);
1141           c = getc (finput);
1142         }
1143     }
1144
1145   /* As a Bison extension, add the ending semicolon.  Since some Yacc
1146      don't do that, help people using bison as a Yacc finding their
1147      missing semicolons.  */
1148   if (yacc_flag)
1149     obstack_sgrow (&action_obstack, "}\n    break;");
1150   else
1151     obstack_sgrow (&action_obstack, ";\n    break;}");
1152 }
1153 \f
1154 /*-------------------------------------------------------------------.
1155 | After `%guard' is seen in the input file, copy the actual guard    |
1156 | into the guards file.  If the guard is followed by an action, copy |
1157 | that into the actions file.  STACK_OFFSET is the number of values  |
1158 | in the current rule so far, which says where to find `$0' with     |
1159 | respect to the top of the stack, for the simple parser in which    |
1160 | the stack is not popped until after the guard is run.              |
1161 `-------------------------------------------------------------------*/
1162
1163 static void
1164 copy_guard (symbol_list *rule, int stack_offset)
1165 {
1166   int c;
1167   int count;
1168   int brace_flag = 0;
1169
1170   /* offset is always 0 if parser has already popped the stack pointer */
1171   if (semantic_parser)
1172     stack_offset = 0;
1173
1174   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1175   if (!no_lines_flag)
1176     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1177                     lineno, quotearg_style (c_quoting_style, infile));
1178   obstack_1grow (&guard_obstack, '{');
1179
1180   count = 0;
1181   c = getc (finput);
1182
1183   while (brace_flag ? (count > 0) : (c != ';'))
1184     {
1185       switch (c)
1186         {
1187         case '\n':
1188           obstack_1grow (&guard_obstack, c);
1189           lineno++;
1190           break;
1191
1192         case '{':
1193           obstack_1grow (&guard_obstack, c);
1194           brace_flag = 1;
1195           count++;
1196           break;
1197
1198         case '}':
1199           obstack_1grow (&guard_obstack, c);
1200           if (count > 0)
1201             count--;
1202           else
1203             {
1204               complain (_("unmatched %s"), "`}'");
1205               c = getc (finput);        /* skip it */
1206             }
1207           break;
1208
1209         case '\'':
1210         case '"':
1211           copy_string (finput, &guard_obstack, c);
1212           break;
1213
1214         case '/':
1215           copy_comment (finput, &guard_obstack);
1216           break;
1217
1218         case '$':
1219           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1220           break;
1221
1222         case '@':
1223           copy_at (finput, &guard_obstack, rule, stack_offset);
1224           break;
1225
1226         case EOF:
1227           fatal ("%s", _("unterminated %guard clause"));
1228
1229         default:
1230           obstack_1grow (&guard_obstack, c);
1231         }
1232
1233       if (c != '}' || count != 0)
1234         c = getc (finput);
1235     }
1236
1237   c = skip_white_space ();
1238
1239   obstack_sgrow (&guard_obstack, ";\n    break;}");
1240   if (c == '{')
1241     copy_action (rule, stack_offset);
1242   else if (c == '=')
1243     {
1244       c = getc (finput);        /* why not skip_white_space -wjh */
1245       if (c == '{')
1246         copy_action (rule, stack_offset);
1247     }
1248   else
1249     ungetc (c, finput);
1250 }
1251 \f
1252
1253 /*-------------------------------------------------------------------.
1254 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1255 | with the user's names.                                             |
1256 `-------------------------------------------------------------------*/
1257
1258 static bucket *
1259 gensym (void)
1260 {
1261   /* Incremented for each generated symbol */
1262   static int gensym_count = 0;
1263   static char buf[256];
1264
1265   bucket *sym;
1266
1267   sprintf (buf, "@%d", ++gensym_count);
1268   token_buffer = buf;
1269   sym = getsym (token_buffer);
1270   sym->class = nterm_sym;
1271   sym->value = nvars++;
1272   return sym;
1273 }
1274
1275 #if 0
1276 /*------------------------------------------------------------------.
1277 | read in a %type declaration and record its information for        |
1278 | get_type_name to access.  This is unused.  It is only called from |
1279 | the #if 0 part of readgram                                        |
1280 `------------------------------------------------------------------*/
1281
1282 static int
1283 get_type (void)
1284 {
1285   int k;
1286   token_t token;
1287   char *name;
1288
1289   token = lex ();
1290
1291   if (token != tok_typename)
1292     {
1293       complain (_("invalid %s declaration"), "%type");
1294       return t;
1295     }
1296
1297   name = xstrdup (token_buffer);
1298
1299   for (;;)
1300     {
1301       token = lex ();
1302
1303       switch (token)
1304         {
1305         case tok_semicolon:
1306           return lex ();
1307
1308         case tok_comma:
1309           break;
1310
1311         case tok_identifier:
1312           if (symval->type_name == NULL)
1313             symval->type_name = name;
1314           else if (strcmp (name, symval->type_name) != 0)
1315             complain (_("type redeclaration for %s"), symval->tag);
1316
1317           break;
1318
1319         default:
1320           return token;
1321         }
1322     }
1323 }
1324
1325 #endif
1326 \f
1327 /*------------------------------------------------------------------.
1328 | Parse the input grammar into a one symbol_list structure.  Each   |
1329 | rule is represented by a sequence of symbols: the left hand side  |
1330 | followed by the contents of the right hand side, followed by a    |
1331 | null pointer instead of a symbol to terminate the rule.  The next |
1332 | symbol is the lhs of the following rule.                          |
1333 |                                                                   |
1334 | All guards and actions are copied out to the appropriate files,   |
1335 | labelled by the rule number they apply to.                        |
1336 `------------------------------------------------------------------*/
1337
1338 static void
1339 readgram (void)
1340 {
1341   token_t t;
1342   bucket *lhs = NULL;
1343   symbol_list *p;
1344   symbol_list *p1;
1345   bucket *bp;
1346
1347   /* Points to first symbol_list of current rule. its symbol is the
1348      lhs of the rule.  */
1349   symbol_list *crule;
1350   /* Points to the symbol_list preceding crule.  */
1351   symbol_list *crule1;
1352
1353   p1 = NULL;
1354
1355   t = lex ();
1356
1357   while (t != tok_two_percents && t != tok_eof)
1358     {
1359       if (t == tok_identifier || t == tok_bar)
1360         {
1361           int action_flag = 0;
1362           /* Number of symbols in rhs of this rule so far */
1363           int rulelength = 0;
1364           int xactions = 0;     /* JF for error checking */
1365           bucket *first_rhs = 0;
1366
1367           if (t == tok_identifier)
1368             {
1369               lhs = symval;
1370
1371               if (!start_flag)
1372                 {
1373                   startval = lhs;
1374                   start_flag = 1;
1375                 }
1376
1377               t = lex ();
1378               if (t != tok_colon)
1379                 {
1380                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1381                   unlex (t);
1382                 }
1383             }
1384
1385           if (nrules == 0 && t == tok_bar)
1386             {
1387               complain (_("grammar starts with vertical bar"));
1388               lhs = symval;     /* BOGUS: use a random symval */
1389             }
1390           /* start a new rule and record its lhs.  */
1391
1392           nrules++;
1393           nitems++;
1394
1395           p = symbol_list_new (lhs);
1396
1397           crule1 = p1;
1398           if (p1)
1399             p1->next = p;
1400           else
1401             grammar = p;
1402
1403           p1 = p;
1404           crule = p;
1405
1406           /* mark the rule's lhs as a nonterminal if not already so.  */
1407
1408           if (lhs->class == unknown_sym)
1409             {
1410               lhs->class = nterm_sym;
1411               lhs->value = nvars;
1412               nvars++;
1413             }
1414           else if (lhs->class == token_sym)
1415             complain (_("rule given for %s, which is a token"), lhs->tag);
1416
1417           /* read the rhs of the rule.  */
1418
1419           for (;;)
1420             {
1421               t = lex ();
1422               if (t == tok_prec)
1423                 {
1424                   t = lex ();
1425                   crule->ruleprec = symval;
1426                   t = lex ();
1427                 }
1428
1429               if (!(t == tok_identifier || t == tok_left_curly))
1430                 break;
1431
1432               /* If next token is an identifier, see if a colon follows it.
1433                  If one does, exit this rule now.  */
1434               if (t == tok_identifier)
1435                 {
1436                   bucket *ssave;
1437                   token_t t1;
1438
1439                   ssave = symval;
1440                   t1 = lex ();
1441                   unlex (t1);
1442                   symval = ssave;
1443                   if (t1 == tok_colon)
1444                     {
1445                       warn (_("previous rule lacks an ending `;'"));
1446                       break;
1447                     }
1448
1449                   if (!first_rhs)       /* JF */
1450                     first_rhs = symval;
1451                   /* Not followed by colon =>
1452                      process as part of this rule's rhs.  */
1453                 }
1454
1455               /* If we just passed an action, that action was in the middle
1456                  of a rule, so make a dummy rule to reduce it to a
1457                  non-terminal.  */
1458               if (action_flag)
1459                 {
1460                   /* Since the action was written out with this rule's
1461                      number, we must give the new rule this number by
1462                      inserting the new rule before it.  */
1463
1464                   /* Make a dummy nonterminal, a gensym.  */
1465                   bucket *sdummy = gensym ();
1466
1467                   /* Make a new rule, whose body is empty, before the
1468                      current one, so that the action just read can
1469                      belong to it.  */
1470                   nrules++;
1471                   nitems++;
1472                   p = symbol_list_new (sdummy);
1473                   /* Attach its lineno to that of the host rule. */
1474                   p->line = crule->line;
1475                   if (crule1)
1476                     crule1->next = p;
1477                   else
1478                     grammar = p;
1479                   /* End of the rule. */
1480                   crule1 = symbol_list_new (NULL);
1481                   crule1->next = crule;
1482
1483                   p->next = crule1;
1484
1485                   /* Insert the dummy generated by that rule into this
1486                      rule.  */
1487                   nitems++;
1488                   p = symbol_list_new (sdummy);
1489                   p1->next = p;
1490                   p1 = p;
1491
1492                   action_flag = 0;
1493                 }
1494
1495               if (t == tok_identifier)
1496                 {
1497                   nitems++;
1498                   p = symbol_list_new (symval);
1499                   p1->next = p;
1500                   p1 = p;
1501                 }
1502               else              /* handle an action.  */
1503                 {
1504                   copy_action (crule, rulelength);
1505                   action_flag = 1;
1506                   xactions++;   /* JF */
1507                 }
1508               rulelength++;
1509             }                   /* end of  read rhs of rule */
1510
1511           /* Put an empty link in the list to mark the end of this rule  */
1512           p = symbol_list_new (NULL);
1513           p1->next = p;
1514           p1 = p;
1515
1516           if (t == tok_prec)
1517             {
1518               complain (_("two @prec's in a row"));
1519               t = lex ();
1520               crule->ruleprec = symval;
1521               t = lex ();
1522             }
1523           if (t == tok_guard)
1524             {
1525               if (!semantic_parser)
1526                 complain (_("%%guard present but %%semantic_parser not specified"));
1527
1528               copy_guard (crule, rulelength);
1529               t = lex ();
1530             }
1531           else if (t == tok_left_curly)
1532             {
1533               /* This case never occurs -wjh */
1534               if (action_flag)
1535                 complain (_("two actions at end of one rule"));
1536               copy_action (crule, rulelength);
1537               action_flag = 1;
1538               xactions++;       /* -wjh */
1539               t = lex ();
1540             }
1541           /* If $$ is being set in default way, report if any type
1542              mismatch.  */
1543           else if (!xactions
1544                    && first_rhs && lhs->type_name != first_rhs->type_name)
1545             {
1546               if (lhs->type_name == 0
1547                   || first_rhs->type_name == 0
1548                   || strcmp (lhs->type_name, first_rhs->type_name))
1549                 complain (_("type clash (`%s' `%s') on default action"),
1550                           lhs->type_name ? lhs->type_name : "",
1551                           first_rhs->type_name ? first_rhs->type_name : "");
1552             }
1553           /* Warn if there is no default for $$ but we need one.  */
1554           else if (!xactions && !first_rhs && lhs->type_name != 0)
1555             complain (_("empty rule for typed nonterminal, and no action"));
1556           if (t == tok_two_percents || t == tok_eof)
1557             warn (_("previous rule lacks an ending `;'"));
1558           if (t == tok_semicolon)
1559             t = lex ();
1560         }
1561 #if 0
1562       /* these things can appear as alternatives to rules.  */
1563 /* NO, they cannot.
1564         a) none of the documentation allows them
1565         b) most of them scan forward until finding a next %
1566                 thus they may swallow lots of intervening rules
1567 */
1568       else if (t == tok_token)
1569         {
1570           parse_token_decl (token_sym, nterm_sym);
1571           t = lex ();
1572         }
1573       else if (t == tok_nterm)
1574         {
1575           parse_token_decl (nterm_sym, token_sym);
1576           t = lex ();
1577         }
1578       else if (t == tok_type)
1579         {
1580           t = get_type ();
1581         }
1582       else if (t == tok_union)
1583         {
1584           parse_union_decl ();
1585           t = lex ();
1586         }
1587       else if (t == tok_expect)
1588         {
1589           parse_expect_decl ();
1590           t = lex ();
1591         }
1592       else if (t == tok_start)
1593         {
1594           parse_start_decl ();
1595           t = lex ();
1596         }
1597 #endif
1598
1599       else
1600         {
1601           complain (_("invalid input: %s"), quote (token_buffer));
1602           t = lex ();
1603         }
1604     }
1605
1606   /* grammar has been read.  Do some checking */
1607
1608   if (nsyms > MAXSHORT)
1609     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1610            MAXSHORT);
1611   if (nrules == 0)
1612     fatal (_("no rules in the input grammar"));
1613
1614   /* JF put out same default YYSTYPE as YACC does */
1615   if (typed == 0
1616       && !value_components_used)
1617     {
1618       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1619          but it seems better to be consistent.
1620          Most programs should declare their own type anyway.  */
1621       obstack_sgrow (&attrs_obstack, "\
1622 #ifndef YYSTYPE\n\
1623 # define YYSTYPE int\n\
1624 # define YYSTYPE_IS_TRIVIAL 1\n\
1625 #endif\n");
1626       if (defines_flag)
1627         obstack_sgrow (&defines_obstack, "\
1628 # ifndef YYSTYPE\n\
1629 #  define YYSTYPE int\n\
1630 #  define YYSTYPE_IS_TRIVIAL 1\n\
1631 # endif\n");
1632     }
1633
1634   /* Report any undefined symbols and consider them nonterminals.  */
1635
1636   for (bp = firstsymbol; bp; bp = bp->next)
1637     if (bp->class == unknown_sym)
1638       {
1639         complain (_
1640                   ("symbol %s is used, but is not defined as a token and has no rules"),
1641                   bp->tag);
1642         bp->class = nterm_sym;
1643         bp->value = nvars++;
1644       }
1645
1646   ntokens = nsyms - nvars;
1647 }
1648 \f
1649 /*--------------------------------------------------------------.
1650 | For named tokens, but not literal ones, define the name.  The |
1651 | value is the user token number.                               |
1652 `--------------------------------------------------------------*/
1653
1654 static void
1655 output_token_defines (struct obstack *oout)
1656 {
1657   bucket *bp;
1658   char *cp, *symbol;
1659   char c;
1660
1661   for (bp = firstsymbol; bp; bp = bp->next)
1662     {
1663       symbol = bp->tag;         /* get symbol */
1664
1665       if (bp->value >= ntokens)
1666         continue;
1667       if (bp->user_token_number == SALIAS)
1668         continue;
1669       if ('\'' == *symbol)
1670         continue;               /* skip literal character */
1671       if (bp == errtoken)
1672         continue;               /* skip error token */
1673       if ('\"' == *symbol)
1674         {
1675           /* use literal string only if given a symbol with an alias */
1676           if (bp->alias)
1677             symbol = bp->alias->tag;
1678           else
1679             continue;
1680         }
1681
1682       /* Don't #define nonliteral tokens whose names contain periods.  */
1683       cp = symbol;
1684       while ((c = *cp++) && c != '.');
1685       if (c != '\0')
1686         continue;
1687
1688       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1689                       symbol, bp->user_token_number);
1690       if (semantic_parser)
1691         /* FIXME: This is certainly dead wrong, and should be just as
1692            above. --akim.  */
1693         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1694     }
1695
1696   obstack_1grow (oout, '\n');
1697 }
1698
1699
1700 /*--------------------.
1701 | Output the header.  |
1702 `--------------------*/
1703
1704 static void
1705 symbols_output (void)
1706 {
1707   if (defines_flag)
1708     {
1709       output_token_defines (&defines_obstack);
1710
1711       if (!pure_parser)
1712         {
1713           if (spec_name_prefix)
1714             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1715                             spec_name_prefix);
1716           else
1717             obstack_sgrow (&defines_obstack,
1718                                  "\nextern YYSTYPE yylval;\n");
1719         }
1720
1721       if (semantic_parser)
1722         {
1723           int i;
1724
1725           for (i = ntokens; i < nsyms; i++)
1726             {
1727               /* don't make these for dummy nonterminals made by gensym.  */
1728               if (*tags[i] != '@')
1729                 obstack_fgrow2 (&defines_obstack,
1730                                 "# define\tNT%s\t%d\n", tags[i], i);
1731             }
1732 #if 0
1733           /* `fdefines' is now a temporary file, so we need to copy its
1734              contents in `done', so we can't close it here.  */
1735           fclose (fdefines);
1736           fdefines = NULL;
1737 #endif
1738         }
1739     }
1740 }
1741
1742
1743 /*------------------------------------------------------------------.
1744 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1745 | number.                                                           |
1746 `------------------------------------------------------------------*/
1747
1748 static void
1749 token_translations_init (void)
1750 {
1751   bucket *bp = NULL;
1752   int i;
1753
1754   token_translations = XCALLOC (short, max_user_token_number + 1);
1755
1756   /* Initialize all entries for literal tokens to 2, the internal
1757      token number for $undefined., which represents all invalid
1758      inputs.  */
1759   for (i = 0; i <= max_user_token_number; i++)
1760     token_translations[i] = 2;
1761
1762   for (bp = firstsymbol; bp; bp = bp->next)
1763     {
1764       /* Non-terminal? */
1765       if (bp->value >= ntokens)
1766         continue;
1767       /* A token string alias? */
1768       if (bp->user_token_number == SALIAS)
1769         continue;
1770
1771       assert (bp->user_token_number != SUNDEF);
1772
1773       /* A token which translation has already been set? */
1774       if (token_translations[bp->user_token_number] != 2)
1775         complain (_("tokens %s and %s both assigned number %d"),
1776                   tags[token_translations[bp->user_token_number]],
1777                   bp->tag, bp->user_token_number);
1778       token_translations[bp->user_token_number] = bp->value;
1779     }
1780 }
1781
1782
1783 /*------------------------------------------------------------------.
1784 | Assign symbol numbers, and write definition of token names into   |
1785 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1786 | of symbols.                                                       |
1787 `------------------------------------------------------------------*/
1788
1789 static void
1790 packsymbols (void)
1791 {
1792   bucket *bp = NULL;
1793   int tokno = 1;
1794   int last_user_token_number;
1795   static char DOLLAR[] = "$";
1796
1797   tags = XCALLOC (char *, nsyms + 1);
1798   user_toknums = XCALLOC (short, nsyms + 1);
1799
1800   sprec = XCALLOC (short, nsyms);
1801   sassoc = XCALLOC (short, nsyms);
1802
1803   /* The EOF token. */
1804   tags[0] = DOLLAR;
1805   user_toknums[0] = 0;
1806
1807   max_user_token_number = 256;
1808   last_user_token_number = 256;
1809
1810   for (bp = firstsymbol; bp; bp = bp->next)
1811     {
1812       if (bp->class == nterm_sym)
1813         {
1814           bp->value += ntokens;
1815         }
1816       else if (bp->alias)
1817         {
1818           /* this symbol and its alias are a single token defn.
1819              allocate a tokno, and assign to both check agreement of
1820              ->prec and ->assoc fields and make both the same */
1821           if (bp->value == 0)
1822             bp->value = bp->alias->value = tokno++;
1823
1824           if (bp->prec != bp->alias->prec)
1825             {
1826               if (bp->prec != 0 && bp->alias->prec != 0
1827                   && bp->user_token_number == SALIAS)
1828                 complain (_("conflicting precedences for %s and %s"),
1829                           bp->tag, bp->alias->tag);
1830               if (bp->prec != 0)
1831                 bp->alias->prec = bp->prec;
1832               else
1833                 bp->prec = bp->alias->prec;
1834             }
1835
1836           if (bp->assoc != bp->alias->assoc)
1837             {
1838               if (bp->assoc != 0 && bp->alias->assoc != 0
1839                   && bp->user_token_number == SALIAS)
1840                 complain (_("conflicting assoc values for %s and %s"),
1841                           bp->tag, bp->alias->tag);
1842               if (bp->assoc != 0)
1843                 bp->alias->assoc = bp->assoc;
1844               else
1845                 bp->assoc = bp->alias->assoc;
1846             }
1847
1848           if (bp->user_token_number == SALIAS)
1849             continue;           /* do not do processing below for SALIASs */
1850
1851         }
1852       else                      /* bp->class == token_sym */
1853         {
1854           bp->value = tokno++;
1855         }
1856
1857       if (bp->class == token_sym)
1858         {
1859           if (bp->user_token_number == SUNDEF)
1860             bp->user_token_number = ++last_user_token_number;
1861           if (bp->user_token_number > max_user_token_number)
1862             max_user_token_number = bp->user_token_number;
1863         }
1864
1865       tags[bp->value] = bp->tag;
1866       user_toknums[bp->value] = bp->user_token_number;
1867       sprec[bp->value] = bp->prec;
1868       sassoc[bp->value] = bp->assoc;
1869     }
1870
1871   token_translations_init ();
1872
1873   error_token_number = errtoken->value;
1874
1875   if (!no_parser_flag)
1876     output_token_defines (&table_obstack);
1877
1878   if (startval->class == unknown_sym)
1879     fatal (_("the start symbol %s is undefined"), startval->tag);
1880   else if (startval->class == token_sym)
1881     fatal (_("the start symbol %s is a token"), startval->tag);
1882
1883   start_symbol = startval->value;
1884 }
1885
1886
1887 /*---------------------------------------------------------------.
1888 | Convert the rules into the representation using RRHS, RLHS and |
1889 | RITEMS.                                                        |
1890 `---------------------------------------------------------------*/
1891
1892 static void
1893 packgram (void)
1894 {
1895   int itemno;
1896   int ruleno;
1897   symbol_list *p;
1898
1899   ritem = XCALLOC (short, nitems + 1);
1900   rule_table = XCALLOC (rule_t, nrules) - 1;
1901
1902   itemno = 0;
1903   ruleno = 1;
1904
1905   p = grammar;
1906   while (p)
1907     {
1908       bucket *ruleprec = p->ruleprec;
1909       rule_table[ruleno].lhs = p->sym->value;
1910       rule_table[ruleno].rhs = itemno;
1911       rule_table[ruleno].line = p->line;
1912       rule_table[ruleno].useful = TRUE;
1913
1914       p = p->next;
1915       while (p && p->sym)
1916         {
1917           ritem[itemno++] = p->sym->value;
1918           /* A rule gets by default the precedence and associativity
1919              of the last token in it.  */
1920           if (p->sym->class == token_sym)
1921             {
1922               rule_table[ruleno].prec = p->sym->prec;
1923               rule_table[ruleno].assoc = p->sym->assoc;
1924             }
1925           if (p)
1926             p = p->next;
1927         }
1928
1929       /* If this rule has a %prec,
1930          the specified symbol's precedence replaces the default.  */
1931       if (ruleprec)
1932         {
1933           rule_table[ruleno].prec = ruleprec->prec;
1934           rule_table[ruleno].assoc = ruleprec->assoc;
1935           rule_table[ruleno].precsym = ruleprec->value;
1936         }
1937
1938       ritem[itemno++] = -ruleno;
1939       ruleno++;
1940
1941       if (p)
1942         p = p->next;
1943     }
1944
1945   ritem[itemno] = 0;
1946
1947   if (trace_flag)
1948     ritem_print (stderr);
1949 }
1950 \f
1951 /*-------------------------------------------------------------------.
1952 | Read in the grammar specification and record it in the format      |
1953 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1954 | and all actions into ACTION_OBSTACK, in each case forming the body |
1955 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1956 | statement to decide which guard or action to execute.              |
1957 `-------------------------------------------------------------------*/
1958
1959 void
1960 reader (void)
1961 {
1962   start_flag = 0;
1963   startval = NULL;              /* start symbol not specified yet. */
1964
1965   nsyms = 1;
1966   nvars = 0;
1967   nrules = 0;
1968   nitems = 0;
1969
1970   typed = 0;
1971   lastprec = 0;
1972
1973   semantic_parser = 0;
1974   pure_parser = 0;
1975
1976   grammar = NULL;
1977
1978   lex_init ();
1979   lineno = 1;
1980
1981   /* Initialize the symbol table.  */
1982   tabinit ();
1983   /* Construct the error token */
1984   errtoken = getsym ("error");
1985   errtoken->class = token_sym;
1986   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1987   /* Construct a token that represents all undefined literal tokens.
1988      It is always token number 2.  */
1989   undeftoken = getsym ("$undefined.");
1990   undeftoken->class = token_sym;
1991   undeftoken->user_token_number = 2;
1992
1993   /* Read the declaration section.  Copy %{ ... %} groups to
1994      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1995      etc. found there.  */
1996   obstack_fgrow3 (&table_obstack, "\
1997 /* %s, made from %s\n\
1998    by GNU bison %s.  */\n\
1999 \n",
2000                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
2001                   infile, VERSION);
2002
2003   obstack_sgrow (&table_obstack,
2004                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
2005   read_declarations ();
2006   /* Start writing the guard and action files, if they are needed.  */
2007   output_headers ();
2008   /* Read in the grammar, build grammar in list form.  Write out
2009      guards and actions.  */
2010   readgram ();
2011   /* Now we know whether we need the line-number stack.  If we do,
2012      write its type into the .tab.h file.  */
2013   if (defines_flag)
2014     reader_output_yylsp (&defines_obstack);
2015   /* Write closing delimiters for actions and guards.  */
2016   output_trailers ();
2017   if (locations_flag)
2018     obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
2019   /* Assign the symbols their symbol numbers.  Write #defines for the
2020      token symbols into FDEFINES if requested.  */
2021   packsymbols ();
2022   /* Convert the grammar into the format described in gram.h.  */
2023   packgram ();
2024   /* Output the headers. */
2025   symbols_output ();
2026 }
2027
2028
2029 /*------------------------------------------------------------------.
2030 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
2031 | output it in the headers if --defines is used.                    |
2032 `------------------------------------------------------------------*/
2033
2034 void
2035 reader_output_yylsp (struct obstack *oout)
2036 {
2037   if (locations_flag)
2038     obstack_sgrow (oout, "\
2039 \n\
2040 #ifndef YYLTYPE\n\
2041 typedef struct yyltype\n\
2042 {\n\
2043   int first_line;\n\
2044   int first_column;\n\
2045 \n\
2046   int last_line;\n\
2047   int last_column;\n\
2048 } yyltype;\n\
2049 \n\
2050 # define YYLTYPE yyltype\n\
2051 # define YYLTYPE_IS_TRIVIAL 1\n\
2052 #endif\n\
2053 \n");
2054 }