src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "xalloc.h"
  30 #include "symtab.h"
  31 #include "lex.h"
  32 #include "gram.h"
  33 #include "complain.h"
  34 #include "output.h"
  35 #include "reader.h"
  36 #include "conflicts.h"
  37
  38 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  39 static int rline_allocated;
  40
  41 typedef struct symbol_list
  42 {
  43   struct symbol_list *next;
  44   bucket *sym;
  45   bucket *ruleprec;
  46 }
  47 symbol_list;
  48
  49 int lineno;
  50 char **tags;
  51 short *user_toknums;
  52 static symbol_list *grammar;
  53 static int start_flag;
  54 static bucket *startval;
  55
  56 /* Nonzero if components of semantic values are used, implying
  57    they must be unions.  */
  58 static int value_components_used;
  59
  60 /* Nonzero if %union has been seen.  */
  61 static int typed;
  62
  63 /* Incremented for each %left, %right or %nonassoc seen */
  64 static int lastprec;
  65
  66 /* Incremented for each generated symbol */
  67 static int gensym_count;
  68
  69 static bucket *errtoken;
  70 static bucket *undeftoken;
  71 \f
  72
  73 /*===================\
  74 | Low level lexing.  |
  75 \===================*/
  76
  77 static void
  78 skip_to_char (int target)
  79 {
  80   int c;
  81   if (target == '\n')
  82     complain (_("   Skipping to next \\n"));
  83   else
  84     complain (_("   Skipping to next %c"), target);
  85
  86   do
  87     c = skip_white_space ();
  88   while (c != target && c != EOF);
  89   if (c != EOF)
  90     ungetc (c, finput);
  91 }
  92
  93
  94 /*---------------------------------------------------------.
  95 | Read a signed integer from STREAM and return its value.  |
  96 `---------------------------------------------------------*/
  97
  98 static inline int
  99 read_signed_integer (FILE *stream)
 100 {
 101   int c = getc (stream);
 102   int sign = 1;
 103   int n = 0;
 104
 105   if (c == '-')
 106     {
 107       c = getc (stream);
 108       sign = -1;
 109     }
 110
 111   while (isdigit (c))
 112     {
 113       n = 10 * n + (c - '0');
 114       c = getc (stream);
 115     }
 116
 117   ungetc (c, stream);
 118
 119   return sign * n;
 120 }
 121 \f
 122 /*--------------------------------------------------------------.
 123 | Get the data type (alternative in the union) of the value for |
 124 | symbol N in rule RULE.                                        |
 125 `--------------------------------------------------------------*/
 126
 127 static char *
 128 get_type_name (int n, symbol_list * rule)
 129 {
 130   int i;
 131   symbol_list *rp;
 132
 133   if (n < 0)
 134     {
 135       complain (_("invalid $ value"));
 136       return NULL;
 137     }
 138
 139   rp = rule;
 140   i = 0;
 141
 142   while (i < n)
 143     {
 144       rp = rp->next;
 145       if (rp == NULL || rp->sym == NULL)
 146         {
 147           complain (_("invalid $ value"));
 148           return NULL;
 149         }
 150       i++;
 151     }
 152
 153   return rp->sym->type_name;
 154 }
 155 \f
 156 /*------------------------------------------------------------.
 157 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 158 | delimiter of the string (either ' or ").                    |
 159 `------------------------------------------------------------*/
 160
 161 static inline void
 162 copy_string (FILE *fin, struct obstack *oout, int match)
 163 {
 164   int c;
 165
 166   obstack_1grow (oout, match);
 167
 168   c = getc (fin);
 169
 170   while (c != match)
 171     {
 172       if (c == EOF)
 173         fatal (_("unterminated string at end of file"));
 174       if (c == '\n')
 175         {
 176           complain (_("unterminated string"));
 177           ungetc (c, fin);
 178           c = match;            /* invent terminator */
 179           continue;
 180         }
 181
 182       obstack_1grow (oout, c);
 183
 184       if (c == '\\')
 185         {
 186           c = getc (fin);
 187           if (c == EOF)
 188             fatal (_("unterminated string at end of file"));
 189           obstack_1grow (oout, c);
 190
 191           if (c == '\n')
 192             lineno++;
 193         }
 194
 195       c = getc (fin);
 196     }
 197
 198   obstack_1grow (oout, c);
 199 }
 200
 201
 202 /*-----------------------------------------------------------------.
 203 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 204 | NULL).  In fact we just saw a `/', which might or might not be a |
 205 | comment.  In any case, copy what we saw.                         |
 206 |                                                                  |
 207 | OUT2 might be NULL.                                              |
 208 `-----------------------------------------------------------------*/
 209
 210 static inline void
 211 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 212 {
 213   int cplus_comment;
 214   int ended;
 215   int c;
 216
 217   /* We read a `/', output it. */
 218   obstack_1grow (oout1, '/');
 219   if (oout2)
 220     obstack_1grow (oout2, '/');
 221
 222   switch ((c = getc (fin)))
 223     {
 224     case '/':
 225       cplus_comment = 1;
 226       break;
 227     case '*':
 228       cplus_comment = 0;
 229       break;
 230     default:
 231       ungetc (c, fin);
 232       return;
 233     }
 234
 235   obstack_1grow (oout1, c);
 236   if (oout2)
 237     obstack_1grow (oout2, c);
 238   c = getc (fin);
 239
 240   ended = 0;
 241   while (!ended)
 242     {
 243       if (!cplus_comment && c == '*')
 244         {
 245           while (c == '*')
 246             {
 247               obstack_1grow (oout1, c);
 248               if (oout2)
 249                 obstack_1grow (oout2, c);
 250               c = getc (fin);
 251             }
 252
 253           if (c == '/')
 254             {
 255               obstack_1grow (oout1, c);
 256               if (oout2)
 257                 obstack_1grow (oout2, c);
 258               ended = 1;
 259             }
 260         }
 261       else if (c == '\n')
 262         {
 263           lineno++;
 264           obstack_1grow (oout1, c);
 265           if (oout2)
 266             obstack_1grow (oout2, c);
 267           if (cplus_comment)
 268             ended = 1;
 269           else
 270             c = getc (fin);
 271         }
 272       else if (c == EOF)
 273         fatal (_("unterminated comment"));
 274       else
 275         {
 276           obstack_1grow (oout1, c);
 277           if (oout2)
 278             obstack_1grow (oout2, c);
 279           c = getc (fin);
 280         }
 281     }
 282 }
 283
 284
 285 /*-------------------------------------------------------------------.
 286 | Dump the comment (actually the current string starting with a `/') |
 287 | from FIN to OOUT.                                                  |
 288 `-------------------------------------------------------------------*/
 289
 290 static inline void
 291 copy_comment (FILE *fin, struct obstack *oout)
 292 {
 293   copy_comment2 (fin, oout, NULL);
 294 }
 295
 296
 297 /*-----------------------------------------------------------------.
 298 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 299 | reference to this location. STACK_OFFSET is the number of values |
 300 | in the current rule so far, which says where to find `$0' with   |
 301 | respect to the top of the stack.                                 |
 302 `-----------------------------------------------------------------*/
 303
 304 static inline void
 305 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 306 {
 307   int c;
 308
 309   c = getc (fin);
 310   if (c == '$')
 311     {
 312       obstack_grow_string (oout, "yyloc");
 313       locations_flag = 1;
 314     }
 315   else if (isdigit (c) || c == '-')
 316     {
 317       int n;
 318       char buf[4096];
 319
 320       ungetc (c, fin);
 321       n = read_signed_integer (fin);
 322
 323       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 324       locations_flag = 1;
 325     }
 326   else
 327     {
 328       char buf[] = "@c";
 329       buf[1] = c;
 330       complain (_("%s is invalid"), quote (buf));
 331     }
 332 }
 333
 334
 335 /*-------------------------------------------------------------------.
 336 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 337 |                                                                    |
 338 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 339 |                                                                    |
 340 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 341 | the number of values in the current rule so far, which says where  |
 342 | to find `$0' with respect to the top of the stack.                 |
 343 `-------------------------------------------------------------------*/
 344
 345 static inline void
 346 copy_dollar (FILE *fin, struct obstack *oout,
 347              symbol_list *rule, int stack_offset)
 348 {
 349   int c = getc (fin);
 350   char *type_name = NULL;
 351
 352   /* Get the type name if explicit. */
 353   if (c == '<')
 354     {
 355       read_type_name (fin);
 356       type_name = token_buffer;
 357       value_components_used = 1;
 358       c = getc (fin);
 359     }
 360
 361   if (c == '$')
 362     {
 363       obstack_grow_string (oout, "yyval");
 364
 365       if (!type_name)
 366         type_name = get_type_name (0, rule);
 367       if (type_name)
 368         obstack_fgrow1 (oout, ".%s", type_name);
 369       if (!type_name && typed)
 370         complain (_("$$ of `%s' has no declared type"),
 371                   rule->sym->tag);
 372     }
 373   else if (isdigit (c) || c == '-')
 374     {
 375       int n;
 376       ungetc (c, fin);
 377       n = read_signed_integer (fin);
 378
 379       if (!type_name && n > 0)
 380         type_name = get_type_name (n, rule);
 381
 382       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 383
 384       if (type_name)
 385         obstack_fgrow1 (oout, ".%s", type_name);
 386       if (!type_name && typed)
 387         complain (_("$%d of `%s' has no declared type"),
 388                   n, rule->sym->tag);
 389     }
 390   else
 391     {
 392       char buf[] = "$c";
 393       buf[1] = c;
 394       complain (_("%s is invalid"), quote (buf));
 395     }
 396 }
 397 \f
 398 /*-------------------------------------------------------------------.
 399 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 400 | `%{' has already been read.  Return after reading the `%}'.        |
 401 `-------------------------------------------------------------------*/
 402
 403 static void
 404 copy_definition (void)
 405 {
 406   int c;
 407   /* -1 while reading a character if prev char was %. */
 408   int after_percent;
 409
 410   if (!no_lines_flag)
 411     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 412                     lineno, quotearg_style (c_quoting_style, infile));
 413
 414   after_percent = 0;
 415
 416   c = getc (finput);
 417
 418   for (;;)
 419     {
 420       switch (c)
 421         {
 422         case '\n':
 423           obstack_1grow (&attrs_obstack, c);
 424           lineno++;
 425           break;
 426
 427         case '%':
 428           after_percent = -1;
 429           break;
 430
 431         case '\'':
 432         case '"':
 433           copy_string (finput, &attrs_obstack, c);
 434           break;
 435
 436         case '/':
 437           copy_comment (finput, &attrs_obstack);
 438           break;
 439
 440         case EOF:
 441           fatal ("%s", _("unterminated `%{' definition"));
 442
 443         default:
 444           obstack_1grow (&attrs_obstack, c);
 445         }
 446
 447       c = getc (finput);
 448
 449       if (after_percent)
 450         {
 451           if (c == '}')
 452             return;
 453           obstack_1grow (&attrs_obstack, '%');
 454         }
 455       after_percent = 0;
 456     }
 457 }
 458
 459
 460 /*-------------------------------------------------------------------.
 461 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 462 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 463 | are reversed.                                                      |
 464 `-------------------------------------------------------------------*/
 465
 466 static void
 467 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 468 {
 469   int token = 0;
 470   char *typename = 0;
 471
 472   /* The symbol being defined.  */
 473   struct bucket *symbol = NULL;
 474
 475   /* After `%token' and `%nterm', any number of symbols maybe be
 476      defined.  */
 477   for (;;)
 478     {
 479       int tmp_char = ungetc (skip_white_space (), finput);
 480
 481       /* `%' (for instance from `%token', or from `%%' etc.) is the
 482          only valid means to end this declaration.  */
 483       if (tmp_char == '%')
 484         return;
 485       if (tmp_char == EOF)
 486         fatal (_("Premature EOF after %s"), token_buffer);
 487
 488       token = lex ();
 489       if (token == COMMA)
 490         {
 491           symbol = NULL;
 492           continue;
 493         }
 494       if (token == TYPENAME)
 495         {
 496           typename = xstrdup (token_buffer);
 497           value_components_used = 1;
 498           symbol = NULL;
 499         }
 500       else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
 501         {
 502           if (symval->alias)
 503             warn (_("symbol `%s' used more than once as a literal string"),
 504                   symval->tag);
 505           else if (symbol->alias)
 506             warn (_("symbol `%s' given more than one literal string"),
 507                   symbol->tag);
 508           else
 509             {
 510               symval->class = token_sym;
 511               symval->type_name = typename;
 512               symval->user_token_number = symbol->user_token_number;
 513               symbol->user_token_number = SALIAS;
 514               symval->alias = symbol;
 515               symbol->alias = symval;
 516               /* symbol and symval combined are only one symbol */
 517               nsyms--;
 518             }
 519           translations = 1;
 520           symbol = NULL;
 521         }
 522       else if (token == IDENTIFIER)
 523         {
 524           int oldclass = symval->class;
 525           symbol = symval;
 526
 527           if (symbol->class == what_is_not)
 528             complain (_("symbol %s redefined"), symbol->tag);
 529           symbol->class = what_is;
 530           if (what_is == nterm_sym && oldclass != nterm_sym)
 531             symbol->value = nvars++;
 532
 533           if (typename)
 534             {
 535               if (symbol->type_name == NULL)
 536                 symbol->type_name = typename;
 537               else if (strcmp (typename, symbol->type_name) != 0)
 538                 complain (_("type redeclaration for %s"), symbol->tag);
 539             }
 540         }
 541       else if (symbol && token == NUMBER)
 542         {
 543           symbol->user_token_number = numval;
 544           translations = 1;
 545         }
 546       else
 547         {
 548           complain (_("`%s' is invalid in %s"),
 549                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 550           skip_to_char ('%');
 551         }
 552     }
 553
 554 }
 555
 556
 557 /*------------------------------.
 558 | Parse what comes after %start |
 559 `------------------------------*/
 560
 561 static void
 562 parse_start_decl (void)
 563 {
 564   if (start_flag)
 565     complain (_("multiple %s declarations"), "%start");
 566   if (lex () != IDENTIFIER)
 567     complain (_("invalid %s declaration"), "%start");
 568   else
 569     {
 570       start_flag = 1;
 571       startval = symval;
 572     }
 573 }
 574
 575 /*-----------------------------------------------------------.
 576 | read in a %type declaration and record its information for |
 577 | get_type_name to access                                    |
 578 `-----------------------------------------------------------*/
 579
 580 static void
 581 parse_type_decl (void)
 582 {
 583   char *name;
 584
 585   if (lex () != TYPENAME)
 586     {
 587       complain ("%s", _("%type declaration has no <typename>"));
 588       skip_to_char ('%');
 589       return;
 590     }
 591
 592   name = xstrdup (token_buffer);
 593
 594   for (;;)
 595     {
 596       int t;
 597       int tmp_char = ungetc (skip_white_space (), finput);
 598
 599       if (tmp_char == '%')
 600         return;
 601       if (tmp_char == EOF)
 602         fatal (_("Premature EOF after %s"), token_buffer);
 603
 604       t = lex ();
 605
 606       switch (t)
 607         {
 608
 609         case COMMA:
 610         case SEMICOLON:
 611           break;
 612
 613         case IDENTIFIER:
 614           if (symval->type_name == NULL)
 615             symval->type_name = name;
 616           else if (strcmp (name, symval->type_name) != 0)
 617             complain (_("type redeclaration for %s"), symval->tag);
 618
 619           break;
 620
 621         default:
 622           complain (_("invalid %%type declaration due to item: %s"),
 623                     token_buffer);
 624           skip_to_char ('%');
 625         }
 626     }
 627 }
 628
 629
 630
 631 /*----------------------------------------------------------------.
 632 | Read in a %left, %right or %nonassoc declaration and record its |
 633 | information.                                                    |
 634 `----------------------------------------------------------------*/
 635
 636 static void
 637 parse_assoc_decl (associativity assoc)
 638 {
 639   char *name = NULL;
 640   int prev = 0;
 641
 642   lastprec++;                   /* Assign a new precedence level, never 0.  */
 643
 644   for (;;)
 645     {
 646       int t;
 647       int tmp_char = ungetc (skip_white_space (), finput);
 648
 649       if (tmp_char == '%')
 650         return;
 651       if (tmp_char == EOF)
 652         fatal (_("Premature EOF after %s"), token_buffer);
 653
 654       t = lex ();
 655
 656       switch (t)
 657         {
 658         case TYPENAME:
 659           name = xstrdup (token_buffer);
 660           break;
 661
 662         case COMMA:
 663           break;
 664
 665         case IDENTIFIER:
 666           if (symval->prec != 0)
 667             complain (_("redefining precedence of %s"), symval->tag);
 668           symval->prec = lastprec;
 669           symval->assoc = assoc;
 670           if (symval->class == nterm_sym)
 671             complain (_("symbol %s redefined"), symval->tag);
 672           symval->class = token_sym;
 673           if (name)
 674             {                   /* record the type, if one is specified */
 675               if (symval->type_name == NULL)
 676                 symval->type_name = name;
 677               else if (strcmp (name, symval->type_name) != 0)
 678                 complain (_("type redeclaration for %s"), symval->tag);
 679             }
 680           break;
 681
 682         case NUMBER:
 683           if (prev == IDENTIFIER)
 684             {
 685               symval->user_token_number = numval;
 686               translations = 1;
 687             }
 688           else
 689             {
 690               complain (_
 691                         ("invalid text (%s) - number should be after identifier"),
 692 token_buffer);
 693               skip_to_char ('%');
 694             }
 695           break;
 696
 697         case SEMICOLON:
 698           return;
 699
 700         default:
 701           complain (_("unexpected item: %s"), token_buffer);
 702           skip_to_char ('%');
 703         }
 704
 705       prev = t;
 706
 707     }
 708 }
 709
 710
 711
 712 /*--------------------------------------------------------------.
 713 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 714 | where it is made into the definition of YYSTYPE, the type of  |
 715 | elements of the parser value stack.                           |
 716 `--------------------------------------------------------------*/
 717
 718 static void
 719 parse_union_decl (void)
 720 {
 721   int c;
 722   int count = 0;
 723
 724   if (typed)
 725     complain (_("multiple %s declarations"), "%union");
 726
 727   typed = 1;
 728
 729   if (!no_lines_flag)
 730     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 731                     lineno, quotearg_style (c_quoting_style, infile));
 732   else
 733     obstack_1grow (&attrs_obstack, '\n');
 734
 735   obstack_grow_string (&attrs_obstack, "typedef union");
 736   if (defines_flag)
 737     obstack_grow_string (&defines_obstack, "typedef union");
 738
 739   c = getc (finput);
 740
 741   while (c != EOF)
 742     {
 743       obstack_1grow (&attrs_obstack, c);
 744       if (defines_flag)
 745         obstack_1grow (&defines_obstack, c);
 746
 747       switch (c)
 748         {
 749         case '\n':
 750           lineno++;
 751           break;
 752
 753         case '/':
 754           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 755           break;
 756
 757         case '{':
 758           count++;
 759           break;
 760
 761         case '}':
 762           if (count == 0)
 763             complain (_("unmatched %s"), "`}'");
 764           count--;
 765           if (count <= 0)
 766             {
 767               obstack_grow_string (&attrs_obstack, " YYSTYPE;\n");
 768               if (defines_flag)
 769                 obstack_grow_string (&defines_obstack, " YYSTYPE;\n");
 770               /* JF don't choke on trailing semi */
 771               c = skip_white_space ();
 772               if (c != ';')
 773                 ungetc (c, finput);
 774               return;
 775             }
 776         }
 777
 778       c = getc (finput);
 779     }
 780 }
 781
 782
 783 /*-------------------------------------------------------.
 784 | Parse the declaration %expect N which says to expect N |
 785 | shift-reduce conflicts.                                |
 786 `-------------------------------------------------------*/
 787
 788 static void
 789 parse_expect_decl (void)
 790 {
 791   int c = skip_white_space ();
 792   ungetc (c, finput);
 793
 794   if (!isdigit (c))
 795     complain (_("argument of %%expect is not an integer"));
 796   else
 797     expected_conflicts = read_signed_integer (finput);
 798 }
 799
 800
 801 /*-------------------------------------------------------------------.
 802 | Parse what comes after %thong.  the full syntax is                 |
 803 |                                                                    |
 804 |                %thong <type> token number literal                  |
 805 |                                                                    |
 806 | the <type> or number may be omitted.  The number specifies the     |
 807 | user_token_number.                                                 |
 808 |                                                                    |
 809 | Two symbols are entered in the table, one for the token symbol and |
 810 | one for the literal.  Both are given the <type>, if any, from the  |
 811 | declaration.  The ->user_token_number of the first is SALIAS and   |
 812 | the ->user_token_number of the second is set to the number, if     |
 813 | any, from the declaration.  The two symbols are linked via         |
 814 | pointers in their ->alias fields.                                  |
 815 |                                                                    |
 816 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 817 | only the literal string is retained it is the literal string that  |
 818 | is output to yytname                                               |
 819 `-------------------------------------------------------------------*/
 820
 821 static void
 822 parse_thong_decl (void)
 823 {
 824   int token;
 825   struct bucket *symbol;
 826   char *typename = 0;
 827   int usrtoknum;
 828
 829   translations = 1;
 830   token = lex ();               /* fetch typename or first token */
 831   if (token == TYPENAME)
 832     {
 833       typename = xstrdup (token_buffer);
 834       value_components_used = 1;
 835       token = lex ();           /* fetch first token */
 836     }
 837
 838   /* process first token */
 839
 840   if (token != IDENTIFIER)
 841     {
 842       complain (_("unrecognized item %s, expected an identifier"),
 843                 token_buffer);
 844       skip_to_char ('%');
 845       return;
 846     }
 847   symval->class = token_sym;
 848   symval->type_name = typename;
 849   symval->user_token_number = SALIAS;
 850   symbol = symval;
 851
 852   token = lex ();               /* get number or literal string */
 853
 854   if (token == NUMBER)
 855     {
 856       usrtoknum = numval;
 857       token = lex ();           /* okay, did number, now get literal */
 858     }
 859   else
 860     usrtoknum = 0;
 861
 862   /* process literal string token */
 863
 864   if (token != IDENTIFIER || *symval->tag != '\"')
 865     {
 866       complain (_("expected string constant instead of %s"), token_buffer);
 867       skip_to_char ('%');
 868       return;
 869     }
 870   symval->class = token_sym;
 871   symval->type_name = typename;
 872   symval->user_token_number = usrtoknum;
 873
 874   symval->alias = symbol;
 875   symbol->alias = symval;
 876
 877   /* symbol and symval combined are only one symbol.  */
 878   nsyms--;
 879 }
 880
 881
 882 /*----------------------------------------------------------------.
 883 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 884 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 885 | groups to ATTRS_OBSTACK.                                        |
 886 `----------------------------------------------------------------*/
 887
 888 static void
 889 read_declarations (void)
 890 {
 891   int c;
 892   int tok;
 893
 894   for (;;)
 895     {
 896       c = skip_white_space ();
 897
 898       if (c == '%')
 899         {
 900           tok = parse_percent_token ();
 901
 902           switch (tok)
 903             {
 904             case TWO_PERCENTS:
 905               return;
 906
 907             case PERCENT_LEFT_CURLY:
 908               copy_definition ();
 909               break;
 910
 911             case TOKEN:
 912               parse_token_decl (token_sym, nterm_sym);
 913               break;
 914
 915             case NTERM:
 916               parse_token_decl (nterm_sym, token_sym);
 917               break;
 918
 919             case TYPE:
 920               parse_type_decl ();
 921               break;
 922
 923             case START:
 924               parse_start_decl ();
 925               break;
 926
 927             case UNION:
 928               parse_union_decl ();
 929               break;
 930
 931             case EXPECT:
 932               parse_expect_decl ();
 933               break;
 934             case THONG:
 935               parse_thong_decl ();
 936               break;
 937
 938             case LEFT:
 939               parse_assoc_decl (left_assoc);
 940               break;
 941
 942             case RIGHT:
 943               parse_assoc_decl (right_assoc);
 944               break;
 945
 946             case NONASSOC:
 947               parse_assoc_decl (non_assoc);
 948               break;
 949
 950             case SEMANTIC_PARSER:
 951               semantic_parser = 1;
 952               break;
 953
 954             case PURE_PARSER:
 955               pure_parser = 1;
 956               break;
 957
 958             case NOOP:
 959               break;
 960
 961             default:
 962               complain (_("unrecognized: %s"), token_buffer);
 963               skip_to_char ('%');
 964             }
 965         }
 966       else if (c == EOF)
 967         fatal (_("no input grammar"));
 968       else
 969         {
 970           char buf[] = "c";
 971           buf[0] = c;
 972           complain (_("unknown character: %s"), quote (buf));
 973           skip_to_char ('%');
 974         }
 975     }
 976 }
 977 \f
 978 /*-------------------------------------------------------------------.
 979 | Assuming that a `{' has just been seen, copy everything up to the  |
 980 | matching `}' into the actions file.  STACK_OFFSET is the number of |
 981 | values in the current rule so far, which says where to find `$0'   |
 982 | with respect to the top of the stack.                              |
 983 `-------------------------------------------------------------------*/
 984
 985 static void
 986 copy_action (symbol_list *rule, int stack_offset)
 987 {
 988   int c;
 989   int count;
 990   char buf[4096];
 991
 992   /* offset is always 0 if parser has already popped the stack pointer */
 993   if (semantic_parser)
 994     stack_offset = 0;
 995
 996   sprintf (buf, "\ncase %d:\n", nrules);
 997   obstack_grow (&action_obstack, buf, strlen (buf));
 998
 999   if (!no_lines_flag)
1000     {
1001       sprintf (buf, "#line %d %s\n",
1002                lineno, quotearg_style (c_quoting_style, infile));
1003       obstack_grow (&action_obstack, buf, strlen (buf));
1004     }
1005   obstack_1grow (&action_obstack, '{');
1006
1007   count = 1;
1008   c = getc (finput);
1009
1010   while (count > 0)
1011     {
1012       while (c != '}')
1013         {
1014           switch (c)
1015             {
1016             case '\n':
1017               obstack_1grow (&action_obstack, c);
1018               lineno++;
1019               break;
1020
1021             case '{':
1022               obstack_1grow (&action_obstack, c);
1023               count++;
1024               break;
1025
1026             case '\'':
1027             case '"':
1028               copy_string (finput, &action_obstack, c);
1029               break;
1030
1031             case '/':
1032               copy_comment (finput, &action_obstack);
1033               break;
1034
1035             case '$':
1036               copy_dollar (finput, &action_obstack,
1037                            rule, stack_offset);
1038               break;
1039
1040             case '@':
1041               copy_at (finput, &action_obstack,
1042                        stack_offset);
1043               break;
1044
1045             case EOF:
1046               fatal (_("unmatched %s"), "`{'");
1047
1048             default:
1049               obstack_1grow (&action_obstack, c);
1050             }
1051
1052           c = getc (finput);
1053         }
1054
1055       /* above loop exits when c is '}' */
1056
1057       if (--count)
1058         {
1059           obstack_1grow (&action_obstack, c);
1060           c = getc (finput);
1061         }
1062     }
1063
1064   obstack_grow_string (&action_obstack, ";\n    break;}");
1065 }
1066 \f
1067 /*-------------------------------------------------------------------.
1068 | After `%guard' is seen in the input file, copy the actual guard    |
1069 | into the guards file.  If the guard is followed by an action, copy |
1070 | that into the actions file.  STACK_OFFSET is the number of values  |
1071 | in the current rule so far, which says where to find `$0' with     |
1072 | respect to the top of the stack, for the simple parser in which    |
1073 | the stack is not popped until after the guard is run.              |
1074 `-------------------------------------------------------------------*/
1075
1076 static void
1077 copy_guard (symbol_list *rule, int stack_offset)
1078 {
1079   int c;
1080   int count;
1081   int brace_flag = 0;
1082
1083   /* offset is always 0 if parser has already popped the stack pointer */
1084   if (semantic_parser)
1085     stack_offset = 0;
1086
1087   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1088   if (!no_lines_flag)
1089     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1090                     lineno, quotearg_style (c_quoting_style, infile));
1091   obstack_1grow (&guard_obstack, '{');
1092
1093   count = 0;
1094   c = getc (finput);
1095
1096   while (brace_flag ? (count > 0) : (c != ';'))
1097     {
1098       switch (c)
1099         {
1100         case '\n':
1101           obstack_1grow (&guard_obstack, c);
1102           lineno++;
1103           break;
1104
1105         case '{':
1106           obstack_1grow (&guard_obstack, c);
1107           brace_flag = 1;
1108           count++;
1109           break;
1110
1111         case '}':
1112           obstack_1grow (&guard_obstack, c);
1113           if (count > 0)
1114             count--;
1115           else
1116             {
1117               complain (_("unmatched %s"), "`}'");
1118               c = getc (finput);        /* skip it */
1119             }
1120           break;
1121
1122         case '\'':
1123         case '"':
1124           copy_string (finput, &guard_obstack, c);
1125           break;
1126
1127         case '/':
1128           copy_comment (finput, &guard_obstack);
1129           break;
1130
1131         case '$':
1132           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1133           break;
1134
1135         case '@':
1136           copy_at (finput, &guard_obstack, stack_offset);
1137           break;
1138
1139         case EOF:
1140           fatal ("%s", _("unterminated %guard clause"));
1141
1142         default:
1143           obstack_1grow (&guard_obstack, c);
1144         }
1145
1146       if (c != '}' || count != 0)
1147         c = getc (finput);
1148     }
1149
1150   c = skip_white_space ();
1151
1152   obstack_grow_string (&guard_obstack, ";\n    break;}");
1153   if (c == '{')
1154     copy_action (rule, stack_offset);
1155   else if (c == '=')
1156     {
1157       c = getc (finput);        /* why not skip_white_space -wjh */
1158       if (c == '{')
1159         copy_action (rule, stack_offset);
1160     }
1161   else
1162     ungetc (c, finput);
1163 }
1164 \f
1165
1166 static void
1167 record_rule_line (void)
1168 {
1169   /* Record each rule's source line number in rline table.  */
1170
1171   if (nrules >= rline_allocated)
1172     {
1173       rline_allocated = nrules * 2;
1174       rline = XREALLOC (rline, short, rline_allocated);
1175     }
1176   rline[nrules] = lineno;
1177 }
1178
1179
1180 /*-------------------------------------------------------------------.
1181 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1182 | with the user's names.                                             |
1183 `-------------------------------------------------------------------*/
1184
1185 static bucket *
1186 gensym (void)
1187 {
1188   bucket *sym;
1189
1190   sprintf (token_buffer, "@%d", ++gensym_count);
1191   sym = getsym (token_buffer);
1192   sym->class = nterm_sym;
1193   sym->value = nvars++;
1194   return sym;
1195 }
1196
1197 #if 0
1198 /*------------------------------------------------------------------.
1199 | read in a %type declaration and record its information for        |
1200 | get_type_name to access.  This is unused.  It is only called from |
1201 | the #if 0 part of readgram                                        |
1202 `------------------------------------------------------------------*/
1203
1204 static int
1205 get_type (void)
1206 {
1207   int k;
1208   int t;
1209   char *name;
1210
1211   t = lex ();
1212
1213   if (t != TYPENAME)
1214     {
1215       complain (_("invalid %s declaration"), "%type");
1216       return t;
1217     }
1218
1219   name = xstrdup (token_buffer);
1220
1221   for (;;)
1222     {
1223       t = lex ();
1224
1225       switch (t)
1226         {
1227         case SEMICOLON:
1228           return lex ();
1229
1230         case COMMA:
1231           break;
1232
1233         case IDENTIFIER:
1234           if (symval->type_name == NULL)
1235             symval->type_name = name;
1236           else if (strcmp (name, symval->type_name) != 0)
1237             complain (_("type redeclaration for %s"), symval->tag);
1238
1239           break;
1240
1241         default:
1242           return t;
1243         }
1244     }
1245 }
1246
1247 #endif
1248 \f
1249 /*------------------------------------------------------------------.
1250 | Parse the input grammar into a one symbol_list structure.  Each   |
1251 | rule is represented by a sequence of symbols: the left hand side  |
1252 | followed by the contents of the right hand side, followed by a    |
1253 | null pointer instead of a symbol to terminate the rule.  The next |
1254 | symbol is the lhs of the following rule.                          |
1255 |                                                                   |
1256 | All guards and actions are copied out to the appropriate files,   |
1257 | labelled by the rule number they apply to.                        |
1258 `------------------------------------------------------------------*/
1259
1260 static void
1261 readgram (void)
1262 {
1263   int t;
1264   bucket *lhs = NULL;
1265   symbol_list *p;
1266   symbol_list *p1;
1267   bucket *bp;
1268
1269   /* Points to first symbol_list of current rule. its symbol is the
1270      lhs of the rule.  */
1271   symbol_list *crule;
1272   /* Points to the symbol_list preceding crule.  */
1273   symbol_list *crule1;
1274
1275   p1 = NULL;
1276
1277   t = lex ();
1278
1279   while (t != TWO_PERCENTS && t != ENDFILE)
1280     {
1281       if (t == IDENTIFIER || t == BAR)
1282         {
1283           int action_flag = 0;
1284           /* Number of symbols in rhs of this rule so far */
1285           int rulelength = 0;
1286           int xactions = 0;     /* JF for error checking */
1287           bucket *first_rhs = 0;
1288
1289           if (t == IDENTIFIER)
1290             {
1291               lhs = symval;
1292
1293               if (!start_flag)
1294                 {
1295                   startval = lhs;
1296                   start_flag = 1;
1297                 }
1298
1299               t = lex ();
1300               if (t != COLON)
1301                 {
1302                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1303                   unlex (t);
1304                 }
1305             }
1306
1307           if (nrules == 0 && t == BAR)
1308             {
1309               complain (_("grammar starts with vertical bar"));
1310               lhs = symval;     /* BOGUS: use a random symval */
1311             }
1312           /* start a new rule and record its lhs.  */
1313
1314           nrules++;
1315           nitems++;
1316
1317           record_rule_line ();
1318
1319           p = XCALLOC (symbol_list, 1);
1320           p->sym = lhs;
1321
1322           crule1 = p1;
1323           if (p1)
1324             p1->next = p;
1325           else
1326             grammar = p;
1327
1328           p1 = p;
1329           crule = p;
1330
1331           /* mark the rule's lhs as a nonterminal if not already so.  */
1332
1333           if (lhs->class == unknown_sym)
1334             {
1335               lhs->class = nterm_sym;
1336               lhs->value = nvars;
1337               nvars++;
1338             }
1339           else if (lhs->class == token_sym)
1340             complain (_("rule given for %s, which is a token"), lhs->tag);
1341
1342           /* read the rhs of the rule.  */
1343
1344           for (;;)
1345             {
1346               t = lex ();
1347               if (t == PREC)
1348                 {
1349                   t = lex ();
1350                   crule->ruleprec = symval;
1351                   t = lex ();
1352                 }
1353
1354               if (!(t == IDENTIFIER || t == LEFT_CURLY))
1355                 break;
1356
1357               /* If next token is an identifier, see if a colon follows it.
1358                  If one does, exit this rule now.  */
1359               if (t == IDENTIFIER)
1360                 {
1361                   bucket *ssave;
1362                   int t1;
1363
1364                   ssave = symval;
1365                   t1 = lex ();
1366                   unlex (t1);
1367                   symval = ssave;
1368                   if (t1 == COLON)
1369                     break;
1370
1371                   if (!first_rhs)       /* JF */
1372                     first_rhs = symval;
1373                   /* Not followed by colon =>
1374                      process as part of this rule's rhs.  */
1375                 }
1376
1377               /* If we just passed an action, that action was in the middle
1378                  of a rule, so make a dummy rule to reduce it to a
1379                  non-terminal.  */
1380               if (action_flag)
1381                 {
1382                   bucket *sdummy;
1383
1384                   /* Since the action was written out with this rule's
1385                      number, we must give the new rule this number by
1386                      inserting the new rule before it.  */
1387
1388                   /* Make a dummy nonterminal, a gensym.  */
1389                   sdummy = gensym ();
1390
1391                   /* Make a new rule, whose body is empty,
1392                      before the current one, so that the action
1393                      just read can belong to it.  */
1394                   nrules++;
1395                   nitems++;
1396                   record_rule_line ();
1397                   p = XCALLOC (symbol_list, 1);
1398                   if (crule1)
1399                     crule1->next = p;
1400                   else
1401                     grammar = p;
1402                   p->sym = sdummy;
1403                   crule1 = XCALLOC (symbol_list, 1);
1404                   p->next = crule1;
1405                   crule1->next = crule;
1406
1407                   /* Insert the dummy generated by that rule into this
1408                      rule.  */
1409                   nitems++;
1410                   p = XCALLOC (symbol_list, 1);
1411                   p->sym = sdummy;
1412                   p1->next = p;
1413                   p1 = p;
1414
1415                   action_flag = 0;
1416                 }
1417
1418               if (t == IDENTIFIER)
1419                 {
1420                   nitems++;
1421                   p = XCALLOC (symbol_list, 1);
1422                   p->sym = symval;
1423                   p1->next = p;
1424                   p1 = p;
1425                 }
1426               else              /* handle an action.  */
1427                 {
1428                   copy_action (crule, rulelength);
1429                   action_flag = 1;
1430                   xactions++;   /* JF */
1431                 }
1432               rulelength++;
1433             }                   /* end of  read rhs of rule */
1434
1435           /* Put an empty link in the list to mark the end of this rule  */
1436           p = XCALLOC (symbol_list, 1);
1437           p1->next = p;
1438           p1 = p;
1439
1440           if (t == PREC)
1441             {
1442               complain (_("two @prec's in a row"));
1443               t = lex ();
1444               crule->ruleprec = symval;
1445               t = lex ();
1446             }
1447           if (t == GUARD)
1448             {
1449               if (!semantic_parser)
1450                 complain (_("%%guard present but %%semantic_parser not specified"));
1451
1452               copy_guard (crule, rulelength);
1453               t = lex ();
1454             }
1455           else if (t == LEFT_CURLY)
1456             {
1457               /* This case never occurs -wjh */
1458               if (action_flag)
1459                 complain (_("two actions at end of one rule"));
1460               copy_action (crule, rulelength);
1461               action_flag = 1;
1462               xactions++;       /* -wjh */
1463               t = lex ();
1464             }
1465           /* If $$ is being set in default way, report if any type
1466              mismatch.  */
1467           else if (!xactions
1468                    && first_rhs && lhs->type_name != first_rhs->type_name)
1469             {
1470               if (lhs->type_name == 0
1471                   || first_rhs->type_name == 0
1472                   || strcmp (lhs->type_name, first_rhs->type_name))
1473                 complain (_("type clash (`%s' `%s') on default action"),
1474                           lhs->type_name ? lhs->type_name : "",
1475                           first_rhs->type_name ? first_rhs->type_name : "");
1476             }
1477           /* Warn if there is no default for $$ but we need one.  */
1478           else if (!xactions && !first_rhs && lhs->type_name != 0)
1479             complain (_("empty rule for typed nonterminal, and no action"));
1480           if (t == SEMICOLON)
1481             t = lex ();
1482         }
1483 #if 0
1484       /* these things can appear as alternatives to rules.  */
1485 /* NO, they cannot.
1486         a) none of the documentation allows them
1487         b) most of them scan forward until finding a next %
1488                 thus they may swallow lots of intervening rules
1489 */
1490       else if (t == TOKEN)
1491         {
1492           parse_token_decl (token_sym, nterm_sym);
1493           t = lex ();
1494         }
1495       else if (t == NTERM)
1496         {
1497           parse_token_decl (nterm_sym, token_sym);
1498           t = lex ();
1499         }
1500       else if (t == TYPE)
1501         {
1502           t = get_type ();
1503         }
1504       else if (t == UNION)
1505         {
1506           parse_union_decl ();
1507           t = lex ();
1508         }
1509       else if (t == EXPECT)
1510         {
1511           parse_expect_decl ();
1512           t = lex ();
1513         }
1514       else if (t == START)
1515         {
1516           parse_start_decl ();
1517           t = lex ();
1518         }
1519 #endif
1520
1521       else
1522         {
1523           complain (_("invalid input: %s"), token_buffer);
1524           t = lex ();
1525         }
1526     }
1527
1528   /* grammar has been read.  Do some checking */
1529
1530   if (nsyms > MAXSHORT)
1531     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1532            MAXSHORT);
1533   if (nrules == 0)
1534     fatal (_("no rules in the input grammar"));
1535
1536   /* JF put out same default YYSTYPE as YACC does */
1537   if (typed == 0
1538       && !value_components_used)
1539     {
1540       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1541          but it seems better to be consistent.
1542          Most programs should declare their own type anyway.  */
1543       obstack_grow_string (&attrs_obstack,
1544                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1545       if (defines_flag)
1546         obstack_grow_string (&defines_obstack, "\
1547 #ifndef YYSTYPE\n\
1548 # define YYSTYPE int\n\
1549 #endif\n");
1550     }
1551
1552   /* Report any undefined symbols and consider them nonterminals.  */
1553
1554   for (bp = firstsymbol; bp; bp = bp->next)
1555     if (bp->class == unknown_sym)
1556       {
1557         complain (_
1558                   ("symbol %s is used, but is not defined as a token and has no rules"),
1559                   bp->tag);
1560         bp->class = nterm_sym;
1561         bp->value = nvars++;
1562       }
1563
1564   ntokens = nsyms - nvars;
1565 }
1566 \f
1567 /*--------------------------------------------------------------.
1568 | For named tokens, but not literal ones, define the name.  The |
1569 | value is the user token number.                               |
1570 `--------------------------------------------------------------*/
1571
1572 static void
1573 output_token_defines (struct obstack *oout)
1574 {
1575   bucket *bp;
1576   char *cp, *symbol;
1577   char c;
1578
1579   for (bp = firstsymbol; bp; bp = bp->next)
1580     {
1581       symbol = bp->tag;         /* get symbol */
1582
1583       if (bp->value >= ntokens)
1584         continue;
1585       if (bp->user_token_number == SALIAS)
1586         continue;
1587       if ('\'' == *symbol)
1588         continue;               /* skip literal character */
1589       if (bp == errtoken)
1590         continue;               /* skip error token */
1591       if ('\"' == *symbol)
1592         {
1593           /* use literal string only if given a symbol with an alias */
1594           if (bp->alias)
1595             symbol = bp->alias->tag;
1596           else
1597             continue;
1598         }
1599
1600       /* Don't #define nonliteral tokens whose names contain periods.  */
1601       cp = symbol;
1602       while ((c = *cp++) && c != '.');
1603       if (c != '\0')
1604         continue;
1605
1606       obstack_fgrow2 (oout, "#define\t%s\t%d\n",
1607                       symbol,
1608                       ((translations && !raw_flag)
1609                        ? bp->user_token_number : bp->value));
1610       if (semantic_parser)
1611         obstack_fgrow2 (oout, "#define\tT%s\t%d\n", symbol, bp->value);
1612     }
1613
1614   obstack_1grow (oout, '\n');
1615 }
1616
1617
1618 /*------------------------------------------------------------------.
1619 | Assign symbol numbers, and write definition of token names into   |
1620 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1621 | of symbols.                                                       |
1622 `------------------------------------------------------------------*/
1623
1624 static void
1625 packsymbols (void)
1626 {
1627   bucket *bp;
1628   int tokno = 1;
1629   int i;
1630   int last_user_token_number;
1631   static char DOLLAR[] = "$";
1632
1633   /* int lossage = 0; JF set but not used */
1634
1635   tags = XCALLOC (char *, nsyms + 1);
1636   tags[0] = DOLLAR;
1637   user_toknums = XCALLOC (short, nsyms + 1);
1638   user_toknums[0] = 0;
1639
1640   sprec = XCALLOC (short, nsyms);
1641   sassoc = XCALLOC (short, nsyms);
1642
1643   max_user_token_number = 256;
1644   last_user_token_number = 256;
1645
1646   for (bp = firstsymbol; bp; bp = bp->next)
1647     {
1648       if (bp->class == nterm_sym)
1649         {
1650           bp->value += ntokens;
1651         }
1652       else if (bp->alias)
1653         {
1654           /* this symbol and its alias are a single token defn.
1655              allocate a tokno, and assign to both check agreement of
1656              ->prec and ->assoc fields and make both the same */
1657           if (bp->value == 0)
1658             bp->value = bp->alias->value = tokno++;
1659
1660           if (bp->prec != bp->alias->prec)
1661             {
1662               if (bp->prec != 0 && bp->alias->prec != 0
1663                   && bp->user_token_number == SALIAS)
1664                 complain (_("conflicting precedences for %s and %s"),
1665                           bp->tag, bp->alias->tag);
1666               if (bp->prec != 0)
1667                 bp->alias->prec = bp->prec;
1668               else
1669                 bp->prec = bp->alias->prec;
1670             }
1671
1672           if (bp->assoc != bp->alias->assoc)
1673             {
1674               if (bp->assoc != 0 && bp->alias->assoc != 0
1675                   && bp->user_token_number == SALIAS)
1676                 complain (_("conflicting assoc values for %s and %s"),
1677                           bp->tag, bp->alias->tag);
1678               if (bp->assoc != 0)
1679                 bp->alias->assoc = bp->assoc;
1680               else
1681                 bp->assoc = bp->alias->assoc;
1682             }
1683
1684           if (bp->user_token_number == SALIAS)
1685             continue;           /* do not do processing below for SALIASs */
1686
1687         }
1688       else                      /* bp->class == token_sym */
1689         {
1690           bp->value = tokno++;
1691         }
1692
1693       if (bp->class == token_sym)
1694         {
1695           if (translations && !(bp->user_token_number))
1696             bp->user_token_number = ++last_user_token_number;
1697           if (bp->user_token_number > max_user_token_number)
1698             max_user_token_number = bp->user_token_number;
1699         }
1700
1701       tags[bp->value] = bp->tag;
1702       user_toknums[bp->value] = bp->user_token_number;
1703       sprec[bp->value] = bp->prec;
1704       sassoc[bp->value] = bp->assoc;
1705
1706     }
1707
1708   if (translations)
1709     {
1710       int j;
1711
1712       token_translations = XCALLOC (short, max_user_token_number + 1);
1713
1714       /* initialize all entries for literal tokens to 2, the internal
1715          token number for $undefined., which represents all invalid
1716          inputs.  */
1717       for (j = 0; j <= max_user_token_number; j++)
1718         token_translations[j] = 2;
1719
1720       for (bp = firstsymbol; bp; bp = bp->next)
1721         {
1722           if (bp->value >= ntokens)
1723             continue;           /* non-terminal */
1724           if (bp->user_token_number == SALIAS)
1725             continue;
1726           if (token_translations[bp->user_token_number] != 2)
1727             complain (_("tokens %s and %s both assigned number %d"),
1728                       tags[token_translations[bp->user_token_number]],
1729                       bp->tag, bp->user_token_number);
1730           token_translations[bp->user_token_number] = bp->value;
1731         }
1732     }
1733
1734   error_token_number = errtoken->value;
1735
1736   if (!no_parser_flag)
1737     output_token_defines (&table_obstack);
1738
1739   if (startval->class == unknown_sym)
1740     fatal (_("the start symbol %s is undefined"), startval->tag);
1741   else if (startval->class == token_sym)
1742     fatal (_("the start symbol %s is a token"), startval->tag);
1743
1744   start_symbol = startval->value;
1745
1746   if (defines_flag)
1747     {
1748       output_token_defines (&defines_obstack);
1749
1750       if (!pure_parser)
1751         {
1752           if (spec_name_prefix)
1753             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1754                             spec_name_prefix);
1755           else
1756             obstack_grow_string (&defines_obstack,
1757                                  "\nextern YYSTYPE yylval;\n");
1758         }
1759
1760       if (semantic_parser)
1761         for (i = ntokens; i < nsyms; i++)
1762           {
1763             /* don't make these for dummy nonterminals made by gensym.  */
1764             if (*tags[i] != '@')
1765                obstack_fgrow2 (&defines_obstack,
1766                                "#define\tNT%s\t%d\n", tags[i], i);
1767           }
1768 #if 0
1769       /* `fdefines' is now a temporary file, so we need to copy its
1770          contents in `done', so we can't close it here.  */
1771       fclose (fdefines);
1772       fdefines = NULL;
1773 #endif
1774     }
1775 }
1776
1777
1778 /*---------------------------------------------------------------.
1779 | Convert the rules into the representation using RRHS, RLHS and |
1780 | RITEMS.                                                        |
1781 `---------------------------------------------------------------*/
1782
1783 static void
1784 packgram (void)
1785 {
1786   int itemno;
1787   int ruleno;
1788   symbol_list *p;
1789
1790   bucket *ruleprec;
1791
1792   ritem = XCALLOC (short, nitems + 1);
1793   rlhs = XCALLOC (short, nrules) - 1;
1794   rrhs = XCALLOC (short, nrules) - 1;
1795   rprec = XCALLOC (short, nrules) - 1;
1796   rprecsym = XCALLOC (short, nrules) - 1;
1797   rassoc = XCALLOC (short, nrules) - 1;
1798
1799   itemno = 0;
1800   ruleno = 1;
1801
1802   p = grammar;
1803   while (p)
1804     {
1805       rlhs[ruleno] = p->sym->value;
1806       rrhs[ruleno] = itemno;
1807       ruleprec = p->ruleprec;
1808
1809       p = p->next;
1810       while (p && p->sym)
1811         {
1812           ritem[itemno++] = p->sym->value;
1813           /* A rule gets by default the precedence and associativity
1814              of the last token in it.  */
1815           if (p->sym->class == token_sym)
1816             {
1817               rprec[ruleno] = p->sym->prec;
1818               rassoc[ruleno] = p->sym->assoc;
1819             }
1820           if (p)
1821             p = p->next;
1822         }
1823
1824       /* If this rule has a %prec,
1825          the specified symbol's precedence replaces the default.  */
1826       if (ruleprec)
1827         {
1828           rprec[ruleno] = ruleprec->prec;
1829           rassoc[ruleno] = ruleprec->assoc;
1830           rprecsym[ruleno] = ruleprec->value;
1831         }
1832
1833       ritem[itemno++] = -ruleno;
1834       ruleno++;
1835
1836       if (p)
1837         p = p->next;
1838     }
1839
1840   ritem[itemno] = 0;
1841 }
1842 \f
1843 /*-------------------------------------------------------------------.
1844 | Read in the grammar specification and record it in the format      |
1845 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1846 | and all actions into ACTION_OBSTACK, in each case forming the body |
1847 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1848 | statement to decide which guard or action to execute.              |
1849 `-------------------------------------------------------------------*/
1850
1851 void
1852 reader (void)
1853 {
1854   start_flag = 0;
1855   startval = NULL;              /* start symbol not specified yet. */
1856
1857 #if 0
1858   /* initially assume token number translation not needed.  */
1859   translations = 0;
1860 #endif
1861   /* Nowadays translations is always set to 1, since we give `error' a
1862      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1863    */
1864   translations = 1;
1865
1866   nsyms = 1;
1867   nvars = 0;
1868   nrules = 0;
1869   nitems = 0;
1870   rline_allocated = 10;
1871   rline = XCALLOC (short, rline_allocated);
1872
1873   typed = 0;
1874   lastprec = 0;
1875
1876   gensym_count = 0;
1877
1878   semantic_parser = 0;
1879   pure_parser = 0;
1880
1881   grammar = NULL;
1882
1883   init_lex ();
1884   lineno = 1;
1885
1886   /* Initialize the symbol table.  */
1887   tabinit ();
1888   /* Construct the error token */
1889   errtoken = getsym ("error");
1890   errtoken->class = token_sym;
1891   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1892   /* Construct a token that represents all undefined literal tokens.
1893      It is always token number 2.  */
1894   undeftoken = getsym ("$undefined.");
1895   undeftoken->class = token_sym;
1896   undeftoken->user_token_number = 2;
1897
1898   /* Read the declaration section.  Copy %{ ... %} groups to
1899      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1900      etc. found there.  */
1901   obstack_1grow (&table_obstack, '\n');
1902   obstack_fgrow3 (&table_obstack, "\
1903 /* %s, made from %s\n\
1904    by GNU bison %s.  */\n\
1905 \n",
1906                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1907                   infile, VERSION);
1908
1909   obstack_grow_string (&table_obstack,
1910                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
1911   read_declarations ();
1912   /* Start writing the guard and action files, if they are needed.  */
1913   output_headers ();
1914   /* Read in the grammar, build grammar in list form.  Write out
1915      guards and actions.  */
1916   readgram ();
1917   /* Now we know whether we need the line-number stack.  If we do,
1918      write its type into the .tab.h file.  */
1919   if (defines_flag)
1920     reader_output_yylsp (&defines_obstack);
1921   /* Write closing delimiters for actions and guards.  */
1922   output_trailers ();
1923   if (locations_flag)
1924     obstack_grow_string (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
1925   /* Assign the symbols their symbol numbers.  Write #defines for the
1926      token symbols into FDEFINES if requested.  */
1927   packsymbols ();
1928   /* Convert the grammar into the format described in gram.h.  */
1929   packgram ();
1930   /* Free the symbol table data structure since symbols are now all
1931      referred to by symbol number.  */
1932   free_symtab ();
1933 }
1934
1935
1936 /*------------------------------------------------------------------.
1937 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
1938 | output it in the headers if --defines is used.                    |
1939 `------------------------------------------------------------------*/
1940
1941 void
1942 reader_output_yylsp (struct obstack *oout)
1943 {
1944   if (locations_flag)
1945     obstack_grow_string (oout, "\
1946 \n\
1947 #ifndef YYLTYPE\n\
1948 typedef struct yyltype\n\
1949 {\n\
1950   int first_line;\n\
1951   int first_column;\n\
1952 \n\
1953   int last_line;\n\
1954   int last_column;\n\
1955 } yyltype;\n\
1956 \n\
1957 # define YYLTYPE yyltype\n\
1958 #endif\n\
1959 \n");
1960 }