src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "xalloc.h"
  30 #include "symtab.h"
  31 #include "lex.h"
  32 #include "gram.h"
  33 #include "complain.h"
  34 #include "output.h"
  35 #include "reader.h"
  36 #include "conflicts.h"
  37
  38 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  39 static int rline_allocated;
  40
  41 typedef struct symbol_list
  42 {
  43   struct symbol_list *next;
  44   bucket *sym;
  45   bucket *ruleprec;
  46 }
  47 symbol_list;
  48
  49 int lineno;
  50 char **tags;
  51 short *user_toknums;
  52 static symbol_list *grammar;
  53 static int start_flag;
  54 static bucket *startval;
  55
  56 /* Nonzero if components of semantic values are used, implying
  57    they must be unions.  */
  58 static int value_components_used;
  59
  60 /* Nonzero if %union has been seen.  */
  61 static int typed;
  62
  63 /* Incremented for each %left, %right or %nonassoc seen */
  64 static int lastprec;
  65
  66 static bucket *errtoken;
  67 static bucket *undeftoken;
  68 \f
  69
  70 /*===================\
  71 | Low level lexing.  |
  72 \===================*/
  73
  74 static void
  75 skip_to_char (int target)
  76 {
  77   int c;
  78   if (target == '\n')
  79     complain (_("   Skipping to next \\n"));
  80   else
  81     complain (_("   Skipping to next %c"), target);
  82
  83   do
  84     c = skip_white_space ();
  85   while (c != target && c != EOF);
  86   if (c != EOF)
  87     ungetc (c, finput);
  88 }
  89
  90
  91 /*---------------------------------------------------------.
  92 | Read a signed integer from STREAM and return its value.  |
  93 `---------------------------------------------------------*/
  94
  95 static inline int
  96 read_signed_integer (FILE *stream)
  97 {
  98   int c = getc (stream);
  99   int sign = 1;
 100   int n = 0;
 101
 102   if (c == '-')
 103     {
 104       c = getc (stream);
 105       sign = -1;
 106     }
 107
 108   while (isdigit (c))
 109     {
 110       n = 10 * n + (c - '0');
 111       c = getc (stream);
 112     }
 113
 114   ungetc (c, stream);
 115
 116   return sign * n;
 117 }
 118 \f
 119 /*--------------------------------------------------------------.
 120 | Get the data type (alternative in the union) of the value for |
 121 | symbol N in rule RULE.                                        |
 122 `--------------------------------------------------------------*/
 123
 124 static char *
 125 get_type_name (int n, symbol_list * rule)
 126 {
 127   int i;
 128   symbol_list *rp;
 129
 130   if (n < 0)
 131     {
 132       complain (_("invalid $ value"));
 133       return NULL;
 134     }
 135
 136   rp = rule;
 137   i = 0;
 138
 139   while (i < n)
 140     {
 141       rp = rp->next;
 142       if (rp == NULL || rp->sym == NULL)
 143         {
 144           complain (_("invalid $ value"));
 145           return NULL;
 146         }
 147       i++;
 148     }
 149
 150   return rp->sym->type_name;
 151 }
 152 \f
 153 /*------------------------------------------------------------.
 154 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 155 | delimiter of the string (either ' or ").                    |
 156 `------------------------------------------------------------*/
 157
 158 static inline void
 159 copy_string (FILE *fin, struct obstack *oout, int match)
 160 {
 161   int c;
 162
 163   obstack_1grow (oout, match);
 164
 165   c = getc (fin);
 166
 167   while (c != match)
 168     {
 169       if (c == EOF)
 170         fatal (_("unterminated string at end of file"));
 171       if (c == '\n')
 172         {
 173           complain (_("unterminated string"));
 174           ungetc (c, fin);
 175           c = match;            /* invent terminator */
 176           continue;
 177         }
 178
 179       obstack_1grow (oout, c);
 180
 181       if (c == '\\')
 182         {
 183           c = getc (fin);
 184           if (c == EOF)
 185             fatal (_("unterminated string at end of file"));
 186           obstack_1grow (oout, c);
 187
 188           if (c == '\n')
 189             lineno++;
 190         }
 191
 192       c = getc (fin);
 193     }
 194
 195   obstack_1grow (oout, c);
 196 }
 197
 198
 199 /*-----------------------------------------------------------------.
 200 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 201 | NULL).  In fact we just saw a `/', which might or might not be a |
 202 | comment.  In any case, copy what we saw.                         |
 203 |                                                                  |
 204 | OUT2 might be NULL.                                              |
 205 `-----------------------------------------------------------------*/
 206
 207 static inline void
 208 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 209 {
 210   int cplus_comment;
 211   int ended;
 212   int c;
 213
 214   /* We read a `/', output it. */
 215   obstack_1grow (oout1, '/');
 216   if (oout2)
 217     obstack_1grow (oout2, '/');
 218
 219   switch ((c = getc (fin)))
 220     {
 221     case '/':
 222       cplus_comment = 1;
 223       break;
 224     case '*':
 225       cplus_comment = 0;
 226       break;
 227     default:
 228       ungetc (c, fin);
 229       return;
 230     }
 231
 232   obstack_1grow (oout1, c);
 233   if (oout2)
 234     obstack_1grow (oout2, c);
 235   c = getc (fin);
 236
 237   ended = 0;
 238   while (!ended)
 239     {
 240       if (!cplus_comment && c == '*')
 241         {
 242           while (c == '*')
 243             {
 244               obstack_1grow (oout1, c);
 245               if (oout2)
 246                 obstack_1grow (oout2, c);
 247               c = getc (fin);
 248             }
 249
 250           if (c == '/')
 251             {
 252               obstack_1grow (oout1, c);
 253               if (oout2)
 254                 obstack_1grow (oout2, c);
 255               ended = 1;
 256             }
 257         }
 258       else if (c == '\n')
 259         {
 260           lineno++;
 261           obstack_1grow (oout1, c);
 262           if (oout2)
 263             obstack_1grow (oout2, c);
 264           if (cplus_comment)
 265             ended = 1;
 266           else
 267             c = getc (fin);
 268         }
 269       else if (c == EOF)
 270         fatal (_("unterminated comment"));
 271       else
 272         {
 273           obstack_1grow (oout1, c);
 274           if (oout2)
 275             obstack_1grow (oout2, c);
 276           c = getc (fin);
 277         }
 278     }
 279 }
 280
 281
 282 /*-------------------------------------------------------------------.
 283 | Dump the comment (actually the current string starting with a `/') |
 284 | from FIN to OOUT.                                                  |
 285 `-------------------------------------------------------------------*/
 286
 287 static inline void
 288 copy_comment (FILE *fin, struct obstack *oout)
 289 {
 290   copy_comment2 (fin, oout, NULL);
 291 }
 292
 293
 294 /*-----------------------------------------------------------------.
 295 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 296 | reference to this location. STACK_OFFSET is the number of values |
 297 | in the current rule so far, which says where to find `$0' with   |
 298 | respect to the top of the stack.                                 |
 299 `-----------------------------------------------------------------*/
 300
 301 static inline void
 302 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 303 {
 304   int c;
 305
 306   c = getc (fin);
 307   if (c == '$')
 308     {
 309       obstack_sgrow (oout, "yyloc");
 310       locations_flag = 1;
 311     }
 312   else if (isdigit (c) || c == '-')
 313     {
 314       int n;
 315       char buf[4096];
 316
 317       ungetc (c, fin);
 318       n = read_signed_integer (fin);
 319
 320       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 321       locations_flag = 1;
 322     }
 323   else
 324     {
 325       char buf[] = "@c";
 326       buf[1] = c;
 327       complain (_("%s is invalid"), quote (buf));
 328     }
 329 }
 330
 331
 332 /*-------------------------------------------------------------------.
 333 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 334 |                                                                    |
 335 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 336 |                                                                    |
 337 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 338 | the number of values in the current rule so far, which says where  |
 339 | to find `$0' with respect to the top of the stack.                 |
 340 `-------------------------------------------------------------------*/
 341
 342 static inline void
 343 copy_dollar (FILE *fin, struct obstack *oout,
 344              symbol_list *rule, int stack_offset)
 345 {
 346   int c = getc (fin);
 347   char *type_name = NULL;
 348
 349   /* Get the type name if explicit. */
 350   if (c == '<')
 351     {
 352       read_type_name (fin);
 353       type_name = token_buffer;
 354       value_components_used = 1;
 355       c = getc (fin);
 356     }
 357
 358   if (c == '$')
 359     {
 360       obstack_sgrow (oout, "yyval");
 361
 362       if (!type_name)
 363         type_name = get_type_name (0, rule);
 364       if (type_name)
 365         obstack_fgrow1 (oout, ".%s", type_name);
 366       if (!type_name && typed)
 367         complain (_("$$ of `%s' has no declared type"),
 368                   rule->sym->tag);
 369     }
 370   else if (isdigit (c) || c == '-')
 371     {
 372       int n;
 373       ungetc (c, fin);
 374       n = read_signed_integer (fin);
 375
 376       if (!type_name && n > 0)
 377         type_name = get_type_name (n, rule);
 378
 379       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 380
 381       if (type_name)
 382         obstack_fgrow1 (oout, ".%s", type_name);
 383       if (!type_name && typed)
 384         complain (_("$%d of `%s' has no declared type"),
 385                   n, rule->sym->tag);
 386     }
 387   else
 388     {
 389       char buf[] = "$c";
 390       buf[1] = c;
 391       complain (_("%s is invalid"), quote (buf));
 392     }
 393 }
 394 \f
 395 /*-------------------------------------------------------------------.
 396 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 397 | `%{' has already been read.  Return after reading the `%}'.        |
 398 `-------------------------------------------------------------------*/
 399
 400 static void
 401 copy_definition (void)
 402 {
 403   int c;
 404   /* -1 while reading a character if prev char was %. */
 405   int after_percent;
 406
 407   if (!no_lines_flag)
 408     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 409                     lineno, quotearg_style (c_quoting_style, infile));
 410
 411   after_percent = 0;
 412
 413   c = getc (finput);
 414
 415   for (;;)
 416     {
 417       switch (c)
 418         {
 419         case '\n':
 420           obstack_1grow (&attrs_obstack, c);
 421           lineno++;
 422           break;
 423
 424         case '%':
 425           after_percent = -1;
 426           break;
 427
 428         case '\'':
 429         case '"':
 430           copy_string (finput, &attrs_obstack, c);
 431           break;
 432
 433         case '/':
 434           copy_comment (finput, &attrs_obstack);
 435           break;
 436
 437         case EOF:
 438           fatal ("%s", _("unterminated `%{' definition"));
 439
 440         default:
 441           obstack_1grow (&attrs_obstack, c);
 442         }
 443
 444       c = getc (finput);
 445
 446       if (after_percent)
 447         {
 448           if (c == '}')
 449             return;
 450           obstack_1grow (&attrs_obstack, '%');
 451         }
 452       after_percent = 0;
 453     }
 454 }
 455
 456
 457 /*-------------------------------------------------------------------.
 458 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 459 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 460 | are reversed.                                                      |
 461 `-------------------------------------------------------------------*/
 462
 463 static void
 464 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 465 {
 466   token_t token = 0;
 467   char *typename = 0;
 468
 469   /* The symbol being defined.  */
 470   struct bucket *symbol = NULL;
 471
 472   /* After `%token' and `%nterm', any number of symbols maybe be
 473      defined.  */
 474   for (;;)
 475     {
 476       int tmp_char = ungetc (skip_white_space (), finput);
 477
 478       /* `%' (for instance from `%token', or from `%%' etc.) is the
 479          only valid means to end this declaration.  */
 480       if (tmp_char == '%')
 481         return;
 482       if (tmp_char == EOF)
 483         fatal (_("Premature EOF after %s"), token_buffer);
 484
 485       token = lex ();
 486       if (token == tok_comma)
 487         {
 488           symbol = NULL;
 489           continue;
 490         }
 491       if (token == tok_typename)
 492         {
 493           typename = xstrdup (token_buffer);
 494           value_components_used = 1;
 495           symbol = NULL;
 496         }
 497       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 498         {
 499           if (symval->alias)
 500             warn (_("symbol `%s' used more than once as a literal string"),
 501                   symval->tag);
 502           else if (symbol->alias)
 503             warn (_("symbol `%s' given more than one literal string"),
 504                   symbol->tag);
 505           else
 506             {
 507               symval->class = token_sym;
 508               symval->type_name = typename;
 509               symval->user_token_number = symbol->user_token_number;
 510               symbol->user_token_number = SALIAS;
 511               symval->alias = symbol;
 512               symbol->alias = symval;
 513               /* symbol and symval combined are only one symbol */
 514               nsyms--;
 515             }
 516           translations = 1;
 517           symbol = NULL;
 518         }
 519       else if (token == tok_identifier)
 520         {
 521           int oldclass = symval->class;
 522           symbol = symval;
 523
 524           if (symbol->class == what_is_not)
 525             complain (_("symbol %s redefined"), symbol->tag);
 526           symbol->class = what_is;
 527           if (what_is == nterm_sym && oldclass != nterm_sym)
 528             symbol->value = nvars++;
 529
 530           if (typename)
 531             {
 532               if (symbol->type_name == NULL)
 533                 symbol->type_name = typename;
 534               else if (strcmp (typename, symbol->type_name) != 0)
 535                 complain (_("type redeclaration for %s"), symbol->tag);
 536             }
 537         }
 538       else if (symbol && token == tok_number)
 539         {
 540           symbol->user_token_number = numval;
 541           translations = 1;
 542         }
 543       else
 544         {
 545           complain (_("`%s' is invalid in %s"),
 546                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 547           skip_to_char ('%');
 548         }
 549     }
 550
 551 }
 552
 553
 554 /*------------------------------.
 555 | Parse what comes after %start |
 556 `------------------------------*/
 557
 558 static void
 559 parse_start_decl (void)
 560 {
 561   if (start_flag)
 562     complain (_("multiple %s declarations"), "%start");
 563   if (lex () != tok_identifier)
 564     complain (_("invalid %s declaration"), "%start");
 565   else
 566     {
 567       start_flag = 1;
 568       startval = symval;
 569     }
 570 }
 571
 572 /*-----------------------------------------------------------.
 573 | read in a %type declaration and record its information for |
 574 | get_type_name to access                                    |
 575 `-----------------------------------------------------------*/
 576
 577 static void
 578 parse_type_decl (void)
 579 {
 580   char *name;
 581
 582   if (lex () != tok_typename)
 583     {
 584       complain ("%s", _("%type declaration has no <typename>"));
 585       skip_to_char ('%');
 586       return;
 587     }
 588
 589   name = xstrdup (token_buffer);
 590
 591   for (;;)
 592     {
 593       token_t t;
 594       int tmp_char = ungetc (skip_white_space (), finput);
 595
 596       if (tmp_char == '%')
 597         return;
 598       if (tmp_char == EOF)
 599         fatal (_("Premature EOF after %s"), token_buffer);
 600
 601       t = lex ();
 602
 603       switch (t)
 604         {
 605
 606         case tok_comma:
 607         case tok_semicolon:
 608           break;
 609
 610         case tok_identifier:
 611           if (symval->type_name == NULL)
 612             symval->type_name = name;
 613           else if (strcmp (name, symval->type_name) != 0)
 614             complain (_("type redeclaration for %s"), symval->tag);
 615
 616           break;
 617
 618         default:
 619           complain (_("invalid %%type declaration due to item: %s"),
 620                     token_buffer);
 621           skip_to_char ('%');
 622         }
 623     }
 624 }
 625
 626
 627
 628 /*----------------------------------------------------------------.
 629 | Read in a %left, %right or %nonassoc declaration and record its |
 630 | information.                                                    |
 631 `----------------------------------------------------------------*/
 632
 633 static void
 634 parse_assoc_decl (associativity assoc)
 635 {
 636   char *name = NULL;
 637   int prev = 0;
 638
 639   lastprec++;                   /* Assign a new precedence level, never 0.  */
 640
 641   for (;;)
 642     {
 643       token_t t;
 644       int tmp_char = ungetc (skip_white_space (), finput);
 645
 646       if (tmp_char == '%')
 647         return;
 648       if (tmp_char == EOF)
 649         fatal (_("Premature EOF after %s"), token_buffer);
 650
 651       t = lex ();
 652
 653       switch (t)
 654         {
 655         case tok_typename:
 656           name = xstrdup (token_buffer);
 657           break;
 658
 659         case tok_comma:
 660           break;
 661
 662         case tok_identifier:
 663           if (symval->prec != 0)
 664             complain (_("redefining precedence of %s"), symval->tag);
 665           symval->prec = lastprec;
 666           symval->assoc = assoc;
 667           if (symval->class == nterm_sym)
 668             complain (_("symbol %s redefined"), symval->tag);
 669           symval->class = token_sym;
 670           if (name)
 671             {                   /* record the type, if one is specified */
 672               if (symval->type_name == NULL)
 673                 symval->type_name = name;
 674               else if (strcmp (name, symval->type_name) != 0)
 675                 complain (_("type redeclaration for %s"), symval->tag);
 676             }
 677           break;
 678
 679         case tok_number:
 680           if (prev == tok_identifier)
 681             {
 682               symval->user_token_number = numval;
 683               translations = 1;
 684             }
 685           else
 686             {
 687               complain (_
 688                         ("invalid text (%s) - number should be after identifier"),
 689 token_buffer);
 690               skip_to_char ('%');
 691             }
 692           break;
 693
 694         case tok_semicolon:
 695           return;
 696
 697         default:
 698           complain (_("unexpected item: %s"), token_buffer);
 699           skip_to_char ('%');
 700         }
 701
 702       prev = t;
 703
 704     }
 705 }
 706
 707
 708
 709 /*--------------------------------------------------------------.
 710 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 711 | where it is made into the definition of YYSTYPE, the type of  |
 712 | elements of the parser value stack.                           |
 713 `--------------------------------------------------------------*/
 714
 715 static void
 716 parse_union_decl (void)
 717 {
 718   int c;
 719   int count = 0;
 720
 721   if (typed)
 722     complain (_("multiple %s declarations"), "%union");
 723
 724   typed = 1;
 725
 726   if (!no_lines_flag)
 727     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 728                     lineno, quotearg_style (c_quoting_style, infile));
 729   else
 730     obstack_1grow (&attrs_obstack, '\n');
 731
 732   obstack_sgrow (&attrs_obstack, "typedef union");
 733   if (defines_flag)
 734     obstack_sgrow (&defines_obstack, "typedef union");
 735
 736   c = getc (finput);
 737
 738   while (c != EOF)
 739     {
 740       obstack_1grow (&attrs_obstack, c);
 741       if (defines_flag)
 742         obstack_1grow (&defines_obstack, c);
 743
 744       switch (c)
 745         {
 746         case '\n':
 747           lineno++;
 748           break;
 749
 750         case '/':
 751           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 752           break;
 753
 754         case '{':
 755           count++;
 756           break;
 757
 758         case '}':
 759           if (count == 0)
 760             complain (_("unmatched %s"), "`}'");
 761           count--;
 762           if (count <= 0)
 763             {
 764               obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
 765               if (defines_flag)
 766                 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
 767               /* JF don't choke on trailing semi */
 768               c = skip_white_space ();
 769               if (c != ';')
 770                 ungetc (c, finput);
 771               return;
 772             }
 773         }
 774
 775       c = getc (finput);
 776     }
 777 }
 778
 779
 780 /*-------------------------------------------------------.
 781 | Parse the declaration %expect N which says to expect N |
 782 | shift-reduce conflicts.                                |
 783 `-------------------------------------------------------*/
 784
 785 static void
 786 parse_expect_decl (void)
 787 {
 788   int c = skip_white_space ();
 789   ungetc (c, finput);
 790
 791   if (!isdigit (c))
 792     complain (_("argument of %%expect is not an integer"));
 793   else
 794     expected_conflicts = read_signed_integer (finput);
 795 }
 796
 797
 798 /*-------------------------------------------------------------------.
 799 | Parse what comes after %thong.  the full syntax is                 |
 800 |                                                                    |
 801 |                %thong <type> token number literal                  |
 802 |                                                                    |
 803 | the <type> or number may be omitted.  The number specifies the     |
 804 | user_token_number.                                                 |
 805 |                                                                    |
 806 | Two symbols are entered in the table, one for the token symbol and |
 807 | one for the literal.  Both are given the <type>, if any, from the  |
 808 | declaration.  The ->user_token_number of the first is SALIAS and   |
 809 | the ->user_token_number of the second is set to the number, if     |
 810 | any, from the declaration.  The two symbols are linked via         |
 811 | pointers in their ->alias fields.                                  |
 812 |                                                                    |
 813 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 814 | only the literal string is retained it is the literal string that  |
 815 | is output to yytname                                               |
 816 `-------------------------------------------------------------------*/
 817
 818 static void
 819 parse_thong_decl (void)
 820 {
 821   token_t token;
 822   struct bucket *symbol;
 823   char *typename = 0;
 824   int usrtoknum;
 825
 826   translations = 1;
 827   token = lex ();               /* fetch typename or first token */
 828   if (token == tok_typename)
 829     {
 830       typename = xstrdup (token_buffer);
 831       value_components_used = 1;
 832       token = lex ();           /* fetch first token */
 833     }
 834
 835   /* process first token */
 836
 837   if (token != tok_identifier)
 838     {
 839       complain (_("unrecognized item %s, expected an identifier"),
 840                 token_buffer);
 841       skip_to_char ('%');
 842       return;
 843     }
 844   symval->class = token_sym;
 845   symval->type_name = typename;
 846   symval->user_token_number = SALIAS;
 847   symbol = symval;
 848
 849   token = lex ();               /* get number or literal string */
 850
 851   if (token == tok_number)
 852     {
 853       usrtoknum = numval;
 854       token = lex ();           /* okay, did number, now get literal */
 855     }
 856   else
 857     usrtoknum = 0;
 858
 859   /* process literal string token */
 860
 861   if (token != tok_identifier || *symval->tag != '\"')
 862     {
 863       complain (_("expected string constant instead of %s"), token_buffer);
 864       skip_to_char ('%');
 865       return;
 866     }
 867   symval->class = token_sym;
 868   symval->type_name = typename;
 869   symval->user_token_number = usrtoknum;
 870
 871   symval->alias = symbol;
 872   symbol->alias = symval;
 873
 874   /* symbol and symval combined are only one symbol.  */
 875   nsyms--;
 876 }
 877
 878
 879 /*----------------------------------------------------------------.
 880 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 881 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 882 | groups to ATTRS_OBSTACK.                                        |
 883 `----------------------------------------------------------------*/
 884
 885 static void
 886 read_declarations (void)
 887 {
 888   int c;
 889   int tok;
 890
 891   for (;;)
 892     {
 893       c = skip_white_space ();
 894
 895       if (c == '%')
 896         {
 897           tok = parse_percent_token ();
 898
 899           switch (tok)
 900             {
 901             case tok_two_percents:
 902               return;
 903
 904             case tok_percent_left_curly:
 905               copy_definition ();
 906               break;
 907
 908             case tok_token:
 909               parse_token_decl (token_sym, nterm_sym);
 910               break;
 911
 912             case tok_nterm:
 913               parse_token_decl (nterm_sym, token_sym);
 914               break;
 915
 916             case tok_type:
 917               parse_type_decl ();
 918               break;
 919
 920             case tok_start:
 921               parse_start_decl ();
 922               break;
 923
 924             case tok_union:
 925               parse_union_decl ();
 926               break;
 927
 928             case tok_expect:
 929               parse_expect_decl ();
 930               break;
 931
 932             case tok_thong:
 933               parse_thong_decl ();
 934               break;
 935
 936             case tok_left:
 937               parse_assoc_decl (left_assoc);
 938               break;
 939
 940             case tok_right:
 941               parse_assoc_decl (right_assoc);
 942               break;
 943
 944             case tok_nonassoc:
 945               parse_assoc_decl (non_assoc);
 946               break;
 947
 948             case tok_noop:
 949               break;
 950
 951             default:
 952               complain (_("unrecognized: %s"), token_buffer);
 953               skip_to_char ('%');
 954             }
 955         }
 956       else if (c == EOF)
 957         fatal (_("no input grammar"));
 958       else
 959         {
 960           char buf[] = "c";
 961           buf[0] = c;
 962           complain (_("unknown character: %s"), quote (buf));
 963           skip_to_char ('%');
 964         }
 965     }
 966 }
 967 \f
 968 /*-------------------------------------------------------------------.
 969 | Assuming that a `{' has just been seen, copy everything up to the  |
 970 | matching `}' into the actions file.  STACK_OFFSET is the number of |
 971 | values in the current rule so far, which says where to find `$0'   |
 972 | with respect to the top of the stack.                              |
 973 `-------------------------------------------------------------------*/
 974
 975 static void
 976 copy_action (symbol_list *rule, int stack_offset)
 977 {
 978   int c;
 979   int count;
 980   char buf[4096];
 981
 982   /* offset is always 0 if parser has already popped the stack pointer */
 983   if (semantic_parser)
 984     stack_offset = 0;
 985
 986   sprintf (buf, "\ncase %d:\n", nrules);
 987   obstack_grow (&action_obstack, buf, strlen (buf));
 988
 989   if (!no_lines_flag)
 990     {
 991       sprintf (buf, "#line %d %s\n",
 992                lineno, quotearg_style (c_quoting_style, infile));
 993       obstack_grow (&action_obstack, buf, strlen (buf));
 994     }
 995   obstack_1grow (&action_obstack, '{');
 996
 997   count = 1;
 998   c = getc (finput);
 999
1000   while (count > 0)
1001     {
1002       while (c != '}')
1003         {
1004           switch (c)
1005             {
1006             case '\n':
1007               obstack_1grow (&action_obstack, c);
1008               lineno++;
1009               break;
1010
1011             case '{':
1012               obstack_1grow (&action_obstack, c);
1013               count++;
1014               break;
1015
1016             case '\'':
1017             case '"':
1018               copy_string (finput, &action_obstack, c);
1019               break;
1020
1021             case '/':
1022               copy_comment (finput, &action_obstack);
1023               break;
1024
1025             case '$':
1026               copy_dollar (finput, &action_obstack,
1027                            rule, stack_offset);
1028               break;
1029
1030             case '@':
1031               copy_at (finput, &action_obstack,
1032                        stack_offset);
1033               break;
1034
1035             case EOF:
1036               fatal (_("unmatched %s"), "`{'");
1037
1038             default:
1039               obstack_1grow (&action_obstack, c);
1040             }
1041
1042           c = getc (finput);
1043         }
1044
1045       /* above loop exits when c is '}' */
1046
1047       if (--count)
1048         {
1049           obstack_1grow (&action_obstack, c);
1050           c = getc (finput);
1051         }
1052     }
1053
1054   obstack_sgrow (&action_obstack, ";\n    break;}");
1055 }
1056 \f
1057 /*-------------------------------------------------------------------.
1058 | After `%guard' is seen in the input file, copy the actual guard    |
1059 | into the guards file.  If the guard is followed by an action, copy |
1060 | that into the actions file.  STACK_OFFSET is the number of values  |
1061 | in the current rule so far, which says where to find `$0' with     |
1062 | respect to the top of the stack, for the simple parser in which    |
1063 | the stack is not popped until after the guard is run.              |
1064 `-------------------------------------------------------------------*/
1065
1066 static void
1067 copy_guard (symbol_list *rule, int stack_offset)
1068 {
1069   int c;
1070   int count;
1071   int brace_flag = 0;
1072
1073   /* offset is always 0 if parser has already popped the stack pointer */
1074   if (semantic_parser)
1075     stack_offset = 0;
1076
1077   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1078   if (!no_lines_flag)
1079     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1080                     lineno, quotearg_style (c_quoting_style, infile));
1081   obstack_1grow (&guard_obstack, '{');
1082
1083   count = 0;
1084   c = getc (finput);
1085
1086   while (brace_flag ? (count > 0) : (c != ';'))
1087     {
1088       switch (c)
1089         {
1090         case '\n':
1091           obstack_1grow (&guard_obstack, c);
1092           lineno++;
1093           break;
1094
1095         case '{':
1096           obstack_1grow (&guard_obstack, c);
1097           brace_flag = 1;
1098           count++;
1099           break;
1100
1101         case '}':
1102           obstack_1grow (&guard_obstack, c);
1103           if (count > 0)
1104             count--;
1105           else
1106             {
1107               complain (_("unmatched %s"), "`}'");
1108               c = getc (finput);        /* skip it */
1109             }
1110           break;
1111
1112         case '\'':
1113         case '"':
1114           copy_string (finput, &guard_obstack, c);
1115           break;
1116
1117         case '/':
1118           copy_comment (finput, &guard_obstack);
1119           break;
1120
1121         case '$':
1122           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1123           break;
1124
1125         case '@':
1126           copy_at (finput, &guard_obstack, stack_offset);
1127           break;
1128
1129         case EOF:
1130           fatal ("%s", _("unterminated %guard clause"));
1131
1132         default:
1133           obstack_1grow (&guard_obstack, c);
1134         }
1135
1136       if (c != '}' || count != 0)
1137         c = getc (finput);
1138     }
1139
1140   c = skip_white_space ();
1141
1142   obstack_sgrow (&guard_obstack, ";\n    break;}");
1143   if (c == '{')
1144     copy_action (rule, stack_offset);
1145   else if (c == '=')
1146     {
1147       c = getc (finput);        /* why not skip_white_space -wjh */
1148       if (c == '{')
1149         copy_action (rule, stack_offset);
1150     }
1151   else
1152     ungetc (c, finput);
1153 }
1154 \f
1155
1156 static void
1157 record_rule_line (void)
1158 {
1159   /* Record each rule's source line number in rline table.  */
1160
1161   if (nrules >= rline_allocated)
1162     {
1163       rline_allocated = nrules * 2;
1164       rline = XREALLOC (rline, short, rline_allocated);
1165     }
1166   rline[nrules] = lineno;
1167 }
1168
1169
1170 /*-------------------------------------------------------------------.
1171 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1172 | with the user's names.                                             |
1173 `-------------------------------------------------------------------*/
1174
1175 static bucket *
1176 gensym (void)
1177 {
1178   /* Incremented for each generated symbol */
1179   static int gensym_count = 0;
1180   static char buf[256];
1181
1182   bucket *sym;
1183
1184   sprintf (buf, "@%d", ++gensym_count);
1185   token_buffer = buf;
1186   sym = getsym (token_buffer);
1187   sym->class = nterm_sym;
1188   sym->value = nvars++;
1189   return sym;
1190 }
1191
1192 #if 0
1193 /*------------------------------------------------------------------.
1194 | read in a %type declaration and record its information for        |
1195 | get_type_name to access.  This is unused.  It is only called from |
1196 | the #if 0 part of readgram                                        |
1197 `------------------------------------------------------------------*/
1198
1199 static int
1200 get_type (void)
1201 {
1202   int k;
1203   token_t token;
1204   char *name;
1205
1206   token = lex ();
1207
1208   if (token != tok_typename)
1209     {
1210       complain (_("invalid %s declaration"), "%type");
1211       return t;
1212     }
1213
1214   name = xstrdup (token_buffer);
1215
1216   for (;;)
1217     {
1218       token = lex ();
1219
1220       switch (token)
1221         {
1222         case tok_semicolon:
1223           return lex ();
1224
1225         case tok_comma:
1226           break;
1227
1228         case tok_identifier:
1229           if (symval->type_name == NULL)
1230             symval->type_name = name;
1231           else if (strcmp (name, symval->type_name) != 0)
1232             complain (_("type redeclaration for %s"), symval->tag);
1233
1234           break;
1235
1236         default:
1237           return token;
1238         }
1239     }
1240 }
1241
1242 #endif
1243 \f
1244 /*------------------------------------------------------------------.
1245 | Parse the input grammar into a one symbol_list structure.  Each   |
1246 | rule is represented by a sequence of symbols: the left hand side  |
1247 | followed by the contents of the right hand side, followed by a    |
1248 | null pointer instead of a symbol to terminate the rule.  The next |
1249 | symbol is the lhs of the following rule.                          |
1250 |                                                                   |
1251 | All guards and actions are copied out to the appropriate files,   |
1252 | labelled by the rule number they apply to.                        |
1253 `------------------------------------------------------------------*/
1254
1255 static void
1256 readgram (void)
1257 {
1258   token_t t;
1259   bucket *lhs = NULL;
1260   symbol_list *p;
1261   symbol_list *p1;
1262   bucket *bp;
1263
1264   /* Points to first symbol_list of current rule. its symbol is the
1265      lhs of the rule.  */
1266   symbol_list *crule;
1267   /* Points to the symbol_list preceding crule.  */
1268   symbol_list *crule1;
1269
1270   p1 = NULL;
1271
1272   t = lex ();
1273
1274   while (t != tok_two_percents && t != tok_eof)
1275     {
1276       if (t == tok_identifier || t == tok_bar)
1277         {
1278           int action_flag = 0;
1279           /* Number of symbols in rhs of this rule so far */
1280           int rulelength = 0;
1281           int xactions = 0;     /* JF for error checking */
1282           bucket *first_rhs = 0;
1283
1284           if (t == tok_identifier)
1285             {
1286               lhs = symval;
1287
1288               if (!start_flag)
1289                 {
1290                   startval = lhs;
1291                   start_flag = 1;
1292                 }
1293
1294               t = lex ();
1295               if (t != tok_colon)
1296                 {
1297                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1298                   unlex (t);
1299                 }
1300             }
1301
1302           if (nrules == 0 && t == tok_bar)
1303             {
1304               complain (_("grammar starts with vertical bar"));
1305               lhs = symval;     /* BOGUS: use a random symval */
1306             }
1307           /* start a new rule and record its lhs.  */
1308
1309           nrules++;
1310           nitems++;
1311
1312           record_rule_line ();
1313
1314           p = XCALLOC (symbol_list, 1);
1315           p->sym = lhs;
1316
1317           crule1 = p1;
1318           if (p1)
1319             p1->next = p;
1320           else
1321             grammar = p;
1322
1323           p1 = p;
1324           crule = p;
1325
1326           /* mark the rule's lhs as a nonterminal if not already so.  */
1327
1328           if (lhs->class == unknown_sym)
1329             {
1330               lhs->class = nterm_sym;
1331               lhs->value = nvars;
1332               nvars++;
1333             }
1334           else if (lhs->class == token_sym)
1335             complain (_("rule given for %s, which is a token"), lhs->tag);
1336
1337           /* read the rhs of the rule.  */
1338
1339           for (;;)
1340             {
1341               t = lex ();
1342               if (t == tok_prec)
1343                 {
1344                   t = lex ();
1345                   crule->ruleprec = symval;
1346                   t = lex ();
1347                 }
1348
1349               if (!(t == tok_identifier || t == tok_left_curly))
1350                 break;
1351
1352               /* If next token is an identifier, see if a colon follows it.
1353                  If one does, exit this rule now.  */
1354               if (t == tok_identifier)
1355                 {
1356                   bucket *ssave;
1357                   token_t t1;
1358
1359                   ssave = symval;
1360                   t1 = lex ();
1361                   unlex (t1);
1362                   symval = ssave;
1363                   if (t1 == tok_colon)
1364                     break;
1365
1366                   if (!first_rhs)       /* JF */
1367                     first_rhs = symval;
1368                   /* Not followed by colon =>
1369                      process as part of this rule's rhs.  */
1370                 }
1371
1372               /* If we just passed an action, that action was in the middle
1373                  of a rule, so make a dummy rule to reduce it to a
1374                  non-terminal.  */
1375               if (action_flag)
1376                 {
1377                   bucket *sdummy;
1378
1379                   /* Since the action was written out with this rule's
1380                      number, we must give the new rule this number by
1381                      inserting the new rule before it.  */
1382
1383                   /* Make a dummy nonterminal, a gensym.  */
1384                   sdummy = gensym ();
1385
1386                   /* Make a new rule, whose body is empty,
1387                      before the current one, so that the action
1388                      just read can belong to it.  */
1389                   nrules++;
1390                   nitems++;
1391                   record_rule_line ();
1392                   p = XCALLOC (symbol_list, 1);
1393                   if (crule1)
1394                     crule1->next = p;
1395                   else
1396                     grammar = p;
1397                   p->sym = sdummy;
1398                   crule1 = XCALLOC (symbol_list, 1);
1399                   p->next = crule1;
1400                   crule1->next = crule;
1401
1402                   /* Insert the dummy generated by that rule into this
1403                      rule.  */
1404                   nitems++;
1405                   p = XCALLOC (symbol_list, 1);
1406                   p->sym = sdummy;
1407                   p1->next = p;
1408                   p1 = p;
1409
1410                   action_flag = 0;
1411                 }
1412
1413               if (t == tok_identifier)
1414                 {
1415                   nitems++;
1416                   p = XCALLOC (symbol_list, 1);
1417                   p->sym = symval;
1418                   p1->next = p;
1419                   p1 = p;
1420                 }
1421               else              /* handle an action.  */
1422                 {
1423                   copy_action (crule, rulelength);
1424                   action_flag = 1;
1425                   xactions++;   /* JF */
1426                 }
1427               rulelength++;
1428             }                   /* end of  read rhs of rule */
1429
1430           /* Put an empty link in the list to mark the end of this rule  */
1431           p = XCALLOC (symbol_list, 1);
1432           p1->next = p;
1433           p1 = p;
1434
1435           if (t == tok_prec)
1436             {
1437               complain (_("two @prec's in a row"));
1438               t = lex ();
1439               crule->ruleprec = symval;
1440               t = lex ();
1441             }
1442           if (t == tok_guard)
1443             {
1444               if (!semantic_parser)
1445                 complain (_("%%guard present but %%semantic_parser not specified"));
1446
1447               copy_guard (crule, rulelength);
1448               t = lex ();
1449             }
1450           else if (t == tok_left_curly)
1451             {
1452               /* This case never occurs -wjh */
1453               if (action_flag)
1454                 complain (_("two actions at end of one rule"));
1455               copy_action (crule, rulelength);
1456               action_flag = 1;
1457               xactions++;       /* -wjh */
1458               t = lex ();
1459             }
1460           /* If $$ is being set in default way, report if any type
1461              mismatch.  */
1462           else if (!xactions
1463                    && first_rhs && lhs->type_name != first_rhs->type_name)
1464             {
1465               if (lhs->type_name == 0
1466                   || first_rhs->type_name == 0
1467                   || strcmp (lhs->type_name, first_rhs->type_name))
1468                 complain (_("type clash (`%s' `%s') on default action"),
1469                           lhs->type_name ? lhs->type_name : "",
1470                           first_rhs->type_name ? first_rhs->type_name : "");
1471             }
1472           /* Warn if there is no default for $$ but we need one.  */
1473           else if (!xactions && !first_rhs && lhs->type_name != 0)
1474             complain (_("empty rule for typed nonterminal, and no action"));
1475           if (t == tok_semicolon)
1476             t = lex ();
1477         }
1478 #if 0
1479       /* these things can appear as alternatives to rules.  */
1480 /* NO, they cannot.
1481         a) none of the documentation allows them
1482         b) most of them scan forward until finding a next %
1483                 thus they may swallow lots of intervening rules
1484 */
1485       else if (t == tok_token)
1486         {
1487           parse_token_decl (token_sym, nterm_sym);
1488           t = lex ();
1489         }
1490       else if (t == tok_nterm)
1491         {
1492           parse_token_decl (nterm_sym, token_sym);
1493           t = lex ();
1494         }
1495       else if (t == tok_type)
1496         {
1497           t = get_type ();
1498         }
1499       else if (t == tok_union)
1500         {
1501           parse_union_decl ();
1502           t = lex ();
1503         }
1504       else if (t == tok_expect)
1505         {
1506           parse_expect_decl ();
1507           t = lex ();
1508         }
1509       else if (t == tok_start)
1510         {
1511           parse_start_decl ();
1512           t = lex ();
1513         }
1514 #endif
1515
1516       else
1517         {
1518           complain (_("invalid input: %s"), token_buffer);
1519           t = lex ();
1520         }
1521     }
1522
1523   /* grammar has been read.  Do some checking */
1524
1525   if (nsyms > MAXSHORT)
1526     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1527            MAXSHORT);
1528   if (nrules == 0)
1529     fatal (_("no rules in the input grammar"));
1530
1531   /* JF put out same default YYSTYPE as YACC does */
1532   if (typed == 0
1533       && !value_components_used)
1534     {
1535       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1536          but it seems better to be consistent.
1537          Most programs should declare their own type anyway.  */
1538       obstack_sgrow (&attrs_obstack,
1539                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1540       if (defines_flag)
1541         obstack_sgrow (&defines_obstack, "\
1542 #ifndef YYSTYPE\n\
1543 # define YYSTYPE int\n\
1544 #endif\n");
1545     }
1546
1547   /* Report any undefined symbols and consider them nonterminals.  */
1548
1549   for (bp = firstsymbol; bp; bp = bp->next)
1550     if (bp->class == unknown_sym)
1551       {
1552         complain (_
1553                   ("symbol %s is used, but is not defined as a token and has no rules"),
1554                   bp->tag);
1555         bp->class = nterm_sym;
1556         bp->value = nvars++;
1557       }
1558
1559   ntokens = nsyms - nvars;
1560 }
1561 \f
1562 /*--------------------------------------------------------------.
1563 | For named tokens, but not literal ones, define the name.  The |
1564 | value is the user token number.                               |
1565 `--------------------------------------------------------------*/
1566
1567 static void
1568 output_token_defines (struct obstack *oout)
1569 {
1570   bucket *bp;
1571   char *cp, *symbol;
1572   char c;
1573
1574   for (bp = firstsymbol; bp; bp = bp->next)
1575     {
1576       symbol = bp->tag;         /* get symbol */
1577
1578       if (bp->value >= ntokens)
1579         continue;
1580       if (bp->user_token_number == SALIAS)
1581         continue;
1582       if ('\'' == *symbol)
1583         continue;               /* skip literal character */
1584       if (bp == errtoken)
1585         continue;               /* skip error token */
1586       if ('\"' == *symbol)
1587         {
1588           /* use literal string only if given a symbol with an alias */
1589           if (bp->alias)
1590             symbol = bp->alias->tag;
1591           else
1592             continue;
1593         }
1594
1595       /* Don't #define nonliteral tokens whose names contain periods.  */
1596       cp = symbol;
1597       while ((c = *cp++) && c != '.');
1598       if (c != '\0')
1599         continue;
1600
1601       obstack_fgrow2 (oout, "#define\t%s\t%d\n",
1602                       symbol,
1603                       (translations ? bp->user_token_number : bp->value));
1604       if (semantic_parser)
1605         obstack_fgrow2 (oout, "#define\tT%s\t%d\n", symbol, bp->value);
1606     }
1607
1608   obstack_1grow (oout, '\n');
1609 }
1610
1611
1612 /*------------------------------------------------------------------.
1613 | Assign symbol numbers, and write definition of token names into   |
1614 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1615 | of symbols.                                                       |
1616 `------------------------------------------------------------------*/
1617
1618 static void
1619 packsymbols (void)
1620 {
1621   bucket *bp;
1622   int tokno = 1;
1623   int i;
1624   int last_user_token_number;
1625   static char DOLLAR[] = "$";
1626
1627   /* int lossage = 0; JF set but not used */
1628
1629   tags = XCALLOC (char *, nsyms + 1);
1630   tags[0] = DOLLAR;
1631   user_toknums = XCALLOC (short, nsyms + 1);
1632   user_toknums[0] = 0;
1633
1634   sprec = XCALLOC (short, nsyms);
1635   sassoc = XCALLOC (short, nsyms);
1636
1637   max_user_token_number = 256;
1638   last_user_token_number = 256;
1639
1640   for (bp = firstsymbol; bp; bp = bp->next)
1641     {
1642       if (bp->class == nterm_sym)
1643         {
1644           bp->value += ntokens;
1645         }
1646       else if (bp->alias)
1647         {
1648           /* this symbol and its alias are a single token defn.
1649              allocate a tokno, and assign to both check agreement of
1650              ->prec and ->assoc fields and make both the same */
1651           if (bp->value == 0)
1652             bp->value = bp->alias->value = tokno++;
1653
1654           if (bp->prec != bp->alias->prec)
1655             {
1656               if (bp->prec != 0 && bp->alias->prec != 0
1657                   && bp->user_token_number == SALIAS)
1658                 complain (_("conflicting precedences for %s and %s"),
1659                           bp->tag, bp->alias->tag);
1660               if (bp->prec != 0)
1661                 bp->alias->prec = bp->prec;
1662               else
1663                 bp->prec = bp->alias->prec;
1664             }
1665
1666           if (bp->assoc != bp->alias->assoc)
1667             {
1668               if (bp->assoc != 0 && bp->alias->assoc != 0
1669                   && bp->user_token_number == SALIAS)
1670                 complain (_("conflicting assoc values for %s and %s"),
1671                           bp->tag, bp->alias->tag);
1672               if (bp->assoc != 0)
1673                 bp->alias->assoc = bp->assoc;
1674               else
1675                 bp->assoc = bp->alias->assoc;
1676             }
1677
1678           if (bp->user_token_number == SALIAS)
1679             continue;           /* do not do processing below for SALIASs */
1680
1681         }
1682       else                      /* bp->class == token_sym */
1683         {
1684           bp->value = tokno++;
1685         }
1686
1687       if (bp->class == token_sym)
1688         {
1689           if (translations && !(bp->user_token_number))
1690             bp->user_token_number = ++last_user_token_number;
1691           if (bp->user_token_number > max_user_token_number)
1692             max_user_token_number = bp->user_token_number;
1693         }
1694
1695       tags[bp->value] = bp->tag;
1696       user_toknums[bp->value] = bp->user_token_number;
1697       sprec[bp->value] = bp->prec;
1698       sassoc[bp->value] = bp->assoc;
1699
1700     }
1701
1702   if (translations)
1703     {
1704       int j;
1705
1706       token_translations = XCALLOC (short, max_user_token_number + 1);
1707
1708       /* initialize all entries for literal tokens to 2, the internal
1709          token number for $undefined., which represents all invalid
1710          inputs.  */
1711       for (j = 0; j <= max_user_token_number; j++)
1712         token_translations[j] = 2;
1713
1714       for (bp = firstsymbol; bp; bp = bp->next)
1715         {
1716           if (bp->value >= ntokens)
1717             continue;           /* non-terminal */
1718           if (bp->user_token_number == SALIAS)
1719             continue;
1720           if (token_translations[bp->user_token_number] != 2)
1721             complain (_("tokens %s and %s both assigned number %d"),
1722                       tags[token_translations[bp->user_token_number]],
1723                       bp->tag, bp->user_token_number);
1724           token_translations[bp->user_token_number] = bp->value;
1725         }
1726     }
1727
1728   error_token_number = errtoken->value;
1729
1730   if (!no_parser_flag)
1731     output_token_defines (&table_obstack);
1732
1733   if (startval->class == unknown_sym)
1734     fatal (_("the start symbol %s is undefined"), startval->tag);
1735   else if (startval->class == token_sym)
1736     fatal (_("the start symbol %s is a token"), startval->tag);
1737
1738   start_symbol = startval->value;
1739
1740   if (defines_flag)
1741     {
1742       output_token_defines (&defines_obstack);
1743
1744       if (!pure_parser)
1745         {
1746           if (spec_name_prefix)
1747             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1748                             spec_name_prefix);
1749           else
1750             obstack_sgrow (&defines_obstack,
1751                                  "\nextern YYSTYPE yylval;\n");
1752         }
1753
1754       if (semantic_parser)
1755         for (i = ntokens; i < nsyms; i++)
1756           {
1757             /* don't make these for dummy nonterminals made by gensym.  */
1758             if (*tags[i] != '@')
1759                obstack_fgrow2 (&defines_obstack,
1760                                "#define\tNT%s\t%d\n", tags[i], i);
1761           }
1762 #if 0
1763       /* `fdefines' is now a temporary file, so we need to copy its
1764          contents in `done', so we can't close it here.  */
1765       fclose (fdefines);
1766       fdefines = NULL;
1767 #endif
1768     }
1769 }
1770
1771
1772 /*---------------------------------------------------------------.
1773 | Convert the rules into the representation using RRHS, RLHS and |
1774 | RITEMS.                                                        |
1775 `---------------------------------------------------------------*/
1776
1777 static void
1778 packgram (void)
1779 {
1780   int itemno;
1781   int ruleno;
1782   symbol_list *p;
1783
1784   bucket *ruleprec;
1785
1786   ritem = XCALLOC (short, nitems + 1);
1787   rlhs = XCALLOC (short, nrules) - 1;
1788   rrhs = XCALLOC (short, nrules) - 1;
1789   rprec = XCALLOC (short, nrules) - 1;
1790   rprecsym = XCALLOC (short, nrules) - 1;
1791   rassoc = XCALLOC (short, nrules) - 1;
1792
1793   itemno = 0;
1794   ruleno = 1;
1795
1796   p = grammar;
1797   while (p)
1798     {
1799       rlhs[ruleno] = p->sym->value;
1800       rrhs[ruleno] = itemno;
1801       ruleprec = p->ruleprec;
1802
1803       p = p->next;
1804       while (p && p->sym)
1805         {
1806           ritem[itemno++] = p->sym->value;
1807           /* A rule gets by default the precedence and associativity
1808              of the last token in it.  */
1809           if (p->sym->class == token_sym)
1810             {
1811               rprec[ruleno] = p->sym->prec;
1812               rassoc[ruleno] = p->sym->assoc;
1813             }
1814           if (p)
1815             p = p->next;
1816         }
1817
1818       /* If this rule has a %prec,
1819          the specified symbol's precedence replaces the default.  */
1820       if (ruleprec)
1821         {
1822           rprec[ruleno] = ruleprec->prec;
1823           rassoc[ruleno] = ruleprec->assoc;
1824           rprecsym[ruleno] = ruleprec->value;
1825         }
1826
1827       ritem[itemno++] = -ruleno;
1828       ruleno++;
1829
1830       if (p)
1831         p = p->next;
1832     }
1833
1834   ritem[itemno] = 0;
1835 }
1836 \f
1837 /*-------------------------------------------------------------------.
1838 | Read in the grammar specification and record it in the format      |
1839 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1840 | and all actions into ACTION_OBSTACK, in each case forming the body |
1841 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1842 | statement to decide which guard or action to execute.              |
1843 `-------------------------------------------------------------------*/
1844
1845 void
1846 reader (void)
1847 {
1848   start_flag = 0;
1849   startval = NULL;              /* start symbol not specified yet. */
1850
1851 #if 0
1852   /* initially assume token number translation not needed.  */
1853   translations = 0;
1854 #endif
1855   /* Nowadays translations is always set to 1, since we give `error' a
1856      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1857    */
1858   translations = 1;
1859
1860   nsyms = 1;
1861   nvars = 0;
1862   nrules = 0;
1863   nitems = 0;
1864   rline_allocated = 10;
1865   rline = XCALLOC (short, rline_allocated);
1866
1867   typed = 0;
1868   lastprec = 0;
1869
1870   semantic_parser = 0;
1871   pure_parser = 0;
1872
1873   grammar = NULL;
1874
1875   init_lex ();
1876   lineno = 1;
1877
1878   /* Initialize the symbol table.  */
1879   tabinit ();
1880   /* Construct the error token */
1881   errtoken = getsym ("error");
1882   errtoken->class = token_sym;
1883   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1884   /* Construct a token that represents all undefined literal tokens.
1885      It is always token number 2.  */
1886   undeftoken = getsym ("$undefined.");
1887   undeftoken->class = token_sym;
1888   undeftoken->user_token_number = 2;
1889
1890   /* Read the declaration section.  Copy %{ ... %} groups to
1891      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1892      etc. found there.  */
1893   obstack_1grow (&table_obstack, '\n');
1894   obstack_fgrow3 (&table_obstack, "\
1895 /* %s, made from %s\n\
1896    by GNU bison %s.  */\n\
1897 \n",
1898                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1899                   infile, VERSION);
1900
1901   obstack_sgrow (&table_obstack,
1902                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
1903   read_declarations ();
1904   /* Start writing the guard and action files, if they are needed.  */
1905   output_headers ();
1906   /* Read in the grammar, build grammar in list form.  Write out
1907      guards and actions.  */
1908   readgram ();
1909   /* Now we know whether we need the line-number stack.  If we do,
1910      write its type into the .tab.h file.  */
1911   if (defines_flag)
1912     reader_output_yylsp (&defines_obstack);
1913   /* Write closing delimiters for actions and guards.  */
1914   output_trailers ();
1915   if (locations_flag)
1916     obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
1917   /* Assign the symbols their symbol numbers.  Write #defines for the
1918      token symbols into FDEFINES if requested.  */
1919   packsymbols ();
1920   /* Convert the grammar into the format described in gram.h.  */
1921   packgram ();
1922   /* Free the symbol table data structure since symbols are now all
1923      referred to by symbol number.  */
1924   free_symtab ();
1925 }
1926
1927
1928 /*------------------------------------------------------------------.
1929 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
1930 | output it in the headers if --defines is used.                    |
1931 `------------------------------------------------------------------*/
1932
1933 void
1934 reader_output_yylsp (struct obstack *oout)
1935 {
1936   if (locations_flag)
1937     obstack_sgrow (oout, "\
1938 \n\
1939 #ifndef YYLTYPE\n\
1940 typedef struct yyltype\n\
1941 {\n\
1942   int first_line;\n\
1943   int first_column;\n\
1944 \n\
1945   int last_line;\n\
1946   int last_column;\n\
1947 } yyltype;\n\
1948 \n\
1949 # define YYLTYPE yyltype\n\
1950 #endif\n\
1951 \n");
1952 }