src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "symtab.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36
  37 typedef struct symbol_list
  38 {
  39   struct symbol_list *next;
  40   bucket *sym;
  41   int line;
  42   bucket *ruleprec;
  43 }
  44 symbol_list;
  45
  46 int lineno;
  47 char **tags;
  48 short *user_toknums;
  49 static symbol_list *grammar;
  50 static int start_flag;
  51 static bucket *startval;
  52
  53 /* Nonzero if components of semantic values are used, implying
  54    they must be unions.  */
  55 static int value_components_used;
  56
  57 /* Nonzero if %union has been seen.  */
  58 static int typed;
  59
  60 /* Incremented for each %left, %right or %nonassoc seen */
  61 static int lastprec;
  62
  63 static bucket *errtoken;
  64 static bucket *undeftoken;
  65
  66
  67 static symbol_list *
  68 symbol_list_new (bucket *sym)
  69 {
  70   symbol_list *res = XMALLOC (symbol_list, 1);
  71   res->next = NULL;
  72   res->sym = sym;
  73   res->line = lineno;
  74   res->ruleprec = NULL;
  75   return res;
  76 }
  77
  78 \f
  79
  80 /*===================\
  81 | Low level lexing.  |
  82 \===================*/
  83
  84 static void
  85 skip_to_char (int target)
  86 {
  87   int c;
  88   if (target == '\n')
  89     complain (_("   Skipping to next \\n"));
  90   else
  91     complain (_("   Skipping to next %c"), target);
  92
  93   do
  94     c = skip_white_space ();
  95   while (c != target && c != EOF);
  96   if (c != EOF)
  97     ungetc (c, finput);
  98 }
  99
 100
 101 /*---------------------------------------------------------.
 102 | Read a signed integer from STREAM and return its value.  |
 103 `---------------------------------------------------------*/
 104
 105 static inline int
 106 read_signed_integer (FILE *stream)
 107 {
 108   int c = getc (stream);
 109   int sign = 1;
 110   int n = 0;
 111
 112   if (c == '-')
 113     {
 114       c = getc (stream);
 115       sign = -1;
 116     }
 117
 118   while (isdigit (c))
 119     {
 120       n = 10 * n + (c - '0');
 121       c = getc (stream);
 122     }
 123
 124   ungetc (c, stream);
 125
 126   return sign * n;
 127 }
 128 \f
 129 /*--------------------------------------------------------------.
 130 | Get the data type (alternative in the union) of the value for |
 131 | symbol N in rule RULE.                                        |
 132 `--------------------------------------------------------------*/
 133
 134 static char *
 135 get_type_name (int n, symbol_list *rule)
 136 {
 137   int i;
 138   symbol_list *rp;
 139
 140   if (n < 0)
 141     {
 142       complain (_("invalid $ value"));
 143       return NULL;
 144     }
 145
 146   rp = rule;
 147   i = 0;
 148
 149   while (i < n)
 150     {
 151       rp = rp->next;
 152       if (rp == NULL || rp->sym == NULL)
 153         {
 154           complain (_("invalid $ value"));
 155           return NULL;
 156         }
 157       i++;
 158     }
 159
 160   return rp->sym->type_name;
 161 }
 162 \f
 163 /*------------------------------------------------------------.
 164 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 165 | delimiter of the string (either ' or ").                    |
 166 `------------------------------------------------------------*/
 167
 168 static inline void
 169 copy_string (FILE *fin, struct obstack *oout, int match)
 170 {
 171   int c;
 172
 173   obstack_1grow (oout, match);
 174
 175   c = getc (fin);
 176
 177   while (c != match)
 178     {
 179       if (c == EOF)
 180         fatal (_("unterminated string at end of file"));
 181       if (c == '\n')
 182         {
 183           complain (_("unterminated string"));
 184           ungetc (c, fin);
 185           c = match;            /* invent terminator */
 186           continue;
 187         }
 188
 189       obstack_1grow (oout, c);
 190
 191       if (c == '\\')
 192         {
 193           c = getc (fin);
 194           if (c == EOF)
 195             fatal (_("unterminated string at end of file"));
 196           obstack_1grow (oout, c);
 197
 198           if (c == '\n')
 199             lineno++;
 200         }
 201
 202       c = getc (fin);
 203     }
 204
 205   obstack_1grow (oout, c);
 206 }
 207
 208
 209 /*-----------------------------------------------------------------.
 210 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 211 | NULL).  In fact we just saw a `/', which might or might not be a |
 212 | comment.  In any case, copy what we saw.                         |
 213 |                                                                  |
 214 | OUT2 might be NULL.                                              |
 215 `-----------------------------------------------------------------*/
 216
 217 static inline void
 218 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 219 {
 220   int cplus_comment;
 221   int ended;
 222   int c;
 223
 224   /* We read a `/', output it. */
 225   obstack_1grow (oout1, '/');
 226   if (oout2)
 227     obstack_1grow (oout2, '/');
 228
 229   switch ((c = getc (fin)))
 230     {
 231     case '/':
 232       cplus_comment = 1;
 233       break;
 234     case '*':
 235       cplus_comment = 0;
 236       break;
 237     default:
 238       ungetc (c, fin);
 239       return;
 240     }
 241
 242   obstack_1grow (oout1, c);
 243   if (oout2)
 244     obstack_1grow (oout2, c);
 245   c = getc (fin);
 246
 247   ended = 0;
 248   while (!ended)
 249     {
 250       if (!cplus_comment && c == '*')
 251         {
 252           while (c == '*')
 253             {
 254               obstack_1grow (oout1, c);
 255               if (oout2)
 256                 obstack_1grow (oout2, c);
 257               c = getc (fin);
 258             }
 259
 260           if (c == '/')
 261             {
 262               obstack_1grow (oout1, c);
 263               if (oout2)
 264                 obstack_1grow (oout2, c);
 265               ended = 1;
 266             }
 267         }
 268       else if (c == '\n')
 269         {
 270           lineno++;
 271           obstack_1grow (oout1, c);
 272           if (oout2)
 273             obstack_1grow (oout2, c);
 274           if (cplus_comment)
 275             ended = 1;
 276           else
 277             c = getc (fin);
 278         }
 279       else if (c == EOF)
 280         fatal (_("unterminated comment"));
 281       else
 282         {
 283           obstack_1grow (oout1, c);
 284           if (oout2)
 285             obstack_1grow (oout2, c);
 286           c = getc (fin);
 287         }
 288     }
 289 }
 290
 291
 292 /*-------------------------------------------------------------------.
 293 | Dump the comment (actually the current string starting with a `/') |
 294 | from FIN to OOUT.                                                  |
 295 `-------------------------------------------------------------------*/
 296
 297 static inline void
 298 copy_comment (FILE *fin, struct obstack *oout)
 299 {
 300   copy_comment2 (fin, oout, NULL);
 301 }
 302
 303
 304 /*-----------------------------------------------------------------.
 305 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 306 | reference to this location. STACK_OFFSET is the number of values |
 307 | in the current rule so far, which says where to find `$0' with   |
 308 | respect to the top of the stack.                                 |
 309 `-----------------------------------------------------------------*/
 310
 311 static inline void
 312 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 313 {
 314   int c;
 315
 316   c = getc (fin);
 317   if (c == '$')
 318     {
 319       obstack_sgrow (oout, "yyloc");
 320       locations_flag = 1;
 321     }
 322   else if (isdigit (c) || c == '-')
 323     {
 324       int n;
 325
 326       ungetc (c, fin);
 327       n = read_signed_integer (fin);
 328
 329       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 330       locations_flag = 1;
 331     }
 332   else
 333     {
 334       char buf[] = "@c";
 335       buf[1] = c;
 336       complain (_("%s is invalid"), quote (buf));
 337     }
 338 }
 339
 340
 341 /*-------------------------------------------------------------------.
 342 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 343 |                                                                    |
 344 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 345 |                                                                    |
 346 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 347 | the number of values in the current rule so far, which says where  |
 348 | to find `$0' with respect to the top of the stack.                 |
 349 `-------------------------------------------------------------------*/
 350
 351 static inline void
 352 copy_dollar (FILE *fin, struct obstack *oout,
 353              symbol_list *rule, int stack_offset)
 354 {
 355   int c = getc (fin);
 356   const char *type_name = NULL;
 357
 358   /* Get the type name if explicit. */
 359   if (c == '<')
 360     {
 361       read_type_name (fin);
 362       type_name = token_buffer;
 363       value_components_used = 1;
 364       c = getc (fin);
 365     }
 366
 367   if (c == '$')
 368     {
 369       obstack_sgrow (oout, "yyval");
 370
 371       if (!type_name)
 372         type_name = get_type_name (0, rule);
 373       if (type_name)
 374         obstack_fgrow1 (oout, ".%s", type_name);
 375       if (!type_name && typed)
 376         complain (_("$$ of `%s' has no declared type"),
 377                   rule->sym->tag);
 378     }
 379   else if (isdigit (c) || c == '-')
 380     {
 381       int n;
 382       ungetc (c, fin);
 383       n = read_signed_integer (fin);
 384
 385       if (!type_name && n > 0)
 386         type_name = get_type_name (n, rule);
 387
 388       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 389
 390       if (type_name)
 391         obstack_fgrow1 (oout, ".%s", type_name);
 392       if (!type_name && typed)
 393         complain (_("$%d of `%s' has no declared type"),
 394                   n, rule->sym->tag);
 395     }
 396   else
 397     {
 398       char buf[] = "$c";
 399       buf[1] = c;
 400       complain (_("%s is invalid"), quote (buf));
 401     }
 402 }
 403 \f
 404 /*-------------------------------------------------------------------.
 405 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 406 | `%{' has already been read.  Return after reading the `%}'.        |
 407 `-------------------------------------------------------------------*/
 408
 409 static void
 410 copy_definition (void)
 411 {
 412   int c;
 413   /* -1 while reading a character if prev char was %. */
 414   int after_percent;
 415
 416   if (!no_lines_flag)
 417     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 418                     lineno, quotearg_style (c_quoting_style, infile));
 419
 420   after_percent = 0;
 421
 422   c = getc (finput);
 423
 424   for (;;)
 425     {
 426       switch (c)
 427         {
 428         case '\n':
 429           obstack_1grow (&attrs_obstack, c);
 430           lineno++;
 431           break;
 432
 433         case '%':
 434           after_percent = -1;
 435           break;
 436
 437         case '\'':
 438         case '"':
 439           copy_string (finput, &attrs_obstack, c);
 440           break;
 441
 442         case '/':
 443           copy_comment (finput, &attrs_obstack);
 444           break;
 445
 446         case EOF:
 447           fatal ("%s", _("unterminated `%{' definition"));
 448
 449         default:
 450           obstack_1grow (&attrs_obstack, c);
 451         }
 452
 453       c = getc (finput);
 454
 455       if (after_percent)
 456         {
 457           if (c == '}')
 458             return;
 459           obstack_1grow (&attrs_obstack, '%');
 460         }
 461       after_percent = 0;
 462     }
 463 }
 464
 465
 466 /*-------------------------------------------------------------------.
 467 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 468 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 469 | are reversed.                                                      |
 470 `-------------------------------------------------------------------*/
 471
 472 static void
 473 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 474 {
 475   token_t token = tok_undef;
 476   char *typename = NULL;
 477
 478   /* The symbol being defined.  */
 479   struct bucket *symbol = NULL;
 480
 481   /* After `%token' and `%nterm', any number of symbols maybe be
 482      defined.  */
 483   for (;;)
 484     {
 485       int tmp_char = ungetc (skip_white_space (), finput);
 486
 487       /* `%' (for instance from `%token', or from `%%' etc.) is the
 488          only valid means to end this declaration.  */
 489       if (tmp_char == '%')
 490         return;
 491       if (tmp_char == EOF)
 492         fatal (_("Premature EOF after %s"), token_buffer);
 493
 494       token = lex ();
 495       if (token == tok_comma)
 496         {
 497           symbol = NULL;
 498           continue;
 499         }
 500       if (token == tok_typename)
 501         {
 502           typename = xstrdup (token_buffer);
 503           value_components_used = 1;
 504           symbol = NULL;
 505         }
 506       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 507         {
 508           if (symval->alias)
 509             warn (_("symbol `%s' used more than once as a literal string"),
 510                   symval->tag);
 511           else if (symbol->alias)
 512             warn (_("symbol `%s' given more than one literal string"),
 513                   symbol->tag);
 514           else
 515             {
 516               symval->class = token_sym;
 517               symval->type_name = typename;
 518               symval->user_token_number = symbol->user_token_number;
 519               symbol->user_token_number = SALIAS;
 520               symval->alias = symbol;
 521               symbol->alias = symval;
 522               /* symbol and symval combined are only one symbol */
 523               nsyms--;
 524             }
 525           symbol = NULL;
 526         }
 527       else if (token == tok_identifier)
 528         {
 529           int oldclass = symval->class;
 530           symbol = symval;
 531
 532           if (symbol->class == what_is_not)
 533             complain (_("symbol %s redefined"), symbol->tag);
 534           symbol->class = what_is;
 535           if (what_is == nterm_sym && oldclass != nterm_sym)
 536             symbol->value = nvars++;
 537
 538           if (typename)
 539             {
 540               if (symbol->type_name == NULL)
 541                 symbol->type_name = typename;
 542               else if (strcmp (typename, symbol->type_name) != 0)
 543                 complain (_("type redeclaration for %s"), symbol->tag);
 544             }
 545         }
 546       else if (symbol && token == tok_number)
 547         {
 548           symbol->user_token_number = numval;
 549         }
 550       else
 551         {
 552           complain (_("`%s' is invalid in %s"),
 553                     token_buffer,
 554                     (what_is == token_sym) ? "%token" : "%nterm");
 555           skip_to_char ('%');
 556         }
 557     }
 558
 559 }
 560
 561
 562 /*------------------------------.
 563 | Parse what comes after %start |
 564 `------------------------------*/
 565
 566 static void
 567 parse_start_decl (void)
 568 {
 569   if (start_flag)
 570     complain (_("multiple %s declarations"), "%start");
 571   if (lex () != tok_identifier)
 572     complain (_("invalid %s declaration"), "%start");
 573   else
 574     {
 575       start_flag = 1;
 576       startval = symval;
 577     }
 578 }
 579
 580 /*-----------------------------------------------------------.
 581 | read in a %type declaration and record its information for |
 582 | get_type_name to access                                    |
 583 `-----------------------------------------------------------*/
 584
 585 static void
 586 parse_type_decl (void)
 587 {
 588   char *name;
 589
 590   if (lex () != tok_typename)
 591     {
 592       complain ("%s", _("%type declaration has no <typename>"));
 593       skip_to_char ('%');
 594       return;
 595     }
 596
 597   name = xstrdup (token_buffer);
 598
 599   for (;;)
 600     {
 601       token_t t;
 602       int tmp_char = ungetc (skip_white_space (), finput);
 603
 604       if (tmp_char == '%')
 605         return;
 606       if (tmp_char == EOF)
 607         fatal (_("Premature EOF after %s"), token_buffer);
 608
 609       t = lex ();
 610
 611       switch (t)
 612         {
 613
 614         case tok_comma:
 615         case tok_semicolon:
 616           break;
 617
 618         case tok_identifier:
 619           if (symval->type_name == NULL)
 620             symval->type_name = name;
 621           else if (strcmp (name, symval->type_name) != 0)
 622             complain (_("type redeclaration for %s"), symval->tag);
 623
 624           break;
 625
 626         default:
 627           complain (_("invalid %%type declaration due to item: %s"),
 628                     token_buffer);
 629           skip_to_char ('%');
 630         }
 631     }
 632 }
 633
 634
 635
 636 /*----------------------------------------------------------------.
 637 | Read in a %left, %right or %nonassoc declaration and record its |
 638 | information.                                                    |
 639 `----------------------------------------------------------------*/
 640
 641 static void
 642 parse_assoc_decl (associativity assoc)
 643 {
 644   char *name = NULL;
 645   int prev = 0;
 646
 647   lastprec++;                   /* Assign a new precedence level, never 0.  */
 648
 649   for (;;)
 650     {
 651       token_t t;
 652       int tmp_char = ungetc (skip_white_space (), finput);
 653
 654       if (tmp_char == '%')
 655         return;
 656       if (tmp_char == EOF)
 657         fatal (_("Premature EOF after %s"), token_buffer);
 658
 659       t = lex ();
 660
 661       switch (t)
 662         {
 663         case tok_typename:
 664           name = xstrdup (token_buffer);
 665           break;
 666
 667         case tok_comma:
 668           break;
 669
 670         case tok_identifier:
 671           if (symval->prec != 0)
 672             complain (_("redefining precedence of %s"), symval->tag);
 673           symval->prec = lastprec;
 674           symval->assoc = assoc;
 675           if (symval->class == nterm_sym)
 676             complain (_("symbol %s redefined"), symval->tag);
 677           symval->class = token_sym;
 678           if (name)
 679             {                   /* record the type, if one is specified */
 680               if (symval->type_name == NULL)
 681                 symval->type_name = name;
 682               else if (strcmp (name, symval->type_name) != 0)
 683                 complain (_("type redeclaration for %s"), symval->tag);
 684             }
 685           break;
 686
 687         case tok_number:
 688           if (prev == tok_identifier)
 689             {
 690               symval->user_token_number = numval;
 691             }
 692           else
 693             {
 694               complain (_
 695                         ("invalid text (%s) - number should be after identifier"),
 696 token_buffer);
 697               skip_to_char ('%');
 698             }
 699           break;
 700
 701         case tok_semicolon:
 702           return;
 703
 704         default:
 705           complain (_("unexpected item: %s"), token_buffer);
 706           skip_to_char ('%');
 707         }
 708
 709       prev = t;
 710     }
 711 }
 712
 713
 714
 715 /*--------------------------------------------------------------.
 716 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 717 | where it is made into the definition of YYSTYPE, the type of  |
 718 | elements of the parser value stack.                           |
 719 `--------------------------------------------------------------*/
 720
 721 static void
 722 parse_union_decl (void)
 723 {
 724   int c;
 725   int count = 0;
 726   const char *prologue = "\
 727 #ifndef YYSTYPE\n\
 728 typedef union";
 729   const char *epilogue = "\
 730  yystype;\n\
 731 # define YYSTYPE yystype\n\
 732 #endif\n";
 733
 734   if (typed)
 735     complain (_("multiple %s declarations"), "%union");
 736
 737   typed = 1;
 738
 739   if (!no_lines_flag)
 740     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 741                     lineno, quotearg_style (c_quoting_style, infile));
 742   else
 743     obstack_1grow (&attrs_obstack, '\n');
 744
 745   obstack_sgrow (&attrs_obstack, prologue);
 746   if (defines_flag)
 747     obstack_sgrow (&defines_obstack, prologue);
 748
 749   c = getc (finput);
 750
 751   while (c != EOF)
 752     {
 753
 754       /* If C contains '/', it is output by copy_comment ().  */
 755       if (c != '/')
 756         {
 757           obstack_1grow (&attrs_obstack, c);
 758           if (defines_flag)
 759             obstack_1grow (&defines_obstack, c);
 760         }
 761
 762       switch (c)
 763         {
 764         case '\n':
 765           lineno++;
 766           break;
 767
 768         case '/':
 769           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 770           break;
 771
 772         case '{':
 773           count++;
 774           break;
 775
 776         case '}':
 777           if (count == 0)
 778             complain (_("unmatched %s"), "`}'");
 779           count--;
 780           if (count <= 0)
 781             {
 782               obstack_sgrow (&attrs_obstack, epilogue);
 783               if (defines_flag)
 784                 obstack_sgrow (&defines_obstack, epilogue);
 785               /* JF don't choke on trailing semi */
 786               c = skip_white_space ();
 787               if (c != ';')
 788                 ungetc (c, finput);
 789               return;
 790             }
 791         }
 792
 793       c = getc (finput);
 794     }
 795 }
 796
 797
 798 /*-------------------------------------------------------.
 799 | Parse the declaration %expect N which says to expect N |
 800 | shift-reduce conflicts.                                |
 801 `-------------------------------------------------------*/
 802
 803 static void
 804 parse_expect_decl (void)
 805 {
 806   int c = skip_white_space ();
 807   ungetc (c, finput);
 808
 809   if (!isdigit (c))
 810     complain (_("argument of %%expect is not an integer"));
 811   else
 812     expected_conflicts = read_signed_integer (finput);
 813 }
 814
 815
 816 /*-------------------------------------------------------------------.
 817 | Parse what comes after %thong.  the full syntax is                 |
 818 |                                                                    |
 819 |                %thong <type> token number literal                  |
 820 |                                                                    |
 821 | the <type> or number may be omitted.  The number specifies the     |
 822 | user_token_number.                                                 |
 823 |                                                                    |
 824 | Two symbols are entered in the table, one for the token symbol and |
 825 | one for the literal.  Both are given the <type>, if any, from the  |
 826 | declaration.  The ->user_token_number of the first is SALIAS and   |
 827 | the ->user_token_number of the second is set to the number, if     |
 828 | any, from the declaration.  The two symbols are linked via         |
 829 | pointers in their ->alias fields.                                  |
 830 |                                                                    |
 831 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 832 | only the literal string is retained it is the literal string that  |
 833 | is output to yytname                                               |
 834 `-------------------------------------------------------------------*/
 835
 836 static void
 837 parse_thong_decl (void)
 838 {
 839   token_t token;
 840   struct bucket *symbol;
 841   char *typename = 0;
 842   int usrtoknum = SUNDEF;
 843
 844   token = lex ();               /* fetch typename or first token */
 845   if (token == tok_typename)
 846     {
 847       typename = xstrdup (token_buffer);
 848       value_components_used = 1;
 849       token = lex ();           /* fetch first token */
 850     }
 851
 852   /* process first token */
 853
 854   if (token != tok_identifier)
 855     {
 856       complain (_("unrecognized item %s, expected an identifier"),
 857                 token_buffer);
 858       skip_to_char ('%');
 859       return;
 860     }
 861   symval->class = token_sym;
 862   symval->type_name = typename;
 863   symval->user_token_number = SALIAS;
 864   symbol = symval;
 865
 866   token = lex ();               /* get number or literal string */
 867
 868   if (token == tok_number)
 869     {
 870       usrtoknum = numval;
 871       token = lex ();           /* okay, did number, now get literal */
 872     }
 873
 874   /* process literal string token */
 875
 876   if (token != tok_identifier || *symval->tag != '\"')
 877     {
 878       complain (_("expected string constant instead of %s"), token_buffer);
 879       skip_to_char ('%');
 880       return;
 881     }
 882   symval->class = token_sym;
 883   symval->type_name = typename;
 884   symval->user_token_number = usrtoknum;
 885
 886   symval->alias = symbol;
 887   symbol->alias = symval;
 888
 889   /* symbol and symval combined are only one symbol.  */
 890   nsyms--;
 891 }
 892
 893
 894 /*------------------------------------------------------------------.
 895 | Parse a double quoted parameter. It was used for                  |
 896 | %{source,header}_extension.  For the moment, It is not used since |
 897 | extension features have been removed.                             |
 898 `------------------------------------------------------------------*/
 899
 900 #if 0
 901
 902 static const char *
 903 parse_dquoted_param (const char *from)
 904 {
 905   char buff[32];
 906   int c;
 907   int i;
 908
 909   c = skip_white_space ();
 910
 911   if (c != '"')
 912     {
 913       ungetc (c, finput);
 914       complain (_("invalid %s declaration"), from);
 915       return NULL;
 916     }
 917
 918   c = getc (finput);
 919   for (i = 0; (c >= '!') && (c <= '~'); i++)
 920     {
 921       if (c == '"')
 922         break;
 923
 924       if (c == '\\')
 925         {
 926           c = getc (finput);
 927           if ((c < '!') && (c > '~'))
 928             break;
 929         }
 930
 931       buff[i] = c;
 932       c = getc (finput);
 933     }
 934   buff[i] = '\0';
 935
 936   if (c != '"')
 937     {
 938       ungetc (c, finput);
 939       complain (_("invalid %s declaration"), from);
 940       return NULL;
 941     }
 942
 943   return xstrdup (buff);
 944 }
 945
 946 #endif
 947
 948
 949 /*----------------------------------------------------------------.
 950 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 951 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 952 | groups to ATTRS_OBSTACK.                                        |
 953 `----------------------------------------------------------------*/
 954
 955 static void
 956 read_declarations (void)
 957 {
 958   for (;;)
 959     {
 960       int c = skip_white_space ();
 961
 962       if (c == '%')
 963         {
 964           token_t tok = parse_percent_token ();
 965
 966           switch (tok)
 967             {
 968             case tok_two_percents:
 969               return;
 970
 971             case tok_percent_left_curly:
 972               copy_definition ();
 973               break;
 974
 975             case tok_token:
 976               parse_token_decl (token_sym, nterm_sym);
 977               break;
 978
 979             case tok_nterm:
 980               parse_token_decl (nterm_sym, token_sym);
 981               break;
 982
 983             case tok_type:
 984               parse_type_decl ();
 985               break;
 986
 987             case tok_start:
 988               parse_start_decl ();
 989               break;
 990
 991             case tok_union:
 992               parse_union_decl ();
 993               break;
 994
 995             case tok_expect:
 996               parse_expect_decl ();
 997               break;
 998
 999             case tok_thong:
1000               parse_thong_decl ();
1001               break;
1002
1003             case tok_left:
1004               parse_assoc_decl (left_assoc);
1005               break;
1006
1007             case tok_right:
1008               parse_assoc_decl (right_assoc);
1009               break;
1010
1011             case tok_nonassoc:
1012               parse_assoc_decl (non_assoc);
1013               break;
1014
1015             case tok_noop:
1016               break;
1017
1018             case tok_stropt:
1019             case tok_intopt:
1020             case tok_obsolete:
1021               abort ();
1022               break;
1023
1024             case tok_illegal:
1025             default:
1026               complain (_("unrecognized: %s"), token_buffer);
1027               skip_to_char ('%');
1028             }
1029         }
1030       else if (c == EOF)
1031         fatal (_("no input grammar"));
1032       else
1033         {
1034           char buf[] = "c";
1035           buf[0] = c;
1036           complain (_("unknown character: %s"), quote (buf));
1037           skip_to_char ('%');
1038         }
1039     }
1040 }
1041 \f
1042 /*-------------------------------------------------------------------.
1043 | Assuming that a `{' has just been seen, copy everything up to the  |
1044 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1045 | values in the current rule so far, which says where to find `$0'   |
1046 | with respect to the top of the stack.                              |
1047 `-------------------------------------------------------------------*/
1048
1049 static void
1050 copy_action (symbol_list *rule, int stack_offset)
1051 {
1052   int c;
1053   int count;
1054   char buf[4096];
1055
1056   /* offset is always 0 if parser has already popped the stack pointer */
1057   if (semantic_parser)
1058     stack_offset = 0;
1059
1060   sprintf (buf, "\ncase %d:\n", nrules);
1061   obstack_grow (&action_obstack, buf, strlen (buf));
1062
1063   if (!no_lines_flag)
1064     {
1065       sprintf (buf, "#line %d %s\n",
1066                lineno, quotearg_style (c_quoting_style, infile));
1067       obstack_grow (&action_obstack, buf, strlen (buf));
1068     }
1069   obstack_1grow (&action_obstack, '{');
1070
1071   count = 1;
1072   c = getc (finput);
1073
1074   while (count > 0)
1075     {
1076       while (c != '}')
1077         {
1078           switch (c)
1079             {
1080             case '\n':
1081               obstack_1grow (&action_obstack, c);
1082               lineno++;
1083               break;
1084
1085             case '{':
1086               obstack_1grow (&action_obstack, c);
1087               count++;
1088               break;
1089
1090             case '\'':
1091             case '"':
1092               copy_string (finput, &action_obstack, c);
1093               break;
1094
1095             case '/':
1096               copy_comment (finput, &action_obstack);
1097               break;
1098
1099             case '$':
1100               copy_dollar (finput, &action_obstack,
1101                            rule, stack_offset);
1102               break;
1103
1104             case '@':
1105               copy_at (finput, &action_obstack,
1106                        stack_offset);
1107               break;
1108
1109             case EOF:
1110               fatal (_("unmatched %s"), "`{'");
1111
1112             default:
1113               obstack_1grow (&action_obstack, c);
1114             }
1115
1116           c = getc (finput);
1117         }
1118
1119       /* above loop exits when c is '}' */
1120
1121       if (--count)
1122         {
1123           obstack_1grow (&action_obstack, c);
1124           c = getc (finput);
1125         }
1126     }
1127
1128   obstack_sgrow (&action_obstack, ";\n    break;}");
1129 }
1130 \f
1131 /*-------------------------------------------------------------------.
1132 | After `%guard' is seen in the input file, copy the actual guard    |
1133 | into the guards file.  If the guard is followed by an action, copy |
1134 | that into the actions file.  STACK_OFFSET is the number of values  |
1135 | in the current rule so far, which says where to find `$0' with     |
1136 | respect to the top of the stack, for the simple parser in which    |
1137 | the stack is not popped until after the guard is run.              |
1138 `-------------------------------------------------------------------*/
1139
1140 static void
1141 copy_guard (symbol_list *rule, int stack_offset)
1142 {
1143   int c;
1144   int count;
1145   int brace_flag = 0;
1146
1147   /* offset is always 0 if parser has already popped the stack pointer */
1148   if (semantic_parser)
1149     stack_offset = 0;
1150
1151   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1152   if (!no_lines_flag)
1153     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1154                     lineno, quotearg_style (c_quoting_style, infile));
1155   obstack_1grow (&guard_obstack, '{');
1156
1157   count = 0;
1158   c = getc (finput);
1159
1160   while (brace_flag ? (count > 0) : (c != ';'))
1161     {
1162       switch (c)
1163         {
1164         case '\n':
1165           obstack_1grow (&guard_obstack, c);
1166           lineno++;
1167           break;
1168
1169         case '{':
1170           obstack_1grow (&guard_obstack, c);
1171           brace_flag = 1;
1172           count++;
1173           break;
1174
1175         case '}':
1176           obstack_1grow (&guard_obstack, c);
1177           if (count > 0)
1178             count--;
1179           else
1180             {
1181               complain (_("unmatched %s"), "`}'");
1182               c = getc (finput);        /* skip it */
1183             }
1184           break;
1185
1186         case '\'':
1187         case '"':
1188           copy_string (finput, &guard_obstack, c);
1189           break;
1190
1191         case '/':
1192           copy_comment (finput, &guard_obstack);
1193           break;
1194
1195         case '$':
1196           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1197           break;
1198
1199         case '@':
1200           copy_at (finput, &guard_obstack, stack_offset);
1201           break;
1202
1203         case EOF:
1204           fatal ("%s", _("unterminated %guard clause"));
1205
1206         default:
1207           obstack_1grow (&guard_obstack, c);
1208         }
1209
1210       if (c != '}' || count != 0)
1211         c = getc (finput);
1212     }
1213
1214   c = skip_white_space ();
1215
1216   obstack_sgrow (&guard_obstack, ";\n    break;}");
1217   if (c == '{')
1218     copy_action (rule, stack_offset);
1219   else if (c == '=')
1220     {
1221       c = getc (finput);        /* why not skip_white_space -wjh */
1222       if (c == '{')
1223         copy_action (rule, stack_offset);
1224     }
1225   else
1226     ungetc (c, finput);
1227 }
1228 \f
1229
1230 /*-------------------------------------------------------------------.
1231 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1232 | with the user's names.                                             |
1233 `-------------------------------------------------------------------*/
1234
1235 static bucket *
1236 gensym (void)
1237 {
1238   /* Incremented for each generated symbol */
1239   static int gensym_count = 0;
1240   static char buf[256];
1241
1242   bucket *sym;
1243
1244   sprintf (buf, "@%d", ++gensym_count);
1245   token_buffer = buf;
1246   sym = getsym (token_buffer);
1247   sym->class = nterm_sym;
1248   sym->value = nvars++;
1249   return sym;
1250 }
1251
1252 #if 0
1253 /*------------------------------------------------------------------.
1254 | read in a %type declaration and record its information for        |
1255 | get_type_name to access.  This is unused.  It is only called from |
1256 | the #if 0 part of readgram                                        |
1257 `------------------------------------------------------------------*/
1258
1259 static int
1260 get_type (void)
1261 {
1262   int k;
1263   token_t token;
1264   char *name;
1265
1266   token = lex ();
1267
1268   if (token != tok_typename)
1269     {
1270       complain (_("invalid %s declaration"), "%type");
1271       return t;
1272     }
1273
1274   name = xstrdup (token_buffer);
1275
1276   for (;;)
1277     {
1278       token = lex ();
1279
1280       switch (token)
1281         {
1282         case tok_semicolon:
1283           return lex ();
1284
1285         case tok_comma:
1286           break;
1287
1288         case tok_identifier:
1289           if (symval->type_name == NULL)
1290             symval->type_name = name;
1291           else if (strcmp (name, symval->type_name) != 0)
1292             complain (_("type redeclaration for %s"), symval->tag);
1293
1294           break;
1295
1296         default:
1297           return token;
1298         }
1299     }
1300 }
1301
1302 #endif
1303 \f
1304 /*------------------------------------------------------------------.
1305 | Parse the input grammar into a one symbol_list structure.  Each   |
1306 | rule is represented by a sequence of symbols: the left hand side  |
1307 | followed by the contents of the right hand side, followed by a    |
1308 | null pointer instead of a symbol to terminate the rule.  The next |
1309 | symbol is the lhs of the following rule.                          |
1310 |                                                                   |
1311 | All guards and actions are copied out to the appropriate files,   |
1312 | labelled by the rule number they apply to.                        |
1313 `------------------------------------------------------------------*/
1314
1315 static void
1316 readgram (void)
1317 {
1318   token_t t;
1319   bucket *lhs = NULL;
1320   symbol_list *p;
1321   symbol_list *p1;
1322   bucket *bp;
1323
1324   /* Points to first symbol_list of current rule. its symbol is the
1325      lhs of the rule.  */
1326   symbol_list *crule;
1327   /* Points to the symbol_list preceding crule.  */
1328   symbol_list *crule1;
1329
1330   p1 = NULL;
1331
1332   t = lex ();
1333
1334   while (t != tok_two_percents && t != tok_eof)
1335     {
1336       if (t == tok_identifier || t == tok_bar)
1337         {
1338           int action_flag = 0;
1339           /* Number of symbols in rhs of this rule so far */
1340           int rulelength = 0;
1341           int xactions = 0;     /* JF for error checking */
1342           bucket *first_rhs = 0;
1343
1344           if (t == tok_identifier)
1345             {
1346               lhs = symval;
1347
1348               if (!start_flag)
1349                 {
1350                   startval = lhs;
1351                   start_flag = 1;
1352                 }
1353
1354               t = lex ();
1355               if (t != tok_colon)
1356                 {
1357                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1358                   unlex (t);
1359                 }
1360             }
1361
1362           if (nrules == 0 && t == tok_bar)
1363             {
1364               complain (_("grammar starts with vertical bar"));
1365               lhs = symval;     /* BOGUS: use a random symval */
1366             }
1367           /* start a new rule and record its lhs.  */
1368
1369           nrules++;
1370           nitems++;
1371
1372           p = symbol_list_new (lhs);
1373
1374           crule1 = p1;
1375           if (p1)
1376             p1->next = p;
1377           else
1378             grammar = p;
1379
1380           p1 = p;
1381           crule = p;
1382
1383           /* mark the rule's lhs as a nonterminal if not already so.  */
1384
1385           if (lhs->class == unknown_sym)
1386             {
1387               lhs->class = nterm_sym;
1388               lhs->value = nvars;
1389               nvars++;
1390             }
1391           else if (lhs->class == token_sym)
1392             complain (_("rule given for %s, which is a token"), lhs->tag);
1393
1394           /* read the rhs of the rule.  */
1395
1396           for (;;)
1397             {
1398               t = lex ();
1399               if (t == tok_prec)
1400                 {
1401                   t = lex ();
1402                   crule->ruleprec = symval;
1403                   t = lex ();
1404                 }
1405
1406               if (!(t == tok_identifier || t == tok_left_curly))
1407                 break;
1408
1409               /* If next token is an identifier, see if a colon follows it.
1410                  If one does, exit this rule now.  */
1411               if (t == tok_identifier)
1412                 {
1413                   bucket *ssave;
1414                   token_t t1;
1415
1416                   ssave = symval;
1417                   t1 = lex ();
1418                   unlex (t1);
1419                   symval = ssave;
1420                   if (t1 == tok_colon)
1421                     break;
1422
1423                   if (!first_rhs)       /* JF */
1424                     first_rhs = symval;
1425                   /* Not followed by colon =>
1426                      process as part of this rule's rhs.  */
1427                 }
1428
1429               /* If we just passed an action, that action was in the middle
1430                  of a rule, so make a dummy rule to reduce it to a
1431                  non-terminal.  */
1432               if (action_flag)
1433                 {
1434                   /* Since the action was written out with this rule's
1435                      number, we must give the new rule this number by
1436                      inserting the new rule before it.  */
1437
1438                   /* Make a dummy nonterminal, a gensym.  */
1439                   bucket *sdummy = gensym ();
1440
1441                   /* Make a new rule, whose body is empty, before the
1442                      current one, so that the action just read can
1443                      belong to it.  */
1444                   nrules++;
1445                   nitems++;
1446                   p = symbol_list_new (sdummy);
1447                   /* Attach its lineno to that of the host rule. */
1448                   p->line = crule->line;
1449                   if (crule1)
1450                     crule1->next = p;
1451                   else
1452                     grammar = p;
1453                   /* End of the rule. */
1454                   crule1 = symbol_list_new (NULL);
1455                   crule1->next = crule;
1456
1457                   p->next = crule1;
1458
1459                   /* Insert the dummy generated by that rule into this
1460                      rule.  */
1461                   nitems++;
1462                   p = symbol_list_new (sdummy);
1463                   p1->next = p;
1464                   p1 = p;
1465
1466                   action_flag = 0;
1467                 }
1468
1469               if (t == tok_identifier)
1470                 {
1471                   nitems++;
1472                   p = symbol_list_new (symval);
1473                   p1->next = p;
1474                   p1 = p;
1475                 }
1476               else              /* handle an action.  */
1477                 {
1478                   copy_action (crule, rulelength);
1479                   action_flag = 1;
1480                   xactions++;   /* JF */
1481                 }
1482               rulelength++;
1483             }                   /* end of  read rhs of rule */
1484
1485           /* Put an empty link in the list to mark the end of this rule  */
1486           p = symbol_list_new (NULL);
1487           p1->next = p;
1488           p1 = p;
1489
1490           if (t == tok_prec)
1491             {
1492               complain (_("two @prec's in a row"));
1493               t = lex ();
1494               crule->ruleprec = symval;
1495               t = lex ();
1496             }
1497           if (t == tok_guard)
1498             {
1499               if (!semantic_parser)
1500                 complain (_("%%guard present but %%semantic_parser not specified"));
1501
1502               copy_guard (crule, rulelength);
1503               t = lex ();
1504             }
1505           else if (t == tok_left_curly)
1506             {
1507               /* This case never occurs -wjh */
1508               if (action_flag)
1509                 complain (_("two actions at end of one rule"));
1510               copy_action (crule, rulelength);
1511               action_flag = 1;
1512               xactions++;       /* -wjh */
1513               t = lex ();
1514             }
1515           /* If $$ is being set in default way, report if any type
1516              mismatch.  */
1517           else if (!xactions
1518                    && first_rhs && lhs->type_name != first_rhs->type_name)
1519             {
1520               if (lhs->type_name == 0
1521                   || first_rhs->type_name == 0
1522                   || strcmp (lhs->type_name, first_rhs->type_name))
1523                 complain (_("type clash (`%s' `%s') on default action"),
1524                           lhs->type_name ? lhs->type_name : "",
1525                           first_rhs->type_name ? first_rhs->type_name : "");
1526             }
1527           /* Warn if there is no default for $$ but we need one.  */
1528           else if (!xactions && !first_rhs && lhs->type_name != 0)
1529             complain (_("empty rule for typed nonterminal, and no action"));
1530           if (t == tok_semicolon)
1531             t = lex ();
1532         }
1533 #if 0
1534       /* these things can appear as alternatives to rules.  */
1535 /* NO, they cannot.
1536         a) none of the documentation allows them
1537         b) most of them scan forward until finding a next %
1538                 thus they may swallow lots of intervening rules
1539 */
1540       else if (t == tok_token)
1541         {
1542           parse_token_decl (token_sym, nterm_sym);
1543           t = lex ();
1544         }
1545       else if (t == tok_nterm)
1546         {
1547           parse_token_decl (nterm_sym, token_sym);
1548           t = lex ();
1549         }
1550       else if (t == tok_type)
1551         {
1552           t = get_type ();
1553         }
1554       else if (t == tok_union)
1555         {
1556           parse_union_decl ();
1557           t = lex ();
1558         }
1559       else if (t == tok_expect)
1560         {
1561           parse_expect_decl ();
1562           t = lex ();
1563         }
1564       else if (t == tok_start)
1565         {
1566           parse_start_decl ();
1567           t = lex ();
1568         }
1569 #endif
1570
1571       else
1572         {
1573           complain (_("invalid input: %s"), quote (token_buffer));
1574           t = lex ();
1575         }
1576     }
1577
1578   /* grammar has been read.  Do some checking */
1579
1580   if (nsyms > MAXSHORT)
1581     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1582            MAXSHORT);
1583   if (nrules == 0)
1584     fatal (_("no rules in the input grammar"));
1585
1586   /* JF put out same default YYSTYPE as YACC does */
1587   if (typed == 0
1588       && !value_components_used)
1589     {
1590       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1591          but it seems better to be consistent.
1592          Most programs should declare their own type anyway.  */
1593       obstack_sgrow (&attrs_obstack,
1594                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1595       if (defines_flag)
1596         obstack_sgrow (&defines_obstack, "\
1597 # ifndef YYSTYPE\n\
1598 #  define YYSTYPE int\n\
1599 # endif\n");
1600     }
1601
1602   /* Report any undefined symbols and consider them nonterminals.  */
1603
1604   for (bp = firstsymbol; bp; bp = bp->next)
1605     if (bp->class == unknown_sym)
1606       {
1607         complain (_
1608                   ("symbol %s is used, but is not defined as a token and has no rules"),
1609                   bp->tag);
1610         bp->class = nterm_sym;
1611         bp->value = nvars++;
1612       }
1613
1614   ntokens = nsyms - nvars;
1615 }
1616 \f
1617 /*--------------------------------------------------------------.
1618 | For named tokens, but not literal ones, define the name.  The |
1619 | value is the user token number.                               |
1620 `--------------------------------------------------------------*/
1621
1622 static void
1623 output_token_defines (struct obstack *oout)
1624 {
1625   bucket *bp;
1626   char *cp, *symbol;
1627   char c;
1628
1629   for (bp = firstsymbol; bp; bp = bp->next)
1630     {
1631       symbol = bp->tag;         /* get symbol */
1632
1633       if (bp->value >= ntokens)
1634         continue;
1635       if (bp->user_token_number == SALIAS)
1636         continue;
1637       if ('\'' == *symbol)
1638         continue;               /* skip literal character */
1639       if (bp == errtoken)
1640         continue;               /* skip error token */
1641       if ('\"' == *symbol)
1642         {
1643           /* use literal string only if given a symbol with an alias */
1644           if (bp->alias)
1645             symbol = bp->alias->tag;
1646           else
1647             continue;
1648         }
1649
1650       /* Don't #define nonliteral tokens whose names contain periods.  */
1651       cp = symbol;
1652       while ((c = *cp++) && c != '.');
1653       if (c != '\0')
1654         continue;
1655
1656       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1657                       symbol, bp->user_token_number);
1658       if (semantic_parser)
1659         /* FIXME: This is certainly dead wrong, and should be just as
1660            above. --akim.  */
1661         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1662     }
1663
1664   obstack_1grow (oout, '\n');
1665 }
1666
1667
1668 /*--------------------.
1669 | Output the header.  |
1670 `--------------------*/
1671
1672 static void
1673 symbols_output (void)
1674 {
1675   if (defines_flag)
1676     {
1677       output_token_defines (&defines_obstack);
1678
1679       if (!pure_parser)
1680         {
1681           if (spec_name_prefix)
1682             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1683                             spec_name_prefix);
1684           else
1685             obstack_sgrow (&defines_obstack,
1686                                  "\nextern YYSTYPE yylval;\n");
1687         }
1688
1689       if (semantic_parser)
1690         {
1691           int i;
1692
1693           for (i = ntokens; i < nsyms; i++)
1694             {
1695               /* don't make these for dummy nonterminals made by gensym.  */
1696               if (*tags[i] != '@')
1697                 obstack_fgrow2 (&defines_obstack,
1698                                 "# define\tNT%s\t%d\n", tags[i], i);
1699             }
1700 #if 0
1701           /* `fdefines' is now a temporary file, so we need to copy its
1702              contents in `done', so we can't close it here.  */
1703           fclose (fdefines);
1704           fdefines = NULL;
1705 #endif
1706         }
1707     }
1708 }
1709
1710
1711 /*------------------------------------------------------------------.
1712 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1713 | number.                                                           |
1714 `------------------------------------------------------------------*/
1715
1716 static void
1717 token_translations_init (void)
1718 {
1719   bucket *bp = NULL;
1720   int i;
1721
1722   token_translations = XCALLOC (short, max_user_token_number + 1);
1723
1724   /* Initialize all entries for literal tokens to 2, the internal
1725      token number for $undefined., which represents all invalid
1726      inputs.  */
1727   for (i = 0; i <= max_user_token_number; i++)
1728     token_translations[i] = 2;
1729
1730   for (bp = firstsymbol; bp; bp = bp->next)
1731     {
1732       /* Non-terminal? */
1733       if (bp->value >= ntokens)
1734         continue;
1735       /* A token string alias? */
1736       if (bp->user_token_number == SALIAS)
1737         continue;
1738
1739       assert (bp->user_token_number != SUNDEF);
1740
1741       /* A token which translation has already been set? */
1742       if (token_translations[bp->user_token_number] != 2)
1743         complain (_("tokens %s and %s both assigned number %d"),
1744                   tags[token_translations[bp->user_token_number]],
1745                   bp->tag, bp->user_token_number);
1746       token_translations[bp->user_token_number] = bp->value;
1747     }
1748 }
1749
1750
1751 /*------------------------------------------------------------------.
1752 | Assign symbol numbers, and write definition of token names into   |
1753 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1754 | of symbols.                                                       |
1755 `------------------------------------------------------------------*/
1756
1757 static void
1758 packsymbols (void)
1759 {
1760   bucket *bp = NULL;
1761   int tokno = 1;
1762   int last_user_token_number;
1763   static char DOLLAR[] = "$";
1764
1765   tags = XCALLOC (char *, nsyms + 1);
1766   user_toknums = XCALLOC (short, nsyms + 1);
1767
1768   sprec = XCALLOC (short, nsyms);
1769   sassoc = XCALLOC (short, nsyms);
1770
1771   /* The EOF token. */
1772   tags[0] = DOLLAR;
1773   user_toknums[0] = 0;
1774
1775   max_user_token_number = 256;
1776   last_user_token_number = 256;
1777
1778   for (bp = firstsymbol; bp; bp = bp->next)
1779     {
1780       if (bp->class == nterm_sym)
1781         {
1782           bp->value += ntokens;
1783         }
1784       else if (bp->alias)
1785         {
1786           /* this symbol and its alias are a single token defn.
1787              allocate a tokno, and assign to both check agreement of
1788              ->prec and ->assoc fields and make both the same */
1789           if (bp->value == 0)
1790             bp->value = bp->alias->value = tokno++;
1791
1792           if (bp->prec != bp->alias->prec)
1793             {
1794               if (bp->prec != 0 && bp->alias->prec != 0
1795                   && bp->user_token_number == SALIAS)
1796                 complain (_("conflicting precedences for %s and %s"),
1797                           bp->tag, bp->alias->tag);
1798               if (bp->prec != 0)
1799                 bp->alias->prec = bp->prec;
1800               else
1801                 bp->prec = bp->alias->prec;
1802             }
1803
1804           if (bp->assoc != bp->alias->assoc)
1805             {
1806               if (bp->assoc != 0 && bp->alias->assoc != 0
1807                   && bp->user_token_number == SALIAS)
1808                 complain (_("conflicting assoc values for %s and %s"),
1809                           bp->tag, bp->alias->tag);
1810               if (bp->assoc != 0)
1811                 bp->alias->assoc = bp->assoc;
1812               else
1813                 bp->assoc = bp->alias->assoc;
1814             }
1815
1816           if (bp->user_token_number == SALIAS)
1817             continue;           /* do not do processing below for SALIASs */
1818
1819         }
1820       else                      /* bp->class == token_sym */
1821         {
1822           bp->value = tokno++;
1823         }
1824
1825       if (bp->class == token_sym)
1826         {
1827           if (bp->user_token_number == SUNDEF)
1828             bp->user_token_number = ++last_user_token_number;
1829           if (bp->user_token_number > max_user_token_number)
1830             max_user_token_number = bp->user_token_number;
1831         }
1832
1833       tags[bp->value] = bp->tag;
1834       user_toknums[bp->value] = bp->user_token_number;
1835       sprec[bp->value] = bp->prec;
1836       sassoc[bp->value] = bp->assoc;
1837     }
1838
1839   token_translations_init ();
1840
1841   error_token_number = errtoken->value;
1842
1843   if (!no_parser_flag)
1844     output_token_defines (&table_obstack);
1845
1846   if (startval->class == unknown_sym)
1847     fatal (_("the start symbol %s is undefined"), startval->tag);
1848   else if (startval->class == token_sym)
1849     fatal (_("the start symbol %s is a token"), startval->tag);
1850
1851   start_symbol = startval->value;
1852 }
1853
1854
1855 /*---------------------------------------------------------------.
1856 | Convert the rules into the representation using RRHS, RLHS and |
1857 | RITEMS.                                                        |
1858 `---------------------------------------------------------------*/
1859
1860 static void
1861 packgram (void)
1862 {
1863   int itemno;
1864   int ruleno;
1865   symbol_list *p;
1866
1867   ritem = XCALLOC (short, nitems + 1);
1868   rule_table = XCALLOC (rule_t, nrules) - 1;
1869
1870   itemno = 0;
1871   ruleno = 1;
1872
1873   p = grammar;
1874   while (p)
1875     {
1876       bucket *ruleprec = p->ruleprec;
1877       rule_table[ruleno].lhs = p->sym->value;
1878       rule_table[ruleno].rhs = itemno;
1879       rule_table[ruleno].line = p->line;
1880
1881       p = p->next;
1882       while (p && p->sym)
1883         {
1884           ritem[itemno++] = p->sym->value;
1885           /* A rule gets by default the precedence and associativity
1886              of the last token in it.  */
1887           if (p->sym->class == token_sym)
1888             {
1889               rule_table[ruleno].prec = p->sym->prec;
1890               rule_table[ruleno].assoc = p->sym->assoc;
1891             }
1892           if (p)
1893             p = p->next;
1894         }
1895
1896       /* If this rule has a %prec,
1897          the specified symbol's precedence replaces the default.  */
1898       if (ruleprec)
1899         {
1900           rule_table[ruleno].prec = ruleprec->prec;
1901           rule_table[ruleno].assoc = ruleprec->assoc;
1902           rule_table[ruleno].precsym = ruleprec->value;
1903         }
1904
1905       ritem[itemno++] = -ruleno;
1906       ruleno++;
1907
1908       if (p)
1909         p = p->next;
1910     }
1911
1912   ritem[itemno] = 0;
1913 }
1914 \f
1915 /*-------------------------------------------------------------------.
1916 | Read in the grammar specification and record it in the format      |
1917 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1918 | and all actions into ACTION_OBSTACK, in each case forming the body |
1919 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1920 | statement to decide which guard or action to execute.              |
1921 `-------------------------------------------------------------------*/
1922
1923 void
1924 reader (void)
1925 {
1926   start_flag = 0;
1927   startval = NULL;              /* start symbol not specified yet. */
1928
1929   nsyms = 1;
1930   nvars = 0;
1931   nrules = 0;
1932   nitems = 0;
1933
1934   typed = 0;
1935   lastprec = 0;
1936
1937   semantic_parser = 0;
1938   pure_parser = 0;
1939
1940   grammar = NULL;
1941
1942   lex_init ();
1943   lineno = 1;
1944
1945   /* Initialize the symbol table.  */
1946   tabinit ();
1947   /* Construct the error token */
1948   errtoken = getsym ("error");
1949   errtoken->class = token_sym;
1950   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1951   /* Construct a token that represents all undefined literal tokens.
1952      It is always token number 2.  */
1953   undeftoken = getsym ("$undefined.");
1954   undeftoken->class = token_sym;
1955   undeftoken->user_token_number = 2;
1956
1957   /* Read the declaration section.  Copy %{ ... %} groups to
1958      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1959      etc. found there.  */
1960   obstack_fgrow3 (&table_obstack, "\
1961 /* %s, made from %s\n\
1962    by GNU bison %s.  */\n\
1963 \n",
1964                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1965                   infile, VERSION);
1966
1967   obstack_sgrow (&table_obstack,
1968                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
1969   read_declarations ();
1970   /* Start writing the guard and action files, if they are needed.  */
1971   output_headers ();
1972   /* Read in the grammar, build grammar in list form.  Write out
1973      guards and actions.  */
1974   readgram ();
1975   /* Now we know whether we need the line-number stack.  If we do,
1976      write its type into the .tab.h file.  */
1977   if (defines_flag)
1978     reader_output_yylsp (&defines_obstack);
1979   /* Write closing delimiters for actions and guards.  */
1980   output_trailers ();
1981   if (locations_flag)
1982     obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
1983   /* Assign the symbols their symbol numbers.  Write #defines for the
1984      token symbols into FDEFINES if requested.  */
1985   packsymbols ();
1986   /* Convert the grammar into the format described in gram.h.  */
1987   packgram ();
1988   /* Output the headers. */
1989   symbols_output ();
1990 }
1991
1992
1993 /*------------------------------------------------------------------.
1994 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
1995 | output it in the headers if --defines is used.                    |
1996 `------------------------------------------------------------------*/
1997
1998 void
1999 reader_output_yylsp (struct obstack *oout)
2000 {
2001   if (locations_flag)
2002     obstack_sgrow (oout, "\
2003 \n\
2004 #ifndef YYLTYPE\n\
2005 typedef struct yyltype\n\
2006 {\n\
2007   int first_line;\n\
2008   int first_column;\n\
2009 \n\
2010   int last_line;\n\
2011   int last_column;\n\
2012 } yyltype;\n\
2013 \n\
2014 # define YYLTYPE yyltype\n\
2015 #endif\n\
2016 \n");
2017 }