src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "getargs.h"
  26 #include "files.h"
  27 #include "xalloc.h"
  28 #include "symtab.h"
  29 #include "lex.h"
  30 #include "gram.h"
  31 #include "complain.h"
  32 #include "output.h"
  33 #include "reader.h"
  34 #include "conflicts.h"
  35 #include "quote.h"
  36
  37 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  38 static int rline_allocated;
  39
  40 typedef struct symbol_list
  41 {
  42   struct symbol_list *next;
  43   bucket *sym;
  44   bucket *ruleprec;
  45 }
  46 symbol_list;
  47
  48 int lineno;
  49 char **tags;
  50 short *user_toknums;
  51 static symbol_list *grammar;
  52 static int start_flag;
  53 static bucket *startval;
  54
  55 /* Nonzero if components of semantic values are used, implying
  56    they must be unions.  */
  57 static int value_components_used;
  58
  59 /* Nonzero if %union has been seen.  */
  60 static int typed;
  61
  62 /* Incremented for each %left, %right or %nonassoc seen */
  63 static int lastprec;
  64
  65 /* Incremented for each generated symbol */
  66 static int gensym_count;
  67
  68 static bucket *errtoken;
  69 static bucket *undeftoken;
  70 \f
  71
  72 /*===================\
  73 | Low level lexing.  |
  74 \===================*/
  75
  76 static void
  77 skip_to_char (int target)
  78 {
  79   int c;
  80   if (target == '\n')
  81     complain (_("   Skipping to next \\n"));
  82   else
  83     complain (_("   Skipping to next %c"), target);
  84
  85   do
  86     c = skip_white_space ();
  87   while (c != target && c != EOF);
  88   if (c != EOF)
  89     ungetc (c, finput);
  90 }
  91
  92
  93 /*---------------------------------------------------------.
  94 | Read a signed integer from STREAM and return its value.  |
  95 `---------------------------------------------------------*/
  96
  97 static inline int
  98 read_signed_integer (FILE *stream)
  99 {
 100   int c = getc (stream);
 101   int sign = 1;
 102   int n = 0;
 103
 104   if (c == '-')
 105     {
 106       c = getc (stream);
 107       sign = -1;
 108     }
 109
 110   while (isdigit (c))
 111     {
 112       n = 10 * n + (c - '0');
 113       c = getc (stream);
 114     }
 115
 116   ungetc (c, stream);
 117
 118   return sign * n;
 119 }
 120 \f
 121 /*--------------------------------------------------------------.
 122 | Get the data type (alternative in the union) of the value for |
 123 | symbol N in rule RULE.                                        |
 124 `--------------------------------------------------------------*/
 125
 126 static char *
 127 get_type_name (int n, symbol_list * rule)
 128 {
 129   int i;
 130   symbol_list *rp;
 131
 132   if (n < 0)
 133     {
 134       complain (_("invalid $ value"));
 135       return NULL;
 136     }
 137
 138   rp = rule;
 139   i = 0;
 140
 141   while (i < n)
 142     {
 143       rp = rp->next;
 144       if (rp == NULL || rp->sym == NULL)
 145         {
 146           complain (_("invalid $ value"));
 147           return NULL;
 148         }
 149       i++;
 150     }
 151
 152   return rp->sym->type_name;
 153 }
 154 \f
 155 /*-----------------------------------------------------------------.
 156 | Dump the string from FIN to FOUT and OOUT if non null.  MATCH is |
 157 | the delimiter of the string (either ' or ").                     |
 158 `-----------------------------------------------------------------*/
 159
 160 static inline void
 161 copy_string (FILE *fin, FILE *fout, struct obstack *oout, int match)
 162 {
 163   int c;
 164
 165   if (fout)
 166     putc (match, fout);
 167   if (oout)
 168     obstack_1grow (oout, match);
 169
 170   c = getc (fin);
 171
 172   while (c != match)
 173     {
 174       if (c == EOF)
 175         fatal (_("unterminated string at end of file"));
 176       if (c == '\n')
 177         {
 178           complain (_("unterminated string"));
 179           ungetc (c, fin);
 180           c = match;            /* invent terminator */
 181           continue;
 182         }
 183
 184       if (fout)
 185         putc (c, fout);
 186       if (oout)
 187         obstack_1grow (oout, c);
 188
 189       if (c == '\\')
 190         {
 191           c = getc (fin);
 192           if (c == EOF)
 193             fatal (_("unterminated string at end of file"));
 194           if (fout)
 195             putc (c, fout);
 196           if (oout)
 197             obstack_1grow (oout, c);
 198
 199           if (c == '\n')
 200             lineno++;
 201         }
 202
 203       c = getc (fin);
 204     }
 205
 206   if (fout)
 207     putc (c, fout);
 208   if (oout)
 209     obstack_1grow (oout, c);
 210 }
 211
 212
 213 /*----------------------------------------------------------------.
 214 | Dump the wannabee comment from IN to OUT1 and OUT2.  In fact we |
 215 | just saw a `/', which might or might not be a comment.  In any  |
 216 | case, copy what we saw.                                         |
 217 |                                                                 |
 218 | OUT2 might be NULL.                                             |
 219 `----------------------------------------------------------------*/
 220
 221 static inline void
 222 copy_comment2 (FILE *fin, FILE *out1, FILE *out2, struct obstack *oout)
 223 {
 224   int cplus_comment;
 225   int ended;
 226   int c;
 227
 228   /* We read a `/', output it. */
 229   if (out1)
 230     putc ('/', out1);
 231   if (out2)
 232     putc ('/', out2);
 233   if (oout)
 234     obstack_1grow (oout, '/');
 235
 236   switch ((c = getc (fin)))
 237     {
 238     case '/':
 239       cplus_comment = 1;
 240       break;
 241     case '*':
 242       cplus_comment = 0;
 243       break;
 244     default:
 245       ungetc (c, fin);
 246       return;
 247     }
 248
 249   if (out1)
 250     putc (c, out1);
 251   if (out2)
 252     putc (c, out2);
 253   if (oout)
 254     obstack_1grow (oout, c);
 255   c = getc (fin);
 256
 257   ended = 0;
 258   while (!ended)
 259     {
 260       if (!cplus_comment && c == '*')
 261         {
 262           while (c == '*')
 263             {
 264               if (out1)
 265                 putc (c, out1);
 266               if (out2)
 267                 putc (c, out2);
 268               if (oout)
 269                 obstack_1grow (oout, c);
 270               c = getc (fin);
 271             }
 272
 273           if (c == '/')
 274             {
 275               if (out1)
 276                 putc (c, out1);
 277               if (out2)
 278                 putc (c, out2);
 279               if (oout)
 280                 obstack_1grow (oout, c);
 281               ended = 1;
 282             }
 283         }
 284       else if (c == '\n')
 285         {
 286           lineno++;
 287           if (out1)
 288             putc (c, out1);
 289           if (out2)
 290             putc (c, out2);
 291           if (oout)
 292             obstack_1grow (oout, c);
 293           if (cplus_comment)
 294             ended = 1;
 295           else
 296             c = getc (fin);
 297         }
 298       else if (c == EOF)
 299         fatal (_("unterminated comment"));
 300       else
 301         {
 302           if (out1)
 303             putc (c, out1);
 304           if (out2)
 305             putc (c, out2);
 306           if (oout)
 307             obstack_1grow (oout, c);
 308           c = getc (fin);
 309         }
 310     }
 311 }
 312
 313
 314 /*-------------------------------------------------------------------.
 315 | Dump the comment (actually the current string starting with a `/') |
 316 | from FIN to FOUT.                                                  |
 317 `-------------------------------------------------------------------*/
 318
 319 static inline void
 320 copy_comment (FILE *fin, FILE *fout, struct obstack *oout)
 321 {
 322   copy_comment2 (fin, fout, NULL, oout);
 323 }
 324
 325
 326 /*-----------------------------------------------------------------.
 327 | FIN is pointing to a location (i.e., a `@').  Output to FOUT a   |
 328 | reference to this location. STACK_OFFSET is the number of values |
 329 | in the current rule so far, which says where to find `$0' with   |
 330 | respect to the top of the stack.                                 |
 331 `-----------------------------------------------------------------*/
 332
 333 static inline void
 334 copy_at (FILE *fin, FILE *fout, struct obstack *oout, int stack_offset)
 335 {
 336   int c;
 337
 338   c = getc (fin);
 339   if (c == '$')
 340     {
 341       if (fout)
 342         fprintf (fout, "yyloc");
 343       if (oout)
 344         obstack_grow_literal_string (oout, "yyloc");
 345       locations_flag = 1;
 346     }
 347   else if (isdigit (c) || c == '-')
 348     {
 349       int n;
 350       char buf[4096];
 351
 352       ungetc (c, fin);
 353       n = read_signed_integer (fin);
 354
 355       sprintf (buf, "yylsp[%d]", n - stack_offset);
 356       if (fout)
 357         fputs (buf, fout);
 358       if (oout)
 359         obstack_grow (oout, buf, strlen (buf));
 360       locations_flag = 1;
 361     }
 362   else
 363     {
 364       char buf[] = "@c";
 365       buf[1] = c;
 366       complain (_("%s is invalid"), quote (buf));
 367     }
 368 }
 369
 370
 371 /*-------------------------------------------------------------------.
 372 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 373 |                                                                    |
 374 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 375 |                                                                    |
 376 | Output to FOUT a reference to this semantic value. STACK_OFFSET is |
 377 | the number of values in the current rule so far, which says where  |
 378 | to find `$0' with respect to the top of the stack.                 |
 379 `-------------------------------------------------------------------*/
 380
 381 static inline void
 382 copy_dollar (FILE *fin, FILE *fout, struct obstack *oout,
 383              symbol_list *rule, int stack_offset)
 384 {
 385   int c = getc (fin);
 386   char *type_name = NULL;
 387
 388   /* Get the type name if explicit. */
 389   if (c == '<')
 390     {
 391       read_type_name (fin);
 392       type_name = token_buffer;
 393       value_components_used = 1;
 394       c = getc (fin);
 395     }
 396
 397   if (c == '$')
 398     {
 399       if (fout)
 400         fputs ("yyval", fout);
 401       if (oout)
 402         obstack_grow_literal_string (oout, "yyval");
 403
 404       if (!type_name)
 405         type_name = get_type_name (0, rule);
 406       if (type_name)
 407         {
 408           if (fout)
 409             fprintf (fout, ".%s", type_name);
 410           if (oout)
 411             obstack_fgrow1 (oout, ".%s", type_name);
 412         }
 413       if (!type_name && typed)
 414         complain (_("$$ of `%s' has no declared type"),
 415                   rule->sym->tag);
 416     }
 417   else if (isdigit (c) || c == '-')
 418     {
 419       int n;
 420       ungetc (c, fin);
 421       n = read_signed_integer (fin);
 422
 423       if (!type_name && n > 0)
 424         type_name = get_type_name (n, rule);
 425
 426       if (fout)
 427         fprintf (fout, "yyvsp[%d]", n - stack_offset);
 428       if (oout)
 429         obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 430
 431       if (type_name)
 432         {
 433           if (fout)
 434             fprintf (fout, ".%s", type_name);
 435           if (oout)
 436             obstack_fgrow1 (oout, ".%s", type_name);
 437         }
 438       if (!type_name && typed)
 439         complain (_("$%d of `%s' has no declared type"),
 440                   n, rule->sym->tag);
 441     }
 442   else
 443     {
 444       char buf[] = "$c";
 445       buf[1] = c;
 446       complain (_("%s is invalid"), quote (buf));
 447     }
 448 }
 449 \f
 450 /*-------------------------------------------------------------------.
 451 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 452 | `%{' has already been read.  Return after reading the `%}'.        |
 453 `-------------------------------------------------------------------*/
 454
 455 static void
 456 copy_definition (void)
 457 {
 458   int c;
 459   /* -1 while reading a character if prev char was %. */
 460   int after_percent;
 461
 462   if (!no_lines_flag)
 463     obstack_fgrow2 (&attrs_obstack, "#line %d \"%s\"\n", lineno, infile);
 464
 465   after_percent = 0;
 466
 467   c = getc (finput);
 468
 469   for (;;)
 470     {
 471       switch (c)
 472         {
 473         case '\n':
 474           obstack_1grow (&attrs_obstack, c);
 475           lineno++;
 476           break;
 477
 478         case '%':
 479           after_percent = -1;
 480           break;
 481
 482         case '\'':
 483         case '"':
 484           copy_string (finput, 0, &attrs_obstack, c);
 485           break;
 486
 487         case '/':
 488           copy_comment (finput, 0, &attrs_obstack);
 489           break;
 490
 491         case EOF:
 492           fatal ("%s", _("unterminated `%{' definition"));
 493
 494         default:
 495           obstack_1grow (&attrs_obstack, c);
 496         }
 497
 498       c = getc (finput);
 499
 500       if (after_percent)
 501         {
 502           if (c == '}')
 503             return;
 504           obstack_1grow (&attrs_obstack, '%');
 505         }
 506       after_percent = 0;
 507     }
 508 }
 509
 510
 511 /*-------------------------------------------------------------------.
 512 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 513 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 514 | are reversed.                                                      |
 515 `-------------------------------------------------------------------*/
 516
 517 static void
 518 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 519 {
 520   int token = 0;
 521   char *typename = 0;
 522
 523   /* The symbol being defined.  */
 524   struct bucket *symbol = NULL;
 525
 526   /* After `%token' and `%nterm', any number of symbols maybe be
 527      defined.  */
 528   for (;;)
 529     {
 530       int tmp_char = ungetc (skip_white_space (), finput);
 531
 532       /* `%' (for instance from `%token', or from `%%' etc.) is the
 533          only valid means to end this declaration.  */
 534       if (tmp_char == '%')
 535         return;
 536       if (tmp_char == EOF)
 537         fatal (_("Premature EOF after %s"), token_buffer);
 538
 539       token = lex ();
 540       if (token == COMMA)
 541         {
 542           symbol = NULL;
 543           continue;
 544         }
 545       if (token == TYPENAME)
 546         {
 547           typename = xstrdup (token_buffer);
 548           value_components_used = 1;
 549           symbol = NULL;
 550         }
 551       else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
 552         {
 553           if (symval->alias)
 554             warn (_("symbol `%s' used more than once as a literal string"),
 555                   symval->tag);
 556           else if (symbol->alias)
 557             warn (_("symbol `%s' given more than one literal string"),
 558                   symbol->tag);
 559           else
 560             {
 561               symval->class = token_sym;
 562               symval->type_name = typename;
 563               symval->user_token_number = symbol->user_token_number;
 564               symbol->user_token_number = SALIAS;
 565               symval->alias = symbol;
 566               symbol->alias = symval;
 567               /* symbol and symval combined are only one symbol */
 568               nsyms--;
 569             }
 570           translations = 1;
 571           symbol = NULL;
 572         }
 573       else if (token == IDENTIFIER)
 574         {
 575           int oldclass = symval->class;
 576           symbol = symval;
 577
 578           if (symbol->class == what_is_not)
 579             complain (_("symbol %s redefined"), symbol->tag);
 580           symbol->class = what_is;
 581           if (what_is == nterm_sym && oldclass != nterm_sym)
 582             symbol->value = nvars++;
 583
 584           if (typename)
 585             {
 586               if (symbol->type_name == NULL)
 587                 symbol->type_name = typename;
 588               else if (strcmp (typename, symbol->type_name) != 0)
 589                 complain (_("type redeclaration for %s"), symbol->tag);
 590             }
 591         }
 592       else if (symbol && token == NUMBER)
 593         {
 594           symbol->user_token_number = numval;
 595           translations = 1;
 596         }
 597       else
 598         {
 599           complain (_("`%s' is invalid in %s"),
 600                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 601           skip_to_char ('%');
 602         }
 603     }
 604
 605 }
 606
 607
 608 /*------------------------------.
 609 | Parse what comes after %start |
 610 `------------------------------*/
 611
 612 static void
 613 parse_start_decl (void)
 614 {
 615   if (start_flag)
 616     complain (_("multiple %s declarations"), "%start");
 617   if (lex () != IDENTIFIER)
 618     complain (_("invalid %s declaration"), "%start");
 619   else
 620     {
 621       start_flag = 1;
 622       startval = symval;
 623     }
 624 }
 625
 626 /*-----------------------------------------------------------.
 627 | read in a %type declaration and record its information for |
 628 | get_type_name to access                                    |
 629 `-----------------------------------------------------------*/
 630
 631 static void
 632 parse_type_decl (void)
 633 {
 634   char *name;
 635
 636   if (lex () != TYPENAME)
 637     {
 638       complain ("%s", _("%type declaration has no <typename>"));
 639       skip_to_char ('%');
 640       return;
 641     }
 642
 643   name = xstrdup (token_buffer);
 644
 645   for (;;)
 646     {
 647       int t;
 648       int tmp_char = ungetc (skip_white_space (), finput);
 649
 650       if (tmp_char == '%')
 651         return;
 652       if (tmp_char == EOF)
 653         fatal (_("Premature EOF after %s"), token_buffer);
 654
 655       t = lex ();
 656
 657       switch (t)
 658         {
 659
 660         case COMMA:
 661         case SEMICOLON:
 662           break;
 663
 664         case IDENTIFIER:
 665           if (symval->type_name == NULL)
 666             symval->type_name = name;
 667           else if (strcmp (name, symval->type_name) != 0)
 668             complain (_("type redeclaration for %s"), symval->tag);
 669
 670           break;
 671
 672         default:
 673           complain (_("invalid %%type declaration due to item: %s"),
 674                     token_buffer);
 675           skip_to_char ('%');
 676         }
 677     }
 678 }
 679
 680
 681
 682 /*----------------------------------------------------------------.
 683 | Read in a %left, %right or %nonassoc declaration and record its |
 684 | information.                                                    |
 685 `----------------------------------------------------------------*/
 686
 687 static void
 688 parse_assoc_decl (associativity assoc)
 689 {
 690   char *name = NULL;
 691   int prev = 0;
 692
 693   lastprec++;                   /* Assign a new precedence level, never 0.  */
 694
 695   for (;;)
 696     {
 697       int t;
 698       int tmp_char = ungetc (skip_white_space (), finput);
 699
 700       if (tmp_char == '%')
 701         return;
 702       if (tmp_char == EOF)
 703         fatal (_("Premature EOF after %s"), token_buffer);
 704
 705       t = lex ();
 706
 707       switch (t)
 708         {
 709         case TYPENAME:
 710           name = xstrdup (token_buffer);
 711           break;
 712
 713         case COMMA:
 714           break;
 715
 716         case IDENTIFIER:
 717           if (symval->prec != 0)
 718             complain (_("redefining precedence of %s"), symval->tag);
 719           symval->prec = lastprec;
 720           symval->assoc = assoc;
 721           if (symval->class == nterm_sym)
 722             complain (_("symbol %s redefined"), symval->tag);
 723           symval->class = token_sym;
 724           if (name)
 725             {                   /* record the type, if one is specified */
 726               if (symval->type_name == NULL)
 727                 symval->type_name = name;
 728               else if (strcmp (name, symval->type_name) != 0)
 729                 complain (_("type redeclaration for %s"), symval->tag);
 730             }
 731           break;
 732
 733         case NUMBER:
 734           if (prev == IDENTIFIER)
 735             {
 736               symval->user_token_number = numval;
 737               translations = 1;
 738             }
 739           else
 740             {
 741               complain (_
 742                         ("invalid text (%s) - number should be after identifier"),
 743 token_buffer);
 744               skip_to_char ('%');
 745             }
 746           break;
 747
 748         case SEMICOLON:
 749           return;
 750
 751         default:
 752           complain (_("unexpected item: %s"), token_buffer);
 753           skip_to_char ('%');
 754         }
 755
 756       prev = t;
 757
 758     }
 759 }
 760
 761
 762
 763 /*--------------------------------------------------------------.
 764 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 765 | where it is made into the definition of YYSTYPE, the type of  |
 766 | elements of the parser value stack.                           |
 767 `--------------------------------------------------------------*/
 768
 769 static void
 770 parse_union_decl (void)
 771 {
 772   int c;
 773   int count = 0;
 774
 775   if (typed)
 776     complain (_("multiple %s declarations"), "%union");
 777
 778   typed = 1;
 779
 780   if (!no_lines_flag)
 781     obstack_fgrow2 (&attrs_obstack, "\n#line %d \"%s\"\n", lineno, infile);
 782   else
 783     obstack_1grow (&attrs_obstack, '\n');
 784
 785   obstack_grow_literal_string (&attrs_obstack, "typedef union");
 786   if (fdefines)
 787     fprintf (fdefines, "typedef union");
 788
 789   c = getc (finput);
 790
 791   while (c != EOF)
 792     {
 793       obstack_1grow (&attrs_obstack, c);
 794       if (fdefines)
 795         putc (c, fdefines);
 796
 797       switch (c)
 798         {
 799         case '\n':
 800           lineno++;
 801           break;
 802
 803         case '/':
 804           copy_comment2 (finput, 0, fdefines, &attrs_obstack);
 805           break;
 806
 807         case '{':
 808           count++;
 809           break;
 810
 811         case '}':
 812           if (count == 0)
 813             complain (_("unmatched %s"), "`}'");
 814           count--;
 815           if (count <= 0)
 816             {
 817               obstack_grow_literal_string (&attrs_obstack, " YYSTYPE;\n");
 818               if (fdefines)
 819                 fprintf (fdefines, " YYSTYPE;\n");
 820               /* JF don't choke on trailing semi */
 821               c = skip_white_space ();
 822               if (c != ';')
 823                 ungetc (c, finput);
 824               return;
 825             }
 826         }
 827
 828       c = getc (finput);
 829     }
 830 }
 831
 832
 833 /*-------------------------------------------------------.
 834 | Parse the declaration %expect N which says to expect N |
 835 | shift-reduce conflicts.                                |
 836 `-------------------------------------------------------*/
 837
 838 static void
 839 parse_expect_decl (void)
 840 {
 841   int c = skip_white_space ();
 842   ungetc (c, finput);
 843
 844   if (!isdigit (c))
 845     complain (_("argument of %%expect is not an integer"));
 846   else
 847     expected_conflicts = read_signed_integer (finput);
 848 }
 849
 850
 851 /*-------------------------------------------------------------------.
 852 | Parse what comes after %thong.  the full syntax is                 |
 853 |                                                                    |
 854 |                %thong <type> token number literal                  |
 855 |                                                                    |
 856 | the <type> or number may be omitted.  The number specifies the     |
 857 | user_token_number.                                                 |
 858 |                                                                    |
 859 | Two symbols are entered in the table, one for the token symbol and |
 860 | one for the literal.  Both are given the <type>, if any, from the  |
 861 | declaration.  The ->user_token_number of the first is SALIAS and   |
 862 | the ->user_token_number of the second is set to the number, if     |
 863 | any, from the declaration.  The two symbols are linked via         |
 864 | pointers in their ->alias fields.                                  |
 865 |                                                                    |
 866 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 867 | only the literal string is retained it is the literal string that  |
 868 | is output to yytname                                               |
 869 `-------------------------------------------------------------------*/
 870
 871 static void
 872 parse_thong_decl (void)
 873 {
 874   int token;
 875   struct bucket *symbol;
 876   char *typename = 0;
 877   int usrtoknum;
 878
 879   translations = 1;
 880   token = lex ();               /* fetch typename or first token */
 881   if (token == TYPENAME)
 882     {
 883       typename = xstrdup (token_buffer);
 884       value_components_used = 1;
 885       token = lex ();           /* fetch first token */
 886     }
 887
 888   /* process first token */
 889
 890   if (token != IDENTIFIER)
 891     {
 892       complain (_("unrecognized item %s, expected an identifier"),
 893                 token_buffer);
 894       skip_to_char ('%');
 895       return;
 896     }
 897   symval->class = token_sym;
 898   symval->type_name = typename;
 899   symval->user_token_number = SALIAS;
 900   symbol = symval;
 901
 902   token = lex ();               /* get number or literal string */
 903
 904   if (token == NUMBER)
 905     {
 906       usrtoknum = numval;
 907       token = lex ();           /* okay, did number, now get literal */
 908     }
 909   else
 910     usrtoknum = 0;
 911
 912   /* process literal string token */
 913
 914   if (token != IDENTIFIER || *symval->tag != '\"')
 915     {
 916       complain (_("expected string constant instead of %s"), token_buffer);
 917       skip_to_char ('%');
 918       return;
 919     }
 920   symval->class = token_sym;
 921   symval->type_name = typename;
 922   symval->user_token_number = usrtoknum;
 923
 924   symval->alias = symbol;
 925   symbol->alias = symval;
 926
 927   /* symbol and symval combined are only one symbol.  */
 928   nsyms--;
 929 }
 930
 931
 932 /*----------------------------------------------------------------.
 933 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 934 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 935 | groups to ATTRS_OBSTACK.                                        |
 936 `----------------------------------------------------------------*/
 937
 938 static void
 939 read_declarations (void)
 940 {
 941   int c;
 942   int tok;
 943
 944   for (;;)
 945     {
 946       c = skip_white_space ();
 947
 948       if (c == '%')
 949         {
 950           tok = parse_percent_token ();
 951
 952           switch (tok)
 953             {
 954             case TWO_PERCENTS:
 955               return;
 956
 957             case PERCENT_LEFT_CURLY:
 958               copy_definition ();
 959               break;
 960
 961             case TOKEN:
 962               parse_token_decl (token_sym, nterm_sym);
 963               break;
 964
 965             case NTERM:
 966               parse_token_decl (nterm_sym, token_sym);
 967               break;
 968
 969             case TYPE:
 970               parse_type_decl ();
 971               break;
 972
 973             case START:
 974               parse_start_decl ();
 975               break;
 976
 977             case UNION:
 978               parse_union_decl ();
 979               break;
 980
 981             case EXPECT:
 982               parse_expect_decl ();
 983               break;
 984             case THONG:
 985               parse_thong_decl ();
 986               break;
 987
 988             case LEFT:
 989               parse_assoc_decl (left_assoc);
 990               break;
 991
 992             case RIGHT:
 993               parse_assoc_decl (right_assoc);
 994               break;
 995
 996             case NONASSOC:
 997               parse_assoc_decl (non_assoc);
 998               break;
 999
1000             case SEMANTIC_PARSER:
1001               if (semantic_parser == 0)
1002                 {
1003                   semantic_parser = 1;
1004                   open_extra_files ();
1005                 }
1006               break;
1007
1008             case PURE_PARSER:
1009               pure_parser = 1;
1010               break;
1011
1012             case NOOP:
1013               break;
1014
1015             default:
1016               complain (_("unrecognized: %s"), token_buffer);
1017               skip_to_char ('%');
1018             }
1019         }
1020       else if (c == EOF)
1021         fatal (_("no input grammar"));
1022       else
1023         {
1024           char buf[] = "c";
1025           buf[0] = c;
1026           complain (_("unknown character: %s"), quote (buf));
1027           skip_to_char ('%');
1028         }
1029     }
1030 }
1031 \f
1032 /*-------------------------------------------------------------------.
1033 | Assuming that a `{' has just been seen, copy everything up to the  |
1034 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1035 | values in the current rule so far, which says where to find `$0'   |
1036 | with respect to the top of the stack.                              |
1037 `-------------------------------------------------------------------*/
1038
1039 static void
1040 copy_action (symbol_list *rule, int stack_offset)
1041 {
1042   int c;
1043   int count;
1044   char buf[4096];
1045
1046   /* offset is always 0 if parser has already popped the stack pointer */
1047   if (semantic_parser)
1048     stack_offset = 0;
1049
1050   sprintf (buf, "\ncase %d:\n", nrules);
1051   obstack_grow (&action_obstack, buf, strlen (buf));
1052
1053   if (!no_lines_flag)
1054     {
1055       sprintf (buf, "#line %d \"%s\"\n", lineno, infile);
1056       obstack_grow (&action_obstack, buf, strlen (buf));
1057     }
1058   obstack_1grow (&action_obstack, '{');
1059
1060   count = 1;
1061   c = getc (finput);
1062
1063   while (count > 0)
1064     {
1065       while (c != '}')
1066         {
1067           switch (c)
1068             {
1069             case '\n':
1070               obstack_1grow (&action_obstack, c);
1071               lineno++;
1072               break;
1073
1074             case '{':
1075               obstack_1grow (&action_obstack, c);
1076               count++;
1077               break;
1078
1079             case '\'':
1080             case '"':
1081               copy_string (finput, 0, &action_obstack, c);
1082               break;
1083
1084             case '/':
1085               copy_comment (finput, 0, &action_obstack);
1086               break;
1087
1088             case '$':
1089               copy_dollar (finput, 0, &action_obstack,
1090                            rule, stack_offset);
1091               break;
1092
1093             case '@':
1094               copy_at (finput, 0, &action_obstack,
1095                        stack_offset);
1096               break;
1097
1098             case EOF:
1099               fatal (_("unmatched %s"), "`{'");
1100
1101             default:
1102               obstack_1grow (&action_obstack, c);
1103             }
1104
1105           c = getc (finput);
1106         }
1107
1108       /* above loop exits when c is '}' */
1109
1110       if (--count)
1111         {
1112           obstack_1grow (&action_obstack, c);
1113           c = getc (finput);
1114         }
1115     }
1116
1117   obstack_grow_literal_string (&action_obstack,
1118                                ";\n    break;}");
1119 }
1120 \f
1121 /*-------------------------------------------------------------------.
1122 | After `%guard' is seen in the input file, copy the actual guard    |
1123 | into the guards file.  If the guard is followed by an action, copy |
1124 | that into the actions file.  STACK_OFFSET is the number of values  |
1125 | in the current rule so far, which says where to find `$0' with     |
1126 | respect to the top of the stack, for the simple parser in which    |
1127 | the stack is not popped until after the guard is run.              |
1128 `-------------------------------------------------------------------*/
1129
1130 static void
1131 copy_guard (symbol_list *rule, int stack_offset)
1132 {
1133   int c;
1134   int count;
1135   int brace_flag = 0;
1136
1137   /* offset is always 0 if parser has already popped the stack pointer */
1138   if (semantic_parser)
1139     stack_offset = 0;
1140
1141   fprintf (fguard, "\ncase %d:\n", nrules);
1142   if (!no_lines_flag)
1143     fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1144   putc ('{', fguard);
1145
1146   count = 0;
1147   c = getc (finput);
1148
1149   while (brace_flag ? (count > 0) : (c != ';'))
1150     {
1151       switch (c)
1152         {
1153         case '\n':
1154           putc (c, fguard);
1155           lineno++;
1156           break;
1157
1158         case '{':
1159           putc (c, fguard);
1160           brace_flag = 1;
1161           count++;
1162           break;
1163
1164         case '}':
1165           putc (c, fguard);
1166           if (count > 0)
1167             count--;
1168           else
1169             {
1170               complain (_("unmatched %s"), "`}'");
1171               c = getc (finput);        /* skip it */
1172             }
1173           break;
1174
1175         case '\'':
1176         case '"':
1177           copy_string (finput, fguard, 0, c);
1178           break;
1179
1180         case '/':
1181           copy_comment (finput, fguard, 0);
1182           break;
1183
1184         case '$':
1185           copy_dollar (finput, fguard, 0, rule, stack_offset);
1186           break;
1187
1188         case '@':
1189           copy_at (finput, fguard, 0, stack_offset);
1190           break;
1191
1192         case EOF:
1193           fatal ("%s", _("unterminated %guard clause"));
1194
1195         default:
1196           putc (c, fguard);
1197         }
1198
1199       if (c != '}' || count != 0)
1200         c = getc (finput);
1201     }
1202
1203   c = skip_white_space ();
1204
1205   fprintf (fguard, ";\n    break;}");
1206   if (c == '{')
1207     copy_action (rule, stack_offset);
1208   else if (c == '=')
1209     {
1210       c = getc (finput);        /* why not skip_white_space -wjh */
1211       if (c == '{')
1212         copy_action (rule, stack_offset);
1213     }
1214   else
1215     ungetc (c, finput);
1216 }
1217 \f
1218
1219 static void
1220 record_rule_line (void)
1221 {
1222   /* Record each rule's source line number in rline table.  */
1223
1224   if (nrules >= rline_allocated)
1225     {
1226       rline_allocated = nrules * 2;
1227       rline = XREALLOC (rline, short, rline_allocated);
1228     }
1229   rline[nrules] = lineno;
1230 }
1231
1232
1233 /*-------------------------------------------------------------------.
1234 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1235 | with the user's names.                                             |
1236 `-------------------------------------------------------------------*/
1237
1238 static bucket *
1239 gensym (void)
1240 {
1241   bucket *sym;
1242
1243   sprintf (token_buffer, "@%d", ++gensym_count);
1244   sym = getsym (token_buffer);
1245   sym->class = nterm_sym;
1246   sym->value = nvars++;
1247   return sym;
1248 }
1249
1250 #if 0
1251 /*------------------------------------------------------------------.
1252 | read in a %type declaration and record its information for        |
1253 | get_type_name to access.  This is unused.  It is only called from |
1254 | the #if 0 part of readgram                                        |
1255 `------------------------------------------------------------------*/
1256
1257 static int
1258 get_type (void)
1259 {
1260   int k;
1261   int t;
1262   char *name;
1263
1264   t = lex ();
1265
1266   if (t != TYPENAME)
1267     {
1268       complain (_("invalid %s declaration"), "%type");
1269       return t;
1270     }
1271
1272   name = xstrdup (token_buffer);
1273
1274   for (;;)
1275     {
1276       t = lex ();
1277
1278       switch (t)
1279         {
1280         case SEMICOLON:
1281           return lex ();
1282
1283         case COMMA:
1284           break;
1285
1286         case IDENTIFIER:
1287           if (symval->type_name == NULL)
1288             symval->type_name = name;
1289           else if (strcmp (name, symval->type_name) != 0)
1290             complain (_("type redeclaration for %s"), symval->tag);
1291
1292           break;
1293
1294         default:
1295           return t;
1296         }
1297     }
1298 }
1299
1300 #endif
1301 \f
1302 /*------------------------------------------------------------------.
1303 | Parse the input grammar into a one symbol_list structure.  Each   |
1304 | rule is represented by a sequence of symbols: the left hand side  |
1305 | followed by the contents of the right hand side, followed by a    |
1306 | null pointer instead of a symbol to terminate the rule.  The next |
1307 | symbol is the lhs of the following rule.                          |
1308 |                                                                   |
1309 | All guards and actions are copied out to the appropriate files,   |
1310 | labelled by the rule number they apply to.                        |
1311 `------------------------------------------------------------------*/
1312
1313 static void
1314 readgram (void)
1315 {
1316   int t;
1317   bucket *lhs = NULL;
1318   symbol_list *p;
1319   symbol_list *p1;
1320   bucket *bp;
1321
1322   /* Points to first symbol_list of current rule. its symbol is the
1323      lhs of the rule.  */
1324   symbol_list *crule;
1325   /* Points to the symbol_list preceding crule.  */
1326   symbol_list *crule1;
1327
1328   p1 = NULL;
1329
1330   t = lex ();
1331
1332   while (t != TWO_PERCENTS && t != ENDFILE)
1333     {
1334       if (t == IDENTIFIER || t == BAR)
1335         {
1336           int action_flag = 0;
1337           /* Number of symbols in rhs of this rule so far */
1338           int rulelength = 0;
1339           int xactions = 0;     /* JF for error checking */
1340           bucket *first_rhs = 0;
1341
1342           if (t == IDENTIFIER)
1343             {
1344               lhs = symval;
1345
1346               if (!start_flag)
1347                 {
1348                   startval = lhs;
1349                   start_flag = 1;
1350                 }
1351
1352               t = lex ();
1353               if (t != COLON)
1354                 {
1355                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1356                   unlex (t);
1357                 }
1358             }
1359
1360           if (nrules == 0 && t == BAR)
1361             {
1362               complain (_("grammar starts with vertical bar"));
1363               lhs = symval;     /* BOGUS: use a random symval */
1364             }
1365           /* start a new rule and record its lhs.  */
1366
1367           nrules++;
1368           nitems++;
1369
1370           record_rule_line ();
1371
1372           p = XCALLOC (symbol_list, 1);
1373           p->sym = lhs;
1374
1375           crule1 = p1;
1376           if (p1)
1377             p1->next = p;
1378           else
1379             grammar = p;
1380
1381           p1 = p;
1382           crule = p;
1383
1384           /* mark the rule's lhs as a nonterminal if not already so.  */
1385
1386           if (lhs->class == unknown_sym)
1387             {
1388               lhs->class = nterm_sym;
1389               lhs->value = nvars;
1390               nvars++;
1391             }
1392           else if (lhs->class == token_sym)
1393             complain (_("rule given for %s, which is a token"), lhs->tag);
1394
1395           /* read the rhs of the rule.  */
1396
1397           for (;;)
1398             {
1399               t = lex ();
1400               if (t == PREC)
1401                 {
1402                   t = lex ();
1403                   crule->ruleprec = symval;
1404                   t = lex ();
1405                 }
1406
1407               if (!(t == IDENTIFIER || t == LEFT_CURLY))
1408                 break;
1409
1410               /* If next token is an identifier, see if a colon follows it.
1411                  If one does, exit this rule now.  */
1412               if (t == IDENTIFIER)
1413                 {
1414                   bucket *ssave;
1415                   int t1;
1416
1417                   ssave = symval;
1418                   t1 = lex ();
1419                   unlex (t1);
1420                   symval = ssave;
1421                   if (t1 == COLON)
1422                     break;
1423
1424                   if (!first_rhs)       /* JF */
1425                     first_rhs = symval;
1426                   /* Not followed by colon =>
1427                      process as part of this rule's rhs.  */
1428                 }
1429
1430               /* If we just passed an action, that action was in the middle
1431                  of a rule, so make a dummy rule to reduce it to a
1432                  non-terminal.  */
1433               if (action_flag)
1434                 {
1435                   bucket *sdummy;
1436
1437                   /* Since the action was written out with this rule's
1438                      number, we must give the new rule this number by
1439                      inserting the new rule before it.  */
1440
1441                   /* Make a dummy nonterminal, a gensym.  */
1442                   sdummy = gensym ();
1443
1444                   /* Make a new rule, whose body is empty,
1445                      before the current one, so that the action
1446                      just read can belong to it.  */
1447                   nrules++;
1448                   nitems++;
1449                   record_rule_line ();
1450                   p = XCALLOC (symbol_list, 1);
1451                   if (crule1)
1452                     crule1->next = p;
1453                   else
1454                     grammar = p;
1455                   p->sym = sdummy;
1456                   crule1 = XCALLOC (symbol_list, 1);
1457                   p->next = crule1;
1458                   crule1->next = crule;
1459
1460                   /* Insert the dummy generated by that rule into this
1461                      rule.  */
1462                   nitems++;
1463                   p = XCALLOC (symbol_list, 1);
1464                   p->sym = sdummy;
1465                   p1->next = p;
1466                   p1 = p;
1467
1468                   action_flag = 0;
1469                 }
1470
1471               if (t == IDENTIFIER)
1472                 {
1473                   nitems++;
1474                   p = XCALLOC (symbol_list, 1);
1475                   p->sym = symval;
1476                   p1->next = p;
1477                   p1 = p;
1478                 }
1479               else              /* handle an action.  */
1480                 {
1481                   copy_action (crule, rulelength);
1482                   action_flag = 1;
1483                   xactions++;   /* JF */
1484                 }
1485               rulelength++;
1486             }                   /* end of  read rhs of rule */
1487
1488           /* Put an empty link in the list to mark the end of this rule  */
1489           p = XCALLOC (symbol_list, 1);
1490           p1->next = p;
1491           p1 = p;
1492
1493           if (t == PREC)
1494             {
1495               complain (_("two @prec's in a row"));
1496               t = lex ();
1497               crule->ruleprec = symval;
1498               t = lex ();
1499             }
1500           if (t == GUARD)
1501             {
1502               if (!semantic_parser)
1503                 complain (_("%%guard present but %%semantic_parser not specified"));
1504
1505               copy_guard (crule, rulelength);
1506               t = lex ();
1507             }
1508           else if (t == LEFT_CURLY)
1509             {
1510               /* This case never occurs -wjh */
1511               if (action_flag)
1512                 complain (_("two actions at end of one rule"));
1513               copy_action (crule, rulelength);
1514               action_flag = 1;
1515               xactions++;       /* -wjh */
1516               t = lex ();
1517             }
1518           /* If $$ is being set in default way, report if any type
1519              mismatch.  */
1520           else if (!xactions
1521                    && first_rhs && lhs->type_name != first_rhs->type_name)
1522             {
1523               if (lhs->type_name == 0
1524                   || first_rhs->type_name == 0
1525                   || strcmp (lhs->type_name, first_rhs->type_name))
1526                 complain (_("type clash (`%s' `%s') on default action"),
1527                           lhs->type_name ? lhs->type_name : "",
1528                           first_rhs->type_name ? first_rhs->type_name : "");
1529             }
1530           /* Warn if there is no default for $$ but we need one.  */
1531           else if (!xactions && !first_rhs && lhs->type_name != 0)
1532             complain (_("empty rule for typed nonterminal, and no action"));
1533           if (t == SEMICOLON)
1534             t = lex ();
1535         }
1536 #if 0
1537       /* these things can appear as alternatives to rules.  */
1538 /* NO, they cannot.
1539         a) none of the documentation allows them
1540         b) most of them scan forward until finding a next %
1541                 thus they may swallow lots of intervening rules
1542 */
1543       else if (t == TOKEN)
1544         {
1545           parse_token_decl (token_sym, nterm_sym);
1546           t = lex ();
1547         }
1548       else if (t == NTERM)
1549         {
1550           parse_token_decl (nterm_sym, token_sym);
1551           t = lex ();
1552         }
1553       else if (t == TYPE)
1554         {
1555           t = get_type ();
1556         }
1557       else if (t == UNION)
1558         {
1559           parse_union_decl ();
1560           t = lex ();
1561         }
1562       else if (t == EXPECT)
1563         {
1564           parse_expect_decl ();
1565           t = lex ();
1566         }
1567       else if (t == START)
1568         {
1569           parse_start_decl ();
1570           t = lex ();
1571         }
1572 #endif
1573
1574       else
1575         {
1576           complain (_("invalid input: %s"), token_buffer);
1577           t = lex ();
1578         }
1579     }
1580
1581   /* grammar has been read.  Do some checking */
1582
1583   if (nsyms > MAXSHORT)
1584     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1585            MAXSHORT);
1586   if (nrules == 0)
1587     fatal (_("no rules in the input grammar"));
1588
1589   /* JF put out same default YYSTYPE as YACC does */
1590   if (typed == 0
1591       && !value_components_used)
1592     {
1593       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1594          but it seems better to be consistent.
1595          Most programs should declare their own type anyway.  */
1596       obstack_grow_literal_string (&attrs_obstack,
1597                             "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1598       if (fdefines)
1599         fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1600     }
1601
1602   /* Report any undefined symbols and consider them nonterminals.  */
1603
1604   for (bp = firstsymbol; bp; bp = bp->next)
1605     if (bp->class == unknown_sym)
1606       {
1607         complain (_
1608                   ("symbol %s is used, but is not defined as a token and has no rules"),
1609                   bp->tag);
1610         bp->class = nterm_sym;
1611         bp->value = nvars++;
1612       }
1613
1614   ntokens = nsyms - nvars;
1615 }
1616 \f
1617 /*--------------------------------------------------------------.
1618 | For named tokens, but not literal ones, define the name.  The |
1619 | value is the user token number.                               |
1620 `--------------------------------------------------------------*/
1621
1622 static void
1623 output_token_defines (FILE *file)
1624 {
1625   bucket *bp;
1626   char *cp, *symbol;
1627   char c;
1628
1629   for (bp = firstsymbol; bp; bp = bp->next)
1630     {
1631       symbol = bp->tag;         /* get symbol */
1632
1633       if (bp->value >= ntokens)
1634         continue;
1635       if (bp->user_token_number == SALIAS)
1636         continue;
1637       if ('\'' == *symbol)
1638         continue;               /* skip literal character */
1639       if (bp == errtoken)
1640         continue;               /* skip error token */
1641       if ('\"' == *symbol)
1642         {
1643           /* use literal string only if given a symbol with an alias */
1644           if (bp->alias)
1645             symbol = bp->alias->tag;
1646           else
1647             continue;
1648         }
1649
1650       /* Don't #define nonliteral tokens whose names contain periods.  */
1651       cp = symbol;
1652       while ((c = *cp++) && c != '.');
1653       if (c != '\0')
1654         continue;
1655
1656       fprintf (file, "#define\t%s\t%d\n", symbol,
1657                ((translations && !raw_flag)
1658                 ? bp->user_token_number : bp->value));
1659       if (semantic_parser)
1660         fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1661     }
1662
1663   putc ('\n', file);
1664 }
1665
1666
1667 /*------------------------------------------------------------------.
1668 | Assign symbol numbers, and write definition of token names into   |
1669 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1670 | of symbols.                                                       |
1671 `------------------------------------------------------------------*/
1672
1673 static void
1674 packsymbols (void)
1675 {
1676   bucket *bp;
1677   int tokno = 1;
1678   int i;
1679   int last_user_token_number;
1680   static char DOLLAR[] = "$";
1681
1682   /* int lossage = 0; JF set but not used */
1683
1684   tags = XCALLOC (char *, nsyms + 1);
1685   tags[0] = DOLLAR;
1686   user_toknums = XCALLOC (short, nsyms + 1);
1687   user_toknums[0] = 0;
1688
1689   sprec = XCALLOC (short, nsyms);
1690   sassoc = XCALLOC (short, nsyms);
1691
1692   max_user_token_number = 256;
1693   last_user_token_number = 256;
1694
1695   for (bp = firstsymbol; bp; bp = bp->next)
1696     {
1697       if (bp->class == nterm_sym)
1698         {
1699           bp->value += ntokens;
1700         }
1701       else if (bp->alias)
1702         {
1703           /* this symbol and its alias are a single token defn.
1704              allocate a tokno, and assign to both check agreement of
1705              ->prec and ->assoc fields and make both the same */
1706           if (bp->value == 0)
1707             bp->value = bp->alias->value = tokno++;
1708
1709           if (bp->prec != bp->alias->prec)
1710             {
1711               if (bp->prec != 0 && bp->alias->prec != 0
1712                   && bp->user_token_number == SALIAS)
1713                 complain (_("conflicting precedences for %s and %s"),
1714                           bp->tag, bp->alias->tag);
1715               if (bp->prec != 0)
1716                 bp->alias->prec = bp->prec;
1717               else
1718                 bp->prec = bp->alias->prec;
1719             }
1720
1721           if (bp->assoc != bp->alias->assoc)
1722             {
1723               if (bp->assoc != 0 && bp->alias->assoc != 0
1724                   && bp->user_token_number == SALIAS)
1725                 complain (_("conflicting assoc values for %s and %s"),
1726                           bp->tag, bp->alias->tag);
1727               if (bp->assoc != 0)
1728                 bp->alias->assoc = bp->assoc;
1729               else
1730                 bp->assoc = bp->alias->assoc;
1731             }
1732
1733           if (bp->user_token_number == SALIAS)
1734             continue;           /* do not do processing below for SALIASs */
1735
1736         }
1737       else                      /* bp->class == token_sym */
1738         {
1739           bp->value = tokno++;
1740         }
1741
1742       if (bp->class == token_sym)
1743         {
1744           if (translations && !(bp->user_token_number))
1745             bp->user_token_number = ++last_user_token_number;
1746           if (bp->user_token_number > max_user_token_number)
1747             max_user_token_number = bp->user_token_number;
1748         }
1749
1750       tags[bp->value] = bp->tag;
1751       user_toknums[bp->value] = bp->user_token_number;
1752       sprec[bp->value] = bp->prec;
1753       sassoc[bp->value] = bp->assoc;
1754
1755     }
1756
1757   if (translations)
1758     {
1759       int j;
1760
1761       token_translations = XCALLOC (short, max_user_token_number + 1);
1762
1763       /* initialize all entries for literal tokens to 2, the internal
1764          token number for $undefined., which represents all invalid
1765          inputs.  */
1766       for (j = 0; j <= max_user_token_number; j++)
1767         token_translations[j] = 2;
1768
1769       for (bp = firstsymbol; bp; bp = bp->next)
1770         {
1771           if (bp->value >= ntokens)
1772             continue;           /* non-terminal */
1773           if (bp->user_token_number == SALIAS)
1774             continue;
1775           if (token_translations[bp->user_token_number] != 2)
1776             complain (_("tokens %s and %s both assigned number %d"),
1777                       tags[token_translations[bp->user_token_number]],
1778                       bp->tag, bp->user_token_number);
1779           token_translations[bp->user_token_number] = bp->value;
1780         }
1781     }
1782
1783   error_token_number = errtoken->value;
1784
1785   if (!no_parser_flag)
1786     output_token_defines (ftable);
1787
1788   if (startval->class == unknown_sym)
1789     fatal (_("the start symbol %s is undefined"), startval->tag);
1790   else if (startval->class == token_sym)
1791     fatal (_("the start symbol %s is a token"), startval->tag);
1792
1793   start_symbol = startval->value;
1794
1795   if (defines_flag)
1796     {
1797       output_token_defines (fdefines);
1798
1799       if (!pure_parser)
1800         {
1801           if (spec_name_prefix)
1802             fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1803                      spec_name_prefix);
1804           else
1805             fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1806         }
1807
1808       if (semantic_parser)
1809         for (i = ntokens; i < nsyms; i++)
1810           {
1811             /* don't make these for dummy nonterminals made by gensym.  */
1812             if (*tags[i] != '@')
1813               fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1814           }
1815 #if 0
1816       /* `fdefines' is now a temporary file, so we need to copy its
1817          contents in `done', so we can't close it here.  */
1818       fclose (fdefines);
1819       fdefines = NULL;
1820 #endif
1821     }
1822 }
1823
1824
1825 /*---------------------------------------------------------------.
1826 | Convert the rules into the representation using RRHS, RLHS and |
1827 | RITEMS.                                                        |
1828 `---------------------------------------------------------------*/
1829
1830 static void
1831 packgram (void)
1832 {
1833   int itemno;
1834   int ruleno;
1835   symbol_list *p;
1836
1837   bucket *ruleprec;
1838
1839   ritem = XCALLOC (short, nitems + 1);
1840   rlhs = XCALLOC (short, nrules) - 1;
1841   rrhs = XCALLOC (short, nrules) - 1;
1842   rprec = XCALLOC (short, nrules) - 1;
1843   rprecsym = XCALLOC (short, nrules) - 1;
1844   rassoc = XCALLOC (short, nrules) - 1;
1845
1846   itemno = 0;
1847   ruleno = 1;
1848
1849   p = grammar;
1850   while (p)
1851     {
1852       rlhs[ruleno] = p->sym->value;
1853       rrhs[ruleno] = itemno;
1854       ruleprec = p->ruleprec;
1855
1856       p = p->next;
1857       while (p && p->sym)
1858         {
1859           ritem[itemno++] = p->sym->value;
1860           /* A rule gets by default the precedence and associativity
1861              of the last token in it.  */
1862           if (p->sym->class == token_sym)
1863             {
1864               rprec[ruleno] = p->sym->prec;
1865               rassoc[ruleno] = p->sym->assoc;
1866             }
1867           if (p)
1868             p = p->next;
1869         }
1870
1871       /* If this rule has a %prec,
1872          the specified symbol's precedence replaces the default.  */
1873       if (ruleprec)
1874         {
1875           rprec[ruleno] = ruleprec->prec;
1876           rassoc[ruleno] = ruleprec->assoc;
1877           rprecsym[ruleno] = ruleprec->value;
1878         }
1879
1880       ritem[itemno++] = -ruleno;
1881       ruleno++;
1882
1883       if (p)
1884         p = p->next;
1885     }
1886
1887   ritem[itemno] = 0;
1888 }
1889 \f
1890 /*-------------------------------------------------------------------.
1891 | Read in the grammar specification and record it in the format      |
1892 | described in gram.h.  All guards are copied into the FGUARD file   |
1893 | and all actions into ACTION_OBSTACK, in each case forming the body |
1894 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1895 | statement to decide which guard or action to execute.              |
1896 `-------------------------------------------------------------------*/
1897
1898 void
1899 reader (void)
1900 {
1901   start_flag = 0;
1902   startval = NULL;              /* start symbol not specified yet. */
1903
1904 #if 0
1905   /* initially assume token number translation not needed.  */
1906   translations = 0;
1907 #endif
1908   /* Nowadays translations is always set to 1, since we give `error' a
1909      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1910    */
1911   translations = 1;
1912
1913   nsyms = 1;
1914   nvars = 0;
1915   nrules = 0;
1916   nitems = 0;
1917   rline_allocated = 10;
1918   rline = XCALLOC (short, rline_allocated);
1919
1920   typed = 0;
1921   lastprec = 0;
1922
1923   gensym_count = 0;
1924
1925   semantic_parser = 0;
1926   pure_parser = 0;
1927
1928   grammar = NULL;
1929
1930   init_lex ();
1931   lineno = 1;
1932
1933   /* Initialize the symbol table.  */
1934   tabinit ();
1935   /* Construct the error token */
1936   errtoken = getsym ("error");
1937   errtoken->class = token_sym;
1938   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1939   /* Construct a token that represents all undefined literal tokens.
1940      It is always token number 2.  */
1941   undeftoken = getsym ("$undefined.");
1942   undeftoken->class = token_sym;
1943   undeftoken->user_token_number = 2;
1944
1945   /* Read the declaration section.  Copy %{ ... %} groups to FTABLE
1946      and FDEFINES file.  Also notice any %token, %left, etc. found
1947      there.  */
1948   putc ('\n', ftable);
1949   fprintf (ftable, "\
1950 /* %s, made from %s\n\
1951    by GNU bison %s.  */\n\
1952 \n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
1953
1954   fputs ("#define YYBISON 1  /* Identify Bison output.  */\n\n", ftable);
1955   read_declarations ();
1956   /* Start writing the guard and action files, if they are needed.  */
1957   output_headers ();
1958   /* Read in the grammar, build grammar in list form.  Write out
1959      guards and actions.  */
1960   readgram ();
1961   /* Now we know whether we need the line-number stack.  If we do,
1962      write its type into the .tab.h file.  */
1963   if (fdefines)
1964     reader_output_yylsp (fdefines);
1965   /* Write closing delimiters for actions and guards.  */
1966   output_trailers ();
1967   if (locations_flag)
1968     fputs ("#define YYLSP_NEEDED\n\n", ftable);
1969   /* Assign the symbols their symbol numbers.  Write #defines for the
1970      token symbols into FDEFINES if requested.  */
1971   packsymbols ();
1972   /* Convert the grammar into the format described in gram.h.  */
1973   packgram ();
1974   /* Free the symbol table data structure since symbols are now all
1975      referred to by symbol number.  */
1976   free_symtab ();
1977 }
1978
1979
1980 void
1981 reader_output_yylsp (FILE *f)
1982 {
1983   if (locations_flag)
1984     fputs ("\
1985 \n\
1986 #ifndef YYLTYPE\n\
1987 typedef struct yyltype\n\
1988 {\n\
1989   int timestamp;\n\
1990   int first_line;\n\
1991   int first_column;\
1992 \n\
1993   int last_line;\n\
1994   int last_column;\n\
1995   char *text;\n\
1996 } yyltype;\n\
1997 \n\
1998 # define YYLTYPE yyltype\n\
1999 #endif\n\
2000 \n",
2001            f);
2002 }