src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "getargs.h"
  25 #include "files.h"
  26 #include "xalloc.h"
  27 #include "symtab.h"
  28 #include "lex.h"
  29 #include "gram.h"
  30 #include "complain.h"
  31 #include "output.h"
  32 #include "reader.h"
  33 #include "conflicts.h"
  34 #include "quote.h"
  35
  36 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  37 static int rline_allocated;
  38
  39 typedef struct symbol_list
  40 {
  41   struct symbol_list *next;
  42   bucket *sym;
  43   bucket *ruleprec;
  44 }
  45 symbol_list;
  46
  47 int lineno;
  48 char **tags;
  49 short *user_toknums;
  50 static symbol_list *grammar;
  51 static int start_flag;
  52 static bucket *startval;
  53
  54 /* Nonzero if components of semantic values are used, implying
  55    they must be unions.  */
  56 static int value_components_used;
  57
  58 /* Nonzero if %union has been seen.  */
  59 static int typed;
  60
  61 /* Incremented for each %left, %right or %nonassoc seen */
  62 static int lastprec;
  63
  64 /* Incremented for each generated symbol */
  65 static int gensym_count;
  66
  67 static bucket *errtoken;
  68 static bucket *undeftoken;
  69 \f
  70
  71 /*===================\
  72 | Low level lexing.  |
  73 \===================*/
  74
  75 static void
  76 skip_to_char (int target)
  77 {
  78   int c;
  79   if (target == '\n')
  80     complain (_("   Skipping to next \\n"));
  81   else
  82     complain (_("   Skipping to next %c"), target);
  83
  84   do
  85     c = skip_white_space ();
  86   while (c != target && c != EOF);
  87   if (c != EOF)
  88     ungetc (c, finput);
  89 }
  90
  91
  92 /*---------------------------------------------------------.
  93 | Read a signed integer from STREAM and return its value.  |
  94 `---------------------------------------------------------*/
  95
  96 static inline int
  97 read_signed_integer (FILE *stream)
  98 {
  99   int c = getc (stream);
 100   int sign = 1;
 101   int n = 0;
 102
 103   if (c == '-')
 104     {
 105       c = getc (stream);
 106       sign = -1;
 107     }
 108
 109   while (isdigit (c))
 110     {
 111       n = 10 * n + (c - '0');
 112       c = getc (stream);
 113     }
 114
 115   ungetc (c, stream);
 116
 117   return sign * n;
 118 }
 119 \f
 120 /*--------------------------------------------------------------.
 121 | Get the data type (alternative in the union) of the value for |
 122 | symbol N in rule RULE.                                        |
 123 `--------------------------------------------------------------*/
 124
 125 static char *
 126 get_type_name (int n, symbol_list * rule)
 127 {
 128   int i;
 129   symbol_list *rp;
 130
 131   if (n < 0)
 132     {
 133       complain (_("invalid $ value"));
 134       return NULL;
 135     }
 136
 137   rp = rule;
 138   i = 0;
 139
 140   while (i < n)
 141     {
 142       rp = rp->next;
 143       if (rp == NULL || rp->sym == NULL)
 144         {
 145           complain (_("invalid $ value"));
 146           return NULL;
 147         }
 148       i++;
 149     }
 150
 151   return rp->sym->type_name;
 152 }
 153 \f
 154 /*-------------------------------------------------------------------.
 155 | Dump the string from FINPUT to FOUTPUT.  MATCH is the delimiter of |
 156 | the string (either ' or ").                                        |
 157 `-------------------------------------------------------------------*/
 158
 159 static inline void
 160 copy_string (FILE *fin, FILE *fout, int match)
 161 {
 162   int c;
 163
 164   putc (match, fout);
 165   c = getc (fin);
 166
 167   while (c != match)
 168     {
 169       if (c == EOF)
 170         fatal (_("unterminated string at end of file"));
 171       if (c == '\n')
 172         {
 173           complain (_("unterminated string"));
 174           ungetc (c, fin);
 175           c = match;            /* invent terminator */
 176           continue;
 177         }
 178
 179       putc (c, fout);
 180
 181       if (c == '\\')
 182         {
 183           c = getc (fin);
 184           if (c == EOF)
 185             fatal (_("unterminated string at end of file"));
 186           putc (c, fout);
 187           if (c == '\n')
 188             lineno++;
 189         }
 190
 191       c = getc (fin);
 192     }
 193
 194   putc (c, fout);
 195 }
 196
 197
 198 /*---------------------------------------------------------------.
 199 | Dump the comment from IN to OUT1 and OUT2.  C is either `*' or |
 200 | `/', depending upon the type of comments used.  OUT2 might be  |
 201 | NULL.                                                          |
 202 `---------------------------------------------------------------*/
 203
 204 static inline void
 205 copy_comment2 (FILE *in, FILE *out1, FILE *out2, int c)
 206 {
 207   int cplus_comment;
 208   int ended;
 209
 210   cplus_comment = (c == '/');
 211   putc (c, out1);
 212   if (out2)
 213     putc (c, out2);
 214   c = getc (in);
 215
 216   ended = 0;
 217   while (!ended)
 218     {
 219       if (!cplus_comment && c == '*')
 220         {
 221           while (c == '*')
 222             {
 223               putc (c, out1);
 224               if (out2)
 225                 putc (c, out2);
 226               c = getc (in);
 227             }
 228
 229           if (c == '/')
 230             {
 231               putc (c, out1);
 232               if (out2)
 233                 putc (c, out2);
 234               ended = 1;
 235             }
 236         }
 237       else if (c == '\n')
 238         {
 239           lineno++;
 240           putc (c, out1);
 241           if (out2)
 242             putc (c, out2);
 243           if (cplus_comment)
 244             ended = 1;
 245           else
 246             c = getc (in);
 247         }
 248       else if (c == EOF)
 249         fatal (_("unterminated comment"));
 250       else
 251         {
 252           putc (c, out1);
 253           if (out2)
 254             putc (c, out2);
 255           c = getc (in);
 256         }
 257     }
 258 }
 259
 260
 261 /*------------------------------------------------------------.
 262 | Dump the comment from FIN to FOUT.  C is either `*' or `/', |
 263 | depending upon the type of comments used.                   |
 264 `------------------------------------------------------------*/
 265
 266 static inline void
 267 copy_comment (FILE *fin, FILE *fout, int c)
 268 {
 269   copy_comment2 (fin, fout, NULL, c);
 270 }
 271
 272
 273 /*-----------------------------------------------------------------.
 274 | FIN is pointing to a location (i.e., a `@').  Output to FOUT a   |
 275 | reference to this location. STACK_OFFSET is the number of values |
 276 | in the current rule so far, which says where to find `$0' with   |
 277 | respect to the top of the stack.                                 |
 278 `-----------------------------------------------------------------*/
 279
 280 static inline void
 281 copy_at (FILE *fin, FILE *fout, int stack_offset)
 282 {
 283   int c;
 284
 285   c = getc (fin);
 286   if (c == '$')
 287     {
 288       fprintf (fout, "yyloc");
 289       locations_flag = 1;
 290     }
 291   else if (isdigit (c) || c == '-')
 292     {
 293       int n;
 294
 295       ungetc (c, fin);
 296       n = read_signed_integer (fin);
 297
 298       fprintf (fout, "yylsp[%d]", n - stack_offset);
 299       locations_flag = 1;
 300     }
 301   else
 302     {
 303       char buf[] = "@c";
 304       buf[1] = c;
 305       complain (_("%s is invalid"), quote (buf));
 306     }
 307 }
 308
 309
 310 /*-------------------------------------------------------------------.
 311 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 312 |                                                                    |
 313 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 314 |                                                                    |
 315 | Output to FOUT a reference to this semantic value. STACK_OFFSET is |
 316 | the number of values in the current rule so far, which says where  |
 317 | to find `$0' with respect to the top of the stack.                 |
 318 `-------------------------------------------------------------------*/
 319
 320 static inline void
 321 copy_dollar (FILE *fin, FILE *fout,
 322              symbol_list *rule, int stack_offset)
 323 {
 324   int c = getc (fin);
 325   char *type_name = NULL;
 326
 327   /* Get the typename if explicit. */
 328   if (c == '<')
 329     {
 330       char *cp = token_buffer;
 331
 332       while ((c = getc (fin)) != '>' && c > 0)
 333         {
 334           if (cp == token_buffer + maxtoken)
 335             cp = grow_token_buffer (cp);
 336
 337           *cp++ = c;
 338         }
 339       *cp = 0;
 340       type_name = token_buffer;
 341       value_components_used = 1;
 342
 343       c = getc (fin);
 344     }
 345
 346   if (c == '$')
 347     {
 348       fprintf (fout, "yyval");
 349       if (!type_name)
 350         type_name = get_type_name (0, rule);
 351       if (type_name)
 352         fprintf (fout, ".%s", type_name);
 353       if (!type_name && typed)
 354         complain (_("$$ of `%s' has no declared type"),
 355                   rule->sym->tag);
 356     }
 357   else if (isdigit (c) || c == '-')
 358     {
 359       int n;
 360       ungetc (c, fin);
 361       n = read_signed_integer (fin);
 362
 363       if (!type_name && n > 0)
 364         type_name = get_type_name (n, rule);
 365
 366       fprintf (fout, "yyvsp[%d]", n - stack_offset);
 367       if (type_name)
 368         fprintf (fout, ".%s", type_name);
 369       if (!type_name && typed)
 370         complain (_("$%d of `%s' has no declared type"),
 371                   n, rule->sym->tag);
 372     }
 373   else
 374     {
 375       char buf[] = "$c";
 376       buf[1] = c;
 377       complain (_("%s is invalid"), quote (buf));
 378     }
 379 }
 380 \f
 381 /*-------------------------------------------------------------------.
 382 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 383 | `%{' has already been read.  Return after reading the `%}'.        |
 384 `-------------------------------------------------------------------*/
 385
 386 static void
 387 copy_definition (void)
 388 {
 389   int c;
 390   /* -1 while reading a character if prev char was %. */
 391   int after_percent;
 392
 393   if (!no_lines_flag)
 394     fprintf (fattrs, "#line %d \"%s\"\n", lineno, infile);
 395
 396   after_percent = 0;
 397
 398   c = getc (finput);
 399
 400   for (;;)
 401     {
 402       switch (c)
 403         {
 404         case '\n':
 405           putc (c, fattrs);
 406           lineno++;
 407           break;
 408
 409         case '%':
 410           after_percent = -1;
 411           break;
 412
 413         case '\'':
 414         case '"':
 415           copy_string (finput, fattrs, c);
 416           break;
 417
 418         case '/':
 419           putc (c, fattrs);
 420           c = getc (finput);
 421           if (c != '*' && c != '/')
 422             continue;
 423           copy_comment (finput, fattrs, c);
 424           break;
 425
 426         case EOF:
 427           fatal ("%s", _("unterminated `%{' definition"));
 428
 429         default:
 430           putc (c, fattrs);
 431         }
 432
 433       c = getc (finput);
 434
 435       if (after_percent)
 436         {
 437           if (c == '}')
 438             return;
 439           putc ('%', fattrs);
 440         }
 441       after_percent = 0;
 442
 443     }
 444
 445 }
 446
 447
 448 /*-------------------------------------------------------------------.
 449 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 450 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 451 | are reversed.                                                      |
 452 `-------------------------------------------------------------------*/
 453
 454 static void
 455 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 456 {
 457   int token = 0;
 458   char *typename = 0;
 459   struct bucket *symbol = NULL; /* pts to symbol being defined */
 460
 461   for (;;)
 462     {
 463       int tmp_char = ungetc (skip_white_space (), finput);
 464
 465       if (tmp_char == '%')
 466         return;
 467       if (tmp_char == EOF)
 468         fatal (_("Premature EOF after %s"), token_buffer);
 469
 470       token = lex ();
 471       if (token == COMMA)
 472         {
 473           symbol = NULL;
 474           continue;
 475         }
 476       if (token == TYPENAME)
 477         {
 478           typename = xstrdup (token_buffer);
 479           value_components_used = 1;
 480           symbol = NULL;
 481         }
 482       else if (token == IDENTIFIER && *symval->tag == '\"' && symbol)
 483         {
 484           if (symval->alias)
 485             warn (_("symbol `%s' used more than once as a literal string"),
 486                   symval->tag);
 487           else if (symbol->alias)
 488             warn (_("symbol `%s' given more than one literal string"),
 489                   symbol->tag);
 490           else
 491             {
 492               symval->class = token_sym;
 493               symval->type_name = typename;
 494               symval->user_token_number = symbol->user_token_number;
 495               symbol->user_token_number = SALIAS;
 496               symval->alias = symbol;
 497               symbol->alias = symval;
 498               /* symbol and symval combined are only one symbol */
 499               nsyms--;
 500             }
 501           translations = 1;
 502           symbol = NULL;
 503         }
 504       else if (token == IDENTIFIER)
 505         {
 506           int oldclass = symval->class;
 507           symbol = symval;
 508
 509           if (symbol->class == what_is_not)
 510             complain (_("symbol %s redefined"), symbol->tag);
 511           symbol->class = what_is;
 512           if (what_is == nterm_sym && oldclass != nterm_sym)
 513             symbol->value = nvars++;
 514
 515           if (typename)
 516             {
 517               if (symbol->type_name == NULL)
 518                 symbol->type_name = typename;
 519               else if (strcmp (typename, symbol->type_name) != 0)
 520                 complain (_("type redeclaration for %s"), symbol->tag);
 521             }
 522         }
 523       else if (symbol && token == NUMBER)
 524         {
 525           symbol->user_token_number = numval;
 526           translations = 1;
 527         }
 528       else
 529         {
 530           complain (_("`%s' is invalid in %s"),
 531                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 532           skip_to_char ('%');
 533         }
 534     }
 535
 536 }
 537
 538
 539 /*------------------------------.
 540 | Parse what comes after %start |
 541 `------------------------------*/
 542
 543 static void
 544 parse_start_decl (void)
 545 {
 546   if (start_flag)
 547     complain (_("multiple %s declarations"), "%start");
 548   if (lex () != IDENTIFIER)
 549     complain (_("invalid %s declaration"), "%start");
 550   else
 551     {
 552       start_flag = 1;
 553       startval = symval;
 554     }
 555 }
 556
 557 /*-----------------------------------------------------------.
 558 | read in a %type declaration and record its information for |
 559 | get_type_name to access                                    |
 560 `-----------------------------------------------------------*/
 561
 562 static void
 563 parse_type_decl (void)
 564 {
 565   char *name;
 566
 567   if (lex () != TYPENAME)
 568     {
 569       complain ("%s", _("%type declaration has no <typename>"));
 570       skip_to_char ('%');
 571       return;
 572     }
 573
 574   name = xstrdup (token_buffer);
 575
 576   for (;;)
 577     {
 578       int t;
 579       int tmp_char = ungetc (skip_white_space (), finput);
 580
 581       if (tmp_char == '%')
 582         return;
 583       if (tmp_char == EOF)
 584         fatal (_("Premature EOF after %s"), token_buffer);
 585
 586       t = lex ();
 587
 588       switch (t)
 589         {
 590
 591         case COMMA:
 592         case SEMICOLON:
 593           break;
 594
 595         case IDENTIFIER:
 596           if (symval->type_name == NULL)
 597             symval->type_name = name;
 598           else if (strcmp (name, symval->type_name) != 0)
 599             complain (_("type redeclaration for %s"), symval->tag);
 600
 601           break;
 602
 603         default:
 604           complain (_("invalid %%type declaration due to item: %s"),
 605                     token_buffer);
 606           skip_to_char ('%');
 607         }
 608     }
 609 }
 610
 611
 612
 613 /*----------------------------------------------------------------.
 614 | Read in a %left, %right or %nonassoc declaration and record its |
 615 | information.                                                    |
 616 `----------------------------------------------------------------*/
 617
 618 static void
 619 parse_assoc_decl (associativity assoc)
 620 {
 621   char *name = NULL;
 622   int prev = 0;
 623
 624   lastprec++;                   /* Assign a new precedence level, never 0.  */
 625
 626   for (;;)
 627     {
 628       int t;
 629       int tmp_char = ungetc (skip_white_space (), finput);
 630
 631       if (tmp_char == '%')
 632         return;
 633       if (tmp_char == EOF)
 634         fatal (_("Premature EOF after %s"), token_buffer);
 635
 636       t = lex ();
 637
 638       switch (t)
 639         {
 640         case TYPENAME:
 641           name = xstrdup (token_buffer);
 642           break;
 643
 644         case COMMA:
 645           break;
 646
 647         case IDENTIFIER:
 648           if (symval->prec != 0)
 649             complain (_("redefining precedence of %s"), symval->tag);
 650           symval->prec = lastprec;
 651           symval->assoc = assoc;
 652           if (symval->class == nterm_sym)
 653             complain (_("symbol %s redefined"), symval->tag);
 654           symval->class = token_sym;
 655           if (name)
 656             {                   /* record the type, if one is specified */
 657               if (symval->type_name == NULL)
 658                 symval->type_name = name;
 659               else if (strcmp (name, symval->type_name) != 0)
 660                 complain (_("type redeclaration for %s"), symval->tag);
 661             }
 662           break;
 663
 664         case NUMBER:
 665           if (prev == IDENTIFIER)
 666             {
 667               symval->user_token_number = numval;
 668               translations = 1;
 669             }
 670           else
 671             {
 672               complain (_
 673                         ("invalid text (%s) - number should be after identifier"),
 674 token_buffer);
 675               skip_to_char ('%');
 676             }
 677           break;
 678
 679         case SEMICOLON:
 680           return;
 681
 682         default:
 683           complain (_("unexpected item: %s"), token_buffer);
 684           skip_to_char ('%');
 685         }
 686
 687       prev = t;
 688
 689     }
 690 }
 691
 692
 693
 694 /*-------------------------------------------------------------------.
 695 | Copy the union declaration into fattrs (and fdefines), where it is |
 696 | made into the definition of YYSTYPE, the type of elements of the   |
 697 | parser value stack.                                                |
 698 `-------------------------------------------------------------------*/
 699
 700 static void
 701 parse_union_decl (void)
 702 {
 703   int c;
 704   int count = 0;
 705
 706   if (typed)
 707     complain (_("multiple %s declarations"), "%union");
 708
 709   typed = 1;
 710
 711   if (!no_lines_flag)
 712     fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
 713   else
 714     fprintf (fattrs, "\n");
 715
 716   fprintf (fattrs, "typedef union");
 717   if (fdefines)
 718     fprintf (fdefines, "typedef union");
 719
 720   c = getc (finput);
 721
 722   while (c != EOF)
 723     {
 724       putc (c, fattrs);
 725       if (fdefines)
 726         putc (c, fdefines);
 727
 728       switch (c)
 729         {
 730         case '\n':
 731           lineno++;
 732           break;
 733
 734         case '/':
 735           c = getc (finput);
 736           if (c != '*' && c != '/')
 737             continue;
 738           copy_comment2 (finput, fattrs, fdefines, c);
 739           break;
 740
 741
 742         case '{':
 743           count++;
 744           break;
 745
 746         case '}':
 747           if (count == 0)
 748             complain (_("unmatched %s"), "`}'");
 749           count--;
 750           if (count <= 0)
 751             {
 752               fprintf (fattrs, " YYSTYPE;\n");
 753               if (fdefines)
 754                 fprintf (fdefines, " YYSTYPE;\n");
 755               /* JF don't choke on trailing semi */
 756               c = skip_white_space ();
 757               if (c != ';')
 758                 ungetc (c, finput);
 759               return;
 760             }
 761         }
 762
 763       c = getc (finput);
 764     }
 765 }
 766
 767
 768 /*-------------------------------------------------------.
 769 | Parse the declaration %expect N which says to expect N |
 770 | shift-reduce conflicts.                                |
 771 `-------------------------------------------------------*/
 772
 773 static void
 774 parse_expect_decl (void)
 775 {
 776   int c = skip_white_space ();
 777   ungetc (c, finput);
 778
 779   if (!isdigit (c))
 780     complain (_("argument of %%expect is not an integer"));
 781   else
 782     expected_conflicts = read_signed_integer (finput);
 783 }
 784
 785
 786 /*-------------------------------------------------------------------.
 787 | Parse what comes after %thong.  the full syntax is                 |
 788 |                                                                    |
 789 |                %thong <type> token number literal                  |
 790 |                                                                    |
 791 | the <type> or number may be omitted.  The number specifies the     |
 792 | user_token_number.                                                 |
 793 |                                                                    |
 794 | Two symbols are entered in the table, one for the token symbol and |
 795 | one for the literal.  Both are given the <type>, if any, from the  |
 796 | declaration.  The ->user_token_number of the first is SALIAS and   |
 797 | the ->user_token_number of the second is set to the number, if     |
 798 | any, from the declaration.  The two symbols are linked via         |
 799 | pointers in their ->alias fields.                                  |
 800 |                                                                    |
 801 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 802 | only the literal string is retained it is the literal string that  |
 803 | is output to yytname                                               |
 804 `-------------------------------------------------------------------*/
 805
 806 static void
 807 parse_thong_decl (void)
 808 {
 809   int token;
 810   struct bucket *symbol;
 811   char *typename = 0;
 812   int usrtoknum;
 813
 814   translations = 1;
 815   token = lex ();               /* fetch typename or first token */
 816   if (token == TYPENAME)
 817     {
 818       typename = xstrdup (token_buffer);
 819       value_components_used = 1;
 820       token = lex ();           /* fetch first token */
 821     }
 822
 823   /* process first token */
 824
 825   if (token != IDENTIFIER)
 826     {
 827       complain (_("unrecognized item %s, expected an identifier"),
 828                 token_buffer);
 829       skip_to_char ('%');
 830       return;
 831     }
 832   symval->class = token_sym;
 833   symval->type_name = typename;
 834   symval->user_token_number = SALIAS;
 835   symbol = symval;
 836
 837   token = lex ();               /* get number or literal string */
 838
 839   if (token == NUMBER)
 840     {
 841       usrtoknum = numval;
 842       token = lex ();           /* okay, did number, now get literal */
 843     }
 844   else
 845     usrtoknum = 0;
 846
 847   /* process literal string token */
 848
 849   if (token != IDENTIFIER || *symval->tag != '\"')
 850     {
 851       complain (_("expected string constant instead of %s"), token_buffer);
 852       skip_to_char ('%');
 853       return;
 854     }
 855   symval->class = token_sym;
 856   symval->type_name = typename;
 857   symval->user_token_number = usrtoknum;
 858
 859   symval->alias = symbol;
 860   symbol->alias = symval;
 861
 862   /* symbol and symval combined are only one symbol.  */
 863   nsyms--;
 864 }
 865
 866
 867 /*----------------------------------------------------------------.
 868 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 869 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 870 | groups to fattrs.                                               |
 871 `----------------------------------------------------------------*/
 872
 873 static void
 874 read_declarations (void)
 875 {
 876   int c;
 877   int tok;
 878
 879   for (;;)
 880     {
 881       c = skip_white_space ();
 882
 883       if (c == '%')
 884         {
 885           tok = parse_percent_token ();
 886
 887           switch (tok)
 888             {
 889             case TWO_PERCENTS:
 890               return;
 891
 892             case PERCENT_LEFT_CURLY:
 893               copy_definition ();
 894               break;
 895
 896             case TOKEN:
 897               parse_token_decl (token_sym, nterm_sym);
 898               break;
 899
 900             case NTERM:
 901               parse_token_decl (nterm_sym, token_sym);
 902               break;
 903
 904             case TYPE:
 905               parse_type_decl ();
 906               break;
 907
 908             case START:
 909               parse_start_decl ();
 910               break;
 911
 912             case UNION:
 913               parse_union_decl ();
 914               break;
 915
 916             case EXPECT:
 917               parse_expect_decl ();
 918               break;
 919             case THONG:
 920               parse_thong_decl ();
 921               break;
 922
 923             case LEFT:
 924               parse_assoc_decl (left_assoc);
 925               break;
 926
 927             case RIGHT:
 928               parse_assoc_decl (right_assoc);
 929               break;
 930
 931             case NONASSOC:
 932               parse_assoc_decl (non_assoc);
 933               break;
 934
 935             case SEMANTIC_PARSER:
 936               if (semantic_parser == 0)
 937                 {
 938                   semantic_parser = 1;
 939                   open_extra_files ();
 940                 }
 941               break;
 942
 943             case PURE_PARSER:
 944               pure_parser = 1;
 945               break;
 946
 947             case NOOP:
 948               break;
 949
 950             default:
 951               complain (_("unrecognized: %s"), token_buffer);
 952               skip_to_char ('%');
 953             }
 954         }
 955       else if (c == EOF)
 956         fatal (_("no input grammar"));
 957       else
 958         {
 959           char buf[] = "c";
 960           buf[0] = c;
 961           complain (_("unknown character: %s"), quote (buf));
 962           skip_to_char ('%');
 963         }
 964     }
 965 }
 966 \f
 967 /*-------------------------------------------------------------------.
 968 | Assuming that a `{' has just been seen, copy everything up to the  |
 969 | matching `}' into the actions file.  STACK_OFFSET is the number of |
 970 | values in the current rule so far, which says where to find `$0'   |
 971 | with respect to the top of the stack.                              |
 972 `-------------------------------------------------------------------*/
 973
 974 static void
 975 copy_action (symbol_list *rule, int stack_offset)
 976 {
 977   int c;
 978   int count;
 979
 980   /* offset is always 0 if parser has already popped the stack pointer */
 981   if (semantic_parser)
 982     stack_offset = 0;
 983
 984   fprintf (faction, "\ncase %d:\n", nrules);
 985   if (!no_lines_flag)
 986     fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
 987   putc ('{', faction);
 988
 989   count = 1;
 990   c = getc (finput);
 991
 992   while (count > 0)
 993     {
 994       while (c != '}')
 995         {
 996           switch (c)
 997             {
 998             case '\n':
 999               putc (c, faction);
1000               lineno++;
1001               break;
1002
1003             case '{':
1004               putc (c, faction);
1005               count++;
1006               break;
1007
1008             case '\'':
1009             case '"':
1010               copy_string (finput, faction, c);
1011               break;
1012
1013             case '/':
1014               putc (c, faction);
1015               c = getc (finput);
1016               if (c != '*' && c != '/')
1017                 continue;
1018               copy_comment (finput, faction, c);
1019               break;
1020
1021             case '$':
1022               copy_dollar (finput, faction, rule, stack_offset);
1023               break;
1024
1025             case '@':
1026               copy_at (finput, faction, stack_offset);
1027               break;
1028
1029             case EOF:
1030               fatal (_("unmatched %s"), "`{'");
1031
1032             default:
1033               putc (c, faction);
1034             }
1035
1036           c = getc (finput);
1037         }
1038
1039       /* above loop exits when c is '}' */
1040
1041       if (--count)
1042         {
1043           putc (c, faction);
1044           c = getc (finput);
1045         }
1046     }
1047
1048   fprintf (faction, ";\n    break;}");
1049 }
1050 \f
1051 /*-------------------------------------------------------------------.
1052 | After `%guard' is seen in the input file, copy the actual guard    |
1053 | into the guards file.  If the guard is followed by an action, copy |
1054 | that into the actions file.  STACK_OFFSET is the number of values  |
1055 | in the current rule so far, which says where to find `$0' with     |
1056 | respect to the top of the stack, for the simple parser in which    |
1057 | the stack is not popped until after the guard is run.              |
1058 `-------------------------------------------------------------------*/
1059
1060 static void
1061 copy_guard (symbol_list *rule, int stack_offset)
1062 {
1063   int c;
1064   int count;
1065   int brace_flag = 0;
1066
1067   /* offset is always 0 if parser has already popped the stack pointer */
1068   if (semantic_parser)
1069     stack_offset = 0;
1070
1071   fprintf (fguard, "\ncase %d:\n", nrules);
1072   if (!no_lines_flag)
1073     fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1074   putc ('{', fguard);
1075
1076   count = 0;
1077   c = getc (finput);
1078
1079   while (brace_flag ? (count > 0) : (c != ';'))
1080     {
1081       switch (c)
1082         {
1083         case '\n':
1084           putc (c, fguard);
1085           lineno++;
1086           break;
1087
1088         case '{':
1089           putc (c, fguard);
1090           brace_flag = 1;
1091           count++;
1092           break;
1093
1094         case '}':
1095           putc (c, fguard);
1096           if (count > 0)
1097             count--;
1098           else
1099             {
1100               complain (_("unmatched %s"), "`}'");
1101               c = getc (finput);        /* skip it */
1102             }
1103           break;
1104
1105         case '\'':
1106         case '"':
1107           copy_string (finput, fguard, c);
1108           break;
1109
1110         case '/':
1111           putc (c, fguard);
1112           c = getc (finput);
1113           if (c != '*' && c != '/')
1114             continue;
1115           copy_comment (finput, fguard, c);
1116           break;
1117
1118         case '$':
1119           copy_dollar (finput, fguard, rule, stack_offset);
1120           break;
1121
1122         case '@':
1123           copy_at (finput, fguard, stack_offset);
1124           break;
1125
1126         case EOF:
1127           fatal ("%s", _("unterminated %guard clause"));
1128
1129         default:
1130           putc (c, fguard);
1131         }
1132
1133       if (c != '}' || count != 0)
1134         c = getc (finput);
1135     }
1136
1137   c = skip_white_space ();
1138
1139   fprintf (fguard, ";\n    break;}");
1140   if (c == '{')
1141     copy_action (rule, stack_offset);
1142   else if (c == '=')
1143     {
1144       c = getc (finput);        /* why not skip_white_space -wjh */
1145       if (c == '{')
1146         copy_action (rule, stack_offset);
1147     }
1148   else
1149     ungetc (c, finput);
1150 }
1151 \f
1152
1153 static void
1154 record_rule_line (void)
1155 {
1156   /* Record each rule's source line number in rline table.  */
1157
1158   if (nrules >= rline_allocated)
1159     {
1160       rline_allocated = nrules * 2;
1161       rline = XREALLOC (rline, short, rline_allocated);
1162     }
1163   rline[nrules] = lineno;
1164 }
1165
1166
1167 /*-------------------------------------------------------------------.
1168 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1169 | with the user's names.                                             |
1170 `-------------------------------------------------------------------*/
1171
1172 static bucket *
1173 gensym (void)
1174 {
1175   bucket *sym;
1176
1177   sprintf (token_buffer, "@%d", ++gensym_count);
1178   sym = getsym (token_buffer);
1179   sym->class = nterm_sym;
1180   sym->value = nvars++;
1181   return sym;
1182 }
1183
1184 #if 0
1185 /*------------------------------------------------------------------.
1186 | read in a %type declaration and record its information for        |
1187 | get_type_name to access.  This is unused.  It is only called from |
1188 | the #if 0 part of readgram                                        |
1189 `------------------------------------------------------------------*/
1190
1191 static int
1192 get_type (void)
1193 {
1194   int k;
1195   int t;
1196   char *name;
1197
1198   t = lex ();
1199
1200   if (t != TYPENAME)
1201     {
1202       complain (_("invalid %s declaration"), "%type");
1203       return t;
1204     }
1205
1206   name = xstrdup (token_buffer);
1207
1208   for (;;)
1209     {
1210       t = lex ();
1211
1212       switch (t)
1213         {
1214         case SEMICOLON:
1215           return lex ();
1216
1217         case COMMA:
1218           break;
1219
1220         case IDENTIFIER:
1221           if (symval->type_name == NULL)
1222             symval->type_name = name;
1223           else if (strcmp (name, symval->type_name) != 0)
1224             complain (_("type redeclaration for %s"), symval->tag);
1225
1226           break;
1227
1228         default:
1229           return t;
1230         }
1231     }
1232 }
1233
1234 #endif
1235 \f
1236 /*------------------------------------------------------------------.
1237 | Parse the input grammar into a one symbol_list structure.  Each   |
1238 | rule is represented by a sequence of symbols: the left hand side  |
1239 | followed by the contents of the right hand side, followed by a    |
1240 | null pointer instead of a symbol to terminate the rule.  The next |
1241 | symbol is the lhs of the following rule.                          |
1242 |                                                                   |
1243 | All guards and actions are copied out to the appropriate files,   |
1244 | labelled by the rule number they apply to.                        |
1245 `------------------------------------------------------------------*/
1246
1247 static void
1248 readgram (void)
1249 {
1250   int t;
1251   bucket *lhs = NULL;
1252   symbol_list *p;
1253   symbol_list *p1;
1254   bucket *bp;
1255
1256   /* Points to first symbol_list of current rule. its symbol is the
1257      lhs of the rule.  */
1258   symbol_list *crule;
1259   /* Points to the symbol_list preceding crule.  */
1260   symbol_list *crule1;
1261
1262   p1 = NULL;
1263
1264   t = lex ();
1265
1266   while (t != TWO_PERCENTS && t != ENDFILE)
1267     {
1268       if (t == IDENTIFIER || t == BAR)
1269         {
1270           int action_flag = 0;
1271           /* Number of symbols in rhs of this rule so far */
1272           int rulelength = 0;
1273           int xactions = 0;     /* JF for error checking */
1274           bucket *first_rhs = 0;
1275
1276           if (t == IDENTIFIER)
1277             {
1278               lhs = symval;
1279
1280               if (!start_flag)
1281                 {
1282                   startval = lhs;
1283                   start_flag = 1;
1284                 }
1285
1286               t = lex ();
1287               if (t != COLON)
1288                 {
1289                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1290                   unlex (t);
1291                 }
1292             }
1293
1294           if (nrules == 0 && t == BAR)
1295             {
1296               complain (_("grammar starts with vertical bar"));
1297               lhs = symval;     /* BOGUS: use a random symval */
1298             }
1299           /* start a new rule and record its lhs.  */
1300
1301           nrules++;
1302           nitems++;
1303
1304           record_rule_line ();
1305
1306           p = XCALLOC (symbol_list, 1);
1307           p->sym = lhs;
1308
1309           crule1 = p1;
1310           if (p1)
1311             p1->next = p;
1312           else
1313             grammar = p;
1314
1315           p1 = p;
1316           crule = p;
1317
1318           /* mark the rule's lhs as a nonterminal if not already so.  */
1319
1320           if (lhs->class == unknown_sym)
1321             {
1322               lhs->class = nterm_sym;
1323               lhs->value = nvars;
1324               nvars++;
1325             }
1326           else if (lhs->class == token_sym)
1327             complain (_("rule given for %s, which is a token"), lhs->tag);
1328
1329           /* read the rhs of the rule.  */
1330
1331           for (;;)
1332             {
1333               t = lex ();
1334               if (t == PREC)
1335                 {
1336                   t = lex ();
1337                   crule->ruleprec = symval;
1338                   t = lex ();
1339                 }
1340
1341               if (!(t == IDENTIFIER || t == LEFT_CURLY))
1342                 break;
1343
1344               /* If next token is an identifier, see if a colon follows it.
1345                  If one does, exit this rule now.  */
1346               if (t == IDENTIFIER)
1347                 {
1348                   bucket *ssave;
1349                   int t1;
1350
1351                   ssave = symval;
1352                   t1 = lex ();
1353                   unlex (t1);
1354                   symval = ssave;
1355                   if (t1 == COLON)
1356                     break;
1357
1358                   if (!first_rhs)       /* JF */
1359                     first_rhs = symval;
1360                   /* Not followed by colon =>
1361                      process as part of this rule's rhs.  */
1362                 }
1363
1364               /* If we just passed an action, that action was in the middle
1365                  of a rule, so make a dummy rule to reduce it to a
1366                  non-terminal.  */
1367               if (action_flag)
1368                 {
1369                   bucket *sdummy;
1370
1371                   /* Since the action was written out with this rule's */
1372                   /* number, we must give the new rule this number */
1373                   /* by inserting the new rule before it.  */
1374
1375                   /* Make a dummy nonterminal, a gensym.  */
1376                   sdummy = gensym ();
1377
1378                   /* Make a new rule, whose body is empty,
1379                      before the current one, so that the action
1380                      just read can belong to it.  */
1381                   nrules++;
1382                   nitems++;
1383                   record_rule_line ();
1384                   p = XCALLOC (symbol_list, 1);
1385                   if (crule1)
1386                     crule1->next = p;
1387                   else
1388                     grammar = p;
1389                   p->sym = sdummy;
1390                   crule1 = XCALLOC (symbol_list, 1);
1391                   p->next = crule1;
1392                   crule1->next = crule;
1393
1394                   /* insert the dummy generated by that rule into this rule.  */
1395                   nitems++;
1396                   p = XCALLOC (symbol_list, 1);
1397                   p->sym = sdummy;
1398                   p1->next = p;
1399                   p1 = p;
1400
1401                   action_flag = 0;
1402                 }
1403
1404               if (t == IDENTIFIER)
1405                 {
1406                   nitems++;
1407                   p = XCALLOC (symbol_list, 1);
1408                   p->sym = symval;
1409                   p1->next = p;
1410                   p1 = p;
1411                 }
1412               else              /* handle an action.  */
1413                 {
1414                   copy_action (crule, rulelength);
1415                   action_flag = 1;
1416                   xactions++;   /* JF */
1417                 }
1418               rulelength++;
1419             }                   /* end of  read rhs of rule */
1420
1421           /* Put an empty link in the list to mark the end of this rule  */
1422           p = XCALLOC (symbol_list, 1);
1423           p1->next = p;
1424           p1 = p;
1425
1426           if (t == PREC)
1427             {
1428               complain (_("two @prec's in a row"));
1429               t = lex ();
1430               crule->ruleprec = symval;
1431               t = lex ();
1432             }
1433           if (t == GUARD)
1434             {
1435               if (!semantic_parser)
1436                 complain (_("%%guard present but %%semantic_parser not specified"));
1437
1438               copy_guard (crule, rulelength);
1439               t = lex ();
1440             }
1441           else if (t == LEFT_CURLY)
1442             {
1443               /* This case never occurs -wjh */
1444               if (action_flag)
1445                 complain (_("two actions at end of one rule"));
1446               copy_action (crule, rulelength);
1447               action_flag = 1;
1448               xactions++;       /* -wjh */
1449               t = lex ();
1450             }
1451           /* If $$ is being set in default way, report if any type
1452              mismatch.  */
1453           else if (!xactions
1454                    && first_rhs && lhs->type_name != first_rhs->type_name)
1455             {
1456               if (lhs->type_name == 0
1457                   || first_rhs->type_name == 0
1458                   || strcmp (lhs->type_name, first_rhs->type_name))
1459                 complain (_("type clash (`%s' `%s') on default action"),
1460                           lhs->type_name ? lhs->type_name : "",
1461                           first_rhs->type_name ? first_rhs->type_name : "");
1462             }
1463           /* Warn if there is no default for $$ but we need one.  */
1464           else if (!xactions && !first_rhs && lhs->type_name != 0)
1465             complain (_("empty rule for typed nonterminal, and no action"));
1466           if (t == SEMICOLON)
1467             t = lex ();
1468         }
1469 #if 0
1470       /* these things can appear as alternatives to rules.  */
1471 /* NO, they cannot.
1472         a) none of the documentation allows them
1473         b) most of them scan forward until finding a next %
1474                 thus they may swallow lots of intervening rules
1475 */
1476       else if (t == TOKEN)
1477         {
1478           parse_token_decl (token_sym, nterm_sym);
1479           t = lex ();
1480         }
1481       else if (t == NTERM)
1482         {
1483           parse_token_decl (nterm_sym, token_sym);
1484           t = lex ();
1485         }
1486       else if (t == TYPE)
1487         {
1488           t = get_type ();
1489         }
1490       else if (t == UNION)
1491         {
1492           parse_union_decl ();
1493           t = lex ();
1494         }
1495       else if (t == EXPECT)
1496         {
1497           parse_expect_decl ();
1498           t = lex ();
1499         }
1500       else if (t == START)
1501         {
1502           parse_start_decl ();
1503           t = lex ();
1504         }
1505 #endif
1506
1507       else
1508         {
1509           complain (_("invalid input: %s"), token_buffer);
1510           t = lex ();
1511         }
1512     }
1513
1514   /* grammar has been read.  Do some checking */
1515
1516   if (nsyms > MAXSHORT)
1517     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1518            MAXSHORT);
1519   if (nrules == 0)
1520     fatal (_("no rules in the input grammar"));
1521
1522   /* JF put out same default YYSTYPE as YACC does */
1523   if (typed == 0
1524       && !value_components_used)
1525     {
1526       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1527          but it seems better to be consistent.
1528          Most programs should declare their own type anyway.  */
1529       fprintf (fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1530       if (fdefines)
1531         fprintf (fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1532     }
1533
1534   /* Report any undefined symbols and consider them nonterminals.  */
1535
1536   for (bp = firstsymbol; bp; bp = bp->next)
1537     if (bp->class == unknown_sym)
1538       {
1539         complain (_
1540                   ("symbol %s is used, but is not defined as a token and has no rules"),
1541                   bp->tag);
1542         bp->class = nterm_sym;
1543         bp->value = nvars++;
1544       }
1545
1546   ntokens = nsyms - nvars;
1547 }
1548 \f
1549 /*--------------------------------------------------------------.
1550 | For named tokens, but not literal ones, define the name.  The |
1551 | value is the user token number.                               |
1552 `--------------------------------------------------------------*/
1553
1554 static void
1555 output_token_defines (FILE *file)
1556 {
1557   bucket *bp;
1558   char *cp, *symbol;
1559   char c;
1560
1561   for (bp = firstsymbol; bp; bp = bp->next)
1562     {
1563       symbol = bp->tag;         /* get symbol */
1564
1565       if (bp->value >= ntokens)
1566         continue;
1567       if (bp->user_token_number == SALIAS)
1568         continue;
1569       if ('\'' == *symbol)
1570         continue;               /* skip literal character */
1571       if (bp == errtoken)
1572         continue;               /* skip error token */
1573       if ('\"' == *symbol)
1574         {
1575           /* use literal string only if given a symbol with an alias */
1576           if (bp->alias)
1577             symbol = bp->alias->tag;
1578           else
1579             continue;
1580         }
1581
1582       /* Don't #define nonliteral tokens whose names contain periods.  */
1583       cp = symbol;
1584       while ((c = *cp++) && c != '.');
1585       if (c != '\0')
1586         continue;
1587
1588       fprintf (file, "#define\t%s\t%d\n", symbol,
1589                ((translations && !raw_flag)
1590                 ? bp->user_token_number : bp->value));
1591       if (semantic_parser)
1592         fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1593     }
1594
1595   putc ('\n', file);
1596 }
1597
1598
1599 /*------------------------------------------------------------------.
1600 | Assign symbol numbers, and write definition of token names into   |
1601 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1602 | of symbols.                                                       |
1603 `------------------------------------------------------------------*/
1604
1605 static void
1606 packsymbols (void)
1607 {
1608   bucket *bp;
1609   int tokno = 1;
1610   int i;
1611   int last_user_token_number;
1612   static char DOLLAR[] = "$";
1613
1614   /* int lossage = 0; JF set but not used */
1615
1616   tags = XCALLOC (char *, nsyms + 1);
1617   tags[0] = DOLLAR;
1618   user_toknums = XCALLOC (short, nsyms + 1);
1619   user_toknums[0] = 0;
1620
1621   sprec = XCALLOC (short, nsyms);
1622   sassoc = XCALLOC (short, nsyms);
1623
1624   max_user_token_number = 256;
1625   last_user_token_number = 256;
1626
1627   for (bp = firstsymbol; bp; bp = bp->next)
1628     {
1629       if (bp->class == nterm_sym)
1630         {
1631           bp->value += ntokens;
1632         }
1633       else if (bp->alias)
1634         {
1635           /* this symbol and its alias are a single token defn.
1636              allocate a tokno, and assign to both check agreement of
1637              ->prec and ->assoc fields and make both the same */
1638           if (bp->value == 0)
1639             bp->value = bp->alias->value = tokno++;
1640
1641           if (bp->prec != bp->alias->prec)
1642             {
1643               if (bp->prec != 0 && bp->alias->prec != 0
1644                   && bp->user_token_number == SALIAS)
1645                 complain (_("conflicting precedences for %s and %s"),
1646                           bp->tag, bp->alias->tag);
1647               if (bp->prec != 0)
1648                 bp->alias->prec = bp->prec;
1649               else
1650                 bp->prec = bp->alias->prec;
1651             }
1652
1653           if (bp->assoc != bp->alias->assoc)
1654             {
1655               if (bp->assoc != 0 && bp->alias->assoc != 0
1656                   && bp->user_token_number == SALIAS)
1657                 complain (_("conflicting assoc values for %s and %s"),
1658                           bp->tag, bp->alias->tag);
1659               if (bp->assoc != 0)
1660                 bp->alias->assoc = bp->assoc;
1661               else
1662                 bp->assoc = bp->alias->assoc;
1663             }
1664
1665           if (bp->user_token_number == SALIAS)
1666             continue;           /* do not do processing below for SALIASs */
1667
1668         }
1669       else                      /* bp->class == token_sym */
1670         {
1671           bp->value = tokno++;
1672         }
1673
1674       if (bp->class == token_sym)
1675         {
1676           if (translations && !(bp->user_token_number))
1677             bp->user_token_number = ++last_user_token_number;
1678           if (bp->user_token_number > max_user_token_number)
1679             max_user_token_number = bp->user_token_number;
1680         }
1681
1682       tags[bp->value] = bp->tag;
1683       user_toknums[bp->value] = bp->user_token_number;
1684       sprec[bp->value] = bp->prec;
1685       sassoc[bp->value] = bp->assoc;
1686
1687     }
1688
1689   if (translations)
1690     {
1691       int j;
1692
1693       token_translations = XCALLOC (short, max_user_token_number + 1);
1694
1695       /* initialize all entries for literal tokens to 2, the internal
1696          token number for $undefined., which represents all invalid
1697          inputs.  */
1698       for (j = 0; j <= max_user_token_number; j++)
1699         token_translations[j] = 2;
1700
1701       for (bp = firstsymbol; bp; bp = bp->next)
1702         {
1703           if (bp->value >= ntokens)
1704             continue;           /* non-terminal */
1705           if (bp->user_token_number == SALIAS)
1706             continue;
1707           if (token_translations[bp->user_token_number] != 2)
1708             complain (_("tokens %s and %s both assigned number %d"),
1709                       tags[token_translations[bp->user_token_number]],
1710                       bp->tag, bp->user_token_number);
1711           token_translations[bp->user_token_number] = bp->value;
1712         }
1713     }
1714
1715   error_token_number = errtoken->value;
1716
1717   if (!no_parser_flag)
1718     output_token_defines (ftable);
1719
1720   if (startval->class == unknown_sym)
1721     fatal (_("the start symbol %s is undefined"), startval->tag);
1722   else if (startval->class == token_sym)
1723     fatal (_("the start symbol %s is a token"), startval->tag);
1724
1725   start_symbol = startval->value;
1726
1727   if (defines_flag)
1728     {
1729       output_token_defines (fdefines);
1730
1731       if (!pure_parser)
1732         {
1733           if (spec_name_prefix)
1734             fprintf (fdefines, "\nextern YYSTYPE %slval;\n",
1735                      spec_name_prefix);
1736           else
1737             fprintf (fdefines, "\nextern YYSTYPE yylval;\n");
1738         }
1739
1740       if (semantic_parser)
1741         for (i = ntokens; i < nsyms; i++)
1742           {
1743             /* don't make these for dummy nonterminals made by gensym.  */
1744             if (*tags[i] != '@')
1745               fprintf (fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1746           }
1747 #if 0
1748       /* `fdefines' is now a temporary file, so we need to copy its
1749          contents in `done', so we can't close it here.  */
1750       fclose (fdefines);
1751       fdefines = NULL;
1752 #endif
1753     }
1754 }
1755
1756
1757 /*---------------------------------------------------------------.
1758 | Convert the rules into the representation using RRHS, RLHS and |
1759 | RITEMS.                                                        |
1760 `---------------------------------------------------------------*/
1761
1762 static void
1763 packgram (void)
1764 {
1765   int itemno;
1766   int ruleno;
1767   symbol_list *p;
1768
1769   bucket *ruleprec;
1770
1771   ritem = XCALLOC (short, nitems + 1);
1772   rlhs = XCALLOC (short, nrules) - 1;
1773   rrhs = XCALLOC (short, nrules) - 1;
1774   rprec = XCALLOC (short, nrules) - 1;
1775   rprecsym = XCALLOC (short, nrules) - 1;
1776   rassoc = XCALLOC (short, nrules) - 1;
1777
1778   itemno = 0;
1779   ruleno = 1;
1780
1781   p = grammar;
1782   while (p)
1783     {
1784       rlhs[ruleno] = p->sym->value;
1785       rrhs[ruleno] = itemno;
1786       ruleprec = p->ruleprec;
1787
1788       p = p->next;
1789       while (p && p->sym)
1790         {
1791           ritem[itemno++] = p->sym->value;
1792           /* A rule gets by default the precedence and associativity
1793              of the last token in it.  */
1794           if (p->sym->class == token_sym)
1795             {
1796               rprec[ruleno] = p->sym->prec;
1797               rassoc[ruleno] = p->sym->assoc;
1798             }
1799           if (p)
1800             p = p->next;
1801         }
1802
1803       /* If this rule has a %prec,
1804          the specified symbol's precedence replaces the default.  */
1805       if (ruleprec)
1806         {
1807           rprec[ruleno] = ruleprec->prec;
1808           rassoc[ruleno] = ruleprec->assoc;
1809           rprecsym[ruleno] = ruleprec->value;
1810         }
1811
1812       ritem[itemno++] = -ruleno;
1813       ruleno++;
1814
1815       if (p)
1816         p = p->next;
1817     }
1818
1819   ritem[itemno] = 0;
1820 }
1821 \f
1822 /*-------------------------------------------------------------------.
1823 | Read in the grammar specification and record it in the format      |
1824 | described in gram.h.  All guards are copied into the FGUARD file   |
1825 | and all actions into FACTION, in each case forming the body of a C |
1826 | function (YYGUARD or YYACTION) which contains a switch statement   |
1827 | to decide which guard or action to execute.                        |
1828 `-------------------------------------------------------------------*/
1829
1830 void
1831 reader (void)
1832 {
1833   start_flag = 0;
1834   startval = NULL;              /* start symbol not specified yet. */
1835
1836 #if 0
1837   /* initially assume token number translation not needed.  */
1838   translations = 0;
1839 #endif
1840   /* Nowadays translations is always set to 1, since we give `error' a
1841      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1842    */
1843   translations = 1;
1844
1845   nsyms = 1;
1846   nvars = 0;
1847   nrules = 0;
1848   nitems = 0;
1849   rline_allocated = 10;
1850   rline = XCALLOC (short, rline_allocated);
1851
1852   typed = 0;
1853   lastprec = 0;
1854
1855   gensym_count = 0;
1856
1857   semantic_parser = 0;
1858   pure_parser = 0;
1859
1860   grammar = NULL;
1861
1862   init_lex ();
1863   lineno = 1;
1864
1865   /* Initialize the symbol table.  */
1866   tabinit ();
1867   /* Construct the error token */
1868   errtoken = getsym ("error");
1869   errtoken->class = token_sym;
1870   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1871   /* Construct a token that represents all undefined literal tokens.
1872      It is always token number 2.  */
1873   undeftoken = getsym ("$undefined.");
1874   undeftoken->class = token_sym;
1875   undeftoken->user_token_number = 2;
1876
1877   /* Read the declaration section.  Copy %{ ... %} groups to FTABLE
1878      and FDEFINES file.  Also notice any %token, %left, etc. found
1879      there.  */
1880   putc ('\n', ftable);
1881   fprintf (ftable, "\
1882 /* %s, made from %s\n\
1883    by GNU bison %s.  */\n\
1884 \n", no_parser_flag ? "Bison-generated parse tables" : "A Bison parser", infile, VERSION);
1885
1886   fputs ("#define YYBISON 1  /* Identify Bison output.  */\n\n", ftable);
1887   read_declarations ();
1888   /* Start writing the guard and action files, if they are needed.  */
1889   output_headers ();
1890   /* Read in the grammar, build grammar in list form.  Write out
1891      guards and actions.  */
1892   readgram ();
1893   /* Now we know whether we need the line-number stack.  If we do,
1894      write its type into the .tab.h file.  */
1895   if (fdefines)
1896     reader_output_yylsp (fdefines);
1897   /* Write closing delimiters for actions and guards.  */
1898   output_trailers ();
1899   if (locations_flag)
1900     fputs ("#define YYLSP_NEEDED\n\n", ftable);
1901   /* Assign the symbols their symbol numbers.  Write #defines for the
1902      token symbols into FDEFINES if requested.  */
1903   packsymbols ();
1904   /* Convert the grammar into the format described in gram.h.  */
1905   packgram ();
1906   /* Free the symbol table data structure since symbols are now all
1907      referred to by symbol number.  */
1908   free_symtab ();
1909 }
1910
1911
1912 void
1913 reader_output_yylsp (FILE *f)
1914 {
1915   if (locations_flag)
1916     fputs ("\
1917 \n\
1918 #ifndef YYLTYPE\n\
1919 typedef struct yyltype\n\
1920 {\n\
1921   int timestamp;\n\
1922   int first_line;\n\
1923   int first_column;\
1924 \n\
1925   int last_line;\n\
1926   int last_column;\n\
1927   char *text;\n\
1928 } yyltype;\n\
1929 \n\
1930 # define YYLTYPE yyltype\n\
1931 #endif\n\
1932 \n",
1933            f);
1934 }