src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "quotearg.h"
  25 #include "quote.h"
  26 #include "getargs.h"
  27 #include "files.h"
  28 #include "symtab.h"
  29 #include "options.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36 #include "muscle_tab.h"
  37
  38 typedef struct symbol_list
  39 {
  40   struct symbol_list *next;
  41   symbol_t *sym;
  42   int line;
  43
  44   /* The action is attached to the LHS of a rule. */
  45   const char *action;
  46   int action_line;
  47
  48   symbol_t *ruleprec;
  49 } symbol_list;
  50
  51 int lineno;
  52 static symbol_list *grammar = NULL;
  53 static int start_flag = 0;
  54
  55 /* Nonzero if components of semantic values are used, implying
  56    they must be unions.  */
  57 static int value_components_used;
  58
  59 /* Nonzero if %union has been seen.  */
  60 static int typed = 0;
  61
  62 /* Incremented for each %left, %right or %nonassoc seen */
  63 static int lastprec = 0;
  64
  65 static symbol_list *
  66 symbol_list_new (symbol_t *sym)
  67 {
  68   symbol_list *res = XMALLOC (symbol_list, 1);
  69   res->next = NULL;
  70   res->sym = sym;
  71   res->line = lineno;
  72   res->action = NULL;
  73   res->action_line = 0;
  74   res->ruleprec = NULL;
  75   return res;
  76 }
  77
  78 /*===================\
  79 | Low level lexing.  |
  80 \===================*/
  81
  82 static void
  83 skip_to_char (int target)
  84 {
  85   int c;
  86   if (target == '\n')
  87     complain (_("   Skipping to next \\n"));
  88   else
  89     complain (_("   Skipping to next %c"), target);
  90
  91   do
  92     c = skip_white_space ();
  93   while (c != target && c != EOF);
  94   if (c != EOF)
  95     ungetc (c, finput);
  96 }
  97
  98
  99 /*---------------------------------------------------------.
 100 | Read a signed integer from STREAM and return its value.  |
 101 `---------------------------------------------------------*/
 102
 103 static inline int
 104 read_signed_integer (FILE *stream)
 105 {
 106   int c = getc (stream);
 107   int sign = 1;
 108   int n = 0;
 109
 110   if (c == '-')
 111     {
 112       c = getc (stream);
 113       sign = -1;
 114     }
 115
 116   while (isdigit (c))
 117     {
 118       n = 10 * n + (c - '0');
 119       c = getc (stream);
 120     }
 121
 122   ungetc (c, stream);
 123
 124   return sign * n;
 125 }
 126 \f
 127 /*--------------------------------------------------------------.
 128 | Get the data type (alternative in the union) of the value for |
 129 | symbol N in rule RULE.                                        |
 130 `--------------------------------------------------------------*/
 131
 132 static char *
 133 get_type_name (int n, symbol_list *rule)
 134 {
 135   int i;
 136   symbol_list *rp;
 137
 138   if (n < 0)
 139     {
 140       complain (_("invalid $ value"));
 141       return NULL;
 142     }
 143
 144   rp = rule;
 145   i = 0;
 146
 147   while (i < n)
 148     {
 149       rp = rp->next;
 150       if (rp == NULL || rp->sym == NULL)
 151         {
 152           complain (_("invalid $ value"));
 153           return NULL;
 154         }
 155       ++i;
 156     }
 157
 158   return rp->sym->type_name;
 159 }
 160 \f
 161 /*------------------------------------------------------------------.
 162 | Copy the character C to OOUT, and insert quadigraphs when needed. |
 163 `------------------------------------------------------------------*/
 164
 165 static inline void
 166 copy_character (struct obstack *oout, int c)
 167 {
 168   switch (c)
 169     {
 170     case '[':
 171       obstack_sgrow (oout, "@<:@");
 172       break;
 173
 174     case ']':
 175       obstack_sgrow (oout, "@:>@");
 176       break;
 177
 178     default:
 179       obstack_1grow (oout, c);
 180     }
 181 }
 182
 183 /*------------------------------------------------------------.
 184 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 185 | delimiter of the string (either ' or ").                    |
 186 `------------------------------------------------------------*/
 187
 188 static inline void
 189 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
 190 {
 191   int c;
 192
 193   if (store)
 194     obstack_1grow (oout, match);
 195
 196   c = getc (fin);
 197
 198   while (c != match)
 199     {
 200       if (c == EOF)
 201         fatal (_("unterminated string at end of file"));
 202       if (c == '\n')
 203         {
 204           complain (_("unterminated string"));
 205           ungetc (c, fin);
 206           c = match;            /* invent terminator */
 207           continue;
 208         }
 209
 210       copy_character (oout, c);
 211
 212       if (c == '\\')
 213         {
 214           c = getc (fin);
 215           if (c == EOF)
 216             fatal (_("unterminated string at end of file"));
 217           copy_character (oout, c);
 218
 219           if (c == '\n')
 220             ++lineno;
 221         }
 222
 223       c = getc (fin);
 224     }
 225
 226   if (store)
 227     obstack_1grow (oout, c);
 228 }
 229
 230 /* FIXME. */
 231
 232 static inline void
 233 copy_string (FILE *fin, struct obstack *oout, int match)
 234 {
 235   copy_string2 (fin, oout, match, 1);
 236 }
 237
 238 /* FIXME. */
 239
 240 static inline void
 241 copy_identifier (FILE *fin, struct obstack *oout)
 242 {
 243   int c;
 244
 245   while (isalnum (c = getc (fin)) || c == '_')
 246     obstack_1grow (oout, c);
 247
 248   ungetc (c, fin);
 249 }
 250
 251
 252 /*------------------------------------------------------------------.
 253 | Dump the wannabee comment from IN to OOUT.  In fact we just saw a |
 254 | `/', which might or might not be a comment.  In any case, copy    |
 255 | what we saw.                                                      |
 256 `------------------------------------------------------------------*/
 257
 258 static inline void
 259 copy_comment (FILE *fin, struct obstack *oout)
 260 {
 261   int cplus_comment;
 262   int ended;
 263   int c;
 264
 265   /* We read a `/', output it. */
 266   obstack_1grow (oout, '/');
 267
 268   switch ((c = getc (fin)))
 269     {
 270     case '/':
 271       cplus_comment = 1;
 272       break;
 273     case '*':
 274       cplus_comment = 0;
 275       break;
 276     default:
 277       ungetc (c, fin);
 278       return;
 279     }
 280
 281   obstack_1grow (oout, c);
 282   c = getc (fin);
 283
 284   ended = 0;
 285   while (!ended)
 286     {
 287       if (!cplus_comment && c == '*')
 288         {
 289           while (c == '*')
 290             {
 291               obstack_1grow (oout, c);
 292               c = getc (fin);
 293             }
 294
 295           if (c == '/')
 296             {
 297               obstack_1grow (oout, c);
 298               ended = 1;
 299             }
 300         }
 301       else if (c == '\n')
 302         {
 303           ++lineno;
 304           obstack_1grow (oout, c);
 305           if (cplus_comment)
 306             ended = 1;
 307           else
 308             c = getc (fin);
 309         }
 310       else if (c == EOF)
 311         fatal (_("unterminated comment"));
 312       else
 313         {
 314           copy_character (oout, c);
 315           c = getc (fin);
 316         }
 317     }
 318 }
 319
 320
 321 /*-------------------------------------------------------------------.
 322 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a     |
 323 | reference to this location. RULE_LENGTH is the number of values in |
 324 | the current rule so far, which says where to find `$0' with        |
 325 | respect to the top of the stack.                                   |
 326 `-------------------------------------------------------------------*/
 327
 328 static inline void
 329 copy_at (FILE *fin, struct obstack *oout, int rule_length)
 330 {
 331   int c = getc (fin);
 332   locations_flag = 1;
 333
 334   if (c == '$')
 335     {
 336       obstack_sgrow (oout, "]b4_lhs_location[");
 337     }
 338   else if (isdigit (c) || c == '-')
 339     {
 340       int n;
 341
 342       ungetc (c, fin);
 343       n = read_signed_integer (fin);
 344       if (n > rule_length)
 345         complain (_("invalid value: %s%d"), "@", n);
 346       else
 347         obstack_fgrow2 (oout, "]b4_rhs_location([%d], [%d])[",
 348                         rule_length, n);
 349     }
 350   else
 351     {
 352       char buf[] = "@c";
 353       buf[1] = c;
 354       complain (_("%s is invalid"), quote (buf));
 355     }
 356 }
 357
 358
 359 /*------------------------------------------------------------------.
 360 | FIN is pointing to a wannabee semantic value (i.e., a `$').       |
 361 |                                                                   |
 362 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 363 |                                                                   |
 364 | Output to OOUT a reference to this semantic value. RULE_LENGTH is |
 365 | the number of values in the current rule so far, which says where |
 366 | to find `$0' with respect to the top of the stack.                |
 367 `------------------------------------------------------------------*/
 368
 369 static inline void
 370 copy_dollar (FILE *fin, struct obstack *oout,
 371              symbol_list *rule, int rule_length)
 372 {
 373   int c = getc (fin);
 374   const char *type_name = NULL;
 375
 376   /* Get the type name if explicit. */
 377   if (c == '<')
 378     {
 379       read_type_name (fin);
 380       type_name = token_buffer;
 381       value_components_used = 1;
 382       c = getc (fin);
 383     }
 384
 385   if (c == '$')
 386     {
 387       if (!type_name)
 388         type_name = get_type_name (0, rule);
 389       if (!type_name && typed)
 390         complain (_("$$ of `%s' has no declared type"),
 391                   rule->sym->tag);
 392       if (!type_name)
 393         type_name = "";
 394       obstack_fgrow1 (oout,
 395                       "]b4_lhs_value([%s])[", type_name);
 396     }
 397   else if (isdigit (c) || c == '-')
 398     {
 399       int n;
 400       ungetc (c, fin);
 401       n = read_signed_integer (fin);
 402
 403       if (n > rule_length)
 404         complain (_("invalid value: %s%d"), "$", n);
 405       else
 406         {
 407           if (!type_name && n > 0)
 408             type_name = get_type_name (n, rule);
 409           if (!type_name && typed)
 410             complain (_("$%d of `%s' has no declared type"),
 411                       n, rule->sym->tag);
 412           if (!type_name)
 413             type_name = "";
 414           obstack_fgrow3 (oout, "]b4_rhs_value([%d], [%d], [%s])[",
 415                           rule_length, n, type_name);
 416         }
 417     }
 418   else
 419     {
 420       char buf[] = "$c";
 421       buf[1] = c;
 422       complain (_("%s is invalid"), quote (buf));
 423     }
 424 }
 425 \f
 426 /*-------------------------------------------------------------------.
 427 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 428 | `%{' has already been read.  Return after reading the `%}'.        |
 429 `-------------------------------------------------------------------*/
 430
 431 static void
 432 copy_definition (struct obstack *oout)
 433 {
 434   int c;
 435   /* -1 while reading a character if prev char was %. */
 436   int after_percent;
 437
 438   if (!no_lines_flag)
 439     {
 440       obstack_fgrow2 (oout, muscle_find ("linef"),
 441                       lineno, quotearg_style (c_quoting_style,
 442                                               muscle_find ("filename")));
 443     }
 444
 445   after_percent = 0;
 446
 447   c = getc (finput);
 448
 449   for (;;)
 450     {
 451       switch (c)
 452         {
 453         case '\n':
 454           obstack_1grow (oout, c);
 455           ++lineno;
 456           break;
 457
 458         case '%':
 459           after_percent = -1;
 460           break;
 461
 462         case '\'':
 463         case '"':
 464           copy_string (finput, oout, c);
 465           break;
 466
 467         case '/':
 468           copy_comment (finput, oout);
 469           break;
 470
 471         case EOF:
 472           fatal ("%s", _("unterminated `%{' definition"));
 473
 474         default:
 475           copy_character (oout, c);
 476         }
 477
 478       c = getc (finput);
 479
 480       if (after_percent)
 481         {
 482           if (c == '}')
 483             return;
 484           obstack_1grow (oout, '%');
 485         }
 486       after_percent = 0;
 487     }
 488 }
 489
 490
 491 /*-------------------------------------------------------------------.
 492 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 493 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 494 | are reversed.                                                      |
 495 `-------------------------------------------------------------------*/
 496
 497 static void
 498 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 499 {
 500   token_t token = tok_undef;
 501   char *typename = NULL;
 502
 503   /* The symbol being defined.  */
 504   symbol_t *symbol = NULL;
 505
 506   /* After `%token' and `%nterm', any number of symbols maybe be
 507      defined.  */
 508   for (;;)
 509     {
 510       int tmp_char = ungetc (skip_white_space (), finput);
 511
 512       /* `%' (for instance from `%token', or from `%%' etc.) is the
 513          only valid means to end this declaration.  */
 514       if (tmp_char == '%')
 515         return;
 516       if (tmp_char == EOF)
 517         fatal (_("Premature EOF after %s"), token_buffer);
 518
 519       token = lex ();
 520       if (token == tok_comma)
 521         {
 522           symbol = NULL;
 523           continue;
 524         }
 525       if (token == tok_typename)
 526         {
 527           typename = xstrdup (token_buffer);
 528           value_components_used = 1;
 529           symbol = NULL;
 530         }
 531       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 532         {
 533           symbol_make_alias (symbol, symval, typename);
 534           symbol = NULL;
 535         }
 536       else if (token == tok_identifier)
 537         {
 538           int oldclass = symval->class;
 539           symbol = symval;
 540
 541           if (symbol->class == what_is_not)
 542             complain (_("symbol %s redefined"), symbol->tag);
 543           symbol->class = what_is;
 544           if (what_is == nterm_sym && oldclass != nterm_sym)
 545             symbol->number = nvars++;
 546           if (what_is == token_sym && symbol->number == NUMBER_UNDEFINED)
 547             symbol->number = ntokens++;
 548
 549           if (typename)
 550             {
 551               if (symbol->type_name == NULL)
 552                 symbol->type_name = typename;
 553               else if (strcmp (typename, symbol->type_name) != 0)
 554                 complain (_("type redeclaration for %s"), symbol->tag);
 555             }
 556         }
 557       else if (symbol && token == tok_number)
 558         {
 559           symbol->user_token_number = numval;
 560           /* User defined EOF token? */
 561           if (numval == 0)
 562             {
 563               eoftoken = symbol;
 564               eoftoken->number = 0;
 565               /* It is always mapped to 0, so it was already counted in
 566                  NTOKENS.  */
 567               --ntokens;
 568             }
 569         }
 570       else
 571         {
 572           complain (_("`%s' is invalid in %s"),
 573                     token_buffer,
 574                     (what_is == token_sym) ? "%token" : "%nterm");
 575           skip_to_char ('%');
 576         }
 577     }
 578
 579 }
 580
 581
 582 /*------------------------------.
 583 | Parse what comes after %start |
 584 `------------------------------*/
 585
 586 static void
 587 parse_start_decl (void)
 588 {
 589   if (start_flag)
 590     complain (_("multiple %s declarations"), "%start");
 591   if (lex () != tok_identifier)
 592     complain (_("invalid %s declaration"), "%start");
 593   else
 594     {
 595       start_flag = 1;
 596       startsymbol = symval;
 597     }
 598 }
 599
 600 /*-----------------------------------------------------------.
 601 | read in a %type declaration and record its information for |
 602 | get_type_name to access                                    |
 603 `-----------------------------------------------------------*/
 604
 605 static void
 606 parse_type_decl (void)
 607 {
 608   char *name;
 609
 610   if (lex () != tok_typename)
 611     {
 612       complain ("%s", _("%type declaration has no <typename>"));
 613       skip_to_char ('%');
 614       return;
 615     }
 616
 617   name = xstrdup (token_buffer);
 618
 619   for (;;)
 620     {
 621       token_t t;
 622       int tmp_char = ungetc (skip_white_space (), finput);
 623
 624       if (tmp_char == '%')
 625         return;
 626       if (tmp_char == EOF)
 627         fatal (_("Premature EOF after %s"), token_buffer);
 628
 629       t = lex ();
 630
 631       switch (t)
 632         {
 633
 634         case tok_comma:
 635         case tok_semicolon:
 636           break;
 637
 638         case tok_identifier:
 639           if (symval->type_name == NULL)
 640             symval->type_name = name;
 641           else if (strcmp (name, symval->type_name) != 0)
 642             complain (_("type redeclaration for %s"), symval->tag);
 643
 644           break;
 645
 646         default:
 647           complain (_("invalid %%type declaration due to item: %s"),
 648                     token_buffer);
 649           skip_to_char ('%');
 650         }
 651     }
 652 }
 653
 654
 655
 656 /*----------------------------------------------------------------.
 657 | Read in a %left, %right or %nonassoc declaration and record its |
 658 | information.                                                    |
 659 `----------------------------------------------------------------*/
 660
 661 static void
 662 parse_assoc_decl (associativity assoc)
 663 {
 664   char *name = NULL;
 665   int prev = 0;
 666
 667   /* Assign a new precedence level, never 0.  */
 668   ++lastprec;
 669
 670   for (;;)
 671     {
 672       token_t t;
 673       int tmp_char = ungetc (skip_white_space (), finput);
 674
 675       if (tmp_char == '%')
 676         return;
 677       if (tmp_char == EOF)
 678         fatal (_("Premature EOF after %s"), token_buffer);
 679
 680       t = lex ();
 681
 682       switch (t)
 683         {
 684         case tok_typename:
 685           name = xstrdup (token_buffer);
 686           break;
 687
 688         case tok_comma:
 689           break;
 690
 691         case tok_identifier:
 692           if (symval->prec != 0)
 693             complain (_("redefining precedence of %s"), symval->tag);
 694           symval->prec = lastprec;
 695           symval->assoc = assoc;
 696           if (symval->class == nterm_sym)
 697             complain (_("symbol %s redefined"), symval->tag);
 698           if (symval->number == NUMBER_UNDEFINED)
 699             {
 700               symval->number = ntokens++;
 701               symval->class = token_sym;
 702             }
 703           if (name)
 704             {                   /* record the type, if one is specified */
 705               if (symval->type_name == NULL)
 706                 symval->type_name = name;
 707               else if (strcmp (name, symval->type_name) != 0)
 708                 complain (_("type redeclaration for %s"), symval->tag);
 709             }
 710           break;
 711
 712         case tok_number:
 713           if (prev == tok_identifier)
 714             {
 715               symval->user_token_number = numval;
 716             }
 717           else
 718             {
 719               complain
 720                 (_("invalid text (%s) - number should be after identifier"),
 721                  token_buffer);
 722               skip_to_char ('%');
 723             }
 724           break;
 725
 726         case tok_semicolon:
 727           return;
 728
 729         default:
 730           complain (_("unexpected item: %s"), token_buffer);
 731           skip_to_char ('%');
 732         }
 733
 734       prev = t;
 735     }
 736 }
 737
 738
 739
 740 /*--------------------------------------------------------------.
 741 | Copy the union declaration into the stype muscle              |
 742 | (and fdefines),  where it is made into the definition of      |
 743 | YYSTYPE, the type of elements of the parser value stack.      |
 744 `--------------------------------------------------------------*/
 745
 746 static void
 747 parse_union_decl (void)
 748 {
 749   int c;
 750   int count = 0;
 751   bool done = FALSE;
 752   struct obstack union_obstack;
 753   if (typed)
 754     complain (_("multiple %s declarations"), "%union");
 755
 756   typed = 1;
 757
 758   MUSCLE_INSERT_INT ("stype_line", lineno);
 759   obstack_init (&union_obstack);
 760   obstack_sgrow (&union_obstack, "union");
 761
 762   while (!done)
 763     {
 764       c = xgetc (finput);
 765
 766       /* If C contains '/', it is output by copy_comment ().  */
 767       if (c != '/')
 768         obstack_1grow (&union_obstack, c);
 769
 770       switch (c)
 771         {
 772         case '\n':
 773           ++lineno;
 774           break;
 775
 776         case '/':
 777           copy_comment (finput, &union_obstack);
 778           break;
 779
 780         case '{':
 781           ++count;
 782           break;
 783
 784         case '}':
 785           /* FIXME: Errr.  How could this happen???. --akim */
 786           if (count == 0)
 787             complain (_("unmatched %s"), "`}'");
 788           count--;
 789           if (!count)
 790             done = TRUE;
 791           break;
 792         }
 793     }
 794
 795   /* JF don't choke on trailing semi */
 796   c = skip_white_space ();
 797   if (c != ';')
 798     ungetc (c, finput);
 799   obstack_1grow (&union_obstack, 0);
 800   muscle_insert ("stype", obstack_finish (&union_obstack));
 801 }
 802
 803
 804 /*-------------------------------------------------------.
 805 | Parse the declaration %expect N which says to expect N |
 806 | shift-reduce conflicts.                                |
 807 `-------------------------------------------------------*/
 808
 809 static void
 810 parse_expect_decl (void)
 811 {
 812   int c = skip_white_space ();
 813   ungetc (c, finput);
 814
 815   if (!isdigit (c))
 816     complain (_("argument of %%expect is not an integer"));
 817   else
 818     expected_conflicts = read_signed_integer (finput);
 819 }
 820
 821
 822 /*-------------------------------------------------------------------.
 823 | Parse what comes after %thong.  the full syntax is                 |
 824 |                                                                    |
 825 |                %thong <type> token number literal                  |
 826 |                                                                    |
 827 | the <type> or number may be omitted.  The number specifies the     |
 828 | user_token_number.                                                 |
 829 |                                                                    |
 830 | Two symbols are entered in the table, one for the token symbol and |
 831 | one for the literal.  Both are given the <type>, if any, from the  |
 832 | declaration.  The ->user_token_number of the first is              |
 833 | USER_NUMBER_ALIAS and the ->user_token_number of the second is set |
 834 | to the number, if any, from the declaration.  The two symbols are  |
 835 | linked via pointers in their ->alias fields.                       |
 836 |                                                                    |
 837 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 838 | only the literal string is retained it is the literal string that  |
 839 | is output to yytname                                               |
 840 `-------------------------------------------------------------------*/
 841
 842 static void
 843 parse_thong_decl (void)
 844 {
 845   token_t token;
 846   symbol_t *symbol;
 847   char *typename = 0;
 848   int usrtoknum = USER_NUMBER_UNDEFINED;
 849
 850   token = lex ();               /* fetch typename or first token */
 851   if (token == tok_typename)
 852     {
 853       typename = xstrdup (token_buffer);
 854       value_components_used = 1;
 855       token = lex ();           /* fetch first token */
 856     }
 857
 858   /* process first token */
 859
 860   if (token != tok_identifier)
 861     {
 862       complain (_("unrecognized item %s, expected an identifier"),
 863                 token_buffer);
 864       skip_to_char ('%');
 865       return;
 866     }
 867   symval->class = token_sym;
 868   symval->type_name = typename;
 869   symval->user_token_number = USER_NUMBER_ALIAS;
 870   symbol = symval;
 871
 872   token = lex ();               /* get number or literal string */
 873
 874   if (token == tok_number)
 875     {
 876       usrtoknum = numval;
 877       token = lex ();           /* okay, did number, now get literal */
 878     }
 879
 880   /* process literal string token */
 881
 882   if (token != tok_identifier || *symval->tag != '\"')
 883     {
 884       complain (_("expected string constant instead of %s"), token_buffer);
 885       skip_to_char ('%');
 886       return;
 887     }
 888   symval->class = token_sym;
 889   symval->type_name = typename;
 890   symval->user_token_number = usrtoknum;
 891
 892   symval->alias = symbol;
 893   symbol->alias = symval;
 894
 895   /* symbol and symval combined are only one symbol.  */
 896   nsyms--;
 897 }
 898
 899
 900 static void
 901 parse_muscle_decl (void)
 902 {
 903   int ch = ungetc (skip_white_space (), finput);
 904   char *muscle_key;
 905   char *muscle_value;
 906
 907   /* Read key. */
 908   if (!isalpha (ch) && ch != '_')
 909     {
 910       complain (_("invalid %s declaration"), "%define");
 911       skip_to_char ('%');
 912       return;
 913     }
 914   copy_identifier (finput, &muscle_obstack);
 915   obstack_1grow (&muscle_obstack, 0);
 916   muscle_key = obstack_finish (&muscle_obstack);
 917
 918   /* Read value. */
 919   ch = skip_white_space ();
 920   if (ch != '"')
 921     {
 922       ungetc (ch, finput);
 923       if (ch != EOF)
 924         {
 925           complain (_("invalid %s declaration"), "%define");
 926           skip_to_char ('%');
 927           return;
 928         }
 929       else
 930         fatal (_("Premature EOF after %s"), "\"");
 931     }
 932   copy_string2 (finput, &muscle_obstack, '"', 0);
 933   obstack_1grow (&muscle_obstack, 0);
 934   muscle_value = obstack_finish (&muscle_obstack);
 935
 936   /* Store the (key, value) pair in the environment. */
 937   muscle_insert (muscle_key, muscle_value);
 938 }
 939
 940
 941
 942 /*---------------------------------.
 943 | Parse a double quoted parameter. |
 944 `---------------------------------*/
 945
 946 static const char *
 947 parse_dquoted_param (const char *from)
 948 {
 949   struct obstack param_obstack;
 950   const char *param = NULL;
 951   int c;
 952
 953   obstack_init (&param_obstack);
 954   c = skip_white_space ();
 955
 956   if (c != '"')
 957     {
 958       complain (_("invalid %s declaration"), from);
 959       ungetc (c, finput);
 960       skip_to_char ('%');
 961       return NULL;
 962     }
 963
 964   while ((c = literalchar ()) != '"')
 965     obstack_1grow (&param_obstack, c);
 966
 967   obstack_1grow (&param_obstack, '\0');
 968   param = obstack_finish (&param_obstack);
 969
 970   if (c != '"' || strlen (param) == 0)
 971     {
 972       complain (_("invalid %s declaration"), from);
 973       if (c != '"')
 974         ungetc (c, finput);
 975       skip_to_char ('%');
 976       return NULL;
 977     }
 978
 979   return param;
 980 }
 981
 982 /*----------------------------------.
 983 | Parse what comes after %skeleton. |
 984 `----------------------------------*/
 985
 986 static void
 987 parse_skel_decl (void)
 988 {
 989   skeleton = parse_dquoted_param ("%skeleton");
 990 }
 991
 992 /*----------------------------------------------------------------.
 993 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 994 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 995 | groups to PRE_PROLOGUE_OBSTACK or POST_PROLOGUE_OBSTACK.        |
 996 `----------------------------------------------------------------*/
 997
 998 static void
 999 read_declarations (void)
1000 {
1001   for (;;)
1002     {
1003       int c = skip_white_space ();
1004
1005       if (c == '%')
1006         {
1007           token_t tok = parse_percent_token ();
1008
1009           switch (tok)
1010             {
1011             case tok_two_percents:
1012               return;
1013
1014             case tok_percent_left_curly:
1015               if (!typed)
1016                 copy_definition (&pre_prologue_obstack);
1017               else
1018                 copy_definition (&post_prologue_obstack);
1019               break;
1020
1021             case tok_token:
1022               parse_token_decl (token_sym, nterm_sym);
1023               break;
1024
1025             case tok_nterm:
1026               parse_token_decl (nterm_sym, token_sym);
1027               break;
1028
1029             case tok_type:
1030               parse_type_decl ();
1031               break;
1032
1033             case tok_start:
1034               parse_start_decl ();
1035               break;
1036
1037             case tok_union:
1038               parse_union_decl ();
1039               break;
1040
1041             case tok_expect:
1042               parse_expect_decl ();
1043               break;
1044
1045             case tok_thong:
1046               parse_thong_decl ();
1047               break;
1048
1049             case tok_left:
1050               parse_assoc_decl (left_assoc);
1051               break;
1052
1053             case tok_right:
1054               parse_assoc_decl (right_assoc);
1055               break;
1056
1057             case tok_nonassoc:
1058               parse_assoc_decl (non_assoc);
1059               break;
1060
1061             case tok_define:
1062               parse_muscle_decl ();
1063               break;
1064
1065             case tok_skel:
1066               parse_skel_decl ();
1067               break;
1068
1069             case tok_noop:
1070               break;
1071
1072             case tok_stropt:
1073             case tok_intopt:
1074             case tok_obsolete:
1075               assert (0);
1076               break;
1077
1078             case tok_illegal:
1079             default:
1080               complain (_("unrecognized: %s"), token_buffer);
1081               skip_to_char ('%');
1082             }
1083         }
1084       else if (c == EOF)
1085         fatal (_("no input grammar"));
1086       else
1087         {
1088           char buf[] = "c";
1089           buf[0] = c;
1090           complain (_("unknown character: %s"), quote (buf));
1091           skip_to_char ('%');
1092         }
1093     }
1094 }
1095 \f
1096 /*------------------------------------------------------------------.
1097 | Assuming that a `{' has just been seen, copy everything up to the |
1098 | matching `}' into ACTION_OBSTACK.                                 |
1099 |                                                                   |
1100 | RULE_LENGTH is the number of values in the current rule so far,   |
1101 | which says where to find `$0' with respect to the top of the      |
1102 | stack.  It is not the same as the rule->length in the case of mid |
1103 | rule actions.                                                     |
1104 |                                                                   |
1105 | This routine is used for actions.                                 |
1106 `------------------------------------------------------------------*/
1107
1108 static void
1109 parse_action (symbol_list *rule, int rule_length)
1110 {
1111   int count = 1;
1112   rule->action_line = lineno;
1113   while (count > 0)
1114     {
1115       int c;
1116       while ((c = getc (finput)) != '}')
1117         switch (c)
1118           {
1119           case '\n':
1120             copy_character (&action_obstack, c);
1121             ++lineno;
1122             break;
1123
1124           case '{':
1125             copy_character (&action_obstack, c);
1126             ++count;
1127             break;
1128
1129           case '\'':
1130           case '"':
1131             copy_string (finput, &action_obstack, c);
1132             break;
1133
1134           case '/':
1135             copy_comment (finput, &action_obstack);
1136             break;
1137
1138           case '$':
1139             copy_dollar (finput, &action_obstack, rule, rule_length);
1140             break;
1141
1142           case '@':
1143             copy_at (finput, &action_obstack, rule_length);
1144             break;
1145
1146           case EOF:
1147             fatal (_("unmatched %s"), "`{'");
1148
1149           default:
1150             copy_character (&action_obstack, c);
1151           }
1152
1153       /* Above loop exits when C is '}'.  */
1154       if (--count)
1155         copy_character (&action_obstack, c);
1156     }
1157
1158   obstack_1grow (&action_obstack, '\0');
1159   rule->action = obstack_finish (&action_obstack);
1160 }
1161
1162 \f
1163
1164 /*-------------------------------------------------------------------.
1165 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1166 | with the user's names.                                             |
1167 `-------------------------------------------------------------------*/
1168
1169 static symbol_t *
1170 gensym (void)
1171 {
1172   /* Incremented for each generated symbol */
1173   static int gensym_count = 0;
1174   static char buf[256];
1175
1176   symbol_t *sym;
1177
1178   sprintf (buf, "@%d", ++gensym_count);
1179   token_buffer = buf;
1180   sym = getsym (token_buffer);
1181   sym->class = nterm_sym;
1182   sym->number = nvars++;
1183   return sym;
1184 }
1185 \f
1186 /*-------------------------------------------------------------------.
1187 | Parse the input grammar into a one symbol_list structure.  Each    |
1188 | rule is represented by a sequence of symbols: the left hand side   |
1189 | followed by the contents of the right hand side, followed by a     |
1190 | null pointer instead of a symbol to terminate the rule.  The next  |
1191 | symbol is the lhs of the following rule.                           |
1192 |                                                                    |
1193 | All actions are copied out, labelled by the rule number they apply |
1194 | to.                                                                |
1195 |                                                                    |
1196 | Bison used to allow some %directives in the rules sections, but    |
1197 | this is no longer consider appropriate: (i) the documented grammar |
1198 | doesn't claim it, (ii), it would promote bad style, (iii), error   |
1199 | recovery for %directives consists in skipping the junk until a `%' |
1200 | is seen and helrp synchronizing.  This scheme is definitely wrong  |
1201 | in the rules section.                                              |
1202 `-------------------------------------------------------------------*/
1203
1204 static void
1205 readgram (void)
1206 {
1207   token_t t;
1208   symbol_t *lhs = NULL;
1209   symbol_list *p = NULL;
1210   symbol_list *p1 = NULL;
1211
1212   /* Points to first symbol_list of current rule. its symbol is the
1213      lhs of the rule.  */
1214   symbol_list *crule = NULL;
1215   /* Points to the symbol_list preceding crule.  */
1216   symbol_list *crule1 = NULL;
1217
1218   t = lex ();
1219
1220   while (t != tok_two_percents && t != tok_eof)
1221     if (t == tok_identifier || t == tok_bar)
1222       {
1223         int action_flag = 0;
1224         /* Number of symbols in rhs of this rule so far */
1225         int rulelength = 0;
1226         int xactions = 0;       /* JF for error checking */
1227         symbol_t *first_rhs = 0;
1228
1229         if (t == tok_identifier)
1230           {
1231             lhs = symval;
1232
1233             if (!start_flag)
1234               {
1235                 startsymbol = lhs;
1236                 start_flag = 1;
1237               }
1238
1239             t = lex ();
1240             if (t != tok_colon)
1241               {
1242                 complain (_("ill-formed rule: initial symbol not followed by colon"));
1243                 unlex (t);
1244               }
1245           }
1246
1247         if (nrules == 0 && t == tok_bar)
1248           {
1249             complain (_("grammar starts with vertical bar"));
1250             lhs = symval;       /* BOGUS: use a random symval */
1251           }
1252         /* start a new rule and record its lhs.  */
1253
1254         ++nrules;
1255         ++nritems;
1256
1257         p = symbol_list_new (lhs);
1258
1259         crule1 = p1;
1260         if (p1)
1261           p1->next = p;
1262         else
1263           grammar = p;
1264
1265         p1 = p;
1266         crule = p;
1267
1268         /* mark the rule's lhs as a nonterminal if not already so.  */
1269
1270         if (lhs->class == unknown_sym)
1271           {
1272             lhs->class = nterm_sym;
1273             lhs->number = nvars;
1274             ++nvars;
1275           }
1276         else if (lhs->class == token_sym)
1277           complain (_("rule given for %s, which is a token"), lhs->tag);
1278
1279         /* read the rhs of the rule.  */
1280
1281         for (;;)
1282           {
1283             t = lex ();
1284             if (t == tok_prec)
1285               {
1286                 t = lex ();
1287                 crule->ruleprec = symval;
1288                 t = lex ();
1289               }
1290
1291             if (!(t == tok_identifier || t == tok_left_curly))
1292               break;
1293
1294             /* If next token is an identifier, see if a colon follows it.
1295                If one does, exit this rule now.  */
1296             if (t == tok_identifier)
1297               {
1298                 symbol_t *ssave;
1299                 token_t t1;
1300
1301                 ssave = symval;
1302                 t1 = lex ();
1303                 unlex (t1);
1304                 symval = ssave;
1305                 if (t1 == tok_colon)
1306                   {
1307                     warn (_("previous rule lacks an ending `;'"));
1308                     break;
1309                   }
1310
1311                 if (!first_rhs) /* JF */
1312                   first_rhs = symval;
1313                 /* Not followed by colon =>
1314                    process as part of this rule's rhs.  */
1315               }
1316
1317             /* If we just passed an action, that action was in the middle
1318                of a rule, so make a dummy rule to reduce it to a
1319                non-terminal.  */
1320             if (action_flag)
1321               {
1322                 /* Since the action was written out with this rule's
1323                    number, we must give the new rule this number by
1324                    inserting the new rule before it.  */
1325
1326                 /* Make a dummy nonterminal, a gensym.  */
1327                 symbol_t *sdummy = gensym ();
1328
1329                 /* Make a new rule, whose body is empty, before the
1330                    current one, so that the action just read can
1331                    belong to it.  */
1332                 ++nrules;
1333                 ++nritems;
1334                 p = symbol_list_new (sdummy);
1335                 /* Attach its lineno to that of the host rule. */
1336                 p->line = crule->line;
1337                 /* Move the action from the host rule to this one. */
1338                 p->action = crule->action;
1339                 p->action_line = crule->action_line;
1340                 crule->action = NULL;
1341
1342                 if (crule1)
1343                   crule1->next = p;
1344                 else
1345                   grammar = p;
1346                 /* End of the rule. */
1347                 crule1 = symbol_list_new (NULL);
1348                 crule1->next = crule;
1349
1350                 p->next = crule1;
1351
1352                 /* Insert the dummy generated by that rule into this
1353                    rule.  */
1354                 ++nritems;
1355                 p = symbol_list_new (sdummy);
1356                 p1->next = p;
1357                 p1 = p;
1358
1359                 action_flag = 0;
1360               }
1361
1362             if (t == tok_identifier)
1363               {
1364                 ++nritems;
1365                 p = symbol_list_new (symval);
1366                 p1->next = p;
1367                 p1 = p;
1368               }
1369             else                /* handle an action.  */
1370               {
1371                 parse_action (crule, rulelength);
1372                 action_flag = 1;
1373                 ++xactions;     /* JF */
1374               }
1375             ++rulelength;
1376           }                     /* end of  read rhs of rule */
1377
1378         /* Put an empty link in the list to mark the end of this rule  */
1379         p = symbol_list_new (NULL);
1380         p1->next = p;
1381         p1 = p;
1382
1383         if (t == tok_prec)
1384           {
1385             complain (_("two @prec's in a row"));
1386             t = lex ();
1387             crule->ruleprec = symval;
1388             t = lex ();
1389           }
1390
1391         if (t == tok_left_curly)
1392           {
1393             /* This case never occurs -wjh */
1394             if (action_flag)
1395               complain (_("two actions at end of one rule"));
1396             parse_action (crule, rulelength);
1397             action_flag = 1;
1398             ++xactions; /* -wjh */
1399             t = lex ();
1400           }
1401         /* If $$ is being set in default way, report if any type
1402            mismatch.  */
1403         else if (!xactions
1404                  && first_rhs && lhs->type_name != first_rhs->type_name)
1405           {
1406             if (lhs->type_name == 0
1407                 || first_rhs->type_name == 0
1408                 || strcmp (lhs->type_name, first_rhs->type_name))
1409               complain (_("type clash (`%s' `%s') on default action"),
1410                         lhs->type_name ? lhs->type_name : "",
1411                         first_rhs->type_name ? first_rhs->type_name : "");
1412           }
1413         /* Warn if there is no default for $$ but we need one.  */
1414         else if (!xactions && !first_rhs && lhs->type_name != 0)
1415           complain (_("empty rule for typed nonterminal, and no action"));
1416         if (t == tok_two_percents || t == tok_eof)
1417           warn (_("previous rule lacks an ending `;'"));
1418         if (t == tok_semicolon)
1419           t = lex ();
1420       }
1421     else
1422       {
1423         complain (_("invalid input: %s"), quote (token_buffer));
1424         t = lex ();
1425       }
1426
1427   /* grammar has been read.  Do some checking */
1428
1429   if (nrules == 0)
1430     fatal (_("no rules in the input grammar"));
1431
1432   /* Report any undefined symbols and consider them nonterminals.  */
1433   symbols_check_defined ();
1434
1435   /* Insert the initial rule, which line is that of the first rule
1436      (not that of the start symbol):
1437
1438      axiom: %start EOF.  */
1439   p = symbol_list_new (axiom);
1440   p->line = grammar->line;
1441   p->next = symbol_list_new (startsymbol);
1442   p->next->next = symbol_list_new (eoftoken);
1443   p->next->next->next = symbol_list_new (NULL);
1444   p->next->next->next->next = grammar;
1445   nrules += 1;
1446   nritems += 3;
1447   grammar = p;
1448
1449   if (nsyms > SHRT_MAX)
1450     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1451            SHRT_MAX);
1452
1453   assert (nsyms == ntokens + nvars);
1454 }
1455
1456 /* At the end of the grammar file, some C source code must
1457    be stored. It is going to be associated to the epilogue
1458    directive.  */
1459 static void
1460 read_additionnal_code (void)
1461 {
1462   int c;
1463   struct obstack el_obstack;
1464
1465   obstack_init (&el_obstack);
1466
1467   if (!no_lines_flag)
1468     {
1469       obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1470                       lineno, quotearg_style (c_quoting_style,
1471                                               muscle_find ("filename")));
1472     }
1473
1474   while ((c = getc (finput)) != EOF)
1475     copy_character (&el_obstack, c);
1476
1477   obstack_1grow (&el_obstack, 0);
1478   muscle_insert ("epilogue", obstack_finish (&el_obstack));
1479 }
1480
1481 \f
1482 /*---------------------------------------------------------------.
1483 | Convert the rules into the representation using RRHS, RLHS and |
1484 | RITEM.                                                         |
1485 `---------------------------------------------------------------*/
1486
1487 static void
1488 packgram (void)
1489 {
1490   unsigned int itemno;
1491   int ruleno;
1492   symbol_list *p;
1493
1494   ritem = XCALLOC (item_number_t, nritems);
1495   rules = XCALLOC (rule_t, nrules) - 1;
1496
1497   itemno = 0;
1498   ruleno = 1;
1499
1500   p = grammar;
1501   while (p)
1502     {
1503       symbol_t *ruleprec = p->ruleprec;
1504       rules[ruleno].user_number = ruleno;
1505       rules[ruleno].number = ruleno;
1506       rules[ruleno].lhs = p->sym;
1507       rules[ruleno].rhs = ritem + itemno;
1508       rules[ruleno].line = p->line;
1509       rules[ruleno].useful = TRUE;
1510       rules[ruleno].action = p->action;
1511       rules[ruleno].action_line = p->action_line;
1512
1513       p = p->next;
1514       while (p && p->sym)
1515         {
1516           /* item_number_t = symbol_number_t.
1517              But the former needs to contain more: negative rule numbers. */
1518           ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1519           /* A rule gets by default the precedence and associativity
1520              of the last token in it.  */
1521           if (p->sym->class == token_sym)
1522             rules[ruleno].prec = p->sym;
1523           if (p)
1524             p = p->next;
1525         }
1526
1527       /* If this rule has a %prec,
1528          the specified symbol's precedence replaces the default.  */
1529       if (ruleprec)
1530         {
1531           rules[ruleno].precsym = ruleprec;
1532           rules[ruleno].prec = ruleprec;
1533         }
1534       ritem[itemno++] = -ruleno;
1535       ++ruleno;
1536
1537       if (p)
1538         p = p->next;
1539     }
1540
1541   assert (itemno == nritems);
1542
1543   if (trace_flag)
1544     ritem_print (stderr);
1545 }
1546 \f
1547 /*------------------------------------------------------------------.
1548 | Read in the grammar specification and record it in the format     |
1549 | described in gram.h.  All actions are copied into ACTION_OBSTACK, |
1550 | in each case forming the body of a C function (YYACTION) which    |
1551 | contains a switch statement to decide which action to execute.    |
1552 `------------------------------------------------------------------*/
1553
1554 void
1555 reader (void)
1556 {
1557   lex_init ();
1558   lineno = 1;
1559
1560   /* Initialize the muscle obstack.  */
1561   obstack_init (&muscle_obstack);
1562
1563   /* Initialize the symbol table.  */
1564   symbols_new ();
1565
1566   /* Construct the axiom symbol. */
1567   axiom = getsym ("$axiom");
1568   axiom->class = nterm_sym;
1569   axiom->number = nvars++;
1570
1571   /* Construct the error token */
1572   errtoken = getsym ("error");
1573   errtoken->class = token_sym;
1574   errtoken->number = ntokens++;
1575
1576   /* Construct a token that represents all undefined literal tokens.
1577      It is always token number 2.  */
1578   undeftoken = getsym ("$undefined.");
1579   undeftoken->class = token_sym;
1580   undeftoken->number = ntokens++;
1581
1582   /* Initialize the obstacks. */
1583   obstack_init (&action_obstack);
1584   obstack_init (&output_obstack);
1585   obstack_init (&pre_prologue_obstack);
1586   obstack_init (&post_prologue_obstack);
1587
1588   finput = xfopen (infile, "r");
1589
1590   /* Read the declaration section.  Copy %{ ... %} groups to
1591      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1592      etc. found there.  */
1593   read_declarations ();
1594
1595   /* If the user did not define her EOFTOKEN, do it now. */
1596   if (!eoftoken)
1597     {
1598       eoftoken = getsym ("$");
1599       eoftoken->class = token_sym;
1600       eoftoken->number = 0;
1601       /* Value specified by POSIX.  */
1602       eoftoken->user_token_number = 0;
1603     }
1604
1605   /* Read in the grammar, build grammar in list form.  Write out
1606      actions.  */
1607   readgram ();
1608   /* Some C code is given at the end of the grammar file. */
1609   read_additionnal_code ();
1610
1611   lex_free ();
1612   xfclose (finput);
1613
1614   /* Assign the symbols their symbol numbers.  Write #defines for the
1615      token symbols into FDEFINES if requested.  */
1616   symbols_pack ();
1617
1618   /* Convert the grammar into the format described in gram.h.  */
1619   packgram ();
1620
1621   /* The grammar as a symbol_list is no longer needed. */
1622   LIST_FREE (symbol_list, grammar);
1623 }