src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "quotearg.h"
  25 #include "quote.h"
  26 #include "getargs.h"
  27 #include "files.h"
  28 #include "symtab.h"
  29 #include "options.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36 #include "muscle_tab.h"
  37
  38 typedef struct symbol_list
  39 {
  40   struct symbol_list *next;
  41   bucket *sym;
  42   int line;
  43
  44   /* The action is attached to the LHS of a rule. */
  45   const char *action;
  46   int action_line;
  47
  48   /* The guard is attached to the LHS of a rule. */
  49   const char *guard;
  50   int guard_line;
  51   bucket *ruleprec;
  52 } symbol_list;
  53
  54 int lineno;
  55 static symbol_list *grammar;
  56 static int start_flag;
  57 static bucket *startval;
  58
  59 /* Nonzero if components of semantic values are used, implying
  60    they must be unions.  */
  61 static int value_components_used;
  62
  63 /* Nonzero if %union has been seen.  */
  64 static int typed;
  65
  66 /* Incremented for each %left, %right or %nonassoc seen */
  67 static int lastprec;
  68
  69 static bucket *errtoken = NULL;
  70 static bucket *undeftoken = NULL;
  71 static bucket *eoftoken = NULL;
  72 static bucket *axiom = NULL;
  73
  74 static symbol_list *
  75 symbol_list_new (bucket *sym)
  76 {
  77   symbol_list *res = XMALLOC (symbol_list, 1);
  78   res->next = NULL;
  79   res->sym = sym;
  80   res->line = lineno;
  81   res->action = NULL;
  82   res->action_line = 0;
  83   res->guard = NULL;
  84   res->guard_line = 0;
  85   res->ruleprec = NULL;
  86   return res;
  87 }
  88
  89 \f
  90
  91 /*===================\
  92 | Low level lexing.  |
  93 \===================*/
  94
  95 static void
  96 skip_to_char (int target)
  97 {
  98   int c;
  99   if (target == '\n')
 100     complain (_("   Skipping to next \\n"));
 101   else
 102     complain (_("   Skipping to next %c"), target);
 103
 104   do
 105     c = skip_white_space ();
 106   while (c != target && c != EOF);
 107   if (c != EOF)
 108     ungetc (c, finput);
 109 }
 110
 111
 112 /*---------------------------------------------------------.
 113 | Read a signed integer from STREAM and return its value.  |
 114 `---------------------------------------------------------*/
 115
 116 static inline int
 117 read_signed_integer (FILE *stream)
 118 {
 119   int c = getc (stream);
 120   int sign = 1;
 121   int n = 0;
 122
 123   if (c == '-')
 124     {
 125       c = getc (stream);
 126       sign = -1;
 127     }
 128
 129   while (isdigit (c))
 130     {
 131       n = 10 * n + (c - '0');
 132       c = getc (stream);
 133     }
 134
 135   ungetc (c, stream);
 136
 137   return sign * n;
 138 }
 139 \f
 140 /*--------------------------------------------------------------.
 141 | Get the data type (alternative in the union) of the value for |
 142 | symbol N in rule RULE.                                        |
 143 `--------------------------------------------------------------*/
 144
 145 static char *
 146 get_type_name (int n, symbol_list *rule)
 147 {
 148   int i;
 149   symbol_list *rp;
 150
 151   if (n < 0)
 152     {
 153       complain (_("invalid $ value"));
 154       return NULL;
 155     }
 156
 157   rp = rule;
 158   i = 0;
 159
 160   while (i < n)
 161     {
 162       rp = rp->next;
 163       if (rp == NULL || rp->sym == NULL)
 164         {
 165           complain (_("invalid $ value"));
 166           return NULL;
 167         }
 168       i++;
 169     }
 170
 171   return rp->sym->type_name;
 172 }
 173 \f
 174 /*------------------------------------------------------------.
 175 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 176 | delimiter of the string (either ' or ").                    |
 177 `------------------------------------------------------------*/
 178
 179 static inline void
 180 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
 181 {
 182   int c;
 183
 184   if (store)
 185     obstack_1grow (oout, match);
 186
 187   c = getc (fin);
 188
 189   while (c != match)
 190     {
 191       if (c == EOF)
 192         fatal (_("unterminated string at end of file"));
 193       if (c == '\n')
 194         {
 195           complain (_("unterminated string"));
 196           ungetc (c, fin);
 197           c = match;            /* invent terminator */
 198           continue;
 199         }
 200
 201       obstack_1grow (oout, c);
 202
 203       if (c == '\\')
 204         {
 205           c = getc (fin);
 206           if (c == EOF)
 207             fatal (_("unterminated string at end of file"));
 208           obstack_1grow (oout, c);
 209
 210           if (c == '\n')
 211             lineno++;
 212         }
 213
 214       c = getc (fin);
 215     }
 216
 217   if (store)
 218     obstack_1grow (oout, c);
 219 }
 220
 221 /* FIXME. */
 222
 223 static inline void
 224 copy_string (FILE *fin, struct obstack *oout, int match)
 225 {
 226   copy_string2 (fin, oout, match, 1);
 227 }
 228
 229 /* FIXME. */
 230
 231 static inline void
 232 copy_identifier (FILE *fin, struct obstack *oout)
 233 {
 234   int c;
 235
 236   while (isalnum (c = getc (fin)) || c == '_')
 237     obstack_1grow (oout, c);
 238
 239   ungetc (c, fin);
 240 }
 241
 242
 243 /*------------------------------------------------------------------.
 244 | Dump the wannabee comment from IN to OOUT.  In fact we just saw a |
 245 | `/', which might or might not be a comment.  In any case, copy    |
 246 | what we saw.                                                      |
 247 `------------------------------------------------------------------*/
 248
 249 static inline void
 250 copy_comment (FILE *fin, struct obstack *oout)
 251 {
 252   int cplus_comment;
 253   int ended;
 254   int c;
 255
 256   /* We read a `/', output it. */
 257   obstack_1grow (oout, '/');
 258
 259   switch ((c = getc (fin)))
 260     {
 261     case '/':
 262       cplus_comment = 1;
 263       break;
 264     case '*':
 265       cplus_comment = 0;
 266       break;
 267     default:
 268       ungetc (c, fin);
 269       return;
 270     }
 271
 272   obstack_1grow (oout, c);
 273   c = getc (fin);
 274
 275   ended = 0;
 276   while (!ended)
 277     {
 278       if (!cplus_comment && c == '*')
 279         {
 280           while (c == '*')
 281             {
 282               obstack_1grow (oout, c);
 283               c = getc (fin);
 284             }
 285
 286           if (c == '/')
 287             {
 288               obstack_1grow (oout, c);
 289               ended = 1;
 290             }
 291         }
 292       else if (c == '\n')
 293         {
 294           lineno++;
 295           obstack_1grow (oout, c);
 296           if (cplus_comment)
 297             ended = 1;
 298           else
 299             c = getc (fin);
 300         }
 301       else if (c == EOF)
 302         fatal (_("unterminated comment"));
 303       else
 304         {
 305           obstack_1grow (oout, c);
 306           c = getc (fin);
 307         }
 308     }
 309 }
 310
 311
 312 /*-----------------------------------------------------------------.
 313 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 314 | reference to this location. STACK_OFFSET is the number of values |
 315 | in the current rule so far, which says where to find `$0' with   |
 316 | respect to the top of the stack.                                 |
 317 `-----------------------------------------------------------------*/
 318
 319 static inline void
 320 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 321 {
 322   int c;
 323
 324   c = getc (fin);
 325   if (c == '$')
 326     {
 327       obstack_sgrow (oout, "yyloc");
 328       locations_flag = 1;
 329     }
 330   else if (isdigit (c) || c == '-')
 331     {
 332       int n;
 333
 334       ungetc (c, fin);
 335       n = read_signed_integer (fin);
 336       if (n > stack_offset)
 337         complain (_("invalid value: %s%d"), "@", n);
 338       else
 339         {
 340           /* Offset is always 0 if parser has already popped the stack
 341              pointer.  */
 342           obstack_fgrow1 (oout, "yylsp[%d]",
 343                           n - (semantic_parser ? 0 : stack_offset));
 344           locations_flag = 1;
 345         }
 346     }
 347   else
 348     {
 349       char buf[] = "@c";
 350       buf[1] = c;
 351       complain (_("%s is invalid"), quote (buf));
 352     }
 353 }
 354
 355
 356 /*-------------------------------------------------------------------.
 357 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 358 |                                                                    |
 359 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 360 |                                                                    |
 361 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 362 | the number of values in the current rule so far, which says where  |
 363 | to find `$0' with respect to the top of the stack.                 |
 364 `-------------------------------------------------------------------*/
 365
 366 static inline void
 367 copy_dollar (FILE *fin, struct obstack *oout,
 368              symbol_list *rule, int stack_offset)
 369 {
 370   int c = getc (fin);
 371   const char *type_name = NULL;
 372
 373   /* Get the type name if explicit. */
 374   if (c == '<')
 375     {
 376       read_type_name (fin);
 377       type_name = token_buffer;
 378       value_components_used = 1;
 379       c = getc (fin);
 380     }
 381
 382   if (c == '$')
 383     {
 384       obstack_sgrow (oout, "yyval");
 385
 386       if (!type_name)
 387         type_name = get_type_name (0, rule);
 388       if (type_name)
 389         obstack_fgrow1 (oout, ".%s", type_name);
 390       if (!type_name && typed)
 391         complain (_("$$ of `%s' has no declared type"),
 392                   rule->sym->tag);
 393     }
 394   else if (isdigit (c) || c == '-')
 395     {
 396       int n;
 397       ungetc (c, fin);
 398       n = read_signed_integer (fin);
 399
 400       if (n > stack_offset)
 401         complain (_("invalid value: %s%d"), "$", n);
 402       else
 403         {
 404           if (!type_name && n > 0)
 405             type_name = get_type_name (n, rule);
 406
 407           /* Offset is always 0 if parser has already popped the stack
 408              pointer.  */
 409           obstack_fgrow1 (oout, "yyvsp[%d]",
 410                           n - (semantic_parser ? 0 : stack_offset));
 411
 412           if (type_name)
 413             obstack_fgrow1 (oout, ".%s", type_name);
 414           if (!type_name && typed)
 415             complain (_("$%d of `%s' has no declared type"),
 416                       n, rule->sym->tag);
 417         }
 418     }
 419   else
 420     {
 421       char buf[] = "$c";
 422       buf[1] = c;
 423       complain (_("%s is invalid"), quote (buf));
 424     }
 425 }
 426 \f
 427 /*-------------------------------------------------------------------.
 428 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 429 | `%{' has already been read.  Return after reading the `%}'.        |
 430 `-------------------------------------------------------------------*/
 431
 432 static void
 433 copy_definition (void)
 434 {
 435   int c;
 436   /* -1 while reading a character if prev char was %. */
 437   int after_percent;
 438
 439   if (!no_lines_flag)
 440     {
 441       obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
 442                       lineno, quotearg_style (c_quoting_style,
 443                                               muscle_find ("filename")));
 444     }
 445
 446   after_percent = 0;
 447
 448   c = getc (finput);
 449
 450   for (;;)
 451     {
 452       switch (c)
 453         {
 454         case '\n':
 455           obstack_1grow (&attrs_obstack, c);
 456           lineno++;
 457           break;
 458
 459         case '%':
 460           after_percent = -1;
 461           break;
 462
 463         case '\'':
 464         case '"':
 465           copy_string (finput, &attrs_obstack, c);
 466           break;
 467
 468         case '/':
 469           copy_comment (finput, &attrs_obstack);
 470           break;
 471
 472         case EOF:
 473           fatal ("%s", _("unterminated `%{' definition"));
 474
 475         default:
 476           obstack_1grow (&attrs_obstack, c);
 477         }
 478
 479       c = getc (finput);
 480
 481       if (after_percent)
 482         {
 483           if (c == '}')
 484             return;
 485           obstack_1grow (&attrs_obstack, '%');
 486         }
 487       after_percent = 0;
 488     }
 489 }
 490
 491
 492 /*-------------------------------------------------------------------.
 493 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 494 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 495 | are reversed.                                                      |
 496 `-------------------------------------------------------------------*/
 497
 498 static void
 499 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 500 {
 501   token_t token = tok_undef;
 502   char *typename = NULL;
 503
 504   /* The symbol being defined.  */
 505   struct bucket *symbol = NULL;
 506
 507   /* After `%token' and `%nterm', any number of symbols maybe be
 508      defined.  */
 509   for (;;)
 510     {
 511       int tmp_char = ungetc (skip_white_space (), finput);
 512
 513       /* `%' (for instance from `%token', or from `%%' etc.) is the
 514          only valid means to end this declaration.  */
 515       if (tmp_char == '%')
 516         return;
 517       if (tmp_char == EOF)
 518         fatal (_("Premature EOF after %s"), token_buffer);
 519
 520       token = lex ();
 521       if (token == tok_comma)
 522         {
 523           symbol = NULL;
 524           continue;
 525         }
 526       if (token == tok_typename)
 527         {
 528           typename = xstrdup (token_buffer);
 529           value_components_used = 1;
 530           symbol = NULL;
 531         }
 532       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 533         {
 534           if (symval->alias)
 535             warn (_("symbol `%s' used more than once as a literal string"),
 536                   symval->tag);
 537           else if (symbol->alias)
 538             warn (_("symbol `%s' given more than one literal string"),
 539                   symbol->tag);
 540           else
 541             {
 542               symval->class = token_sym;
 543               symval->type_name = typename;
 544               symval->user_token_number = symbol->user_token_number;
 545               symbol->user_token_number = SALIAS;
 546               symval->alias = symbol;
 547               symbol->alias = symval;
 548               /* symbol and symval combined are only one symbol */
 549               nsyms--;
 550             }
 551           symbol = NULL;
 552         }
 553       else if (token == tok_identifier)
 554         {
 555           int oldclass = symval->class;
 556           symbol = symval;
 557
 558           if (symbol->class == what_is_not)
 559             complain (_("symbol %s redefined"), symbol->tag);
 560           symbol->class = what_is;
 561           if (what_is == nterm_sym && oldclass != nterm_sym)
 562             symbol->value = nvars++;
 563
 564           if (typename)
 565             {
 566               if (symbol->type_name == NULL)
 567                 symbol->type_name = typename;
 568               else if (strcmp (typename, symbol->type_name) != 0)
 569                 complain (_("type redeclaration for %s"), symbol->tag);
 570             }
 571         }
 572       else if (symbol && token == tok_number)
 573         {
 574           symbol->user_token_number = numval;
 575           /* User defined EOF token? */
 576           if (numval == 0)
 577             eoftoken = symbol;
 578         }
 579       else
 580         {
 581           complain (_("`%s' is invalid in %s"),
 582                     token_buffer,
 583                     (what_is == token_sym) ? "%token" : "%nterm");
 584           skip_to_char ('%');
 585         }
 586     }
 587
 588 }
 589
 590
 591 /*------------------------------.
 592 | Parse what comes after %start |
 593 `------------------------------*/
 594
 595 static void
 596 parse_start_decl (void)
 597 {
 598   if (start_flag)
 599     complain (_("multiple %s declarations"), "%start");
 600   if (lex () != tok_identifier)
 601     complain (_("invalid %s declaration"), "%start");
 602   else
 603     {
 604       start_flag = 1;
 605       startval = symval;
 606     }
 607 }
 608
 609 /*-----------------------------------------------------------.
 610 | read in a %type declaration and record its information for |
 611 | get_type_name to access                                    |
 612 `-----------------------------------------------------------*/
 613
 614 static void
 615 parse_type_decl (void)
 616 {
 617   char *name;
 618
 619   if (lex () != tok_typename)
 620     {
 621       complain ("%s", _("%type declaration has no <typename>"));
 622       skip_to_char ('%');
 623       return;
 624     }
 625
 626   name = xstrdup (token_buffer);
 627
 628   for (;;)
 629     {
 630       token_t t;
 631       int tmp_char = ungetc (skip_white_space (), finput);
 632
 633       if (tmp_char == '%')
 634         return;
 635       if (tmp_char == EOF)
 636         fatal (_("Premature EOF after %s"), token_buffer);
 637
 638       t = lex ();
 639
 640       switch (t)
 641         {
 642
 643         case tok_comma:
 644         case tok_semicolon:
 645           break;
 646
 647         case tok_identifier:
 648           if (symval->type_name == NULL)
 649             symval->type_name = name;
 650           else if (strcmp (name, symval->type_name) != 0)
 651             complain (_("type redeclaration for %s"), symval->tag);
 652
 653           break;
 654
 655         default:
 656           complain (_("invalid %%type declaration due to item: %s"),
 657                     token_buffer);
 658           skip_to_char ('%');
 659         }
 660     }
 661 }
 662
 663
 664
 665 /*----------------------------------------------------------------.
 666 | Read in a %left, %right or %nonassoc declaration and record its |
 667 | information.                                                    |
 668 `----------------------------------------------------------------*/
 669
 670 static void
 671 parse_assoc_decl (associativity assoc)
 672 {
 673   char *name = NULL;
 674   int prev = 0;
 675
 676   lastprec++;                   /* Assign a new precedence level, never 0.  */
 677
 678   for (;;)
 679     {
 680       token_t t;
 681       int tmp_char = ungetc (skip_white_space (), finput);
 682
 683       if (tmp_char == '%')
 684         return;
 685       if (tmp_char == EOF)
 686         fatal (_("Premature EOF after %s"), token_buffer);
 687
 688       t = lex ();
 689
 690       switch (t)
 691         {
 692         case tok_typename:
 693           name = xstrdup (token_buffer);
 694           break;
 695
 696         case tok_comma:
 697           break;
 698
 699         case tok_identifier:
 700           if (symval->prec != 0)
 701             complain (_("redefining precedence of %s"), symval->tag);
 702           symval->prec = lastprec;
 703           symval->assoc = assoc;
 704           if (symval->class == nterm_sym)
 705             complain (_("symbol %s redefined"), symval->tag);
 706           symval->class = token_sym;
 707           if (name)
 708             {                   /* record the type, if one is specified */
 709               if (symval->type_name == NULL)
 710                 symval->type_name = name;
 711               else if (strcmp (name, symval->type_name) != 0)
 712                 complain (_("type redeclaration for %s"), symval->tag);
 713             }
 714           break;
 715
 716         case tok_number:
 717           if (prev == tok_identifier)
 718             {
 719               symval->user_token_number = numval;
 720             }
 721           else
 722             {
 723               complain (_
 724                         ("invalid text (%s) - number should be after identifier"),
 725 token_buffer);
 726               skip_to_char ('%');
 727             }
 728           break;
 729
 730         case tok_semicolon:
 731           return;
 732
 733         default:
 734           complain (_("unexpected item: %s"), token_buffer);
 735           skip_to_char ('%');
 736         }
 737
 738       prev = t;
 739     }
 740 }
 741
 742
 743
 744 /*--------------------------------------------------------------.
 745 | Copy the union declaration into the stype muscle              |
 746 | (and fdefines),  where it is made into the definition of      |
 747 | YYSTYPE, the type of elements of the parser value stack.      |
 748 `--------------------------------------------------------------*/
 749
 750 static void
 751 parse_union_decl (void)
 752 {
 753   int c;
 754   int count = 0;
 755   bool done = FALSE;
 756   struct obstack union_obstack;
 757   if (typed)
 758     complain (_("multiple %s declarations"), "%union");
 759
 760   typed = 1;
 761
 762   obstack_init (&union_obstack);
 763   obstack_sgrow (&union_obstack, "union");
 764
 765   while (!done)
 766     {
 767       c = xgetc (finput);
 768
 769       /* If C contains '/', it is output by copy_comment ().  */
 770       if (c != '/')
 771         obstack_1grow (&union_obstack, c);
 772
 773       switch (c)
 774         {
 775         case '\n':
 776           lineno++;
 777           break;
 778
 779         case '/':
 780           copy_comment (finput, &union_obstack);
 781           break;
 782
 783         case '{':
 784           count++;
 785           break;
 786
 787         case '}':
 788           /* FIXME: Errr.  How could this happen???. --akim */
 789           if (count == 0)
 790             complain (_("unmatched %s"), "`}'");
 791           count--;
 792           if (!count)
 793             done = TRUE;
 794           break;
 795         }
 796     }
 797
 798   /* JF don't choke on trailing semi */
 799   c = skip_white_space ();
 800   if (c != ';')
 801     ungetc (c, finput);
 802   obstack_1grow (&union_obstack, 0);
 803   muscle_insert ("stype", obstack_finish (&union_obstack));
 804 }
 805
 806
 807 /*-------------------------------------------------------.
 808 | Parse the declaration %expect N which says to expect N |
 809 | shift-reduce conflicts.                                |
 810 `-------------------------------------------------------*/
 811
 812 static void
 813 parse_expect_decl (void)
 814 {
 815   int c = skip_white_space ();
 816   ungetc (c, finput);
 817
 818   if (!isdigit (c))
 819     complain (_("argument of %%expect is not an integer"));
 820   else
 821     expected_conflicts = read_signed_integer (finput);
 822 }
 823
 824
 825 /*-------------------------------------------------------------------.
 826 | Parse what comes after %thong.  the full syntax is                 |
 827 |                                                                    |
 828 |                %thong <type> token number literal                  |
 829 |                                                                    |
 830 | the <type> or number may be omitted.  The number specifies the     |
 831 | user_token_number.                                                 |
 832 |                                                                    |
 833 | Two symbols are entered in the table, one for the token symbol and |
 834 | one for the literal.  Both are given the <type>, if any, from the  |
 835 | declaration.  The ->user_token_number of the first is SALIAS and   |
 836 | the ->user_token_number of the second is set to the number, if     |
 837 | any, from the declaration.  The two symbols are linked via         |
 838 | pointers in their ->alias fields.                                  |
 839 |                                                                    |
 840 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 841 | only the literal string is retained it is the literal string that  |
 842 | is output to yytname                                               |
 843 `-------------------------------------------------------------------*/
 844
 845 static void
 846 parse_thong_decl (void)
 847 {
 848   token_t token;
 849   struct bucket *symbol;
 850   char *typename = 0;
 851   int usrtoknum = SUNDEF;
 852
 853   token = lex ();               /* fetch typename or first token */
 854   if (token == tok_typename)
 855     {
 856       typename = xstrdup (token_buffer);
 857       value_components_used = 1;
 858       token = lex ();           /* fetch first token */
 859     }
 860
 861   /* process first token */
 862
 863   if (token != tok_identifier)
 864     {
 865       complain (_("unrecognized item %s, expected an identifier"),
 866                 token_buffer);
 867       skip_to_char ('%');
 868       return;
 869     }
 870   symval->class = token_sym;
 871   symval->type_name = typename;
 872   symval->user_token_number = SALIAS;
 873   symbol = symval;
 874
 875   token = lex ();               /* get number or literal string */
 876
 877   if (token == tok_number)
 878     {
 879       usrtoknum = numval;
 880       token = lex ();           /* okay, did number, now get literal */
 881     }
 882
 883   /* process literal string token */
 884
 885   if (token != tok_identifier || *symval->tag != '\"')
 886     {
 887       complain (_("expected string constant instead of %s"), token_buffer);
 888       skip_to_char ('%');
 889       return;
 890     }
 891   symval->class = token_sym;
 892   symval->type_name = typename;
 893   symval->user_token_number = usrtoknum;
 894
 895   symval->alias = symbol;
 896   symbol->alias = symval;
 897
 898   /* symbol and symval combined are only one symbol.  */
 899   nsyms--;
 900 }
 901
 902
 903 static void
 904 parse_muscle_decl (void)
 905 {
 906   int ch = ungetc (skip_white_space (), finput);
 907   char *muscle_key;
 908   char *muscle_value;
 909
 910   /* Read key. */
 911   if (!isalpha (ch) && ch != '_')
 912     {
 913       complain (_("invalid %s declaration"), "%define");
 914       skip_to_char ('%');
 915       return;
 916     }
 917   copy_identifier (finput, &muscle_obstack);
 918   obstack_1grow (&muscle_obstack, 0);
 919   muscle_key = obstack_finish (&muscle_obstack);
 920
 921   /* Read value. */
 922   ch = skip_white_space ();
 923   if (ch != '"')
 924     {
 925       ungetc (ch, finput);
 926       if (ch != EOF)
 927         {
 928           complain (_("invalid %s declaration"), "%define");
 929           skip_to_char ('%');
 930           return;
 931         }
 932       else
 933         fatal (_("Premature EOF after %s"), "\"");
 934     }
 935   copy_string2 (finput, &muscle_obstack, '"', 0);
 936   obstack_1grow (&muscle_obstack, 0);
 937   muscle_value = obstack_finish (&muscle_obstack);
 938
 939   /* Store the (key, value) pair in the environment. */
 940   muscle_insert (muscle_key, muscle_value);
 941 }
 942
 943
 944
 945 /*---------------------------------.
 946 | Parse a double quoted parameter. |
 947 `---------------------------------*/
 948
 949 static const char *
 950 parse_dquoted_param (const char *from)
 951 {
 952   struct obstack param_obstack;
 953   const char *param = NULL;
 954   int c;
 955
 956   obstack_init (&param_obstack);
 957   c = skip_white_space ();
 958
 959   if (c != '"')
 960     {
 961       complain (_("invalid %s declaration"), from);
 962       ungetc (c, finput);
 963       skip_to_char ('%');
 964       return NULL;
 965     }
 966
 967   while ((c = literalchar ()) != '"')
 968     obstack_1grow (&param_obstack, c);
 969
 970   obstack_1grow (&param_obstack, '\0');
 971   param = obstack_finish (&param_obstack);
 972
 973   if (c != '"' || strlen (param) == 0)
 974     {
 975       complain (_("invalid %s declaration"), from);
 976       if (c != '"')
 977         ungetc (c, finput);
 978       skip_to_char ('%');
 979       return NULL;
 980     }
 981
 982   return param;
 983 }
 984
 985 /*----------------------------------.
 986 | Parse what comes after %skeleton. |
 987 `----------------------------------*/
 988
 989 static void
 990 parse_skel_decl (void)
 991 {
 992   skeleton = parse_dquoted_param ("%skeleton");
 993 }
 994
 995 /*----------------------------------------------------------------.
 996 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 997 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 998 | groups to ATTRS_OBSTACK.                                        |
 999 `----------------------------------------------------------------*/
1000
1001 static void
1002 read_declarations (void)
1003 {
1004   for (;;)
1005     {
1006       int c = skip_white_space ();
1007
1008       if (c == '%')
1009         {
1010           token_t tok = parse_percent_token ();
1011
1012           switch (tok)
1013             {
1014             case tok_two_percents:
1015               return;
1016
1017             case tok_percent_left_curly:
1018               copy_definition ();
1019               break;
1020
1021             case tok_token:
1022               parse_token_decl (token_sym, nterm_sym);
1023               break;
1024
1025             case tok_nterm:
1026               parse_token_decl (nterm_sym, token_sym);
1027               break;
1028
1029             case tok_type:
1030               parse_type_decl ();
1031               break;
1032
1033             case tok_start:
1034               parse_start_decl ();
1035               break;
1036
1037             case tok_union:
1038               parse_union_decl ();
1039               break;
1040
1041             case tok_expect:
1042               parse_expect_decl ();
1043               break;
1044
1045             case tok_thong:
1046               parse_thong_decl ();
1047               break;
1048
1049             case tok_left:
1050               parse_assoc_decl (left_assoc);
1051               break;
1052
1053             case tok_right:
1054               parse_assoc_decl (right_assoc);
1055               break;
1056
1057             case tok_nonassoc:
1058               parse_assoc_decl (non_assoc);
1059               break;
1060
1061             case tok_define:
1062               parse_muscle_decl ();
1063               break;
1064
1065             case tok_skel:
1066               parse_skel_decl ();
1067               break;
1068
1069             case tok_noop:
1070               break;
1071
1072             case tok_stropt:
1073             case tok_intopt:
1074             case tok_obsolete:
1075               abort ();
1076               break;
1077
1078             case tok_illegal:
1079             default:
1080               complain (_("unrecognized: %s"), token_buffer);
1081               skip_to_char ('%');
1082             }
1083         }
1084       else if (c == EOF)
1085         fatal (_("no input grammar"));
1086       else
1087         {
1088           char buf[] = "c";
1089           buf[0] = c;
1090           complain (_("unknown character: %s"), quote (buf));
1091           skip_to_char ('%');
1092         }
1093     }
1094 }
1095 \f
1096 /*-------------------------------------------------------------------.
1097 | Assuming that a `{' has just been seen, copy everything up to the  |
1098 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1099 | values in the current rule so far, which says where to find `$0'   |
1100 | with respect to the top of the stack.                              |
1101 |                                                                    |
1102 | This routine is used both for actions and guards.  Only            |
1103 | ACTION_OBSTACK is used, but this is fine, since we use only        |
1104 | pointers to relevant portions inside this obstack.                 |
1105 `-------------------------------------------------------------------*/
1106
1107 static void
1108 parse_braces (symbol_list *rule, int stack_offset)
1109 {
1110   int c;
1111   int count;
1112
1113   count = 1;
1114   while (count > 0)
1115     {
1116       while ((c = getc (finput)) != '}')
1117         switch (c)
1118           {
1119           case '\n':
1120             obstack_1grow (&action_obstack, c);
1121             lineno++;
1122             break;
1123
1124           case '{':
1125             obstack_1grow (&action_obstack, c);
1126             count++;
1127             break;
1128
1129           case '\'':
1130           case '"':
1131             copy_string (finput, &action_obstack, c);
1132             break;
1133
1134           case '/':
1135             copy_comment (finput, &action_obstack);
1136             break;
1137
1138           case '$':
1139             copy_dollar (finput, &action_obstack,
1140                          rule, stack_offset);
1141             break;
1142
1143           case '@':
1144             copy_at (finput, &action_obstack,
1145                      stack_offset);
1146             break;
1147
1148           case EOF:
1149             fatal (_("unmatched %s"), "`{'");
1150
1151           default:
1152             obstack_1grow (&action_obstack, c);
1153           }
1154
1155       /* Above loop exits when C is '}'.  */
1156       if (--count)
1157         {
1158           obstack_1grow (&action_obstack, c);
1159           c = getc (finput);
1160         }
1161     }
1162
1163   obstack_1grow (&action_obstack, '\0');
1164 }
1165
1166
1167 static void
1168 parse_action (symbol_list *rule, int stack_offset)
1169 {
1170   rule->action_line = lineno;
1171   parse_braces (rule, stack_offset);
1172   rule->action = obstack_finish (&action_obstack);
1173 }
1174
1175
1176 static void
1177 parse_guard (symbol_list *rule, int stack_offset)
1178 {
1179   token_t t = lex ();
1180   if (t != tok_left_curly)
1181     complain (_("invalid %s declaration"), "%guard");
1182   rule->guard_line = lineno;
1183   parse_braces (rule, stack_offset);
1184   rule->guard = obstack_finish (&action_obstack);
1185 }
1186
1187 \f
1188
1189 /*-------------------------------------------------------------------.
1190 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1191 | with the user's names.                                             |
1192 `-------------------------------------------------------------------*/
1193
1194 static bucket *
1195 gensym (void)
1196 {
1197   /* Incremented for each generated symbol */
1198   static int gensym_count = 0;
1199   static char buf[256];
1200
1201   bucket *sym;
1202
1203   sprintf (buf, "@%d", ++gensym_count);
1204   token_buffer = buf;
1205   sym = getsym (token_buffer);
1206   sym->class = nterm_sym;
1207   sym->value = nvars++;
1208   return sym;
1209 }
1210 \f
1211 /*-------------------------------------------------------------------.
1212 | Parse the input grammar into a one symbol_list structure.  Each    |
1213 | rule is represented by a sequence of symbols: the left hand side   |
1214 | followed by the contents of the right hand side, followed by a     |
1215 | null pointer instead of a symbol to terminate the rule.  The next  |
1216 | symbol is the lhs of the following rule.                           |
1217 |                                                                    |
1218 | All guards and actions are copied out to the appropriate files,    |
1219 | labelled by the rule number they apply to.                         |
1220 |                                                                    |
1221 | Bison used to allow some %directives in the rules sections, but    |
1222 | this is no longer consider appropriate: (i) the documented grammar |
1223 | doesn't claim it, (ii), it would promote bad style, (iii), error   |
1224 | recovery for %directives consists in skipping the junk until a `%' |
1225 | is seen and helrp synchronizing.  This scheme is definitely wrong  |
1226 | in the rules section.                                              |
1227 `-------------------------------------------------------------------*/
1228
1229 static void
1230 readgram (void)
1231 {
1232   token_t t;
1233   bucket *lhs = NULL;
1234   symbol_list *p = NULL;
1235   symbol_list *p1 = NULL;
1236   bucket *bp;
1237
1238   /* Points to first symbol_list of current rule. its symbol is the
1239      lhs of the rule.  */
1240   symbol_list *crule = NULL;
1241   /* Points to the symbol_list preceding crule.  */
1242   symbol_list *crule1 = NULL;
1243
1244   t = lex ();
1245
1246   while (t != tok_two_percents && t != tok_eof)
1247     if (t == tok_identifier || t == tok_bar)
1248       {
1249         int action_flag = 0;
1250         /* Number of symbols in rhs of this rule so far */
1251         int rulelength = 0;
1252         int xactions = 0;       /* JF for error checking */
1253         bucket *first_rhs = 0;
1254
1255         if (t == tok_identifier)
1256           {
1257             lhs = symval;
1258
1259             if (!start_flag)
1260               {
1261                 startval = lhs;
1262                 start_flag = 1;
1263               }
1264
1265             t = lex ();
1266             if (t != tok_colon)
1267               {
1268                 complain (_("ill-formed rule: initial symbol not followed by colon"));
1269                 unlex (t);
1270               }
1271           }
1272
1273         if (nrules == 0 && t == tok_bar)
1274           {
1275             complain (_("grammar starts with vertical bar"));
1276             lhs = symval;       /* BOGUS: use a random symval */
1277           }
1278         /* start a new rule and record its lhs.  */
1279
1280         nrules++;
1281         nitems++;
1282
1283         p = symbol_list_new (lhs);
1284
1285         crule1 = p1;
1286         if (p1)
1287           p1->next = p;
1288         else
1289           grammar = p;
1290
1291         p1 = p;
1292         crule = p;
1293
1294         /* mark the rule's lhs as a nonterminal if not already so.  */
1295
1296         if (lhs->class == unknown_sym)
1297           {
1298             lhs->class = nterm_sym;
1299             lhs->value = nvars;
1300             nvars++;
1301           }
1302         else if (lhs->class == token_sym)
1303           complain (_("rule given for %s, which is a token"), lhs->tag);
1304
1305         /* read the rhs of the rule.  */
1306
1307         for (;;)
1308           {
1309             t = lex ();
1310             if (t == tok_prec)
1311               {
1312                 t = lex ();
1313                 crule->ruleprec = symval;
1314                 t = lex ();
1315               }
1316
1317             if (!(t == tok_identifier || t == tok_left_curly))
1318               break;
1319
1320             /* If next token is an identifier, see if a colon follows it.
1321                If one does, exit this rule now.  */
1322             if (t == tok_identifier)
1323               {
1324                 bucket *ssave;
1325                 token_t t1;
1326
1327                 ssave = symval;
1328                 t1 = lex ();
1329                 unlex (t1);
1330                 symval = ssave;
1331                 if (t1 == tok_colon)
1332                   break;
1333
1334                 if (!first_rhs) /* JF */
1335                   first_rhs = symval;
1336                 /* Not followed by colon =>
1337                    process as part of this rule's rhs.  */
1338               }
1339
1340             /* If we just passed an action, that action was in the middle
1341                of a rule, so make a dummy rule to reduce it to a
1342                non-terminal.  */
1343             if (action_flag)
1344               {
1345                 /* Since the action was written out with this rule's
1346                    number, we must give the new rule this number by
1347                    inserting the new rule before it.  */
1348
1349                 /* Make a dummy nonterminal, a gensym.  */
1350                 bucket *sdummy = gensym ();
1351
1352                 /* Make a new rule, whose body is empty, before the
1353                    current one, so that the action just read can
1354                    belong to it.  */
1355                 nrules++;
1356                 nitems++;
1357                 p = symbol_list_new (sdummy);
1358                 /* Attach its lineno to that of the host rule. */
1359                 p->line = crule->line;
1360                 /* Move the action from the host rule to this one. */
1361                 p->action = crule->action;
1362                 p->action_line = crule->action_line;
1363                 crule->action = NULL;
1364
1365                 if (crule1)
1366                   crule1->next = p;
1367                 else
1368                   grammar = p;
1369                 /* End of the rule. */
1370                 crule1 = symbol_list_new (NULL);
1371                 crule1->next = crule;
1372
1373                 p->next = crule1;
1374
1375                 /* Insert the dummy generated by that rule into this
1376                    rule.  */
1377                 nitems++;
1378                 p = symbol_list_new (sdummy);
1379                 p1->next = p;
1380                 p1 = p;
1381
1382                 action_flag = 0;
1383               }
1384
1385             if (t == tok_identifier)
1386               {
1387                 nitems++;
1388                 p = symbol_list_new (symval);
1389                 p1->next = p;
1390                 p1 = p;
1391               }
1392             else                /* handle an action.  */
1393               {
1394                 parse_action (crule, rulelength);
1395                 action_flag = 1;
1396                 xactions++;     /* JF */
1397               }
1398             rulelength++;
1399           }                     /* end of  read rhs of rule */
1400
1401         /* Put an empty link in the list to mark the end of this rule  */
1402         p = symbol_list_new (NULL);
1403         p1->next = p;
1404         p1 = p;
1405
1406         if (t == tok_prec)
1407           {
1408             complain (_("two @prec's in a row"));
1409             t = lex ();
1410             crule->ruleprec = symval;
1411             t = lex ();
1412           }
1413
1414         if (t == tok_guard)
1415           {
1416             if (!semantic_parser)
1417               complain (_("%%guard present but %%semantic_parser not specified"));
1418
1419             parse_guard (crule, rulelength);
1420             t = lex ();
1421           }
1422
1423         if (t == tok_left_curly)
1424           {
1425             /* This case never occurs -wjh */
1426             if (action_flag)
1427               complain (_("two actions at end of one rule"));
1428             parse_action (crule, rulelength);
1429             action_flag = 1;
1430             xactions++; /* -wjh */
1431             t = lex ();
1432           }
1433         /* If $$ is being set in default way, report if any type
1434            mismatch.  */
1435         else if (!xactions
1436                  && first_rhs && lhs->type_name != first_rhs->type_name)
1437           {
1438             if (lhs->type_name == 0
1439                 || first_rhs->type_name == 0
1440                 || strcmp (lhs->type_name, first_rhs->type_name))
1441               complain (_("type clash (`%s' `%s') on default action"),
1442                         lhs->type_name ? lhs->type_name : "",
1443                         first_rhs->type_name ? first_rhs->type_name : "");
1444           }
1445         /* Warn if there is no default for $$ but we need one.  */
1446         else if (!xactions && !first_rhs && lhs->type_name != 0)
1447           complain (_("empty rule for typed nonterminal, and no action"));
1448         if (t == tok_semicolon)
1449           t = lex ();
1450       }
1451     else
1452       {
1453         complain (_("invalid input: %s"), quote (token_buffer));
1454         t = lex ();
1455       }
1456
1457   /* grammar has been read.  Do some checking */
1458
1459   if (nrules == 0)
1460     fatal (_("no rules in the input grammar"));
1461
1462   /* Report any undefined symbols and consider them nonterminals.  */
1463
1464   for (bp = firstsymbol; bp; bp = bp->next)
1465     if (bp->class == unknown_sym)
1466       {
1467         complain (_
1468                   ("symbol %s is used, but is not defined as a token and has no rules"),
1469                   bp->tag);
1470         bp->class = nterm_sym;
1471         bp->value = nvars++;
1472       }
1473
1474   /* Insert the initial rule, which line is that of the first rule
1475      (not that of the start symbol):
1476
1477      axiom: %start EOF.  */
1478   p = symbol_list_new (axiom);
1479   p->line = grammar->line;
1480   p->next = symbol_list_new (startval);
1481   p->next->next = symbol_list_new (eoftoken);
1482   p->next->next->next = symbol_list_new (NULL);
1483   p->next->next->next->next = grammar;
1484   nrules += 1;
1485   nitems += 3;
1486   grammar = p;
1487   startval = axiom;
1488
1489   if (nsyms > MAXSHORT)
1490     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1491            MAXSHORT);
1492
1493   ntokens = nsyms - nvars;
1494 }
1495
1496 /* At the end of the grammar file, some C source code must
1497    be stored. It is going to be associated to the epilogue
1498    directive.  */
1499 static void
1500 read_additionnal_code (void)
1501 {
1502   char c;
1503   struct obstack el_obstack;
1504
1505   obstack_init (&el_obstack);
1506
1507   if (!no_lines_flag)
1508     {
1509       obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1510                       lineno, quotearg_style (c_quoting_style,
1511                                               muscle_find ("filename")));
1512     }
1513
1514   while ((c = getc (finput)) != EOF)
1515     obstack_1grow (&el_obstack, c);
1516
1517   obstack_1grow (&el_obstack, 0);
1518   muscle_insert ("epilogue", obstack_finish (&el_obstack));
1519 }
1520
1521 \f
1522 /*------------------------------------------------------------------.
1523 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1524 | number.                                                           |
1525 `------------------------------------------------------------------*/
1526
1527 static void
1528 token_translations_init (void)
1529 {
1530   bucket *bp = NULL;
1531   int i;
1532
1533   token_translations = XCALLOC (short, max_user_token_number + 1);
1534
1535   /* Initialize all entries for literal tokens to 2, the internal
1536      token number for $undefined., which represents all invalid
1537      inputs.  */
1538   for (i = 0; i <= max_user_token_number; i++)
1539     token_translations[i] = 2;
1540
1541   for (bp = firstsymbol; bp; bp = bp->next)
1542     {
1543       /* Non-terminal? */
1544       if (bp->value >= ntokens)
1545         continue;
1546       /* A token string alias? */
1547       if (bp->user_token_number == SALIAS)
1548         continue;
1549
1550       assert (bp->user_token_number != SUNDEF);
1551
1552       /* A token which translation has already been set? */
1553       if (token_translations[bp->user_token_number] != 2)
1554         complain (_("tokens %s and %s both assigned number %d"),
1555                   symbols[token_translations[bp->user_token_number]]->tag,
1556                   bp->tag, bp->user_token_number);
1557       token_translations[bp->user_token_number] = bp->value;
1558     }
1559 }
1560
1561
1562 /*----------------------------------------------------------------.
1563 | Assign symbol numbers, and write definition of token names into |
1564 | FDEFINES.  Set up vectors SYMBOL_TABLE, TAGS of symbols.        |
1565 `----------------------------------------------------------------*/
1566
1567 static void
1568 packsymbols (void)
1569 {
1570   bucket *bp = NULL;
1571   int tokno = 1;
1572   int last_user_token_number;
1573
1574   symbols = XCALLOC (bucket *, nsyms);
1575
1576   max_user_token_number = 256;
1577   last_user_token_number = 256;
1578
1579   for (bp = firstsymbol; bp; bp = bp->next)
1580     {
1581       if (bp->class == nterm_sym)
1582         {
1583           bp->value += ntokens;
1584         }
1585       else if (bp->alias)
1586         {
1587           /* This symbol and its alias are a single token defn.
1588              Allocate a tokno, and assign to both check agreement of
1589              prec and assoc fields and make both the same */
1590           if (bp->value == -1)
1591             {
1592               if (bp == eoftoken || bp->alias == eoftoken)
1593                 bp->value = bp->alias->value = 0;
1594               else
1595                 {
1596                   bp->value = bp->alias->value = tokno++;
1597                 }
1598             }
1599
1600           if (bp->prec != bp->alias->prec)
1601             {
1602               if (bp->prec != 0 && bp->alias->prec != 0
1603                   && bp->user_token_number == SALIAS)
1604                 complain (_("conflicting precedences for %s and %s"),
1605                           bp->tag, bp->alias->tag);
1606               if (bp->prec != 0)
1607                 bp->alias->prec = bp->prec;
1608               else
1609                 bp->prec = bp->alias->prec;
1610             }
1611
1612           if (bp->assoc != bp->alias->assoc)
1613             {
1614               if (bp->assoc != 0 && bp->alias->assoc != 0
1615                   && bp->user_token_number == SALIAS)
1616                 complain (_("conflicting assoc values for %s and %s"),
1617                           bp->tag, bp->alias->tag);
1618               if (bp->assoc != 0)
1619                 bp->alias->assoc = bp->assoc;
1620               else
1621                 bp->assoc = bp->alias->assoc;
1622             }
1623
1624           /* Do not do processing below for SALIASs.  */
1625           if (bp->user_token_number == SALIAS)
1626             continue;
1627
1628         }
1629       else /* bp->class == token_sym */
1630         {
1631           if (bp == eoftoken)
1632             bp->value = 0;
1633           else
1634             bp->value = tokno++;
1635         }
1636
1637       if (bp->class == token_sym)
1638         {
1639           if (bp->user_token_number == SUNDEF)
1640             bp->user_token_number = ++last_user_token_number;
1641           if (bp->user_token_number > max_user_token_number)
1642             max_user_token_number = bp->user_token_number;
1643         }
1644
1645       symbols[bp->value] = bp;
1646     }
1647
1648   token_translations_init ();
1649
1650   error_token_number = errtoken->value;
1651
1652   if (startval->class == unknown_sym)
1653     fatal (_("the start symbol %s is undefined"), startval->tag);
1654   else if (startval->class == token_sym)
1655     fatal (_("the start symbol %s is a token"), startval->tag);
1656
1657   start_symbol = startval->value;
1658 }
1659
1660
1661 /*---------------------------------------------------------------.
1662 | Convert the rules into the representation using RRHS, RLHS and |
1663 | RITEMS.                                                        |
1664 `---------------------------------------------------------------*/
1665
1666 static void
1667 packgram (void)
1668 {
1669   int itemno;
1670   int ruleno;
1671   symbol_list *p;
1672
1673   /* We use short to index items.  */
1674   if (nitems >= MAXSHORT)
1675     fatal (_("too many items (max %d)"), MAXSHORT);
1676
1677   ritem = XCALLOC (short, nitems + 1);
1678   rules = XCALLOC (rule_t, nrules) - 1;
1679
1680   itemno = 0;
1681   ruleno = 1;
1682
1683   p = grammar;
1684   while (p)
1685     {
1686       bucket *ruleprec = p->ruleprec;
1687       rules[ruleno].lhs = p->sym->value;
1688       rules[ruleno].rhs = itemno;
1689       rules[ruleno].line = p->line;
1690       rules[ruleno].useful = TRUE;
1691       rules[ruleno].action = p->action;
1692       rules[ruleno].action_line = p->action_line;
1693       rules[ruleno].guard = p->guard;
1694       rules[ruleno].guard_line = p->guard_line;
1695
1696       p = p->next;
1697       while (p && p->sym)
1698         {
1699           ritem[itemno++] = p->sym->value;
1700           /* A rule gets by default the precedence and associativity
1701              of the last token in it.  */
1702           if (p->sym->class == token_sym)
1703             {
1704               rules[ruleno].prec = p->sym->prec;
1705               rules[ruleno].assoc = p->sym->assoc;
1706             }
1707           if (p)
1708             p = p->next;
1709         }
1710
1711       /* If this rule has a %prec,
1712          the specified symbol's precedence replaces the default.  */
1713       if (ruleprec)
1714         {
1715           rules[ruleno].prec = ruleprec->prec;
1716           rules[ruleno].assoc = ruleprec->assoc;
1717           rules[ruleno].precsym = ruleprec->value;
1718         }
1719
1720       ritem[itemno++] = -ruleno;
1721       ruleno++;
1722
1723       if (p)
1724         p = p->next;
1725     }
1726
1727   ritem[itemno] = 0;
1728   nritems = itemno;
1729   assert (nritems == nitems);
1730
1731   if (trace_flag)
1732     ritem_print (stderr);
1733 }
1734 \f
1735 /*-------------------------------------------------------------------.
1736 | Read in the grammar specification and record it in the format      |
1737 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1738 | and all actions into ACTION_OBSTACK, in each case forming the body |
1739 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1740 | statement to decide which guard or action to execute.              |
1741 `-------------------------------------------------------------------*/
1742
1743 void
1744 reader (void)
1745 {
1746   start_flag = 0;
1747   startval = NULL;              /* start symbol not specified yet. */
1748
1749   nsyms = 0;
1750   nvars = 0;
1751   nrules = 0;
1752   nitems = 0;
1753
1754   typed = 0;
1755   lastprec = 0;
1756
1757   semantic_parser = 0;
1758   pure_parser = 0;
1759
1760   grammar = NULL;
1761
1762   lex_init ();
1763   lineno = 1;
1764
1765   /* Initialize the muscle obstack.  */
1766   obstack_init (&muscle_obstack);
1767
1768   /* Initialize the symbol table.  */
1769   tabinit ();
1770
1771   /* Construct the axiom symbol. */
1772   axiom = getsym ("$axiom");
1773   axiom->class = nterm_sym;
1774   axiom->value = nvars++;
1775
1776   /* Construct the error token */
1777   errtoken = getsym ("error");
1778   errtoken->class = token_sym;
1779   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1780
1781   /* Construct a token that represents all undefined literal tokens.
1782      It is always token number 2.  */
1783   undeftoken = getsym ("$undefined.");
1784   undeftoken->class = token_sym;
1785   undeftoken->user_token_number = 2;
1786
1787   /* Initialize the obstacks. */
1788   obstack_init (&action_obstack);
1789   obstack_init (&attrs_obstack);
1790   obstack_init (&output_obstack);
1791
1792   finput = xfopen (infile, "r");
1793
1794   /* Read the declaration section.  Copy %{ ... %} groups to
1795      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1796      etc. found there.  */
1797   read_declarations ();
1798
1799   /* If the user did not define her EOFTOKEN, do it now. */
1800   if (!eoftoken)
1801     {
1802       eoftoken = getsym ("$");
1803       eoftoken->class = token_sym;
1804       /* Value specified by POSIX.  */
1805       eoftoken->user_token_number = 0;
1806     }
1807
1808   /* Read in the grammar, build grammar in list form.  Write out
1809      guards and actions.  */
1810   readgram ();
1811   /* Some C code is given at the end of the grammar file. */
1812   read_additionnal_code ();
1813
1814   lex_free ();
1815   xfclose (finput);
1816
1817   /* Assign the symbols their symbol numbers.  Write #defines for the
1818      token symbols into FDEFINES if requested.  */
1819   packsymbols ();
1820
1821   /* Convert the grammar into the format described in gram.h.  */
1822   packgram ();
1823
1824   /* The grammar as a symbol_list is no longer needed. */
1825   LIST_FREE (symbol_list, grammar);
1826 }