src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "quotearg.h"
  25 #include "quote.h"
  26 #include "getargs.h"
  27 #include "files.h"
  28 #include "symtab.h"
  29 #include "options.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36 #include "muscle_tab.h"
  37
  38 typedef struct symbol_list
  39 {
  40   struct symbol_list *next;
  41   bucket *sym;
  42   int line;
  43   /* The action is attached to the LHS of a rule. */
  44   const char *action;
  45   int action_line;
  46   bucket *ruleprec;
  47 }
  48 symbol_list;
  49
  50 int lineno;
  51 char **tags;
  52 short *user_toknums;
  53 static symbol_list *grammar;
  54 static int start_flag;
  55 static bucket *startval;
  56
  57 /* Nonzero if components of semantic values are used, implying
  58    they must be unions.  */
  59 static int value_components_used;
  60
  61 /* Nonzero if %union has been seen.  */
  62 static int typed;
  63
  64 /* Incremented for each %left, %right or %nonassoc seen */
  65 static int lastprec;
  66
  67 static bucket *errtoken;
  68 static bucket *undeftoken;
  69
  70
  71 static symbol_list *
  72 symbol_list_new (bucket *sym)
  73 {
  74   symbol_list *res = XMALLOC (symbol_list, 1);
  75   res->next = NULL;
  76   res->sym = sym;
  77   res->line = lineno;
  78   res->ruleprec = NULL;
  79   return res;
  80 }
  81
  82 \f
  83
  84 /*===================\
  85 | Low level lexing.  |
  86 \===================*/
  87
  88 static void
  89 skip_to_char (int target)
  90 {
  91   int c;
  92   if (target == '\n')
  93     complain (_("   Skipping to next \\n"));
  94   else
  95     complain (_("   Skipping to next %c"), target);
  96
  97   do
  98     c = skip_white_space ();
  99   while (c != target && c != EOF);
 100   if (c != EOF)
 101     ungetc (c, finput);
 102 }
 103
 104
 105 /*---------------------------------------------------------.
 106 | Read a signed integer from STREAM and return its value.  |
 107 `---------------------------------------------------------*/
 108
 109 static inline int
 110 read_signed_integer (FILE *stream)
 111 {
 112   int c = getc (stream);
 113   int sign = 1;
 114   int n = 0;
 115
 116   if (c == '-')
 117     {
 118       c = getc (stream);
 119       sign = -1;
 120     }
 121
 122   while (isdigit (c))
 123     {
 124       n = 10 * n + (c - '0');
 125       c = getc (stream);
 126     }
 127
 128   ungetc (c, stream);
 129
 130   return sign * n;
 131 }
 132 \f
 133 /*--------------------------------------------------------------.
 134 | Get the data type (alternative in the union) of the value for |
 135 | symbol N in rule RULE.                                        |
 136 `--------------------------------------------------------------*/
 137
 138 static char *
 139 get_type_name (int n, symbol_list *rule)
 140 {
 141   int i;
 142   symbol_list *rp;
 143
 144   if (n < 0)
 145     {
 146       complain (_("invalid $ value"));
 147       return NULL;
 148     }
 149
 150   rp = rule;
 151   i = 0;
 152
 153   while (i < n)
 154     {
 155       rp = rp->next;
 156       if (rp == NULL || rp->sym == NULL)
 157         {
 158           complain (_("invalid $ value"));
 159           return NULL;
 160         }
 161       i++;
 162     }
 163
 164   return rp->sym->type_name;
 165 }
 166 \f
 167 /*------------------------------------------------------------.
 168 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 169 | delimiter of the string (either ' or ").                    |
 170 `------------------------------------------------------------*/
 171
 172 static inline void
 173 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
 174 {
 175   int c;
 176
 177   if (store)
 178     obstack_1grow (oout, match);
 179
 180   c = getc (fin);
 181
 182   while (c != match)
 183     {
 184       if (c == EOF)
 185         fatal (_("unterminated string at end of file"));
 186       if (c == '\n')
 187         {
 188           complain (_("unterminated string"));
 189           ungetc (c, fin);
 190           c = match;            /* invent terminator */
 191           continue;
 192         }
 193
 194       obstack_1grow (oout, c);
 195
 196       if (c == '\\')
 197         {
 198           c = getc (fin);
 199           if (c == EOF)
 200             fatal (_("unterminated string at end of file"));
 201           obstack_1grow (oout, c);
 202
 203           if (c == '\n')
 204             lineno++;
 205         }
 206
 207       c = getc (fin);
 208     }
 209
 210   if (store)
 211     obstack_1grow (oout, c);
 212 }
 213
 214 /* FIXME. */
 215
 216 static inline void
 217 copy_string (FILE *fin, struct obstack *oout, int match)
 218 {
 219   copy_string2 (fin, oout, match, 1);
 220 }
 221
 222 /* FIXME. */
 223
 224 static inline void
 225 copy_identifier (FILE *fin, struct obstack *oout)
 226 {
 227   int c;
 228
 229   while (isalnum (c = getc (fin)) || c == '_')
 230     obstack_1grow (oout, c);
 231
 232   ungetc (c, fin);
 233 }
 234
 235
 236 /*------------------------------------------------------------------.
 237 | Dump the wannabee comment from IN to OOUT.  In fact we just saw a |
 238 | `/', which might or might not be a comment.  In any case, copy    |
 239 | what we saw.                                                      |
 240 `------------------------------------------------------------------*/
 241
 242 static inline void
 243 copy_comment (FILE *fin, struct obstack *oout)
 244 {
 245   int cplus_comment;
 246   int ended;
 247   int c;
 248
 249   /* We read a `/', output it. */
 250   obstack_1grow (oout, '/');
 251
 252   switch ((c = getc (fin)))
 253     {
 254     case '/':
 255       cplus_comment = 1;
 256       break;
 257     case '*':
 258       cplus_comment = 0;
 259       break;
 260     default:
 261       ungetc (c, fin);
 262       return;
 263     }
 264
 265   obstack_1grow (oout, c);
 266   c = getc (fin);
 267
 268   ended = 0;
 269   while (!ended)
 270     {
 271       if (!cplus_comment && c == '*')
 272         {
 273           while (c == '*')
 274             {
 275               obstack_1grow (oout, c);
 276               c = getc (fin);
 277             }
 278
 279           if (c == '/')
 280             {
 281               obstack_1grow (oout, c);
 282               ended = 1;
 283             }
 284         }
 285       else if (c == '\n')
 286         {
 287           lineno++;
 288           obstack_1grow (oout, c);
 289           if (cplus_comment)
 290             ended = 1;
 291           else
 292             c = getc (fin);
 293         }
 294       else if (c == EOF)
 295         fatal (_("unterminated comment"));
 296       else
 297         {
 298           obstack_1grow (oout, c);
 299           c = getc (fin);
 300         }
 301     }
 302 }
 303
 304
 305 /*-----------------------------------------------------------------.
 306 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 307 | reference to this location. STACK_OFFSET is the number of values |
 308 | in the current rule so far, which says where to find `$0' with   |
 309 | respect to the top of the stack.                                 |
 310 `-----------------------------------------------------------------*/
 311
 312 static inline void
 313 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 314 {
 315   int c;
 316
 317   c = getc (fin);
 318   if (c == '$')
 319     {
 320       obstack_sgrow (oout, "yyloc");
 321       locations_flag = 1;
 322     }
 323   else if (isdigit (c) || c == '-')
 324     {
 325       int n;
 326
 327       ungetc (c, fin);
 328       n = read_signed_integer (fin);
 329
 330       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 331       locations_flag = 1;
 332     }
 333   else
 334     {
 335       char buf[] = "@c";
 336       buf[1] = c;
 337       complain (_("%s is invalid"), quote (buf));
 338     }
 339 }
 340
 341
 342 /*-------------------------------------------------------------------.
 343 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 344 |                                                                    |
 345 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 346 |                                                                    |
 347 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 348 | the number of values in the current rule so far, which says where  |
 349 | to find `$0' with respect to the top of the stack.                 |
 350 `-------------------------------------------------------------------*/
 351
 352 static inline void
 353 copy_dollar (FILE *fin, struct obstack *oout,
 354              symbol_list *rule, int stack_offset)
 355 {
 356   int c = getc (fin);
 357   const char *type_name = NULL;
 358
 359   /* Get the type name if explicit. */
 360   if (c == '<')
 361     {
 362       read_type_name (fin);
 363       type_name = token_buffer;
 364       value_components_used = 1;
 365       c = getc (fin);
 366     }
 367
 368   if (c == '$')
 369     {
 370       obstack_sgrow (oout, "yyval");
 371
 372       if (!type_name)
 373         type_name = get_type_name (0, rule);
 374       if (type_name)
 375         obstack_fgrow1 (oout, ".%s", type_name);
 376       if (!type_name && typed)
 377         complain (_("$$ of `%s' has no declared type"),
 378                   rule->sym->tag);
 379     }
 380   else if (isdigit (c) || c == '-')
 381     {
 382       int n;
 383       ungetc (c, fin);
 384       n = read_signed_integer (fin);
 385
 386       if (!type_name && n > 0)
 387         type_name = get_type_name (n, rule);
 388
 389       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 390
 391       if (type_name)
 392         obstack_fgrow1 (oout, ".%s", type_name);
 393       if (!type_name && typed)
 394         complain (_("$%d of `%s' has no declared type"),
 395                   n, rule->sym->tag);
 396     }
 397   else
 398     {
 399       char buf[] = "$c";
 400       buf[1] = c;
 401       complain (_("%s is invalid"), quote (buf));
 402     }
 403 }
 404 \f
 405 /*-------------------------------------------------------------------.
 406 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 407 | `%{' has already been read.  Return after reading the `%}'.        |
 408 `-------------------------------------------------------------------*/
 409
 410 static void
 411 copy_definition (void)
 412 {
 413   int c;
 414   /* -1 while reading a character if prev char was %. */
 415   int after_percent;
 416
 417   if (!no_lines_flag)
 418     {
 419       obstack_fgrow2 (&attrs_obstack, muscle_find ("linef"),
 420                       lineno, quotearg_style (c_quoting_style,
 421                                               muscle_find("filename")));
 422     }
 423
 424   after_percent = 0;
 425
 426   c = getc (finput);
 427
 428   for (;;)
 429     {
 430       switch (c)
 431         {
 432         case '\n':
 433           obstack_1grow (&attrs_obstack, c);
 434           lineno++;
 435           break;
 436
 437         case '%':
 438           after_percent = -1;
 439           break;
 440
 441         case '\'':
 442         case '"':
 443           copy_string (finput, &attrs_obstack, c);
 444           break;
 445
 446         case '/':
 447           copy_comment (finput, &attrs_obstack);
 448           break;
 449
 450         case EOF:
 451           fatal ("%s", _("unterminated `%{' definition"));
 452
 453         default:
 454           obstack_1grow (&attrs_obstack, c);
 455         }
 456
 457       c = getc (finput);
 458
 459       if (after_percent)
 460         {
 461           if (c == '}')
 462             return;
 463           obstack_1grow (&attrs_obstack, '%');
 464         }
 465       after_percent = 0;
 466     }
 467 }
 468
 469
 470 /*-------------------------------------------------------------------.
 471 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 472 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 473 | are reversed.                                                      |
 474 `-------------------------------------------------------------------*/
 475
 476 static void
 477 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 478 {
 479   token_t token = tok_undef;
 480   char *typename = NULL;
 481
 482   /* The symbol being defined.  */
 483   struct bucket *symbol = NULL;
 484
 485   /* After `%token' and `%nterm', any number of symbols maybe be
 486      defined.  */
 487   for (;;)
 488     {
 489       int tmp_char = ungetc (skip_white_space (), finput);
 490
 491       /* `%' (for instance from `%token', or from `%%' etc.) is the
 492          only valid means to end this declaration.  */
 493       if (tmp_char == '%')
 494         return;
 495       if (tmp_char == EOF)
 496         fatal (_("Premature EOF after %s"), token_buffer);
 497
 498       token = lex ();
 499       if (token == tok_comma)
 500         {
 501           symbol = NULL;
 502           continue;
 503         }
 504       if (token == tok_typename)
 505         {
 506           typename = xstrdup (token_buffer);
 507           value_components_used = 1;
 508           symbol = NULL;
 509         }
 510       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 511         {
 512           if (symval->alias)
 513             warn (_("symbol `%s' used more than once as a literal string"),
 514                   symval->tag);
 515           else if (symbol->alias)
 516             warn (_("symbol `%s' given more than one literal string"),
 517                   symbol->tag);
 518           else
 519             {
 520               symval->class = token_sym;
 521               symval->type_name = typename;
 522               symval->user_token_number = symbol->user_token_number;
 523               symbol->user_token_number = SALIAS;
 524               symval->alias = symbol;
 525               symbol->alias = symval;
 526               /* symbol and symval combined are only one symbol */
 527               nsyms--;
 528             }
 529           symbol = NULL;
 530         }
 531       else if (token == tok_identifier)
 532         {
 533           int oldclass = symval->class;
 534           symbol = symval;
 535
 536           if (symbol->class == what_is_not)
 537             complain (_("symbol %s redefined"), symbol->tag);
 538           symbol->class = what_is;
 539           if (what_is == nterm_sym && oldclass != nterm_sym)
 540             symbol->value = nvars++;
 541
 542           if (typename)
 543             {
 544               if (symbol->type_name == NULL)
 545                 symbol->type_name = typename;
 546               else if (strcmp (typename, symbol->type_name) != 0)
 547                 complain (_("type redeclaration for %s"), symbol->tag);
 548             }
 549         }
 550       else if (symbol && token == tok_number)
 551         {
 552           symbol->user_token_number = numval;
 553         }
 554       else
 555         {
 556           complain (_("`%s' is invalid in %s"),
 557                     token_buffer,
 558                     (what_is == token_sym) ? "%token" : "%nterm");
 559           skip_to_char ('%');
 560         }
 561     }
 562
 563 }
 564
 565
 566 /*------------------------------.
 567 | Parse what comes after %start |
 568 `------------------------------*/
 569
 570 static void
 571 parse_start_decl (void)
 572 {
 573   if (start_flag)
 574     complain (_("multiple %s declarations"), "%start");
 575   if (lex () != tok_identifier)
 576     complain (_("invalid %s declaration"), "%start");
 577   else
 578     {
 579       start_flag = 1;
 580       startval = symval;
 581     }
 582 }
 583
 584 /*-----------------------------------------------------------.
 585 | read in a %type declaration and record its information for |
 586 | get_type_name to access                                    |
 587 `-----------------------------------------------------------*/
 588
 589 static void
 590 parse_type_decl (void)
 591 {
 592   char *name;
 593
 594   if (lex () != tok_typename)
 595     {
 596       complain ("%s", _("%type declaration has no <typename>"));
 597       skip_to_char ('%');
 598       return;
 599     }
 600
 601   name = xstrdup (token_buffer);
 602
 603   for (;;)
 604     {
 605       token_t t;
 606       int tmp_char = ungetc (skip_white_space (), finput);
 607
 608       if (tmp_char == '%')
 609         return;
 610       if (tmp_char == EOF)
 611         fatal (_("Premature EOF after %s"), token_buffer);
 612
 613       t = lex ();
 614
 615       switch (t)
 616         {
 617
 618         case tok_comma:
 619         case tok_semicolon:
 620           break;
 621
 622         case tok_identifier:
 623           if (symval->type_name == NULL)
 624             symval->type_name = name;
 625           else if (strcmp (name, symval->type_name) != 0)
 626             complain (_("type redeclaration for %s"), symval->tag);
 627
 628           break;
 629
 630         default:
 631           complain (_("invalid %%type declaration due to item: %s"),
 632                     token_buffer);
 633           skip_to_char ('%');
 634         }
 635     }
 636 }
 637
 638
 639
 640 /*----------------------------------------------------------------.
 641 | Read in a %left, %right or %nonassoc declaration and record its |
 642 | information.                                                    |
 643 `----------------------------------------------------------------*/
 644
 645 static void
 646 parse_assoc_decl (associativity assoc)
 647 {
 648   char *name = NULL;
 649   int prev = 0;
 650
 651   lastprec++;                   /* Assign a new precedence level, never 0.  */
 652
 653   for (;;)
 654     {
 655       token_t t;
 656       int tmp_char = ungetc (skip_white_space (), finput);
 657
 658       if (tmp_char == '%')
 659         return;
 660       if (tmp_char == EOF)
 661         fatal (_("Premature EOF after %s"), token_buffer);
 662
 663       t = lex ();
 664
 665       switch (t)
 666         {
 667         case tok_typename:
 668           name = xstrdup (token_buffer);
 669           break;
 670
 671         case tok_comma:
 672           break;
 673
 674         case tok_identifier:
 675           if (symval->prec != 0)
 676             complain (_("redefining precedence of %s"), symval->tag);
 677           symval->prec = lastprec;
 678           symval->assoc = assoc;
 679           if (symval->class == nterm_sym)
 680             complain (_("symbol %s redefined"), symval->tag);
 681           symval->class = token_sym;
 682           if (name)
 683             {                   /* record the type, if one is specified */
 684               if (symval->type_name == NULL)
 685                 symval->type_name = name;
 686               else if (strcmp (name, symval->type_name) != 0)
 687                 complain (_("type redeclaration for %s"), symval->tag);
 688             }
 689           break;
 690
 691         case tok_number:
 692           if (prev == tok_identifier)
 693             {
 694               symval->user_token_number = numval;
 695             }
 696           else
 697             {
 698               complain (_
 699                         ("invalid text (%s) - number should be after identifier"),
 700 token_buffer);
 701               skip_to_char ('%');
 702             }
 703           break;
 704
 705         case tok_semicolon:
 706           return;
 707
 708         default:
 709           complain (_("unexpected item: %s"), token_buffer);
 710           skip_to_char ('%');
 711         }
 712
 713       prev = t;
 714     }
 715 }
 716
 717
 718
 719 /*--------------------------------------------------------------.
 720 | Copy the union declaration into the stype muscle              |
 721 | (and fdefines),  where it is made into the definition of      |
 722 | YYSTYPE, the type of elements of the parser value stack.      |
 723 `--------------------------------------------------------------*/
 724
 725 static void
 726 parse_union_decl (void)
 727 {
 728   int c;
 729   int count = 0;
 730   bool done = FALSE;
 731   struct obstack union_obstack;
 732   if (typed)
 733     complain (_("multiple %s declarations"), "%union");
 734
 735   typed = 1;
 736
 737   obstack_init (&union_obstack);
 738   obstack_sgrow (&union_obstack, "union");
 739
 740   while (!done)
 741     {
 742       c = xgetc (finput);
 743
 744       /* If C contains '/', it is output by copy_comment ().  */
 745       if (c != '/')
 746         obstack_1grow (&union_obstack, c);
 747
 748       switch (c)
 749         {
 750         case '\n':
 751           lineno++;
 752           break;
 753
 754         case '/':
 755           copy_comment (finput, &union_obstack);
 756           break;
 757
 758         case '{':
 759           count++;
 760           break;
 761
 762         case '}':
 763           /* FIXME: Errr.  How could this happen???. --akim */
 764           if (count == 0)
 765             complain (_("unmatched %s"), "`}'");
 766           count--;
 767           if (!count)
 768             done = TRUE;
 769           break;
 770         }
 771     }
 772
 773   /* JF don't choke on trailing semi */
 774   c = skip_white_space ();
 775   if (c != ';')
 776     ungetc (c, finput);
 777   obstack_1grow (&union_obstack, 0);
 778   muscle_insert ("stype", obstack_finish (&union_obstack));
 779 }
 780
 781
 782 /*-------------------------------------------------------.
 783 | Parse the declaration %expect N which says to expect N |
 784 | shift-reduce conflicts.                                |
 785 `-------------------------------------------------------*/
 786
 787 static void
 788 parse_expect_decl (void)
 789 {
 790   int c = skip_white_space ();
 791   ungetc (c, finput);
 792
 793   if (!isdigit (c))
 794     complain (_("argument of %%expect is not an integer"));
 795   else
 796     expected_conflicts = read_signed_integer (finput);
 797 }
 798
 799
 800 /*-------------------------------------------------------------------.
 801 | Parse what comes after %thong.  the full syntax is                 |
 802 |                                                                    |
 803 |                %thong <type> token number literal                  |
 804 |                                                                    |
 805 | the <type> or number may be omitted.  The number specifies the     |
 806 | user_token_number.                                                 |
 807 |                                                                    |
 808 | Two symbols are entered in the table, one for the token symbol and |
 809 | one for the literal.  Both are given the <type>, if any, from the  |
 810 | declaration.  The ->user_token_number of the first is SALIAS and   |
 811 | the ->user_token_number of the second is set to the number, if     |
 812 | any, from the declaration.  The two symbols are linked via         |
 813 | pointers in their ->alias fields.                                  |
 814 |                                                                    |
 815 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 816 | only the literal string is retained it is the literal string that  |
 817 | is output to yytname                                               |
 818 `-------------------------------------------------------------------*/
 819
 820 static void
 821 parse_thong_decl (void)
 822 {
 823   token_t token;
 824   struct bucket *symbol;
 825   char *typename = 0;
 826   int usrtoknum = SUNDEF;
 827
 828   token = lex ();               /* fetch typename or first token */
 829   if (token == tok_typename)
 830     {
 831       typename = xstrdup (token_buffer);
 832       value_components_used = 1;
 833       token = lex ();           /* fetch first token */
 834     }
 835
 836   /* process first token */
 837
 838   if (token != tok_identifier)
 839     {
 840       complain (_("unrecognized item %s, expected an identifier"),
 841                 token_buffer);
 842       skip_to_char ('%');
 843       return;
 844     }
 845   symval->class = token_sym;
 846   symval->type_name = typename;
 847   symval->user_token_number = SALIAS;
 848   symbol = symval;
 849
 850   token = lex ();               /* get number or literal string */
 851
 852   if (token == tok_number)
 853     {
 854       usrtoknum = numval;
 855       token = lex ();           /* okay, did number, now get literal */
 856     }
 857
 858   /* process literal string token */
 859
 860   if (token != tok_identifier || *symval->tag != '\"')
 861     {
 862       complain (_("expected string constant instead of %s"), token_buffer);
 863       skip_to_char ('%');
 864       return;
 865     }
 866   symval->class = token_sym;
 867   symval->type_name = typename;
 868   symval->user_token_number = usrtoknum;
 869
 870   symval->alias = symbol;
 871   symbol->alias = symval;
 872
 873   /* symbol and symval combined are only one symbol.  */
 874   nsyms--;
 875 }
 876
 877 static void
 878 parse_muscle_decl (void)
 879 {
 880   int ch = ungetc (skip_white_space (), finput);
 881   char* muscle_key;
 882   char* muscle_value;
 883
 884   /* Read key. */
 885   if (!isalpha (ch) && ch != '_')
 886     {
 887       complain (_("invalid %s declaration"), "%define");
 888       skip_to_char ('%');
 889       return;
 890     }
 891   copy_identifier (finput, &muscle_obstack);
 892   obstack_1grow (&muscle_obstack, 0);
 893   muscle_key = obstack_finish (&muscle_obstack);
 894
 895   /* Read value. */
 896   ch = skip_white_space ();
 897   if (ch != '"')
 898     {
 899       ungetc (ch, finput);
 900       if (ch != EOF)
 901         {
 902           complain (_("invalid %s declaration"), "%define");
 903           skip_to_char ('%');
 904           return;
 905         }
 906       else
 907         fatal (_("Premature EOF after %s"), "\"");
 908     }
 909   copy_string2 (finput, &muscle_obstack, '"', 0);
 910   obstack_1grow (&muscle_obstack, 0);
 911   muscle_value = obstack_finish (&muscle_obstack);
 912
 913   /* Store the (key, value) pair in the environment. */
 914   muscle_insert (muscle_key, muscle_value);
 915 }
 916
 917
 918
 919 /*---------------------------------.
 920 | Parse a double quoted parameter. |
 921 `---------------------------------*/
 922
 923 static const char *
 924 parse_dquoted_param (const char *from)
 925 {
 926   struct obstack param_obstack;
 927   const char *param = NULL;
 928   int c;
 929
 930   obstack_init (&param_obstack);
 931   c = skip_white_space ();
 932
 933   if (c != '"')
 934     {
 935       complain (_("invalid %s declaration"), from);
 936       ungetc (c, finput);
 937       skip_to_char ('%');
 938       return NULL;
 939     }
 940
 941   while ((c = literalchar ()) != '"')
 942     obstack_1grow (&param_obstack, c);
 943
 944   obstack_1grow (&param_obstack, '\0');
 945   param = obstack_finish (&param_obstack);
 946
 947   if (c != '"' || strlen (param) == 0)
 948     {
 949       complain (_("invalid %s declaration"), from);
 950       if (c != '"')
 951         ungetc (c, finput);
 952       skip_to_char ('%');
 953       return NULL;
 954     }
 955
 956   return param;
 957 }
 958
 959 /*----------------------------------.
 960 | Parse what comes after %skeleton. |
 961 `----------------------------------*/
 962
 963 static void
 964 parse_skel_decl (void)
 965 {
 966   skeleton = parse_dquoted_param ("%skeleton");
 967 }
 968
 969 /*----------------------------------------------------------------.
 970 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 971 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 972 | groups to ATTRS_OBSTACK.                                        |
 973 `----------------------------------------------------------------*/
 974
 975 static void
 976 read_declarations (void)
 977 {
 978   for (;;)
 979     {
 980       int c = skip_white_space ();
 981
 982       if (c == '%')
 983         {
 984           token_t tok = parse_percent_token ();
 985
 986           switch (tok)
 987             {
 988             case tok_two_percents:
 989               return;
 990
 991             case tok_percent_left_curly:
 992               copy_definition ();
 993               break;
 994
 995             case tok_token:
 996               parse_token_decl (token_sym, nterm_sym);
 997               break;
 998
 999             case tok_nterm:
1000               parse_token_decl (nterm_sym, token_sym);
1001               break;
1002
1003             case tok_type:
1004               parse_type_decl ();
1005               break;
1006
1007             case tok_start:
1008               parse_start_decl ();
1009               break;
1010
1011             case tok_union:
1012               parse_union_decl ();
1013               break;
1014
1015             case tok_expect:
1016               parse_expect_decl ();
1017               break;
1018
1019             case tok_thong:
1020               parse_thong_decl ();
1021               break;
1022
1023             case tok_left:
1024               parse_assoc_decl (left_assoc);
1025               break;
1026
1027             case tok_right:
1028               parse_assoc_decl (right_assoc);
1029               break;
1030
1031             case tok_nonassoc:
1032               parse_assoc_decl (non_assoc);
1033               break;
1034
1035             case tok_define:
1036               parse_muscle_decl ();
1037               break;
1038
1039             case tok_skel:
1040               parse_skel_decl ();
1041               break;
1042
1043             case tok_noop:
1044               break;
1045
1046             case tok_stropt:
1047             case tok_intopt:
1048             case tok_obsolete:
1049               abort ();
1050               break;
1051
1052             case tok_illegal:
1053             default:
1054               complain (_("unrecognized: %s"), token_buffer);
1055               skip_to_char ('%');
1056             }
1057         }
1058       else if (c == EOF)
1059         fatal (_("no input grammar"));
1060       else
1061         {
1062           char buf[] = "c";
1063           buf[0] = c;
1064           complain (_("unknown character: %s"), quote (buf));
1065           skip_to_char ('%');
1066         }
1067     }
1068 }
1069 \f
1070 /*-------------------------------------------------------------------.
1071 | Assuming that a `{' has just been seen, copy everything up to the  |
1072 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1073 | values in the current rule so far, which says where to find `$0'   |
1074 | with respect to the top of the stack.                              |
1075 `-------------------------------------------------------------------*/
1076
1077 static void
1078 copy_action (symbol_list *rule, int stack_offset)
1079 {
1080   int c;
1081   int count;
1082
1083   /* offset is always 0 if parser has already popped the stack pointer */
1084   if (semantic_parser)
1085     stack_offset = 0;
1086
1087   count = 1;
1088   c = getc (finput);
1089
1090   while (count > 0)
1091     {
1092       while (c != '}')
1093         {
1094           switch (c)
1095             {
1096             case '\n':
1097               obstack_1grow (&action_obstack, c);
1098               lineno++;
1099               break;
1100
1101             case '{':
1102               obstack_1grow (&action_obstack, c);
1103               count++;
1104               break;
1105
1106             case '\'':
1107             case '"':
1108               copy_string (finput, &action_obstack, c);
1109               break;
1110
1111             case '/':
1112               copy_comment (finput, &action_obstack);
1113               break;
1114
1115             case '$':
1116               copy_dollar (finput, &action_obstack,
1117                            rule, stack_offset);
1118               break;
1119
1120             case '@':
1121               copy_at (finput, &action_obstack,
1122                        stack_offset);
1123               break;
1124
1125             case EOF:
1126               fatal (_("unmatched %s"), "`{'");
1127
1128             default:
1129               obstack_1grow (&action_obstack, c);
1130             }
1131
1132           c = getc (finput);
1133         }
1134
1135       /* above loop exits when c is '}' */
1136
1137       if (--count)
1138         {
1139           obstack_1grow (&action_obstack, c);
1140           c = getc (finput);
1141         }
1142     }
1143
1144   obstack_1grow (&action_obstack, '\0');
1145   rule->action = obstack_finish (&action_obstack);
1146   rule->action_line = lineno;
1147 }
1148 \f
1149 /*-------------------------------------------------------------------.
1150 | After `%guard' is seen in the input file, copy the actual guard    |
1151 | into the guards file.  If the guard is followed by an action, copy |
1152 | that into the actions file.  STACK_OFFSET is the number of values  |
1153 | in the current rule so far, which says where to find `$0' with     |
1154 | respect to the top of the stack, for the simple parser in which    |
1155 | the stack is not popped until after the guard is run.              |
1156 `-------------------------------------------------------------------*/
1157
1158 static void
1159 copy_guard (symbol_list *rule, int stack_offset)
1160 {
1161   int c;
1162   int count;
1163   int brace_flag = 0;
1164
1165   /* offset is always 0 if parser has already popped the stack pointer */
1166   if (semantic_parser)
1167     stack_offset = 0;
1168
1169   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1170   if (!no_lines_flag)
1171     obstack_fgrow2 (&guard_obstack, muscle_find ("linef"),
1172                     lineno, quotearg_style (c_quoting_style,
1173                                             muscle_find ("filename")));
1174   obstack_1grow (&guard_obstack, '{');
1175
1176   count = 0;
1177   c = getc (finput);
1178
1179   while (brace_flag ? (count > 0) : (c != ';'))
1180     {
1181       switch (c)
1182         {
1183         case '\n':
1184           obstack_1grow (&guard_obstack, c);
1185           lineno++;
1186           break;
1187
1188         case '{':
1189           obstack_1grow (&guard_obstack, c);
1190           brace_flag = 1;
1191           count++;
1192           break;
1193
1194         case '}':
1195           obstack_1grow (&guard_obstack, c);
1196           if (count > 0)
1197             count--;
1198           else
1199             {
1200               complain (_("unmatched %s"), "`}'");
1201               c = getc (finput);        /* skip it */
1202             }
1203           break;
1204
1205         case '\'':
1206         case '"':
1207           copy_string (finput, &guard_obstack, c);
1208           break;
1209
1210         case '/':
1211           copy_comment (finput, &guard_obstack);
1212           break;
1213
1214         case '$':
1215           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1216           break;
1217
1218         case '@':
1219           copy_at (finput, &guard_obstack, stack_offset);
1220           break;
1221
1222         case EOF:
1223           fatal ("%s", _("unterminated %guard clause"));
1224
1225         default:
1226           obstack_1grow (&guard_obstack, c);
1227         }
1228
1229       if (c != '}' || count != 0)
1230         c = getc (finput);
1231     }
1232
1233   c = skip_white_space ();
1234
1235   obstack_sgrow (&guard_obstack, ";\n    break;}");
1236   if (c == '{')
1237     copy_action (rule, stack_offset);
1238   else if (c == '=')
1239     {
1240       c = getc (finput);        /* why not skip_white_space -wjh */
1241       if (c == '{')
1242         copy_action (rule, stack_offset);
1243     }
1244   else
1245     ungetc (c, finput);
1246 }
1247 \f
1248
1249 /*-------------------------------------------------------------------.
1250 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1251 | with the user's names.                                             |
1252 `-------------------------------------------------------------------*/
1253
1254 static bucket *
1255 gensym (void)
1256 {
1257   /* Incremented for each generated symbol */
1258   static int gensym_count = 0;
1259   static char buf[256];
1260
1261   bucket *sym;
1262
1263   sprintf (buf, "@%d", ++gensym_count);
1264   token_buffer = buf;
1265   sym = getsym (token_buffer);
1266   sym->class = nterm_sym;
1267   sym->value = nvars++;
1268   return sym;
1269 }
1270 \f
1271 /*-------------------------------------------------------------------.
1272 | Parse the input grammar into a one symbol_list structure.  Each    |
1273 | rule is represented by a sequence of symbols: the left hand side   |
1274 | followed by the contents of the right hand side, followed by a     |
1275 | null pointer instead of a symbol to terminate the rule.  The next  |
1276 | symbol is the lhs of the following rule.                           |
1277 |                                                                    |
1278 | All guards and actions are copied out to the appropriate files,    |
1279 | labelled by the rule number they apply to.                         |
1280 |                                                                    |
1281 | Bison used to allow some %directives in the rules sections, but    |
1282 | this is no longer consider appropriate: (i) the documented grammar |
1283 | doesn't claim it, (ii), it would promote bad style, (iii), error   |
1284 | recovery for %directives consists in skipping the junk until a `%' |
1285 | is seen and helrp synchronizing.  This scheme is definitely wrong  |
1286 | in the rules section.                                              |
1287 `-------------------------------------------------------------------*/
1288
1289 static void
1290 readgram (void)
1291 {
1292   token_t t;
1293   bucket *lhs = NULL;
1294   symbol_list *p = NULL;
1295   symbol_list *p1 = NULL;
1296   bucket *bp;
1297
1298   /* Points to first symbol_list of current rule. its symbol is the
1299      lhs of the rule.  */
1300   symbol_list *crule = NULL;
1301   /* Points to the symbol_list preceding crule.  */
1302   symbol_list *crule1 = NULL;
1303
1304   t = lex ();
1305
1306   while (t != tok_two_percents && t != tok_eof)
1307     if (t == tok_identifier || t == tok_bar)
1308       {
1309         int action_flag = 0;
1310         /* Number of symbols in rhs of this rule so far */
1311         int rulelength = 0;
1312         int xactions = 0;       /* JF for error checking */
1313         bucket *first_rhs = 0;
1314
1315         if (t == tok_identifier)
1316           {
1317             lhs = symval;
1318
1319             if (!start_flag)
1320               {
1321                 startval = lhs;
1322                 start_flag = 1;
1323               }
1324
1325             t = lex ();
1326             if (t != tok_colon)
1327               {
1328                 complain (_("ill-formed rule: initial symbol not followed by colon"));
1329                 unlex (t);
1330               }
1331           }
1332
1333         if (nrules == 0 && t == tok_bar)
1334           {
1335             complain (_("grammar starts with vertical bar"));
1336             lhs = symval;       /* BOGUS: use a random symval */
1337           }
1338         /* start a new rule and record its lhs.  */
1339
1340         nrules++;
1341         nitems++;
1342
1343         p = symbol_list_new (lhs);
1344
1345         crule1 = p1;
1346         if (p1)
1347           p1->next = p;
1348         else
1349           grammar = p;
1350
1351         p1 = p;
1352         crule = p;
1353
1354         /* mark the rule's lhs as a nonterminal if not already so.  */
1355
1356         if (lhs->class == unknown_sym)
1357           {
1358             lhs->class = nterm_sym;
1359             lhs->value = nvars;
1360             nvars++;
1361           }
1362         else if (lhs->class == token_sym)
1363           complain (_("rule given for %s, which is a token"), lhs->tag);
1364
1365         /* read the rhs of the rule.  */
1366
1367         for (;;)
1368           {
1369             t = lex ();
1370             if (t == tok_prec)
1371               {
1372                 t = lex ();
1373                 crule->ruleprec = symval;
1374                 t = lex ();
1375               }
1376
1377             if (!(t == tok_identifier || t == tok_left_curly))
1378               break;
1379
1380             /* If next token is an identifier, see if a colon follows it.
1381                If one does, exit this rule now.  */
1382             if (t == tok_identifier)
1383               {
1384                 bucket *ssave;
1385                 token_t t1;
1386
1387                 ssave = symval;
1388                 t1 = lex ();
1389                 unlex (t1);
1390                 symval = ssave;
1391                 if (t1 == tok_colon)
1392                   break;
1393
1394                 if (!first_rhs) /* JF */
1395                   first_rhs = symval;
1396                 /* Not followed by colon =>
1397                    process as part of this rule's rhs.  */
1398               }
1399
1400             /* If we just passed an action, that action was in the middle
1401                of a rule, so make a dummy rule to reduce it to a
1402                non-terminal.  */
1403             if (action_flag)
1404               {
1405                 /* Since the action was written out with this rule's
1406                    number, we must give the new rule this number by
1407                    inserting the new rule before it.  */
1408
1409                 /* Make a dummy nonterminal, a gensym.  */
1410                 bucket *sdummy = gensym ();
1411
1412                 /* Make a new rule, whose body is empty, before the
1413                    current one, so that the action just read can
1414                    belong to it.  */
1415                 nrules++;
1416                 nitems++;
1417                 p = symbol_list_new (sdummy);
1418                 /* Attach its lineno to that of the host rule. */
1419                 p->line = crule->line;
1420                 if (crule1)
1421                   crule1->next = p;
1422                 else
1423                   grammar = p;
1424                 /* End of the rule. */
1425                 crule1 = symbol_list_new (NULL);
1426                 crule1->next = crule;
1427
1428                 p->next = crule1;
1429
1430                 /* Insert the dummy generated by that rule into this
1431                    rule.  */
1432                 nitems++;
1433                 p = symbol_list_new (sdummy);
1434                 p1->next = p;
1435                 p1 = p;
1436
1437                 action_flag = 0;
1438               }
1439
1440             if (t == tok_identifier)
1441               {
1442                 nitems++;
1443                 p = symbol_list_new (symval);
1444                 p1->next = p;
1445                 p1 = p;
1446               }
1447             else                /* handle an action.  */
1448               {
1449                 copy_action (crule, rulelength);
1450                 action_flag = 1;
1451                 xactions++;     /* JF */
1452               }
1453             rulelength++;
1454           }                     /* end of  read rhs of rule */
1455
1456         /* Put an empty link in the list to mark the end of this rule  */
1457         p = symbol_list_new (NULL);
1458         p1->next = p;
1459         p1 = p;
1460
1461         if (t == tok_prec)
1462           {
1463             complain (_("two @prec's in a row"));
1464             t = lex ();
1465             crule->ruleprec = symval;
1466             t = lex ();
1467           }
1468         if (t == tok_guard)
1469           {
1470             if (!semantic_parser)
1471               complain (_("%%guard present but %%semantic_parser not specified"));
1472
1473             copy_guard (crule, rulelength);
1474             t = lex ();
1475           }
1476         else if (t == tok_left_curly)
1477           {
1478             /* This case never occurs -wjh */
1479             if (action_flag)
1480               complain (_("two actions at end of one rule"));
1481             copy_action (crule, rulelength);
1482             action_flag = 1;
1483             xactions++; /* -wjh */
1484             t = lex ();
1485           }
1486         /* If $$ is being set in default way, report if any type
1487            mismatch.  */
1488         else if (!xactions
1489                  && first_rhs && lhs->type_name != first_rhs->type_name)
1490           {
1491             if (lhs->type_name == 0
1492                 || first_rhs->type_name == 0
1493                 || strcmp (lhs->type_name, first_rhs->type_name))
1494               complain (_("type clash (`%s' `%s') on default action"),
1495                         lhs->type_name ? lhs->type_name : "",
1496                         first_rhs->type_name ? first_rhs->type_name : "");
1497           }
1498         /* Warn if there is no default for $$ but we need one.  */
1499         else if (!xactions && !first_rhs && lhs->type_name != 0)
1500           complain (_("empty rule for typed nonterminal, and no action"));
1501         if (t == tok_semicolon)
1502           t = lex ();
1503       }
1504     else
1505       {
1506         complain (_("invalid input: %s"), quote (token_buffer));
1507         t = lex ();
1508       }
1509
1510
1511   /* grammar has been read.  Do some checking */
1512
1513   if (nsyms > MAXSHORT)
1514     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1515            MAXSHORT);
1516   if (nrules == 0)
1517     fatal (_("no rules in the input grammar"));
1518
1519   /* Report any undefined symbols and consider them nonterminals.  */
1520
1521   for (bp = firstsymbol; bp; bp = bp->next)
1522     if (bp->class == unknown_sym)
1523       {
1524         complain (_
1525                   ("symbol %s is used, but is not defined as a token and has no rules"),
1526                   bp->tag);
1527         bp->class = nterm_sym;
1528         bp->value = nvars++;
1529       }
1530
1531   ntokens = nsyms - nvars;
1532 }
1533
1534 /* At the end of the grammar file, some C source code must
1535    be stored. It is going to be associated to the epilogue
1536    directive.  */
1537 static void
1538 read_additionnal_code (void)
1539 {
1540   char c;
1541   struct obstack el_obstack;
1542
1543   obstack_init (&el_obstack);
1544
1545   if (!no_lines_flag)
1546     {
1547       obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
1548                       lineno, quotearg_style (c_quoting_style,
1549                                               muscle_find("filename")));
1550     }
1551
1552   while ((c = getc (finput)) != EOF)
1553     obstack_1grow (&el_obstack, c);
1554
1555   obstack_1grow (&el_obstack, 0);
1556   muscle_insert ("epilogue", obstack_finish (&el_obstack));
1557 }
1558
1559 \f
1560 /*------------------------------------------------------------------.
1561 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1562 | number.                                                           |
1563 `------------------------------------------------------------------*/
1564
1565 static void
1566 token_translations_init (void)
1567 {
1568   bucket *bp = NULL;
1569   int i;
1570
1571   token_translations = XCALLOC (short, max_user_token_number + 1);
1572
1573   /* Initialize all entries for literal tokens to 2, the internal
1574      token number for $undefined., which represents all invalid
1575      inputs.  */
1576   for (i = 0; i <= max_user_token_number; i++)
1577     token_translations[i] = 2;
1578
1579   for (bp = firstsymbol; bp; bp = bp->next)
1580     {
1581       /* Non-terminal? */
1582       if (bp->value >= ntokens)
1583         continue;
1584       /* A token string alias? */
1585       if (bp->user_token_number == SALIAS)
1586         continue;
1587
1588       assert (bp->user_token_number != SUNDEF);
1589
1590       /* A token which translation has already been set? */
1591       if (token_translations[bp->user_token_number] != 2)
1592         complain (_("tokens %s and %s both assigned number %d"),
1593                   tags[token_translations[bp->user_token_number]],
1594                   bp->tag, bp->user_token_number);
1595       token_translations[bp->user_token_number] = bp->value;
1596     }
1597 }
1598
1599
1600 /*------------------------------------------------------------------.
1601 | Assign symbol numbers, and write definition of token names into   |
1602 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1603 | of symbols.                                                       |
1604 `------------------------------------------------------------------*/
1605
1606 static void
1607 packsymbols (void)
1608 {
1609   bucket *bp = NULL;
1610   int tokno = 1;
1611   int last_user_token_number;
1612   static char DOLLAR[] = "$";
1613
1614   tags = XCALLOC (char *, nsyms + 1);
1615   user_toknums = XCALLOC (short, nsyms + 1);
1616
1617   sprec = XCALLOC (short, nsyms);
1618   sassoc = XCALLOC (short, nsyms);
1619
1620   /* The EOF token. */
1621   tags[0] = DOLLAR;
1622   user_toknums[0] = 0;
1623
1624   max_user_token_number = 256;
1625   last_user_token_number = 256;
1626
1627   for (bp = firstsymbol; bp; bp = bp->next)
1628     {
1629       if (bp->class == nterm_sym)
1630         {
1631           bp->value += ntokens;
1632         }
1633       else if (bp->alias)
1634         {
1635           /* this symbol and its alias are a single token defn.
1636              allocate a tokno, and assign to both check agreement of
1637              ->prec and ->assoc fields and make both the same */
1638           if (bp->value == 0)
1639             bp->value = bp->alias->value = tokno++;
1640
1641           if (bp->prec != bp->alias->prec)
1642             {
1643               if (bp->prec != 0 && bp->alias->prec != 0
1644                   && bp->user_token_number == SALIAS)
1645                 complain (_("conflicting precedences for %s and %s"),
1646                           bp->tag, bp->alias->tag);
1647               if (bp->prec != 0)
1648                 bp->alias->prec = bp->prec;
1649               else
1650                 bp->prec = bp->alias->prec;
1651             }
1652
1653           if (bp->assoc != bp->alias->assoc)
1654             {
1655               if (bp->assoc != 0 && bp->alias->assoc != 0
1656                   && bp->user_token_number == SALIAS)
1657                 complain (_("conflicting assoc values for %s and %s"),
1658                           bp->tag, bp->alias->tag);
1659               if (bp->assoc != 0)
1660                 bp->alias->assoc = bp->assoc;
1661               else
1662                 bp->assoc = bp->alias->assoc;
1663             }
1664
1665           if (bp->user_token_number == SALIAS)
1666             continue;           /* do not do processing below for SALIASs */
1667
1668         }
1669       else                      /* bp->class == token_sym */
1670         {
1671           bp->value = tokno++;
1672         }
1673
1674       if (bp->class == token_sym)
1675         {
1676           if (bp->user_token_number == SUNDEF)
1677             bp->user_token_number = ++last_user_token_number;
1678           if (bp->user_token_number > max_user_token_number)
1679             max_user_token_number = bp->user_token_number;
1680         }
1681
1682       tags[bp->value] = bp->tag;
1683       user_toknums[bp->value] = bp->user_token_number;
1684       sprec[bp->value] = bp->prec;
1685       sassoc[bp->value] = bp->assoc;
1686     }
1687
1688   token_translations_init ();
1689
1690   error_token_number = errtoken->value;
1691
1692   if (startval->class == unknown_sym)
1693     fatal (_("the start symbol %s is undefined"), startval->tag);
1694   else if (startval->class == token_sym)
1695     fatal (_("the start symbol %s is a token"), startval->tag);
1696
1697   start_symbol = startval->value;
1698 }
1699
1700
1701 /*---------------------------------------------------------------.
1702 | Save the definition of token names in the `TOKENDEFS' muscle.  |
1703 `---------------------------------------------------------------*/
1704
1705 static void
1706 symbols_save (void)
1707 {
1708   struct obstack tokendefs;
1709   bucket *bp;
1710   char *cp, *symbol;
1711   char c;
1712   obstack_init (&tokendefs);
1713
1714   for (bp = firstsymbol; bp; bp = bp->next)
1715     {
1716       symbol = bp->tag;                /* get symbol */
1717
1718       if (bp->value >= ntokens)
1719         continue;
1720       if (bp->user_token_number == SALIAS)
1721         continue;
1722       if ('\'' == *symbol)
1723         continue;               /* skip literal character */
1724       if (bp == errtoken)
1725         continue;               /* skip error token */
1726       if ('\"' == *symbol)
1727         {
1728           /* use literal string only if given a symbol with an alias */
1729           if (bp->alias)
1730             symbol = bp->alias->tag;
1731           else
1732             continue;
1733         }
1734
1735       /* Don't #define nonliteral tokens whose names contain periods.  */
1736       cp = symbol;
1737       while ((c = *cp++) && c != '.');
1738       if (c != '\0')
1739         continue;
1740
1741       obstack_fgrow2 (&tokendefs, "# define\t%s\t%d\n",
1742                       symbol, bp->user_token_number);
1743       if (semantic_parser)
1744         /* FIXME: This is probably wrong, and should be just as
1745            above. --akim.  */
1746         obstack_fgrow2 (&tokendefs, "# define\tT%s\t%d\n", symbol, bp->value);
1747     }
1748
1749   obstack_1grow (&tokendefs, 0);
1750   muscle_insert ("tokendef", xstrdup (obstack_finish (&tokendefs)));
1751   obstack_free (&tokendefs, NULL);
1752 }
1753
1754
1755 /*---------------------------------------------------------------.
1756 | Convert the rules into the representation using RRHS, RLHS and |
1757 | RITEMS.                                                        |
1758 `---------------------------------------------------------------*/
1759
1760 static void
1761 packgram (void)
1762 {
1763   int itemno;
1764   int ruleno;
1765   symbol_list *p;
1766
1767   ritem = XCALLOC (short, nitems + 1);
1768   rule_table = XCALLOC (rule_t, nrules) - 1;
1769
1770   itemno = 0;
1771   ruleno = 1;
1772
1773   p = grammar;
1774   while (p)
1775     {
1776       bucket *ruleprec = p->ruleprec;
1777       rule_table[ruleno].lhs = p->sym->value;
1778       rule_table[ruleno].rhs = itemno;
1779       rule_table[ruleno].line = p->line;
1780       rule_table[ruleno].useful = TRUE;
1781       rule_table[ruleno].action = p->action;
1782       rule_table[ruleno].action_line = p->action_line;
1783
1784       p = p->next;
1785       while (p && p->sym)
1786         {
1787           ritem[itemno++] = p->sym->value;
1788           /* A rule gets by default the precedence and associativity
1789              of the last token in it.  */
1790           if (p->sym->class == token_sym)
1791             {
1792               rule_table[ruleno].prec = p->sym->prec;
1793               rule_table[ruleno].assoc = p->sym->assoc;
1794             }
1795           if (p)
1796             p = p->next;
1797         }
1798
1799       /* If this rule has a %prec,
1800          the specified symbol's precedence replaces the default.  */
1801       if (ruleprec)
1802         {
1803           rule_table[ruleno].prec = ruleprec->prec;
1804           rule_table[ruleno].assoc = ruleprec->assoc;
1805           rule_table[ruleno].precsym = ruleprec->value;
1806         }
1807
1808       ritem[itemno++] = -ruleno;
1809       ruleno++;
1810
1811       if (p)
1812         p = p->next;
1813     }
1814
1815   ritem[itemno] = 0;
1816
1817   if (trace_flag)
1818     ritem_print (stderr);
1819 }
1820 \f
1821 /*-------------------------------------------------------------------.
1822 | Read in the grammar specification and record it in the format      |
1823 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1824 | and all actions into ACTION_OBSTACK, in each case forming the body |
1825 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1826 | statement to decide which guard or action to execute.              |
1827 `-------------------------------------------------------------------*/
1828
1829 void
1830 reader (void)
1831 {
1832   start_flag = 0;
1833   startval = NULL;              /* start symbol not specified yet. */
1834
1835   nsyms = 1;
1836   nvars = 0;
1837   nrules = 0;
1838   nitems = 0;
1839
1840   typed = 0;
1841   lastprec = 0;
1842
1843   semantic_parser = 0;
1844   pure_parser = 0;
1845
1846   grammar = NULL;
1847
1848   lex_init ();
1849   lineno = 1;
1850
1851   /* Initialize the muscle obstack.  */
1852   obstack_init (&muscle_obstack);
1853
1854   /* Initialize the symbol table.  */
1855   tabinit ();
1856
1857   /* Construct the error token */
1858   errtoken = getsym ("error");
1859   errtoken->class = token_sym;
1860   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1861
1862   /* Construct a token that represents all undefined literal tokens.
1863      It is always token number 2.  */
1864   undeftoken = getsym ("$undefined.");
1865   undeftoken->class = token_sym;
1866   undeftoken->user_token_number = 2;
1867
1868   /* Read the declaration section.  Copy %{ ... %} groups to
1869      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1870      etc. found there.  */
1871   read_declarations ();
1872   /* Read in the grammar, build grammar in list form.  Write out
1873      guards and actions.  */
1874   readgram ();
1875   /* Some C code is given at the end of the grammar file. */
1876   read_additionnal_code ();
1877
1878   /* Assign the symbols their symbol numbers.  Write #defines for the
1879      token symbols into FDEFINES if requested.  */
1880   packsymbols ();
1881   /* Save them. */
1882   symbols_save ();
1883
1884   /* Convert the grammar into the format described in gram.h.  */
1885   packgram ();
1886 }