src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "xalloc.h"
  30 #include "symtab.h"
  31 #include "lex.h"
  32 #include "gram.h"
  33 #include "complain.h"
  34 #include "output.h"
  35 #include "reader.h"
  36 #include "conflicts.h"
  37 #include "macrotab.h"
  38
  39 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  40 static int rline_allocated;
  41
  42 typedef struct symbol_list
  43 {
  44   struct symbol_list *next;
  45   bucket *sym;
  46   bucket *ruleprec;
  47 }
  48 symbol_list;
  49
  50 int lineno;
  51 char **tags;
  52 short *user_toknums;
  53 static symbol_list *grammar;
  54 static int start_flag;
  55 static bucket *startval;
  56
  57 /* Nonzero if components of semantic values are used, implying
  58    they must be unions.  */
  59 static int value_components_used;
  60
  61 /* Nonzero if %union has been seen.  */
  62 static int typed;
  63
  64 /* Incremented for each %left, %right or %nonassoc seen */
  65 static int lastprec;
  66
  67 static bucket *errtoken;
  68 static bucket *undeftoken;
  69 \f
  70
  71 /*===================\
  72 | Low level lexing.  |
  73 \===================*/
  74
  75 static void
  76 skip_to_char (int target)
  77 {
  78   int c;
  79   if (target == '\n')
  80     complain (_("   Skipping to next \\n"));
  81   else
  82     complain (_("   Skipping to next %c"), target);
  83
  84   do
  85     c = skip_white_space ();
  86   while (c != target && c != EOF);
  87   if (c != EOF)
  88     ungetc (c, finput);
  89 }
  90
  91
  92 /*---------------------------------------------------------.
  93 | Read a signed integer from STREAM and return its value.  |
  94 `---------------------------------------------------------*/
  95
  96 static inline int
  97 read_signed_integer (FILE *stream)
  98 {
  99   int c = getc (stream);
 100   int sign = 1;
 101   int n = 0;
 102
 103   if (c == '-')
 104     {
 105       c = getc (stream);
 106       sign = -1;
 107     }
 108
 109   while (isdigit (c))
 110     {
 111       n = 10 * n + (c - '0');
 112       c = getc (stream);
 113     }
 114
 115   ungetc (c, stream);
 116
 117   return sign * n;
 118 }
 119 \f
 120 /*--------------------------------------------------------------.
 121 | Get the data type (alternative in the union) of the value for |
 122 | symbol N in rule RULE.                                        |
 123 `--------------------------------------------------------------*/
 124
 125 static char *
 126 get_type_name (int n, symbol_list * rule)
 127 {
 128   int i;
 129   symbol_list *rp;
 130
 131   if (n < 0)
 132     {
 133       complain (_("invalid $ value"));
 134       return NULL;
 135     }
 136
 137   rp = rule;
 138   i = 0;
 139
 140   while (i < n)
 141     {
 142       rp = rp->next;
 143       if (rp == NULL || rp->sym == NULL)
 144         {
 145           complain (_("invalid $ value"));
 146           return NULL;
 147         }
 148       i++;
 149     }
 150
 151   return rp->sym->type_name;
 152 }
 153 \f
 154 /*------------------------------------------------------------.
 155 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 156 | delimiter of the string (either ' or ").                    |
 157 `------------------------------------------------------------*/
 158
 159 static inline void
 160 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
 161 {
 162   int c;
 163
 164   if (store)
 165     obstack_1grow (oout, match);
 166
 167   c = getc (fin);
 168
 169   while (c != match)
 170     {
 171       if (c == EOF)
 172         fatal (_("unterminated string at end of file"));
 173       if (c == '\n')
 174         {
 175           complain (_("unterminated string"));
 176           ungetc (c, fin);
 177           c = match;            /* invent terminator */
 178           continue;
 179         }
 180
 181       obstack_1grow (oout, c);
 182
 183       if (c == '\\')
 184         {
 185           c = getc (fin);
 186           if (c == EOF)
 187             fatal (_("unterminated string at end of file"));
 188           obstack_1grow (oout, c);
 189
 190           if (c == '\n')
 191             lineno++;
 192         }
 193
 194       c = getc (fin);
 195     }
 196
 197   if (store)
 198     obstack_1grow (oout, c);
 199 }
 200
 201 /* FIXME. */
 202
 203 static inline void
 204 copy_string (FILE *fin, struct obstack *oout, int match)
 205 {
 206   copy_string2 (fin, oout, match, 1);
 207 }
 208
 209 /* FIXME. */
 210
 211 static inline void
 212 copy_identifier (FILE *fin, struct obstack *oout)
 213 {
 214   int c;
 215
 216   while (isalnum (c = getc (fin)) || c == '_')
 217     obstack_1grow (oout, c);
 218
 219   ungetc (c, fin);
 220 }
 221
 222 /*-----------------------------------------------------------------.
 223 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 224 | NULL).  In fact we just saw a `/', which might or might not be a |
 225 | comment.  In any case, copy what we saw.                         |
 226 |                                                                  |
 227 | OUT2 might be NULL.                                              |
 228 `-----------------------------------------------------------------*/
 229
 230 static inline void
 231 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 232 {
 233   int cplus_comment;
 234   int ended;
 235   int c;
 236
 237   /* We read a `/', output it. */
 238   obstack_1grow (oout1, '/');
 239   if (oout2)
 240     obstack_1grow (oout2, '/');
 241
 242   switch ((c = getc (fin)))
 243     {
 244     case '/':
 245       cplus_comment = 1;
 246       break;
 247     case '*':
 248       cplus_comment = 0;
 249       break;
 250     default:
 251       ungetc (c, fin);
 252       return;
 253     }
 254
 255   obstack_1grow (oout1, c);
 256   if (oout2)
 257     obstack_1grow (oout2, c);
 258   c = getc (fin);
 259
 260   ended = 0;
 261   while (!ended)
 262     {
 263       if (!cplus_comment && c == '*')
 264         {
 265           while (c == '*')
 266             {
 267               obstack_1grow (oout1, c);
 268               if (oout2)
 269                 obstack_1grow (oout2, c);
 270               c = getc (fin);
 271             }
 272
 273           if (c == '/')
 274             {
 275               obstack_1grow (oout1, c);
 276               if (oout2)
 277                 obstack_1grow (oout2, c);
 278               ended = 1;
 279             }
 280         }
 281       else if (c == '\n')
 282         {
 283           lineno++;
 284           obstack_1grow (oout1, c);
 285           if (oout2)
 286             obstack_1grow (oout2, c);
 287           if (cplus_comment)
 288             ended = 1;
 289           else
 290             c = getc (fin);
 291         }
 292       else if (c == EOF)
 293         fatal (_("unterminated comment"));
 294       else
 295         {
 296           obstack_1grow (oout1, c);
 297           if (oout2)
 298             obstack_1grow (oout2, c);
 299           c = getc (fin);
 300         }
 301     }
 302 }
 303
 304
 305 /*-------------------------------------------------------------------.
 306 | Dump the comment (actually the current string starting with a `/') |
 307 | from FIN to OOUT.                                                  |
 308 `-------------------------------------------------------------------*/
 309
 310 static inline void
 311 copy_comment (FILE *fin, struct obstack *oout)
 312 {
 313   copy_comment2 (fin, oout, NULL);
 314 }
 315
 316
 317 /*-----------------------------------------------------------------.
 318 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 319 | reference to this location. STACK_OFFSET is the number of values |
 320 | in the current rule so far, which says where to find `$0' with   |
 321 | respect to the top of the stack.                                 |
 322 `-----------------------------------------------------------------*/
 323
 324 static inline void
 325 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 326 {
 327   int c;
 328
 329   c = getc (fin);
 330   if (c == '$')
 331     {
 332       obstack_sgrow (oout, "yyloc");
 333       locations_flag = 1;
 334     }
 335   else if (isdigit (c) || c == '-')
 336     {
 337       int n;
 338
 339       ungetc (c, fin);
 340       n = read_signed_integer (fin);
 341
 342       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 343       locations_flag = 1;
 344     }
 345   else
 346     {
 347       char buf[] = "@c";
 348       buf[1] = c;
 349       complain (_("%s is invalid"), quote (buf));
 350     }
 351 }
 352
 353
 354 /*-------------------------------------------------------------------.
 355 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 356 |                                                                    |
 357 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 358 |                                                                    |
 359 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 360 | the number of values in the current rule so far, which says where  |
 361 | to find `$0' with respect to the top of the stack.                 |
 362 `-------------------------------------------------------------------*/
 363
 364 static inline void
 365 copy_dollar (FILE *fin, struct obstack *oout,
 366              symbol_list *rule, int stack_offset)
 367 {
 368   int c = getc (fin);
 369   const char *type_name = NULL;
 370
 371   /* Get the type name if explicit. */
 372   if (c == '<')
 373     {
 374       read_type_name (fin);
 375       type_name = token_buffer;
 376       value_components_used = 1;
 377       c = getc (fin);
 378     }
 379
 380   if (c == '$')
 381     {
 382       obstack_sgrow (oout, "yyval");
 383
 384       if (!type_name)
 385         type_name = get_type_name (0, rule);
 386       if (type_name)
 387         obstack_fgrow1 (oout, ".%s", type_name);
 388       if (!type_name && typed)
 389         complain (_("$$ of `%s' has no declared type"),
 390                   rule->sym->tag);
 391     }
 392   else if (isdigit (c) || c == '-')
 393     {
 394       int n;
 395       ungetc (c, fin);
 396       n = read_signed_integer (fin);
 397
 398       if (!type_name && n > 0)
 399         type_name = get_type_name (n, rule);
 400
 401       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 402
 403       if (type_name)
 404         obstack_fgrow1 (oout, ".%s", type_name);
 405       if (!type_name && typed)
 406         complain (_("$%d of `%s' has no declared type"),
 407                   n, rule->sym->tag);
 408     }
 409   else
 410     {
 411       char buf[] = "$c";
 412       buf[1] = c;
 413       complain (_("%s is invalid"), quote (buf));
 414     }
 415 }
 416 \f
 417 /*-------------------------------------------------------------------.
 418 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 419 | `%{' has already been read.  Return after reading the `%}'.        |
 420 `-------------------------------------------------------------------*/
 421
 422 static void
 423 copy_definition (void)
 424 {
 425   int c;
 426   /* -1 while reading a character if prev char was %. */
 427   int after_percent;
 428
 429 #if 0
 430   if (!no_lines_flag)
 431     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 432                     lineno, quotearg_style (c_quoting_style, infile));
 433 #endif
 434
 435   after_percent = 0;
 436
 437   c = getc (finput);
 438
 439   for (;;)
 440     {
 441       switch (c)
 442         {
 443         case '\n':
 444           obstack_1grow (&attrs_obstack, c);
 445           lineno++;
 446           break;
 447
 448         case '%':
 449           after_percent = -1;
 450           break;
 451
 452         case '\'':
 453         case '"':
 454           copy_string (finput, &attrs_obstack, c);
 455           break;
 456
 457         case '/':
 458           copy_comment (finput, &attrs_obstack);
 459           break;
 460
 461         case EOF:
 462           fatal ("%s", _("unterminated `%{' definition"));
 463
 464         default:
 465           obstack_1grow (&attrs_obstack, c);
 466         }
 467
 468       c = getc (finput);
 469
 470       if (after_percent)
 471         {
 472           if (c == '}')
 473             return;
 474           obstack_1grow (&attrs_obstack, '%');
 475         }
 476       after_percent = 0;
 477     }
 478 }
 479
 480
 481 /*-------------------------------------------------------------------.
 482 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 483 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 484 | are reversed.                                                      |
 485 `-------------------------------------------------------------------*/
 486
 487 static void
 488 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 489 {
 490   token_t token = 0;
 491   char *typename = 0;
 492
 493   /* The symbol being defined.  */
 494   struct bucket *symbol = NULL;
 495
 496   /* After `%token' and `%nterm', any number of symbols maybe be
 497      defined.  */
 498   for (;;)
 499     {
 500       int tmp_char = ungetc (skip_white_space (), finput);
 501
 502       /* `%' (for instance from `%token', or from `%%' etc.) is the
 503          only valid means to end this declaration.  */
 504       if (tmp_char == '%')
 505         return;
 506       if (tmp_char == EOF)
 507         fatal (_("Premature EOF after %s"), token_buffer);
 508
 509       token = lex ();
 510       if (token == tok_comma)
 511         {
 512           symbol = NULL;
 513           continue;
 514         }
 515       if (token == tok_typename)
 516         {
 517           typename = xstrdup (token_buffer);
 518           value_components_used = 1;
 519           symbol = NULL;
 520         }
 521       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 522         {
 523           if (symval->alias)
 524             warn (_("symbol `%s' used more than once as a literal string"),
 525                   symval->tag);
 526           else if (symbol->alias)
 527             warn (_("symbol `%s' given more than one literal string"),
 528                   symbol->tag);
 529           else
 530             {
 531               symval->class = token_sym;
 532               symval->type_name = typename;
 533               symval->user_token_number = symbol->user_token_number;
 534               symbol->user_token_number = SALIAS;
 535               symval->alias = symbol;
 536               symbol->alias = symval;
 537               /* symbol and symval combined are only one symbol */
 538               nsyms--;
 539             }
 540           translations = 1;
 541           symbol = NULL;
 542         }
 543       else if (token == tok_identifier)
 544         {
 545           int oldclass = symval->class;
 546           symbol = symval;
 547
 548           if (symbol->class == what_is_not)
 549             complain (_("symbol %s redefined"), symbol->tag);
 550           symbol->class = what_is;
 551           if (what_is == nterm_sym && oldclass != nterm_sym)
 552             symbol->value = nvars++;
 553
 554           if (typename)
 555             {
 556               if (symbol->type_name == NULL)
 557                 symbol->type_name = typename;
 558               else if (strcmp (typename, symbol->type_name) != 0)
 559                 complain (_("type redeclaration for %s"), symbol->tag);
 560             }
 561         }
 562       else if (symbol && token == tok_number)
 563         {
 564           symbol->user_token_number = numval;
 565           translations = 1;
 566         }
 567       else
 568         {
 569           complain (_("`%s' is invalid in %s"),
 570                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 571           skip_to_char ('%');
 572         }
 573     }
 574
 575 }
 576
 577
 578 /*------------------------------.
 579 | Parse what comes after %start |
 580 `------------------------------*/
 581
 582 static void
 583 parse_start_decl (void)
 584 {
 585   if (start_flag)
 586     complain (_("multiple %s declarations"), "%start");
 587   if (lex () != tok_identifier)
 588     complain (_("invalid %s declaration"), "%start");
 589   else
 590     {
 591       start_flag = 1;
 592       startval = symval;
 593     }
 594 }
 595
 596 /*-----------------------------------------------------------.
 597 | read in a %type declaration and record its information for |
 598 | get_type_name to access                                    |
 599 `-----------------------------------------------------------*/
 600
 601 static void
 602 parse_type_decl (void)
 603 {
 604   char *name;
 605
 606   if (lex () != tok_typename)
 607     {
 608       complain ("%s", _("%type declaration has no <typename>"));
 609       skip_to_char ('%');
 610       return;
 611     }
 612
 613   name = xstrdup (token_buffer);
 614
 615   for (;;)
 616     {
 617       token_t t;
 618       int tmp_char = ungetc (skip_white_space (), finput);
 619
 620       if (tmp_char == '%')
 621         return;
 622       if (tmp_char == EOF)
 623         fatal (_("Premature EOF after %s"), token_buffer);
 624
 625       t = lex ();
 626
 627       switch (t)
 628         {
 629
 630         case tok_comma:
 631         case tok_semicolon:
 632           break;
 633
 634         case tok_identifier:
 635           if (symval->type_name == NULL)
 636             symval->type_name = name;
 637           else if (strcmp (name, symval->type_name) != 0)
 638             complain (_("type redeclaration for %s"), symval->tag);
 639
 640           break;
 641
 642         default:
 643           complain (_("invalid %%type declaration due to item: %s"),
 644                     token_buffer);
 645           skip_to_char ('%');
 646         }
 647     }
 648 }
 649
 650
 651
 652 /*----------------------------------------------------------------.
 653 | Read in a %left, %right or %nonassoc declaration and record its |
 654 | information.                                                    |
 655 `----------------------------------------------------------------*/
 656
 657 static void
 658 parse_assoc_decl (associativity assoc)
 659 {
 660   char *name = NULL;
 661   int prev = 0;
 662
 663   lastprec++;                   /* Assign a new precedence level, never 0.  */
 664
 665   for (;;)
 666     {
 667       token_t t;
 668       int tmp_char = ungetc (skip_white_space (), finput);
 669
 670       if (tmp_char == '%')
 671         return;
 672       if (tmp_char == EOF)
 673         fatal (_("Premature EOF after %s"), token_buffer);
 674
 675       t = lex ();
 676
 677       switch (t)
 678         {
 679         case tok_typename:
 680           name = xstrdup (token_buffer);
 681           break;
 682
 683         case tok_comma:
 684           break;
 685
 686         case tok_identifier:
 687           if (symval->prec != 0)
 688             complain (_("redefining precedence of %s"), symval->tag);
 689           symval->prec = lastprec;
 690           symval->assoc = assoc;
 691           if (symval->class == nterm_sym)
 692             complain (_("symbol %s redefined"), symval->tag);
 693           symval->class = token_sym;
 694           if (name)
 695             {                   /* record the type, if one is specified */
 696               if (symval->type_name == NULL)
 697                 symval->type_name = name;
 698               else if (strcmp (name, symval->type_name) != 0)
 699                 complain (_("type redeclaration for %s"), symval->tag);
 700             }
 701           break;
 702
 703         case tok_number:
 704           if (prev == tok_identifier)
 705             {
 706               symval->user_token_number = numval;
 707               translations = 1;
 708             }
 709           else
 710             {
 711               complain (_
 712                         ("invalid text (%s) - number should be after identifier"),
 713 token_buffer);
 714               skip_to_char ('%');
 715             }
 716           break;
 717
 718         case tok_semicolon:
 719           return;
 720
 721         default:
 722           complain (_("unexpected item: %s"), token_buffer);
 723           skip_to_char ('%');
 724         }
 725
 726       prev = t;
 727
 728     }
 729 }
 730
 731
 732
 733 /*--------------------------------------------------------------.
 734 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 735 | where it is made into the definition of YYSTYPE, the type of  |
 736 | elements of the parser value stack.                           |
 737 `--------------------------------------------------------------*/
 738
 739 static void
 740 parse_union_decl (void)
 741 {
 742   int c;
 743   int count = 0;
 744
 745   if (typed)
 746     complain (_("multiple %s declarations"), "%union");
 747
 748   typed = 1;
 749
 750   if (!no_lines_flag)
 751     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 752                     lineno, quotearg_style (c_quoting_style, infile));
 753   else
 754     obstack_1grow (&attrs_obstack, '\n');
 755
 756   obstack_sgrow (&attrs_obstack, "typedef union");
 757   if (defines_flag)
 758     obstack_sgrow (&defines_obstack, "typedef union");
 759
 760   c = getc (finput);
 761
 762   while (c != EOF)
 763     {
 764       obstack_1grow (&attrs_obstack, c);
 765       if (defines_flag)
 766         obstack_1grow (&defines_obstack, c);
 767
 768       switch (c)
 769         {
 770         case '\n':
 771           lineno++;
 772           break;
 773
 774         case '/':
 775           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 776           break;
 777
 778         case '{':
 779           count++;
 780           break;
 781
 782         case '}':
 783           if (count == 0)
 784             complain (_("unmatched %s"), "`}'");
 785           count--;
 786           if (count <= 0)
 787             {
 788               obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
 789               if (defines_flag)
 790                 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
 791               /* JF don't choke on trailing semi */
 792               c = skip_white_space ();
 793               if (c != ';')
 794                 ungetc (c, finput);
 795               return;
 796             }
 797         }
 798
 799       c = getc (finput);
 800     }
 801 }
 802
 803
 804 /*-------------------------------------------------------.
 805 | Parse the declaration %expect N which says to expect N |
 806 | shift-reduce conflicts.                                |
 807 `-------------------------------------------------------*/
 808
 809 static void
 810 parse_expect_decl (void)
 811 {
 812   int c = skip_white_space ();
 813   ungetc (c, finput);
 814
 815   if (!isdigit (c))
 816     complain (_("argument of %%expect is not an integer"));
 817   else
 818     expected_conflicts = read_signed_integer (finput);
 819 }
 820
 821
 822 /*-------------------------------------------------------------------.
 823 | Parse what comes after %thong.  the full syntax is                 |
 824 |                                                                    |
 825 |                %thong <type> token number literal                  |
 826 |                                                                    |
 827 | the <type> or number may be omitted.  The number specifies the     |
 828 | user_token_number.                                                 |
 829 |                                                                    |
 830 | Two symbols are entered in the table, one for the token symbol and |
 831 | one for the literal.  Both are given the <type>, if any, from the  |
 832 | declaration.  The ->user_token_number of the first is SALIAS and   |
 833 | the ->user_token_number of the second is set to the number, if     |
 834 | any, from the declaration.  The two symbols are linked via         |
 835 | pointers in their ->alias fields.                                  |
 836 |                                                                    |
 837 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 838 | only the literal string is retained it is the literal string that  |
 839 | is output to yytname                                               |
 840 `-------------------------------------------------------------------*/
 841
 842 static void
 843 parse_thong_decl (void)
 844 {
 845   token_t token;
 846   struct bucket *symbol;
 847   char *typename = 0;
 848   int usrtoknum;
 849
 850   translations = 1;
 851   token = lex ();               /* fetch typename or first token */
 852   if (token == tok_typename)
 853     {
 854       typename = xstrdup (token_buffer);
 855       value_components_used = 1;
 856       token = lex ();           /* fetch first token */
 857     }
 858
 859   /* process first token */
 860
 861   if (token != tok_identifier)
 862     {
 863       complain (_("unrecognized item %s, expected an identifier"),
 864                 token_buffer);
 865       skip_to_char ('%');
 866       return;
 867     }
 868   symval->class = token_sym;
 869   symval->type_name = typename;
 870   symval->user_token_number = SALIAS;
 871   symbol = symval;
 872
 873   token = lex ();               /* get number or literal string */
 874
 875   if (token == tok_number)
 876     {
 877       usrtoknum = numval;
 878       token = lex ();           /* okay, did number, now get literal */
 879     }
 880   else
 881     usrtoknum = 0;
 882
 883   /* process literal string token */
 884
 885   if (token != tok_identifier || *symval->tag != '\"')
 886     {
 887       complain (_("expected string constant instead of %s"), token_buffer);
 888       skip_to_char ('%');
 889       return;
 890     }
 891   symval->class = token_sym;
 892   symval->type_name = typename;
 893   symval->user_token_number = usrtoknum;
 894
 895   symval->alias = symbol;
 896   symbol->alias = symval;
 897
 898   /* symbol and symval combined are only one symbol.  */
 899   nsyms--;
 900 }
 901
 902 /* FIXME. */
 903
 904 static void
 905 parse_macro_decl (void)
 906 {
 907   int ch = ungetc (skip_white_space (), finput);
 908   char* macro_key;
 909   char* macro_value;
 910
 911   /* Read key. */
 912   if (!isalpha (ch) && ch != '_')
 913     {
 914       complain (_("invalid %s declaration"), "%define");
 915       skip_to_char ('%');
 916       return;
 917     }
 918   copy_identifier (finput, &macro_obstack);
 919   obstack_1grow (&macro_obstack, 0);
 920   macro_key = obstack_finish (&macro_obstack);
 921
 922   /* Read value. */
 923   ch = skip_white_space ();
 924   if (ch != '"')
 925     {
 926       ungetc (ch, finput);
 927       if (ch != EOF)
 928         {
 929           complain (_("invalid %s declaration"), "%define");
 930           skip_to_char ('%');
 931           return;
 932         }
 933       else
 934         fatal (_("Premature EOF after %s"), "\"");
 935     }
 936   copy_string2 (finput, &macro_obstack, '"', 0);
 937   obstack_1grow (&macro_obstack, 0);
 938   macro_value = obstack_finish (&macro_obstack);
 939
 940   /* Store the (key, value) pair in the environment. */
 941   macro_insert (macro_key, macro_value);
 942 }
 943
 944 /*------------------------------------------.
 945 | Parse what comes after %header_extension. |
 946 `------------------------------------------*/
 947
 948 static void
 949 parse_header_extension_decl (void)
 950 {
 951   char buff[32];
 952
 953   if (header_extension)
 954     complain (_("multiple %%header_extension declarations"));
 955   fscanf (finput, "%s", buff);
 956   header_extension = xstrdup (buff);
 957 }
 958
 959 /*------------------------------------------.
 960 | Parse what comes after %source_extension. |
 961 `------------------------------------------*/
 962
 963 static void
 964 parse_source_extension_decl (void)
 965 {
 966   char buff[32];
 967
 968   if (src_extension)
 969     complain (_("multiple %%source_extension declarations"));
 970   fscanf (finput, "%s", buff);
 971   src_extension = xstrdup (buff);
 972 }
 973
 974 /*----------------------------------------------------------------.
 975 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 976 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 977 | groups to ATTRS_OBSTACK.                                        |
 978 `----------------------------------------------------------------*/
 979
 980 static void
 981 read_declarations (void)
 982 {
 983   int c;
 984   int tok;
 985
 986   for (;;)
 987     {
 988       c = skip_white_space ();
 989
 990       if (c == '%')
 991         {
 992           tok = parse_percent_token ();
 993
 994           switch (tok)
 995             {
 996             case tok_two_percents:
 997               return;
 998
 999             case tok_percent_left_curly:
1000               copy_definition ();
1001               break;
1002
1003             case tok_token:
1004               parse_token_decl (token_sym, nterm_sym);
1005               break;
1006
1007             case tok_nterm:
1008               parse_token_decl (nterm_sym, token_sym);
1009               break;
1010
1011             case tok_type:
1012               parse_type_decl ();
1013               break;
1014
1015             case tok_start:
1016               parse_start_decl ();
1017               break;
1018
1019             case tok_union:
1020               parse_union_decl ();
1021               break;
1022
1023             case tok_expect:
1024               parse_expect_decl ();
1025               break;
1026
1027             case tok_thong:
1028               parse_thong_decl ();
1029               break;
1030
1031             case tok_left:
1032               parse_assoc_decl (left_assoc);
1033               break;
1034
1035             case tok_right:
1036               parse_assoc_decl (right_assoc);
1037               break;
1038
1039             case tok_nonassoc:
1040               parse_assoc_decl (non_assoc);
1041               break;
1042
1043             case tok_hdrext:
1044               parse_header_extension_decl ();
1045               break;
1046
1047             case tok_srcext:
1048               parse_source_extension_decl ();
1049               break;
1050
1051             case tok_define:
1052               parse_macro_decl ();
1053               break;
1054
1055             case tok_noop:
1056               break;
1057
1058             default:
1059               complain (_("unrecognized: %s"), token_buffer);
1060               skip_to_char ('%');
1061             }
1062         }
1063       else if (c == EOF)
1064         fatal (_("no input grammar"));
1065       else
1066         {
1067           char buf[] = "c";
1068           buf[0] = c;
1069           complain (_("unknown character: %s"), quote (buf));
1070           skip_to_char ('%');
1071         }
1072     }
1073 }
1074 \f
1075 /*-------------------------------------------------------------------.
1076 | Assuming that a `{' has just been seen, copy everything up to the  |
1077 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1078 | values in the current rule so far, which says where to find `$0'   |
1079 | with respect to the top of the stack.                              |
1080 `-------------------------------------------------------------------*/
1081
1082 static void
1083 copy_action (symbol_list *rule, int stack_offset)
1084 {
1085   int c;
1086   int count;
1087   char buf[4096];
1088
1089   /* offset is always 0 if parser has already popped the stack pointer */
1090   if (semantic_parser)
1091     stack_offset = 0;
1092
1093   sprintf (buf, "\ncase %d:\n", nrules);
1094   obstack_grow (&action_obstack, buf, strlen (buf));
1095
1096   if (!no_lines_flag)
1097     {
1098       sprintf (buf, "#line %d %s\n",
1099                lineno, quotearg_style (c_quoting_style, infile));
1100       obstack_grow (&action_obstack, buf, strlen (buf));
1101     }
1102   obstack_1grow (&action_obstack, '{');
1103
1104   count = 1;
1105   c = getc (finput);
1106
1107   while (count > 0)
1108     {
1109       while (c != '}')
1110         {
1111           switch (c)
1112             {
1113             case '\n':
1114               obstack_1grow (&action_obstack, c);
1115               lineno++;
1116               break;
1117
1118             case '{':
1119               obstack_1grow (&action_obstack, c);
1120               count++;
1121               break;
1122
1123             case '\'':
1124             case '"':
1125               copy_string (finput, &action_obstack, c);
1126               break;
1127
1128             case '/':
1129               copy_comment (finput, &action_obstack);
1130               break;
1131
1132             case '$':
1133               copy_dollar (finput, &action_obstack,
1134                            rule, stack_offset);
1135               break;
1136
1137             case '@':
1138               copy_at (finput, &action_obstack,
1139                        stack_offset);
1140               break;
1141
1142             case EOF:
1143               fatal (_("unmatched %s"), "`{'");
1144
1145             default:
1146               obstack_1grow (&action_obstack, c);
1147             }
1148
1149           c = getc (finput);
1150         }
1151
1152       /* above loop exits when c is '}' */
1153
1154       if (--count)
1155         {
1156           obstack_1grow (&action_obstack, c);
1157           c = getc (finput);
1158         }
1159     }
1160
1161   obstack_sgrow (&action_obstack, ";\n    break;}");
1162 }
1163 \f
1164 /*-------------------------------------------------------------------.
1165 | After `%guard' is seen in the input file, copy the actual guard    |
1166 | into the guards file.  If the guard is followed by an action, copy |
1167 | that into the actions file.  STACK_OFFSET is the number of values  |
1168 | in the current rule so far, which says where to find `$0' with     |
1169 | respect to the top of the stack, for the simple parser in which    |
1170 | the stack is not popped until after the guard is run.              |
1171 `-------------------------------------------------------------------*/
1172
1173 static void
1174 copy_guard (symbol_list *rule, int stack_offset)
1175 {
1176   int c;
1177   int count;
1178   int brace_flag = 0;
1179
1180   /* offset is always 0 if parser has already popped the stack pointer */
1181   if (semantic_parser)
1182     stack_offset = 0;
1183
1184   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1185   if (!no_lines_flag)
1186     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1187                     lineno, quotearg_style (c_quoting_style, infile));
1188   obstack_1grow (&guard_obstack, '{');
1189
1190   count = 0;
1191   c = getc (finput);
1192
1193   while (brace_flag ? (count > 0) : (c != ';'))
1194     {
1195       switch (c)
1196         {
1197         case '\n':
1198           obstack_1grow (&guard_obstack, c);
1199           lineno++;
1200           break;
1201
1202         case '{':
1203           obstack_1grow (&guard_obstack, c);
1204           brace_flag = 1;
1205           count++;
1206           break;
1207
1208         case '}':
1209           obstack_1grow (&guard_obstack, c);
1210           if (count > 0)
1211             count--;
1212           else
1213             {
1214               complain (_("unmatched %s"), "`}'");
1215               c = getc (finput);        /* skip it */
1216             }
1217           break;
1218
1219         case '\'':
1220         case '"':
1221           copy_string (finput, &guard_obstack, c);
1222           break;
1223
1224         case '/':
1225           copy_comment (finput, &guard_obstack);
1226           break;
1227
1228         case '$':
1229           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1230           break;
1231
1232         case '@':
1233           copy_at (finput, &guard_obstack, stack_offset);
1234           break;
1235
1236         case EOF:
1237           fatal ("%s", _("unterminated %guard clause"));
1238
1239         default:
1240           obstack_1grow (&guard_obstack, c);
1241         }
1242
1243       if (c != '}' || count != 0)
1244         c = getc (finput);
1245     }
1246
1247   c = skip_white_space ();
1248
1249   obstack_sgrow (&guard_obstack, ";\n    break;}");
1250   if (c == '{')
1251     copy_action (rule, stack_offset);
1252   else if (c == '=')
1253     {
1254       c = getc (finput);        /* why not skip_white_space -wjh */
1255       if (c == '{')
1256         copy_action (rule, stack_offset);
1257     }
1258   else
1259     ungetc (c, finput);
1260 }
1261 \f
1262
1263 static void
1264 record_rule_line (void)
1265 {
1266   /* Record each rule's source line number in rline table.  */
1267
1268   if (nrules >= rline_allocated)
1269     {
1270       rline_allocated = nrules * 2;
1271       rline = XREALLOC (rline, short, rline_allocated);
1272     }
1273   rline[nrules] = lineno;
1274 }
1275
1276
1277 /*-------------------------------------------------------------------.
1278 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1279 | with the user's names.                                             |
1280 `-------------------------------------------------------------------*/
1281
1282 static bucket *
1283 gensym (void)
1284 {
1285   /* Incremented for each generated symbol */
1286   static int gensym_count = 0;
1287   static char buf[256];
1288
1289   bucket *sym;
1290
1291   sprintf (buf, "@%d", ++gensym_count);
1292   token_buffer = buf;
1293   sym = getsym (token_buffer);
1294   sym->class = nterm_sym;
1295   sym->value = nvars++;
1296   return sym;
1297 }
1298
1299 #if 0
1300 /*------------------------------------------------------------------.
1301 | read in a %type declaration and record its information for        |
1302 | get_type_name to access.  This is unused.  It is only called from |
1303 | the #if 0 part of readgram                                        |
1304 `------------------------------------------------------------------*/
1305
1306 static int
1307 get_type (void)
1308 {
1309   int k;
1310   token_t token;
1311   char *name;
1312
1313   token = lex ();
1314
1315   if (token != tok_typename)
1316     {
1317       complain (_("invalid %s declaration"), "%type");
1318       return t;
1319     }
1320
1321   name = xstrdup (token_buffer);
1322
1323   for (;;)
1324     {
1325       token = lex ();
1326
1327       switch (token)
1328         {
1329         case tok_semicolon:
1330           return lex ();
1331
1332         case tok_comma:
1333           break;
1334
1335         case tok_identifier:
1336           if (symval->type_name == NULL)
1337             symval->type_name = name;
1338           else if (strcmp (name, symval->type_name) != 0)
1339             complain (_("type redeclaration for %s"), symval->tag);
1340
1341           break;
1342
1343         default:
1344           return token;
1345         }
1346     }
1347 }
1348
1349 #endif
1350 \f
1351 /*------------------------------------------------------------------.
1352 | Parse the input grammar into a one symbol_list structure.  Each   |
1353 | rule is represented by a sequence of symbols: the left hand side  |
1354 | followed by the contents of the right hand side, followed by a    |
1355 | null pointer instead of a symbol to terminate the rule.  The next |
1356 | symbol is the lhs of the following rule.                          |
1357 |                                                                   |
1358 | All guards and actions are copied out to the appropriate files,   |
1359 | labelled by the rule number they apply to.                        |
1360 `------------------------------------------------------------------*/
1361
1362 static void
1363 readgram (void)
1364 {
1365   token_t t;
1366   bucket *lhs = NULL;
1367   symbol_list *p;
1368   symbol_list *p1;
1369   bucket *bp;
1370
1371   /* Points to first symbol_list of current rule. its symbol is the
1372      lhs of the rule.  */
1373   symbol_list *crule;
1374   /* Points to the symbol_list preceding crule.  */
1375   symbol_list *crule1;
1376
1377   p1 = NULL;
1378
1379   t = lex ();
1380
1381   while (t != tok_two_percents && t != tok_eof)
1382     {
1383       if (t == tok_identifier || t == tok_bar)
1384         {
1385           int action_flag = 0;
1386           /* Number of symbols in rhs of this rule so far */
1387           int rulelength = 0;
1388           int xactions = 0;     /* JF for error checking */
1389           bucket *first_rhs = 0;
1390
1391           if (t == tok_identifier)
1392             {
1393               lhs = symval;
1394
1395               if (!start_flag)
1396                 {
1397                   startval = lhs;
1398                   start_flag = 1;
1399                 }
1400
1401               t = lex ();
1402               if (t != tok_colon)
1403                 {
1404                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1405                   unlex (t);
1406                 }
1407             }
1408
1409           if (nrules == 0 && t == tok_bar)
1410             {
1411               complain (_("grammar starts with vertical bar"));
1412               lhs = symval;     /* BOGUS: use a random symval */
1413             }
1414           /* start a new rule and record its lhs.  */
1415
1416           nrules++;
1417           nitems++;
1418
1419           record_rule_line ();
1420
1421           p = XCALLOC (symbol_list, 1);
1422           p->sym = lhs;
1423
1424           crule1 = p1;
1425           if (p1)
1426             p1->next = p;
1427           else
1428             grammar = p;
1429
1430           p1 = p;
1431           crule = p;
1432
1433           /* mark the rule's lhs as a nonterminal if not already so.  */
1434
1435           if (lhs->class == unknown_sym)
1436             {
1437               lhs->class = nterm_sym;
1438               lhs->value = nvars;
1439               nvars++;
1440             }
1441           else if (lhs->class == token_sym)
1442             complain (_("rule given for %s, which is a token"), lhs->tag);
1443
1444           /* read the rhs of the rule.  */
1445
1446           for (;;)
1447             {
1448               t = lex ();
1449               if (t == tok_prec)
1450                 {
1451                   t = lex ();
1452                   crule->ruleprec = symval;
1453                   t = lex ();
1454                 }
1455
1456               if (!(t == tok_identifier || t == tok_left_curly))
1457                 break;
1458
1459               /* If next token is an identifier, see if a colon follows it.
1460                  If one does, exit this rule now.  */
1461               if (t == tok_identifier)
1462                 {
1463                   bucket *ssave;
1464                   token_t t1;
1465
1466                   ssave = symval;
1467                   t1 = lex ();
1468                   unlex (t1);
1469                   symval = ssave;
1470                   if (t1 == tok_colon)
1471                     break;
1472
1473                   if (!first_rhs)       /* JF */
1474                     first_rhs = symval;
1475                   /* Not followed by colon =>
1476                      process as part of this rule's rhs.  */
1477                 }
1478
1479               /* If we just passed an action, that action was in the middle
1480                  of a rule, so make a dummy rule to reduce it to a
1481                  non-terminal.  */
1482               if (action_flag)
1483                 {
1484                   bucket *sdummy;
1485
1486                   /* Since the action was written out with this rule's
1487                      number, we must give the new rule this number by
1488                      inserting the new rule before it.  */
1489
1490                   /* Make a dummy nonterminal, a gensym.  */
1491                   sdummy = gensym ();
1492
1493                   /* Make a new rule, whose body is empty,
1494                      before the current one, so that the action
1495                      just read can belong to it.  */
1496                   nrules++;
1497                   nitems++;
1498                   record_rule_line ();
1499                   p = XCALLOC (symbol_list, 1);
1500                   if (crule1)
1501                     crule1->next = p;
1502                   else
1503                     grammar = p;
1504                   p->sym = sdummy;
1505                   crule1 = XCALLOC (symbol_list, 1);
1506                   p->next = crule1;
1507                   crule1->next = crule;
1508
1509                   /* Insert the dummy generated by that rule into this
1510                      rule.  */
1511                   nitems++;
1512                   p = XCALLOC (symbol_list, 1);
1513                   p->sym = sdummy;
1514                   p1->next = p;
1515                   p1 = p;
1516
1517                   action_flag = 0;
1518                 }
1519
1520               if (t == tok_identifier)
1521                 {
1522                   nitems++;
1523                   p = XCALLOC (symbol_list, 1);
1524                   p->sym = symval;
1525                   p1->next = p;
1526                   p1 = p;
1527                 }
1528               else              /* handle an action.  */
1529                 {
1530                   copy_action (crule, rulelength);
1531                   action_flag = 1;
1532                   xactions++;   /* JF */
1533                 }
1534               rulelength++;
1535             }                   /* end of  read rhs of rule */
1536
1537           /* Put an empty link in the list to mark the end of this rule  */
1538           p = XCALLOC (symbol_list, 1);
1539           p1->next = p;
1540           p1 = p;
1541
1542           if (t == tok_prec)
1543             {
1544               complain (_("two @prec's in a row"));
1545               t = lex ();
1546               crule->ruleprec = symval;
1547               t = lex ();
1548             }
1549           if (t == tok_guard)
1550             {
1551               if (!semantic_parser)
1552                 complain (_("%%guard present but %%semantic_parser not specified"));
1553
1554               copy_guard (crule, rulelength);
1555               t = lex ();
1556             }
1557           else if (t == tok_left_curly)
1558             {
1559               /* This case never occurs -wjh */
1560               if (action_flag)
1561                 complain (_("two actions at end of one rule"));
1562               copy_action (crule, rulelength);
1563               action_flag = 1;
1564               xactions++;       /* -wjh */
1565               t = lex ();
1566             }
1567           /* If $$ is being set in default way, report if any type
1568              mismatch.  */
1569           else if (!xactions
1570                    && first_rhs && lhs->type_name != first_rhs->type_name)
1571             {
1572               if (lhs->type_name == 0
1573                   || first_rhs->type_name == 0
1574                   || strcmp (lhs->type_name, first_rhs->type_name))
1575                 complain (_("type clash (`%s' `%s') on default action"),
1576                           lhs->type_name ? lhs->type_name : "",
1577                           first_rhs->type_name ? first_rhs->type_name : "");
1578             }
1579           /* Warn if there is no default for $$ but we need one.  */
1580           else if (!xactions && !first_rhs && lhs->type_name != 0)
1581             complain (_("empty rule for typed nonterminal, and no action"));
1582           if (t == tok_semicolon)
1583             t = lex ();
1584         }
1585 #if 0
1586       /* these things can appear as alternatives to rules.  */
1587 /* NO, they cannot.
1588         a) none of the documentation allows them
1589         b) most of them scan forward until finding a next %
1590                 thus they may swallow lots of intervening rules
1591 */
1592       else if (t == tok_token)
1593         {
1594           parse_token_decl (token_sym, nterm_sym);
1595           t = lex ();
1596         }
1597       else if (t == tok_nterm)
1598         {
1599           parse_token_decl (nterm_sym, token_sym);
1600           t = lex ();
1601         }
1602       else if (t == tok_type)
1603         {
1604           t = get_type ();
1605         }
1606       else if (t == tok_union)
1607         {
1608           parse_union_decl ();
1609           t = lex ();
1610         }
1611       else if (t == tok_expect)
1612         {
1613           parse_expect_decl ();
1614           t = lex ();
1615         }
1616       else if (t == tok_start)
1617         {
1618           parse_start_decl ();
1619           t = lex ();
1620         }
1621 #endif
1622
1623       else
1624         {
1625           complain (_("invalid input: %s"), quote (token_buffer));
1626           t = lex ();
1627         }
1628     }
1629
1630   /* grammar has been read.  Do some checking */
1631
1632   if (nsyms > MAXSHORT)
1633     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1634            MAXSHORT);
1635   if (nrules == 0)
1636     fatal (_("no rules in the input grammar"));
1637
1638 #if 0 /* This code is in the skeleton now.  */
1639   /* JF put out same default YYSTYPE as YACC does */
1640   if (typed == 0
1641       && !value_components_used)
1642     {
1643       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1644          but it seems better to be consistent.
1645          Most programs should declare their own type anyway.  */
1646       obstack_sgrow (&attrs_obstack,
1647                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1648       if (defines_flag)
1649         obstack_sgrow (&defines_obstack, "\
1650 # ifndef YYSTYPE\n\
1651 #  define YYSTYPE int\n\
1652 # endif\n");
1653     }
1654 #endif
1655
1656   /* Report any undefined symbols and consider them nonterminals.  */
1657
1658   for (bp = firstsymbol; bp; bp = bp->next)
1659     if (bp->class == unknown_sym)
1660       {
1661         complain (_
1662                   ("symbol %s is used, but is not defined as a token and has no rules"),
1663                   bp->tag);
1664         bp->class = nterm_sym;
1665         bp->value = nvars++;
1666       }
1667
1668   ntokens = nsyms - nvars;
1669 }
1670 \f
1671 /*--------------------------------------------------------------.
1672 | For named tokens, but not literal ones, define the name.  The |
1673 | value is the user token number.                               |
1674 `--------------------------------------------------------------*/
1675
1676 static void
1677 output_token_defines (struct obstack *oout)
1678 {
1679   bucket *bp;
1680   char *cp, *symbol;
1681   char c;
1682
1683   for (bp = firstsymbol; bp; bp = bp->next)
1684     {
1685       symbol = bp->tag;         /* get symbol */
1686
1687       if (bp->value >= ntokens)
1688         continue;
1689       if (bp->user_token_number == SALIAS)
1690         continue;
1691       if ('\'' == *symbol)
1692         continue;               /* skip literal character */
1693       if (bp == errtoken)
1694         continue;               /* skip error token */
1695       if ('\"' == *symbol)
1696         {
1697           /* use literal string only if given a symbol with an alias */
1698           if (bp->alias)
1699             symbol = bp->alias->tag;
1700           else
1701             continue;
1702         }
1703
1704       /* Don't #define nonliteral tokens whose names contain periods.  */
1705       cp = symbol;
1706       while ((c = *cp++) && c != '.');
1707       if (c != '\0')
1708         continue;
1709
1710       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1711                       symbol,
1712                       (translations ? bp->user_token_number : bp->value));
1713       if (semantic_parser)
1714         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1715     }
1716
1717   /* obstack_1grow (oout, '\n'); */
1718 }
1719
1720
1721 /*------------------------------------------------------------------.
1722 | Assign symbol numbers, and write definition of token names into   |
1723 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1724 | of symbols.                                                       |
1725 `------------------------------------------------------------------*/
1726
1727 static void
1728 packsymbols (void)
1729 {
1730   bucket *bp;
1731   int tokno = 1;
1732   int i;
1733   int last_user_token_number;
1734   static char DOLLAR[] = "$";
1735
1736   /* int lossage = 0; JF set but not used */
1737
1738   tags = XCALLOC (char *, nsyms + 1);
1739   tags[0] = DOLLAR;
1740   user_toknums = XCALLOC (short, nsyms + 1);
1741   user_toknums[0] = 0;
1742
1743   sprec = XCALLOC (short, nsyms);
1744   sassoc = XCALLOC (short, nsyms);
1745
1746   max_user_token_number = 256;
1747   last_user_token_number = 256;
1748
1749   for (bp = firstsymbol; bp; bp = bp->next)
1750     {
1751       if (bp->class == nterm_sym)
1752         {
1753           bp->value += ntokens;
1754         }
1755       else if (bp->alias)
1756         {
1757           /* this symbol and its alias are a single token defn.
1758              allocate a tokno, and assign to both check agreement of
1759              ->prec and ->assoc fields and make both the same */
1760           if (bp->value == 0)
1761             bp->value = bp->alias->value = tokno++;
1762
1763           if (bp->prec != bp->alias->prec)
1764             {
1765               if (bp->prec != 0 && bp->alias->prec != 0
1766                   && bp->user_token_number == SALIAS)
1767                 complain (_("conflicting precedences for %s and %s"),
1768                           bp->tag, bp->alias->tag);
1769               if (bp->prec != 0)
1770                 bp->alias->prec = bp->prec;
1771               else
1772                 bp->prec = bp->alias->prec;
1773             }
1774
1775           if (bp->assoc != bp->alias->assoc)
1776             {
1777               if (bp->assoc != 0 && bp->alias->assoc != 0
1778                   && bp->user_token_number == SALIAS)
1779                 complain (_("conflicting assoc values for %s and %s"),
1780                           bp->tag, bp->alias->tag);
1781               if (bp->assoc != 0)
1782                 bp->alias->assoc = bp->assoc;
1783               else
1784                 bp->assoc = bp->alias->assoc;
1785             }
1786
1787           if (bp->user_token_number == SALIAS)
1788             continue;           /* do not do processing below for SALIASs */
1789
1790         }
1791       else                      /* bp->class == token_sym */
1792         {
1793           bp->value = tokno++;
1794         }
1795
1796       if (bp->class == token_sym)
1797         {
1798           if (translations && !(bp->user_token_number))
1799             bp->user_token_number = ++last_user_token_number;
1800           if (bp->user_token_number > max_user_token_number)
1801             max_user_token_number = bp->user_token_number;
1802         }
1803
1804       tags[bp->value] = bp->tag;
1805       user_toknums[bp->value] = bp->user_token_number;
1806       sprec[bp->value] = bp->prec;
1807       sassoc[bp->value] = bp->assoc;
1808
1809     }
1810
1811   if (translations)
1812     {
1813       int j;
1814
1815       token_translations = XCALLOC (short, max_user_token_number + 1);
1816
1817       /* initialize all entries for literal tokens to 2, the internal
1818          token number for $undefined., which represents all invalid
1819          inputs.  */
1820       for (j = 0; j <= max_user_token_number; j++)
1821         token_translations[j] = 2;
1822
1823       for (bp = firstsymbol; bp; bp = bp->next)
1824         {
1825           if (bp->value >= ntokens)
1826             continue;           /* non-terminal */
1827           if (bp->user_token_number == SALIAS)
1828             continue;
1829           if (token_translations[bp->user_token_number] != 2)
1830             complain (_("tokens %s and %s both assigned number %d"),
1831                       tags[token_translations[bp->user_token_number]],
1832                       bp->tag, bp->user_token_number);
1833           token_translations[bp->user_token_number] = bp->value;
1834         }
1835     }
1836
1837   error_token_number = errtoken->value;
1838
1839   output_token_defines (&output_obstack);
1840   obstack_1grow (&output_obstack, 0);
1841   macro_insert ("tokendef", obstack_finish (&output_obstack));
1842
1843   /* if (!no_parser_flag)
1844      output_token_defines (&table_obstack); */
1845
1846   if (startval->class == unknown_sym)
1847     fatal (_("the start symbol %s is undefined"), startval->tag);
1848   else if (startval->class == token_sym)
1849     fatal (_("the start symbol %s is a token"), startval->tag);
1850
1851   start_symbol = startval->value;
1852
1853   if (defines_flag)
1854     {
1855       output_token_defines (&defines_obstack);
1856
1857       if (!pure_parser)
1858         {
1859           if (spec_name_prefix)
1860             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1861                             spec_name_prefix);
1862           else
1863             obstack_sgrow (&defines_obstack,
1864                                  "\nextern YYSTYPE yylval;\n");
1865         }
1866
1867       if (semantic_parser)
1868         for (i = ntokens; i < nsyms; i++)
1869           {
1870             /* don't make these for dummy nonterminals made by gensym.  */
1871             if (*tags[i] != '@')
1872                obstack_fgrow2 (&defines_obstack,
1873                                "# define\tNT%s\t%d\n", tags[i], i);
1874           }
1875 #if 0
1876       /* `fdefines' is now a temporary file, so we need to copy its
1877          contents in `done', so we can't close it here.  */
1878       fclose (fdefines);
1879       fdefines = NULL;
1880 #endif
1881     }
1882 }
1883
1884
1885 /*---------------------------------------------------------------.
1886 | Convert the rules into the representation using RRHS, RLHS and |
1887 | RITEMS.                                                        |
1888 `---------------------------------------------------------------*/
1889
1890 static void
1891 packgram (void)
1892 {
1893   int itemno;
1894   int ruleno;
1895   symbol_list *p;
1896
1897   bucket *ruleprec;
1898
1899   ritem = XCALLOC (short, nitems + 1);
1900   rlhs = XCALLOC (short, nrules) - 1;
1901   rrhs = XCALLOC (short, nrules) - 1;
1902   rprec = XCALLOC (short, nrules) - 1;
1903   rprecsym = XCALLOC (short, nrules) - 1;
1904   rassoc = XCALLOC (short, nrules) - 1;
1905
1906   itemno = 0;
1907   ruleno = 1;
1908
1909   p = grammar;
1910   while (p)
1911     {
1912       rlhs[ruleno] = p->sym->value;
1913       rrhs[ruleno] = itemno;
1914       ruleprec = p->ruleprec;
1915
1916       p = p->next;
1917       while (p && p->sym)
1918         {
1919           ritem[itemno++] = p->sym->value;
1920           /* A rule gets by default the precedence and associativity
1921              of the last token in it.  */
1922           if (p->sym->class == token_sym)
1923             {
1924               rprec[ruleno] = p->sym->prec;
1925               rassoc[ruleno] = p->sym->assoc;
1926             }
1927           if (p)
1928             p = p->next;
1929         }
1930
1931       /* If this rule has a %prec,
1932          the specified symbol's precedence replaces the default.  */
1933       if (ruleprec)
1934         {
1935           rprec[ruleno] = ruleprec->prec;
1936           rassoc[ruleno] = ruleprec->assoc;
1937           rprecsym[ruleno] = ruleprec->value;
1938         }
1939
1940       ritem[itemno++] = -ruleno;
1941       ruleno++;
1942
1943       if (p)
1944         p = p->next;
1945     }
1946
1947   ritem[itemno] = 0;
1948 }
1949 \f
1950 /*-------------------------------------------------------------------.
1951 | Read in the grammar specification and record it in the format      |
1952 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1953 | and all actions into ACTION_OBSTACK, in each case forming the body |
1954 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1955 | statement to decide which guard or action to execute.              |
1956 `-------------------------------------------------------------------*/
1957
1958 void
1959 reader (void)
1960 {
1961   start_flag = 0;
1962   startval = NULL;              /* start symbol not specified yet. */
1963
1964 #if 0
1965   /* initially assume token number translation not needed.  */
1966   translations = 0;
1967 #endif
1968   /* Nowadays translations is always set to 1, since we give `error' a
1969      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1970    */
1971   translations = 1;
1972
1973   nsyms = 1;
1974   nvars = 0;
1975   nrules = 0;
1976   nitems = 0;
1977   rline_allocated = 10;
1978   rline = XCALLOC (short, rline_allocated);
1979
1980   typed = 0;
1981   lastprec = 0;
1982
1983   semantic_parser = 0;
1984   pure_parser = 0;
1985
1986   grammar = NULL;
1987
1988   init_lex ();
1989   lineno = 1;
1990
1991   /* Initialize the macro obstack.  */
1992   obstack_init (&macro_obstack);
1993
1994   /* Initialize the symbol table.  */
1995   tabinit ();
1996
1997   /* Construct the error token */
1998   errtoken = getsym ("error");
1999   errtoken->class = token_sym;
2000   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
2001
2002   /* Construct a token that represents all undefined literal tokens.
2003      It is always token number 2.  */
2004   undeftoken = getsym ("$undefined.");
2005   undeftoken->class = token_sym;
2006   undeftoken->user_token_number = 2;
2007
2008   /* Read the declaration section.  Copy %{ ... %} groups to
2009      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
2010      etc. found there.  */
2011   /* obstack_1grow (&table_obstack, '\n'); */
2012
2013   read_declarations ();
2014   /* Start writing the guard and action files, if they are needed.  */
2015   /* output_headers (); */
2016   /* Read in the grammar, build grammar in list form.  Write out
2017      guards and actions.  */
2018   readgram ();
2019   /* Now we know whether we need the line-number stack.  If we do,
2020      write its type into the .tab.h file.  */
2021   /* if (defines_flag)
2022      reader_output_yylsp (&defines_obstack); */
2023   /* Write closing delimiters for actions and guards.  */
2024   /* output_trailers (); */
2025   /* if (locations_flag)
2026      obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n"); */
2027   /* Assign the symbols their symbol numbers.  Write #defines for the
2028      token symbols into FDEFINES if requested.  */
2029   packsymbols ();
2030   /* Convert the grammar into the format described in gram.h.  */
2031   packgram ();
2032   /* Free the symbol table data structure since symbols are now all
2033      referred to by symbol number.  */
2034   free_symtab ();
2035 }