src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "xalloc.h"
  30 #include "symtab.h"
  31 #include "options.h"
  32 #include "lex.h"
  33 #include "gram.h"
  34 #include "complain.h"
  35 #include "output.h"
  36 #include "reader.h"
  37 #include "conflicts.h"
  38 #include "muscle_tab.h"
  39
  40 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  41 static int rline_allocated;
  42
  43 typedef struct symbol_list
  44 {
  45   struct symbol_list *next;
  46   bucket *sym;
  47   bucket *ruleprec;
  48 }
  49 symbol_list;
  50
  51 int lineno;
  52 char **tags;
  53 short *user_toknums;
  54 static symbol_list *grammar;
  55 static int start_flag;
  56 static bucket *startval;
  57
  58 /* Nonzero if components of semantic values are used, implying
  59    they must be unions.  */
  60 static int value_components_used;
  61
  62 /* Nonzero if %union has been seen.  */
  63 static int typed;
  64
  65 /* Incremented for each %left, %right or %nonassoc seen */
  66 static int lastprec;
  67
  68 static bucket *errtoken;
  69 static bucket *undeftoken;
  70 \f
  71
  72 /*===================\
  73 | Low level lexing.  |
  74 \===================*/
  75
  76 static void
  77 skip_to_char (int target)
  78 {
  79   int c;
  80   if (target == '\n')
  81     complain (_("   Skipping to next \\n"));
  82   else
  83     complain (_("   Skipping to next %c"), target);
  84
  85   do
  86     c = skip_white_space ();
  87   while (c != target && c != EOF);
  88   if (c != EOF)
  89     ungetc (c, finput);
  90 }
  91
  92
  93 /*---------------------------------------------------------.
  94 | Read a signed integer from STREAM and return its value.  |
  95 `---------------------------------------------------------*/
  96
  97 static inline int
  98 read_signed_integer (FILE *stream)
  99 {
 100   int c = getc (stream);
 101   int sign = 1;
 102   int n = 0;
 103
 104   if (c == '-')
 105     {
 106       c = getc (stream);
 107       sign = -1;
 108     }
 109
 110   while (isdigit (c))
 111     {
 112       n = 10 * n + (c - '0');
 113       c = getc (stream);
 114     }
 115
 116   ungetc (c, stream);
 117
 118   return sign * n;
 119 }
 120 \f
 121 /*--------------------------------------------------------------.
 122 | Get the data type (alternative in the union) of the value for |
 123 | symbol N in rule RULE.                                        |
 124 `--------------------------------------------------------------*/
 125
 126 static char *
 127 get_type_name (int n, symbol_list * rule)
 128 {
 129   int i;
 130   symbol_list *rp;
 131
 132   if (n < 0)
 133     {
 134       complain (_("invalid $ value"));
 135       return NULL;
 136     }
 137
 138   rp = rule;
 139   i = 0;
 140
 141   while (i < n)
 142     {
 143       rp = rp->next;
 144       if (rp == NULL || rp->sym == NULL)
 145         {
 146           complain (_("invalid $ value"));
 147           return NULL;
 148         }
 149       i++;
 150     }
 151
 152   return rp->sym->type_name;
 153 }
 154 \f
 155 /*------------------------------------------------------------.
 156 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 157 | delimiter of the string (either ' or ").                    |
 158 `------------------------------------------------------------*/
 159
 160 static inline void
 161 copy_string2 (FILE *fin, struct obstack *oout, int match, int store)
 162 {
 163   int c;
 164
 165   if (store)
 166     obstack_1grow (oout, match);
 167
 168   c = getc (fin);
 169
 170   while (c != match)
 171     {
 172       if (c == EOF)
 173         fatal (_("unterminated string at end of file"));
 174       if (c == '\n')
 175         {
 176           complain (_("unterminated string"));
 177           ungetc (c, fin);
 178           c = match;            /* invent terminator */
 179           continue;
 180         }
 181
 182       obstack_1grow (oout, c);
 183
 184       if (c == '\\')
 185         {
 186           c = getc (fin);
 187           if (c == EOF)
 188             fatal (_("unterminated string at end of file"));
 189           obstack_1grow (oout, c);
 190
 191           if (c == '\n')
 192             lineno++;
 193         }
 194
 195       c = getc (fin);
 196     }
 197
 198   if (store)
 199     obstack_1grow (oout, c);
 200 }
 201
 202 /* FIXME. */
 203
 204 static inline void
 205 copy_string (FILE *fin, struct obstack *oout, int match)
 206 {
 207   copy_string2 (fin, oout, match, 1);
 208 }
 209
 210 /* FIXME. */
 211
 212 static inline void
 213 copy_identifier (FILE *fin, struct obstack *oout)
 214 {
 215   int c;
 216
 217   while (isalnum (c = getc (fin)) || c == '_')
 218     obstack_1grow (oout, c);
 219
 220   ungetc (c, fin);
 221 }
 222
 223 /*-----------------------------------------------------------------.
 224 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 225 | NULL).  In fact we just saw a `/', which might or might not be a |
 226 | comment.  In any case, copy what we saw.                         |
 227 |                                                                  |
 228 | OUT2 might be NULL.                                              |
 229 `-----------------------------------------------------------------*/
 230
 231 static inline void
 232 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 233 {
 234   int cplus_comment;
 235   int ended;
 236   int c;
 237
 238   /* We read a `/', output it. */
 239   obstack_1grow (oout1, '/');
 240   if (oout2)
 241     obstack_1grow (oout2, '/');
 242
 243   switch ((c = getc (fin)))
 244     {
 245     case '/':
 246       cplus_comment = 1;
 247       break;
 248     case '*':
 249       cplus_comment = 0;
 250       break;
 251     default:
 252       ungetc (c, fin);
 253       return;
 254     }
 255
 256   obstack_1grow (oout1, c);
 257   if (oout2)
 258     obstack_1grow (oout2, c);
 259   c = getc (fin);
 260
 261   ended = 0;
 262   while (!ended)
 263     {
 264       if (!cplus_comment && c == '*')
 265         {
 266           while (c == '*')
 267             {
 268               obstack_1grow (oout1, c);
 269               if (oout2)
 270                 obstack_1grow (oout2, c);
 271               c = getc (fin);
 272             }
 273
 274           if (c == '/')
 275             {
 276               obstack_1grow (oout1, c);
 277               if (oout2)
 278                 obstack_1grow (oout2, c);
 279               ended = 1;
 280             }
 281         }
 282       else if (c == '\n')
 283         {
 284           lineno++;
 285           obstack_1grow (oout1, c);
 286           if (oout2)
 287             obstack_1grow (oout2, c);
 288           if (cplus_comment)
 289             ended = 1;
 290           else
 291             c = getc (fin);
 292         }
 293       else if (c == EOF)
 294         fatal (_("unterminated comment"));
 295       else
 296         {
 297           obstack_1grow (oout1, c);
 298           if (oout2)
 299             obstack_1grow (oout2, c);
 300           c = getc (fin);
 301         }
 302     }
 303 }
 304
 305
 306 /*-------------------------------------------------------------------.
 307 | Dump the comment (actually the current string starting with a `/') |
 308 | from FIN to OOUT.                                                  |
 309 `-------------------------------------------------------------------*/
 310
 311 static inline void
 312 copy_comment (FILE *fin, struct obstack *oout)
 313 {
 314   copy_comment2 (fin, oout, NULL);
 315 }
 316
 317
 318 /*-----------------------------------------------------------------.
 319 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 320 | reference to this location. STACK_OFFSET is the number of values |
 321 | in the current rule so far, which says where to find `$0' with   |
 322 | respect to the top of the stack.                                 |
 323 `-----------------------------------------------------------------*/
 324
 325 static inline void
 326 copy_at (FILE *fin, struct obstack *oout, int stack_offset)
 327 {
 328   int c;
 329
 330   c = getc (fin);
 331   if (c == '$')
 332     {
 333       obstack_sgrow (oout, "yyloc");
 334       locations_flag = 1;
 335     }
 336   else if (isdigit (c) || c == '-')
 337     {
 338       int n;
 339
 340       ungetc (c, fin);
 341       n = read_signed_integer (fin);
 342
 343       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 344       locations_flag = 1;
 345     }
 346   else
 347     {
 348       char buf[] = "@c";
 349       buf[1] = c;
 350       complain (_("%s is invalid"), quote (buf));
 351     }
 352 }
 353
 354
 355 /*-------------------------------------------------------------------.
 356 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 357 |                                                                    |
 358 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 359 |                                                                    |
 360 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 361 | the number of values in the current rule so far, which says where  |
 362 | to find `$0' with respect to the top of the stack.                 |
 363 `-------------------------------------------------------------------*/
 364
 365 static inline void
 366 copy_dollar (FILE *fin, struct obstack *oout,
 367              symbol_list *rule, int stack_offset)
 368 {
 369   int c = getc (fin);
 370   const char *type_name = NULL;
 371
 372   /* Get the type name if explicit. */
 373   if (c == '<')
 374     {
 375       read_type_name (fin);
 376       type_name = token_buffer;
 377       value_components_used = 1;
 378       c = getc (fin);
 379     }
 380
 381   if (c == '$')
 382     {
 383       obstack_sgrow (oout, "yyval");
 384
 385       if (!type_name)
 386         type_name = get_type_name (0, rule);
 387       if (type_name)
 388         obstack_fgrow1 (oout, ".%s", type_name);
 389       if (!type_name && typed)
 390         complain (_("$$ of `%s' has no declared type"),
 391                   rule->sym->tag);
 392     }
 393   else if (isdigit (c) || c == '-')
 394     {
 395       int n;
 396       ungetc (c, fin);
 397       n = read_signed_integer (fin);
 398
 399       if (!type_name && n > 0)
 400         type_name = get_type_name (n, rule);
 401
 402       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 403
 404       if (type_name)
 405         obstack_fgrow1 (oout, ".%s", type_name);
 406       if (!type_name && typed)
 407         complain (_("$%d of `%s' has no declared type"),
 408                   n, rule->sym->tag);
 409     }
 410   else
 411     {
 412       char buf[] = "$c";
 413       buf[1] = c;
 414       complain (_("%s is invalid"), quote (buf));
 415     }
 416 }
 417 \f
 418 /*-------------------------------------------------------------------.
 419 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 420 | `%{' has already been read.  Return after reading the `%}'.        |
 421 `-------------------------------------------------------------------*/
 422
 423 static void
 424 copy_definition (void)
 425 {
 426   int c;
 427   /* -1 while reading a character if prev char was %. */
 428   int after_percent;
 429
 430 #if 0
 431   if (!no_lines_flag)
 432     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 433                     lineno, quotearg_style (c_quoting_style, infile));
 434 #endif
 435
 436   after_percent = 0;
 437
 438   c = getc (finput);
 439
 440   for (;;)
 441     {
 442       switch (c)
 443         {
 444         case '\n':
 445           obstack_1grow (&attrs_obstack, c);
 446           lineno++;
 447           break;
 448
 449         case '%':
 450           after_percent = -1;
 451           break;
 452
 453         case '\'':
 454         case '"':
 455           copy_string (finput, &attrs_obstack, c);
 456           break;
 457
 458         case '/':
 459           copy_comment (finput, &attrs_obstack);
 460           break;
 461
 462         case EOF:
 463           fatal ("%s", _("unterminated `%{' definition"));
 464
 465         default:
 466           obstack_1grow (&attrs_obstack, c);
 467         }
 468
 469       c = getc (finput);
 470
 471       if (after_percent)
 472         {
 473           if (c == '}')
 474             return;
 475           obstack_1grow (&attrs_obstack, '%');
 476         }
 477       after_percent = 0;
 478     }
 479 }
 480
 481
 482 /*-------------------------------------------------------------------.
 483 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 484 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 485 | are reversed.                                                      |
 486 `-------------------------------------------------------------------*/
 487
 488 static void
 489 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 490 {
 491   token_t token = 0;
 492   char *typename = 0;
 493
 494   /* The symbol being defined.  */
 495   struct bucket *symbol = NULL;
 496
 497   /* After `%token' and `%nterm', any number of symbols maybe be
 498      defined.  */
 499   for (;;)
 500     {
 501       int tmp_char = ungetc (skip_white_space (), finput);
 502
 503       /* `%' (for instance from `%token', or from `%%' etc.) is the
 504          only valid means to end this declaration.  */
 505       if (tmp_char == '%')
 506         return;
 507       if (tmp_char == EOF)
 508         fatal (_("Premature EOF after %s"), token_buffer);
 509
 510       token = lex ();
 511       if (token == tok_comma)
 512         {
 513           symbol = NULL;
 514           continue;
 515         }
 516       if (token == tok_typename)
 517         {
 518           typename = xstrdup (token_buffer);
 519           value_components_used = 1;
 520           symbol = NULL;
 521         }
 522       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 523         {
 524           if (symval->alias)
 525             warn (_("symbol `%s' used more than once as a literal string"),
 526                   symval->tag);
 527           else if (symbol->alias)
 528             warn (_("symbol `%s' given more than one literal string"),
 529                   symbol->tag);
 530           else
 531             {
 532               symval->class = token_sym;
 533               symval->type_name = typename;
 534               symval->user_token_number = symbol->user_token_number;
 535               symbol->user_token_number = SALIAS;
 536               symval->alias = symbol;
 537               symbol->alias = symval;
 538               /* symbol and symval combined are only one symbol */
 539               nsyms--;
 540             }
 541           translations = 1;
 542           symbol = NULL;
 543         }
 544       else if (token == tok_identifier)
 545         {
 546           int oldclass = symval->class;
 547           symbol = symval;
 548
 549           if (symbol->class == what_is_not)
 550             complain (_("symbol %s redefined"), symbol->tag);
 551           symbol->class = what_is;
 552           if (what_is == nterm_sym && oldclass != nterm_sym)
 553             symbol->value = nvars++;
 554
 555           if (typename)
 556             {
 557               if (symbol->type_name == NULL)
 558                 symbol->type_name = typename;
 559               else if (strcmp (typename, symbol->type_name) != 0)
 560                 complain (_("type redeclaration for %s"), symbol->tag);
 561             }
 562         }
 563       else if (symbol && token == tok_number)
 564         {
 565           symbol->user_token_number = numval;
 566           translations = 1;
 567         }
 568       else
 569         {
 570           complain (_("`%s' is invalid in %s"),
 571                     token_buffer, (what_is == token_sym) ? "%token" : "%nterm");
 572           skip_to_char ('%');
 573         }
 574     }
 575
 576 }
 577
 578
 579 /*------------------------------.
 580 | Parse what comes after %start |
 581 `------------------------------*/
 582
 583 static void
 584 parse_start_decl (void)
 585 {
 586   if (start_flag)
 587     complain (_("multiple %s declarations"), "%start");
 588   if (lex () != tok_identifier)
 589     complain (_("invalid %s declaration"), "%start");
 590   else
 591     {
 592       start_flag = 1;
 593       startval = symval;
 594     }
 595 }
 596
 597 /*-----------------------------------------------------------.
 598 | read in a %type declaration and record its information for |
 599 | get_type_name to access                                    |
 600 `-----------------------------------------------------------*/
 601
 602 static void
 603 parse_type_decl (void)
 604 {
 605   char *name;
 606
 607   if (lex () != tok_typename)
 608     {
 609       complain ("%s", _("%type declaration has no <typename>"));
 610       skip_to_char ('%');
 611       return;
 612     }
 613
 614   name = xstrdup (token_buffer);
 615
 616   for (;;)
 617     {
 618       token_t t;
 619       int tmp_char = ungetc (skip_white_space (), finput);
 620
 621       if (tmp_char == '%')
 622         return;
 623       if (tmp_char == EOF)
 624         fatal (_("Premature EOF after %s"), token_buffer);
 625
 626       t = lex ();
 627
 628       switch (t)
 629         {
 630
 631         case tok_comma:
 632         case tok_semicolon:
 633           break;
 634
 635         case tok_identifier:
 636           if (symval->type_name == NULL)
 637             symval->type_name = name;
 638           else if (strcmp (name, symval->type_name) != 0)
 639             complain (_("type redeclaration for %s"), symval->tag);
 640
 641           break;
 642
 643         default:
 644           complain (_("invalid %%type declaration due to item: %s"),
 645                     token_buffer);
 646           skip_to_char ('%');
 647         }
 648     }
 649 }
 650
 651
 652
 653 /*----------------------------------------------------------------.
 654 | Read in a %left, %right or %nonassoc declaration and record its |
 655 | information.                                                    |
 656 `----------------------------------------------------------------*/
 657
 658 static void
 659 parse_assoc_decl (associativity assoc)
 660 {
 661   char *name = NULL;
 662   int prev = 0;
 663
 664   lastprec++;                   /* Assign a new precedence level, never 0.  */
 665
 666   for (;;)
 667     {
 668       token_t t;
 669       int tmp_char = ungetc (skip_white_space (), finput);
 670
 671       if (tmp_char == '%')
 672         return;
 673       if (tmp_char == EOF)
 674         fatal (_("Premature EOF after %s"), token_buffer);
 675
 676       t = lex ();
 677
 678       switch (t)
 679         {
 680         case tok_typename:
 681           name = xstrdup (token_buffer);
 682           break;
 683
 684         case tok_comma:
 685           break;
 686
 687         case tok_identifier:
 688           if (symval->prec != 0)
 689             complain (_("redefining precedence of %s"), symval->tag);
 690           symval->prec = lastprec;
 691           symval->assoc = assoc;
 692           if (symval->class == nterm_sym)
 693             complain (_("symbol %s redefined"), symval->tag);
 694           symval->class = token_sym;
 695           if (name)
 696             {                   /* record the type, if one is specified */
 697               if (symval->type_name == NULL)
 698                 symval->type_name = name;
 699               else if (strcmp (name, symval->type_name) != 0)
 700                 complain (_("type redeclaration for %s"), symval->tag);
 701             }
 702           break;
 703
 704         case tok_number:
 705           if (prev == tok_identifier)
 706             {
 707               symval->user_token_number = numval;
 708               translations = 1;
 709             }
 710           else
 711             {
 712               complain (_
 713                         ("invalid text (%s) - number should be after identifier"),
 714 token_buffer);
 715               skip_to_char ('%');
 716             }
 717           break;
 718
 719         case tok_semicolon:
 720           return;
 721
 722         default:
 723           complain (_("unexpected item: %s"), token_buffer);
 724           skip_to_char ('%');
 725         }
 726
 727       prev = t;
 728
 729     }
 730 }
 731
 732
 733
 734 /*--------------------------------------------------------------.
 735 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 736 | where it is made into the definition of YYSTYPE, the type of  |
 737 | elements of the parser value stack.                           |
 738 `--------------------------------------------------------------*/
 739
 740 static void
 741 parse_union_decl (void)
 742 {
 743   int c;
 744   int count = 0;
 745
 746   if (typed)
 747     complain (_("multiple %s declarations"), "%union");
 748
 749   typed = 1;
 750
 751   if (!no_lines_flag)
 752     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 753                     lineno, quotearg_style (c_quoting_style,
 754                                             muscle_find("filename")));
 755   else
 756     obstack_1grow (&attrs_obstack, '\n');
 757
 758   obstack_sgrow (&attrs_obstack, "typedef union");
 759   if (defines_flag)
 760     obstack_sgrow (&defines_obstack, "typedef union");
 761
 762   c = getc (finput);
 763
 764   while (c != EOF)
 765     {
 766       obstack_1grow (&attrs_obstack, c);
 767       if (defines_flag)
 768         obstack_1grow (&defines_obstack, c);
 769
 770       switch (c)
 771         {
 772         case '\n':
 773           lineno++;
 774           break;
 775
 776         case '/':
 777           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 778           break;
 779
 780         case '{':
 781           count++;
 782           break;
 783
 784         case '}':
 785           if (count == 0)
 786             complain (_("unmatched %s"), "`}'");
 787           count--;
 788           if (count <= 0)
 789             {
 790               obstack_sgrow (&attrs_obstack, " YYSTYPE;\n");
 791               if (defines_flag)
 792                 obstack_sgrow (&defines_obstack, " YYSTYPE;\n");
 793               /* JF don't choke on trailing semi */
 794               c = skip_white_space ();
 795               if (c != ';')
 796                 ungetc (c, finput);
 797               return;
 798             }
 799         }
 800
 801       c = getc (finput);
 802     }
 803 }
 804
 805
 806 /*-------------------------------------------------------.
 807 | Parse the declaration %expect N which says to expect N |
 808 | shift-reduce conflicts.                                |
 809 `-------------------------------------------------------*/
 810
 811 static void
 812 parse_expect_decl (void)
 813 {
 814   int c = skip_white_space ();
 815   ungetc (c, finput);
 816
 817   if (!isdigit (c))
 818     complain (_("argument of %%expect is not an integer"));
 819   else
 820     expected_conflicts = read_signed_integer (finput);
 821 }
 822
 823
 824 /*-------------------------------------------------------------------.
 825 | Parse what comes after %thong.  the full syntax is                 |
 826 |                                                                    |
 827 |                %thong <type> token number literal                  |
 828 |                                                                    |
 829 | the <type> or number may be omitted.  The number specifies the     |
 830 | user_token_number.                                                 |
 831 |                                                                    |
 832 | Two symbols are entered in the table, one for the token symbol and |
 833 | one for the literal.  Both are given the <type>, if any, from the  |
 834 | declaration.  The ->user_token_number of the first is SALIAS and   |
 835 | the ->user_token_number of the second is set to the number, if     |
 836 | any, from the declaration.  The two symbols are linked via         |
 837 | pointers in their ->alias fields.                                  |
 838 |                                                                    |
 839 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 840 | only the literal string is retained it is the literal string that  |
 841 | is output to yytname                                               |
 842 `-------------------------------------------------------------------*/
 843
 844 static void
 845 parse_thong_decl (void)
 846 {
 847   token_t token;
 848   struct bucket *symbol;
 849   char *typename = 0;
 850   int usrtoknum;
 851
 852   translations = 1;
 853   token = lex ();               /* fetch typename or first token */
 854   if (token == tok_typename)
 855     {
 856       typename = xstrdup (token_buffer);
 857       value_components_used = 1;
 858       token = lex ();           /* fetch first token */
 859     }
 860
 861   /* process first token */
 862
 863   if (token != tok_identifier)
 864     {
 865       complain (_("unrecognized item %s, expected an identifier"),
 866                 token_buffer);
 867       skip_to_char ('%');
 868       return;
 869     }
 870   symval->class = token_sym;
 871   symval->type_name = typename;
 872   symval->user_token_number = SALIAS;
 873   symbol = symval;
 874
 875   token = lex ();               /* get number or literal string */
 876
 877   if (token == tok_number)
 878     {
 879       usrtoknum = numval;
 880       token = lex ();           /* okay, did number, now get literal */
 881     }
 882   else
 883     usrtoknum = 0;
 884
 885   /* process literal string token */
 886
 887   if (token != tok_identifier || *symval->tag != '\"')
 888     {
 889       complain (_("expected string constant instead of %s"), token_buffer);
 890       skip_to_char ('%');
 891       return;
 892     }
 893   symval->class = token_sym;
 894   symval->type_name = typename;
 895   symval->user_token_number = usrtoknum;
 896
 897   symval->alias = symbol;
 898   symbol->alias = symval;
 899
 900   /* symbol and symval combined are only one symbol.  */
 901   nsyms--;
 902 }
 903
 904 /* FIXME. */
 905
 906 static void
 907 parse_muscle_decl (void)
 908 {
 909   int ch = ungetc (skip_white_space (), finput);
 910   char* muscle_key;
 911   char* muscle_value;
 912
 913   /* Read key. */
 914   if (!isalpha (ch) && ch != '_')
 915     {
 916       complain (_("invalid %s declaration"), "%define");
 917       skip_to_char ('%');
 918       return;
 919     }
 920   copy_identifier (finput, &muscle_obstack);
 921   obstack_1grow (&muscle_obstack, 0);
 922   muscle_key = obstack_finish (&muscle_obstack);
 923
 924   /* Read value. */
 925   ch = skip_white_space ();
 926   if (ch != '"')
 927     {
 928       ungetc (ch, finput);
 929       if (ch != EOF)
 930         {
 931           complain (_("invalid %s declaration"), "%define");
 932           skip_to_char ('%');
 933           return;
 934         }
 935       else
 936         fatal (_("Premature EOF after %s"), "\"");
 937     }
 938   copy_string2 (finput, &muscle_obstack, '"', 0);
 939   obstack_1grow (&muscle_obstack, 0);
 940   muscle_value = obstack_finish (&muscle_obstack);
 941
 942   /* Store the (key, value) pair in the environment. */
 943   muscle_insert (muscle_key, muscle_value);
 944 }
 945
 946
 947 /*----------------------------------.
 948 | Parse what comes after %skeleton. |
 949 `----------------------------------*/
 950
 951 void
 952 parse_skel_decl (void)
 953 {
 954   /* Complete with parse_dquoted_param () on the CVS branch 1.29.  */
 955 }
 956
 957 /*------------------------------------------.
 958 | Parse what comes after %header_extension. |
 959 `------------------------------------------*/
 960
 961 static void
 962 parse_header_extension_decl (void)
 963 {
 964   char buff[32];
 965
 966   if (header_extension)
 967     complain (_("multiple %%header_extension declarations"));
 968   fscanf (finput, "%s", buff);
 969   header_extension = xstrdup (buff);
 970 }
 971
 972 /*------------------------------------------.
 973 | Parse what comes after %source_extension. |
 974 `------------------------------------------*/
 975
 976 static void
 977 parse_source_extension_decl (void)
 978 {
 979   char buff[32];
 980
 981   if (src_extension)
 982     complain (_("multiple %%source_extension declarations"));
 983   fscanf (finput, "%s", buff);
 984   src_extension = xstrdup (buff);
 985 }
 986
 987 /*----------------------------------------------------------------.
 988 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 989 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 990 | groups to ATTRS_OBSTACK.                                        |
 991 `----------------------------------------------------------------*/
 992
 993 static void
 994 read_declarations (void)
 995 {
 996   int c;
 997   int tok;
 998
 999   for (;;)
1000     {
1001       c = skip_white_space ();
1002
1003       if (c == '%')
1004         {
1005           tok = parse_percent_token ();
1006
1007           switch (tok)
1008             {
1009             case tok_two_percents:
1010               return;
1011
1012             case tok_percent_left_curly:
1013               copy_definition ();
1014               break;
1015
1016             case tok_token:
1017               parse_token_decl (token_sym, nterm_sym);
1018               break;
1019
1020             case tok_nterm:
1021               parse_token_decl (nterm_sym, token_sym);
1022               break;
1023
1024             case tok_type:
1025               parse_type_decl ();
1026               break;
1027
1028             case tok_start:
1029               parse_start_decl ();
1030               break;
1031
1032             case tok_union:
1033               parse_union_decl ();
1034               break;
1035
1036             case tok_expect:
1037               parse_expect_decl ();
1038               break;
1039
1040             case tok_thong:
1041               parse_thong_decl ();
1042               break;
1043
1044             case tok_left:
1045               parse_assoc_decl (left_assoc);
1046               break;
1047
1048             case tok_right:
1049               parse_assoc_decl (right_assoc);
1050               break;
1051
1052             case tok_nonassoc:
1053               parse_assoc_decl (non_assoc);
1054               break;
1055
1056             case tok_hdrext:
1057               parse_header_extension_decl ();
1058               break;
1059
1060             case tok_srcext:
1061               parse_source_extension_decl ();
1062               break;
1063
1064             case tok_define:
1065               parse_muscle_decl ();
1066               break;
1067
1068             case tok_skel:
1069               parse_skel_decl ();
1070               break;
1071
1072             case tok_noop:
1073               break;
1074
1075             default:
1076               complain (_("unrecognized: %s"), token_buffer);
1077               skip_to_char ('%');
1078             }
1079         }
1080       else if (c == EOF)
1081         fatal (_("no input grammar"));
1082       else
1083         {
1084           char buf[] = "c";
1085           buf[0] = c;
1086           complain (_("unknown character: %s"), quote (buf));
1087           skip_to_char ('%');
1088         }
1089     }
1090 }
1091 \f
1092 /*-------------------------------------------------------------------.
1093 | Assuming that a `{' has just been seen, copy everything up to the  |
1094 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1095 | values in the current rule so far, which says where to find `$0'   |
1096 | with respect to the top of the stack.                              |
1097 `-------------------------------------------------------------------*/
1098
1099 static void
1100 copy_action (symbol_list *rule, int stack_offset)
1101 {
1102   int c;
1103   int count;
1104   char buf[4096];
1105
1106   /* offset is always 0 if parser has already popped the stack pointer */
1107   if (semantic_parser)
1108     stack_offset = 0;
1109
1110   sprintf (buf, "\ncase %d:\n", nrules);
1111   obstack_grow (&action_obstack, buf, strlen (buf));
1112
1113   if (!no_lines_flag)
1114     {
1115       sprintf (buf, "#line %d %s\n",
1116                lineno, quotearg_style (c_quoting_style,
1117                                        muscle_find ("filename")));
1118       obstack_grow (&action_obstack, buf, strlen (buf));
1119     }
1120   obstack_1grow (&action_obstack, '{');
1121
1122   count = 1;
1123   c = getc (finput);
1124
1125   while (count > 0)
1126     {
1127       while (c != '}')
1128         {
1129           switch (c)
1130             {
1131             case '\n':
1132               obstack_1grow (&action_obstack, c);
1133               lineno++;
1134               break;
1135
1136             case '{':
1137               obstack_1grow (&action_obstack, c);
1138               count++;
1139               break;
1140
1141             case '\'':
1142             case '"':
1143               copy_string (finput, &action_obstack, c);
1144               break;
1145
1146             case '/':
1147               copy_comment (finput, &action_obstack);
1148               break;
1149
1150             case '$':
1151               copy_dollar (finput, &action_obstack,
1152                            rule, stack_offset);
1153               break;
1154
1155             case '@':
1156               copy_at (finput, &action_obstack,
1157                        stack_offset);
1158               break;
1159
1160             case EOF:
1161               fatal (_("unmatched %s"), "`{'");
1162
1163             default:
1164               obstack_1grow (&action_obstack, c);
1165             }
1166
1167           c = getc (finput);
1168         }
1169
1170       /* above loop exits when c is '}' */
1171
1172       if (--count)
1173         {
1174           obstack_1grow (&action_obstack, c);
1175           c = getc (finput);
1176         }
1177     }
1178
1179   obstack_sgrow (&action_obstack, ";\n    break;}");
1180 }
1181 \f
1182 /*-------------------------------------------------------------------.
1183 | After `%guard' is seen in the input file, copy the actual guard    |
1184 | into the guards file.  If the guard is followed by an action, copy |
1185 | that into the actions file.  STACK_OFFSET is the number of values  |
1186 | in the current rule so far, which says where to find `$0' with     |
1187 | respect to the top of the stack, for the simple parser in which    |
1188 | the stack is not popped until after the guard is run.              |
1189 `-------------------------------------------------------------------*/
1190
1191 static void
1192 copy_guard (symbol_list *rule, int stack_offset)
1193 {
1194   int c;
1195   int count;
1196   int brace_flag = 0;
1197
1198   /* offset is always 0 if parser has already popped the stack pointer */
1199   if (semantic_parser)
1200     stack_offset = 0;
1201
1202   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1203   if (!no_lines_flag)
1204     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1205                     lineno, quotearg_style (c_quoting_style,
1206                                             muscle_find ("filename")));
1207   obstack_1grow (&guard_obstack, '{');
1208
1209   count = 0;
1210   c = getc (finput);
1211
1212   while (brace_flag ? (count > 0) : (c != ';'))
1213     {
1214       switch (c)
1215         {
1216         case '\n':
1217           obstack_1grow (&guard_obstack, c);
1218           lineno++;
1219           break;
1220
1221         case '{':
1222           obstack_1grow (&guard_obstack, c);
1223           brace_flag = 1;
1224           count++;
1225           break;
1226
1227         case '}':
1228           obstack_1grow (&guard_obstack, c);
1229           if (count > 0)
1230             count--;
1231           else
1232             {
1233               complain (_("unmatched %s"), "`}'");
1234               c = getc (finput);        /* skip it */
1235             }
1236           break;
1237
1238         case '\'':
1239         case '"':
1240           copy_string (finput, &guard_obstack, c);
1241           break;
1242
1243         case '/':
1244           copy_comment (finput, &guard_obstack);
1245           break;
1246
1247         case '$':
1248           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1249           break;
1250
1251         case '@':
1252           copy_at (finput, &guard_obstack, stack_offset);
1253           break;
1254
1255         case EOF:
1256           fatal ("%s", _("unterminated %guard clause"));
1257
1258         default:
1259           obstack_1grow (&guard_obstack, c);
1260         }
1261
1262       if (c != '}' || count != 0)
1263         c = getc (finput);
1264     }
1265
1266   c = skip_white_space ();
1267
1268   obstack_sgrow (&guard_obstack, ";\n    break;}");
1269   if (c == '{')
1270     copy_action (rule, stack_offset);
1271   else if (c == '=')
1272     {
1273       c = getc (finput);        /* why not skip_white_space -wjh */
1274       if (c == '{')
1275         copy_action (rule, stack_offset);
1276     }
1277   else
1278     ungetc (c, finput);
1279 }
1280 \f
1281
1282 static void
1283 record_rule_line (void)
1284 {
1285   /* Record each rule's source line number in rline table.  */
1286
1287   if (nrules >= rline_allocated)
1288     {
1289       rline_allocated = nrules * 2;
1290       rline = XREALLOC (rline, short, rline_allocated);
1291     }
1292   rline[nrules] = lineno;
1293 }
1294
1295
1296 /*-------------------------------------------------------------------.
1297 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1298 | with the user's names.                                             |
1299 `-------------------------------------------------------------------*/
1300
1301 static bucket *
1302 gensym (void)
1303 {
1304   /* Incremented for each generated symbol */
1305   static int gensym_count = 0;
1306   static char buf[256];
1307
1308   bucket *sym;
1309
1310   sprintf (buf, "@%d", ++gensym_count);
1311   token_buffer = buf;
1312   sym = getsym (token_buffer);
1313   sym->class = nterm_sym;
1314   sym->value = nvars++;
1315   return sym;
1316 }
1317
1318 #if 0
1319 /*------------------------------------------------------------------.
1320 | read in a %type declaration and record its information for        |
1321 | get_type_name to access.  This is unused.  It is only called from |
1322 | the #if 0 part of readgram                                        |
1323 `------------------------------------------------------------------*/
1324
1325 static int
1326 get_type (void)
1327 {
1328   int k;
1329   token_t token;
1330   char *name;
1331
1332   token = lex ();
1333
1334   if (token != tok_typename)
1335     {
1336       complain (_("invalid %s declaration"), "%type");
1337       return t;
1338     }
1339
1340   name = xstrdup (token_buffer);
1341
1342   for (;;)
1343     {
1344       token = lex ();
1345
1346       switch (token)
1347         {
1348         case tok_semicolon:
1349           return lex ();
1350
1351         case tok_comma:
1352           break;
1353
1354         case tok_identifier:
1355           if (symval->type_name == NULL)
1356             symval->type_name = name;
1357           else if (strcmp (name, symval->type_name) != 0)
1358             complain (_("type redeclaration for %s"), symval->tag);
1359
1360           break;
1361
1362         default:
1363           return token;
1364         }
1365     }
1366 }
1367
1368 #endif
1369 \f
1370 /*------------------------------------------------------------------.
1371 | Parse the input grammar into a one symbol_list structure.  Each   |
1372 | rule is represented by a sequence of symbols: the left hand side  |
1373 | followed by the contents of the right hand side, followed by a    |
1374 | null pointer instead of a symbol to terminate the rule.  The next |
1375 | symbol is the lhs of the following rule.                          |
1376 |                                                                   |
1377 | All guards and actions are copied out to the appropriate files,   |
1378 | labelled by the rule number they apply to.                        |
1379 `------------------------------------------------------------------*/
1380
1381 static void
1382 readgram (void)
1383 {
1384   token_t t;
1385   bucket *lhs = NULL;
1386   symbol_list *p;
1387   symbol_list *p1;
1388   bucket *bp;
1389
1390   /* Points to first symbol_list of current rule. its symbol is the
1391      lhs of the rule.  */
1392   symbol_list *crule;
1393   /* Points to the symbol_list preceding crule.  */
1394   symbol_list *crule1;
1395
1396   p1 = NULL;
1397
1398   t = lex ();
1399
1400   while (t != tok_two_percents && t != tok_eof)
1401     {
1402       if (t == tok_identifier || t == tok_bar)
1403         {
1404           int action_flag = 0;
1405           /* Number of symbols in rhs of this rule so far */
1406           int rulelength = 0;
1407           int xactions = 0;     /* JF for error checking */
1408           bucket *first_rhs = 0;
1409
1410           if (t == tok_identifier)
1411             {
1412               lhs = symval;
1413
1414               if (!start_flag)
1415                 {
1416                   startval = lhs;
1417                   start_flag = 1;
1418                 }
1419
1420               t = lex ();
1421               if (t != tok_colon)
1422                 {
1423                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1424                   unlex (t);
1425                 }
1426             }
1427
1428           if (nrules == 0 && t == tok_bar)
1429             {
1430               complain (_("grammar starts with vertical bar"));
1431               lhs = symval;     /* BOGUS: use a random symval */
1432             }
1433           /* start a new rule and record its lhs.  */
1434
1435           nrules++;
1436           nitems++;
1437
1438           record_rule_line ();
1439
1440           p = XCALLOC (symbol_list, 1);
1441           p->sym = lhs;
1442
1443           crule1 = p1;
1444           if (p1)
1445             p1->next = p;
1446           else
1447             grammar = p;
1448
1449           p1 = p;
1450           crule = p;
1451
1452           /* mark the rule's lhs as a nonterminal if not already so.  */
1453
1454           if (lhs->class == unknown_sym)
1455             {
1456               lhs->class = nterm_sym;
1457               lhs->value = nvars;
1458               nvars++;
1459             }
1460           else if (lhs->class == token_sym)
1461             complain (_("rule given for %s, which is a token"), lhs->tag);
1462
1463           /* read the rhs of the rule.  */
1464
1465           for (;;)
1466             {
1467               t = lex ();
1468               if (t == tok_prec)
1469                 {
1470                   t = lex ();
1471                   crule->ruleprec = symval;
1472                   t = lex ();
1473                 }
1474
1475               if (!(t == tok_identifier || t == tok_left_curly))
1476                 break;
1477
1478               /* If next token is an identifier, see if a colon follows it.
1479                  If one does, exit this rule now.  */
1480               if (t == tok_identifier)
1481                 {
1482                   bucket *ssave;
1483                   token_t t1;
1484
1485                   ssave = symval;
1486                   t1 = lex ();
1487                   unlex (t1);
1488                   symval = ssave;
1489                   if (t1 == tok_colon)
1490                     break;
1491
1492                   if (!first_rhs)       /* JF */
1493                     first_rhs = symval;
1494                   /* Not followed by colon =>
1495                      process as part of this rule's rhs.  */
1496                 }
1497
1498               /* If we just passed an action, that action was in the middle
1499                  of a rule, so make a dummy rule to reduce it to a
1500                  non-terminal.  */
1501               if (action_flag)
1502                 {
1503                   bucket *sdummy;
1504
1505                   /* Since the action was written out with this rule's
1506                      number, we must give the new rule this number by
1507                      inserting the new rule before it.  */
1508
1509                   /* Make a dummy nonterminal, a gensym.  */
1510                   sdummy = gensym ();
1511
1512                   /* Make a new rule, whose body is empty,
1513                      before the current one, so that the action
1514                      just read can belong to it.  */
1515                   nrules++;
1516                   nitems++;
1517                   record_rule_line ();
1518                   p = XCALLOC (symbol_list, 1);
1519                   if (crule1)
1520                     crule1->next = p;
1521                   else
1522                     grammar = p;
1523                   p->sym = sdummy;
1524                   crule1 = XCALLOC (symbol_list, 1);
1525                   p->next = crule1;
1526                   crule1->next = crule;
1527
1528                   /* Insert the dummy generated by that rule into this
1529                      rule.  */
1530                   nitems++;
1531                   p = XCALLOC (symbol_list, 1);
1532                   p->sym = sdummy;
1533                   p1->next = p;
1534                   p1 = p;
1535
1536                   action_flag = 0;
1537                 }
1538
1539               if (t == tok_identifier)
1540                 {
1541                   nitems++;
1542                   p = XCALLOC (symbol_list, 1);
1543                   p->sym = symval;
1544                   p1->next = p;
1545                   p1 = p;
1546                 }
1547               else              /* handle an action.  */
1548                 {
1549                   copy_action (crule, rulelength);
1550                   action_flag = 1;
1551                   xactions++;   /* JF */
1552                 }
1553               rulelength++;
1554             }                   /* end of  read rhs of rule */
1555
1556           /* Put an empty link in the list to mark the end of this rule  */
1557           p = XCALLOC (symbol_list, 1);
1558           p1->next = p;
1559           p1 = p;
1560
1561           if (t == tok_prec)
1562             {
1563               complain (_("two @prec's in a row"));
1564               t = lex ();
1565               crule->ruleprec = symval;
1566               t = lex ();
1567             }
1568           if (t == tok_guard)
1569             {
1570               if (!semantic_parser)
1571                 complain (_("%%guard present but %%semantic_parser not specified"));
1572
1573               copy_guard (crule, rulelength);
1574               t = lex ();
1575             }
1576           else if (t == tok_left_curly)
1577             {
1578               /* This case never occurs -wjh */
1579               if (action_flag)
1580                 complain (_("two actions at end of one rule"));
1581               copy_action (crule, rulelength);
1582               action_flag = 1;
1583               xactions++;       /* -wjh */
1584               t = lex ();
1585             }
1586           /* If $$ is being set in default way, report if any type
1587              mismatch.  */
1588           else if (!xactions
1589                    && first_rhs && lhs->type_name != first_rhs->type_name)
1590             {
1591               if (lhs->type_name == 0
1592                   || first_rhs->type_name == 0
1593                   || strcmp (lhs->type_name, first_rhs->type_name))
1594                 complain (_("type clash (`%s' `%s') on default action"),
1595                           lhs->type_name ? lhs->type_name : "",
1596                           first_rhs->type_name ? first_rhs->type_name : "");
1597             }
1598           /* Warn if there is no default for $$ but we need one.  */
1599           else if (!xactions && !first_rhs && lhs->type_name != 0)
1600             complain (_("empty rule for typed nonterminal, and no action"));
1601           if (t == tok_semicolon)
1602             t = lex ();
1603         }
1604 #if 0
1605       /* these things can appear as alternatives to rules.  */
1606 /* NO, they cannot.
1607         a) none of the documentation allows them
1608         b) most of them scan forward until finding a next %
1609                 thus they may swallow lots of intervening rules
1610 */
1611       else if (t == tok_token)
1612         {
1613           parse_token_decl (token_sym, nterm_sym);
1614           t = lex ();
1615         }
1616       else if (t == tok_nterm)
1617         {
1618           parse_token_decl (nterm_sym, token_sym);
1619           t = lex ();
1620         }
1621       else if (t == tok_type)
1622         {
1623           t = get_type ();
1624         }
1625       else if (t == tok_union)
1626         {
1627           parse_union_decl ();
1628           t = lex ();
1629         }
1630       else if (t == tok_expect)
1631         {
1632           parse_expect_decl ();
1633           t = lex ();
1634         }
1635       else if (t == tok_start)
1636         {
1637           parse_start_decl ();
1638           t = lex ();
1639         }
1640 #endif
1641
1642       else
1643         {
1644           complain (_("invalid input: %s"), quote (token_buffer));
1645           t = lex ();
1646         }
1647     }
1648
1649   /* grammar has been read.  Do some checking */
1650
1651   if (nsyms > MAXSHORT)
1652     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1653            MAXSHORT);
1654   if (nrules == 0)
1655     fatal (_("no rules in the input grammar"));
1656
1657   /* Report any undefined symbols and consider them nonterminals.  */
1658
1659   for (bp = firstsymbol; bp; bp = bp->next)
1660     if (bp->class == unknown_sym)
1661       {
1662         complain (_
1663                   ("symbol %s is used, but is not defined as a token and has no rules"),
1664                   bp->tag);
1665         bp->class = nterm_sym;
1666         bp->value = nvars++;
1667       }
1668
1669   ntokens = nsyms - nvars;
1670 }
1671
1672 /* At the end of the grammar file, some C source code must
1673    be stored. It is going to be associated to the epilogue
1674    directive.  */
1675 static void
1676 read_additionnal_code (void)
1677 {
1678   char c;
1679   struct obstack el_obstack;
1680
1681   obstack_init (&el_obstack);
1682
1683   while ((c = getc (finput)) != EOF)
1684     obstack_1grow (&el_obstack, c);
1685
1686   obstack_1grow (&el_obstack, 0);
1687   muscle_insert ("epilogue", obstack_finish (&el_obstack));
1688 }
1689
1690 \f
1691 /*--------------------------------------------------------------.
1692 | For named tokens, but not literal ones, define the name.  The |
1693 | value is the user token number.                               |
1694 `--------------------------------------------------------------*/
1695
1696 static void
1697 output_token_defines (struct obstack *oout)
1698 {
1699   bucket *bp;
1700   char *cp, *symbol;
1701   char c;
1702
1703   for (bp = firstsymbol; bp; bp = bp->next)
1704     {
1705       symbol = bp->tag;         /* get symbol */
1706
1707       if (bp->value >= ntokens)
1708         continue;
1709       if (bp->user_token_number == SALIAS)
1710         continue;
1711       if ('\'' == *symbol)
1712         continue;               /* skip literal character */
1713       if (bp == errtoken)
1714         continue;               /* skip error token */
1715       if ('\"' == *symbol)
1716         {
1717           /* use literal string only if given a symbol with an alias */
1718           if (bp->alias)
1719             symbol = bp->alias->tag;
1720           else
1721             continue;
1722         }
1723
1724       /* Don't #define nonliteral tokens whose names contain periods.  */
1725       cp = symbol;
1726       while ((c = *cp++) && c != '.');
1727       if (c != '\0')
1728         continue;
1729
1730       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1731                       symbol,
1732                       (translations ? bp->user_token_number : bp->value));
1733       if (semantic_parser)
1734         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1735     }
1736 }
1737
1738
1739 /*------------------------------------------------------------------.
1740 | Assign symbol numbers, and write definition of token names into   |
1741 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1742 | of symbols.                                                       |
1743 `------------------------------------------------------------------*/
1744
1745 static void
1746 packsymbols (void)
1747 {
1748   bucket *bp;
1749   int tokno = 1;
1750   int i;
1751   int last_user_token_number;
1752   static char DOLLAR[] = "$";
1753
1754   tags = XCALLOC (char *, nsyms + 1);
1755   tags[0] = DOLLAR;
1756   user_toknums = XCALLOC (short, nsyms + 1);
1757   user_toknums[0] = 0;
1758
1759   sprec = XCALLOC (short, nsyms);
1760   sassoc = XCALLOC (short, nsyms);
1761
1762   max_user_token_number = 256;
1763   last_user_token_number = 256;
1764
1765   for (bp = firstsymbol; bp; bp = bp->next)
1766     {
1767       if (bp->class == nterm_sym)
1768         {
1769           bp->value += ntokens;
1770         }
1771       else if (bp->alias)
1772         {
1773           /* this symbol and its alias are a single token defn.
1774              allocate a tokno, and assign to both check agreement of
1775              ->prec and ->assoc fields and make both the same */
1776           if (bp->value == 0)
1777             bp->value = bp->alias->value = tokno++;
1778
1779           if (bp->prec != bp->alias->prec)
1780             {
1781               if (bp->prec != 0 && bp->alias->prec != 0
1782                   && bp->user_token_number == SALIAS)
1783                 complain (_("conflicting precedences for %s and %s"),
1784                           bp->tag, bp->alias->tag);
1785               if (bp->prec != 0)
1786                 bp->alias->prec = bp->prec;
1787               else
1788                 bp->prec = bp->alias->prec;
1789             }
1790
1791           if (bp->assoc != bp->alias->assoc)
1792             {
1793               if (bp->assoc != 0 && bp->alias->assoc != 0
1794                   && bp->user_token_number == SALIAS)
1795                 complain (_("conflicting assoc values for %s and %s"),
1796                           bp->tag, bp->alias->tag);
1797               if (bp->assoc != 0)
1798                 bp->alias->assoc = bp->assoc;
1799               else
1800                 bp->assoc = bp->alias->assoc;
1801             }
1802
1803           if (bp->user_token_number == SALIAS)
1804             continue;           /* do not do processing below for SALIASs */
1805
1806         }
1807       else                      /* bp->class == token_sym */
1808         {
1809           bp->value = tokno++;
1810         }
1811
1812       if (bp->class == token_sym)
1813         {
1814           if (translations && !(bp->user_token_number))
1815             bp->user_token_number = ++last_user_token_number;
1816           if (bp->user_token_number > max_user_token_number)
1817             max_user_token_number = bp->user_token_number;
1818         }
1819
1820       tags[bp->value] = bp->tag;
1821       user_toknums[bp->value] = bp->user_token_number;
1822       sprec[bp->value] = bp->prec;
1823       sassoc[bp->value] = bp->assoc;
1824
1825     }
1826
1827   if (translations)
1828     {
1829       int j;
1830
1831       token_translations = XCALLOC (short, max_user_token_number + 1);
1832
1833       /* initialize all entries for literal tokens to 2, the internal
1834          token number for $undefined., which represents all invalid
1835          inputs.  */
1836       for (j = 0; j <= max_user_token_number; j++)
1837         token_translations[j] = 2;
1838
1839       for (bp = firstsymbol; bp; bp = bp->next)
1840         {
1841           if (bp->value >= ntokens)
1842             continue;           /* non-terminal */
1843           if (bp->user_token_number == SALIAS)
1844             continue;
1845           if (token_translations[bp->user_token_number] != 2)
1846             complain (_("tokens %s and %s both assigned number %d"),
1847                       tags[token_translations[bp->user_token_number]],
1848                       bp->tag, bp->user_token_number);
1849           token_translations[bp->user_token_number] = bp->value;
1850         }
1851     }
1852
1853   error_token_number = errtoken->value;
1854
1855   output_token_defines (&output_obstack);
1856   obstack_1grow (&output_obstack, 0);
1857   muscle_insert ("tokendef", obstack_finish (&output_obstack));
1858
1859 #if 0
1860   if (!no_parser_flag)
1861     output_token_defines (&table_obstack);
1862 #endif
1863
1864   if (startval->class == unknown_sym)
1865     fatal (_("the start symbol %s is undefined"), startval->tag);
1866   else if (startval->class == token_sym)
1867     fatal (_("the start symbol %s is a token"), startval->tag);
1868
1869   start_symbol = startval->value;
1870
1871   if (defines_flag)
1872     {
1873       output_token_defines (&defines_obstack);
1874
1875       if (!pure_parser)
1876         {
1877           if (spec_name_prefix)
1878             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1879                             spec_name_prefix);
1880           else
1881             obstack_sgrow (&defines_obstack,
1882                                  "\nextern YYSTYPE yylval;\n");
1883         }
1884
1885       if (semantic_parser)
1886         for (i = ntokens; i < nsyms; i++)
1887           {
1888             /* don't make these for dummy nonterminals made by gensym.  */
1889             if (*tags[i] != '@')
1890                obstack_fgrow2 (&defines_obstack,
1891                                "# define\tNT%s\t%d\n", tags[i], i);
1892           }
1893 #if 0
1894       /* `fdefines' is now a temporary file, so we need to copy its
1895          contents in `done', so we can't close it here.  */
1896       fclose (fdefines);
1897       fdefines = NULL;
1898 #endif
1899     }
1900 }
1901
1902
1903 /*---------------------------------------------------------------.
1904 | Convert the rules into the representation using RRHS, RLHS and |
1905 | RITEMS.                                                        |
1906 `---------------------------------------------------------------*/
1907
1908 static void
1909 packgram (void)
1910 {
1911   int itemno;
1912   int ruleno;
1913   symbol_list *p;
1914
1915   bucket *ruleprec;
1916
1917   ritem = XCALLOC (short, nitems + 1);
1918   rlhs = XCALLOC (short, nrules) - 1;
1919   rrhs = XCALLOC (short, nrules) - 1;
1920   rprec = XCALLOC (short, nrules) - 1;
1921   rprecsym = XCALLOC (short, nrules) - 1;
1922   rassoc = XCALLOC (short, nrules) - 1;
1923
1924   itemno = 0;
1925   ruleno = 1;
1926
1927   p = grammar;
1928   while (p)
1929     {
1930       rlhs[ruleno] = p->sym->value;
1931       rrhs[ruleno] = itemno;
1932       ruleprec = p->ruleprec;
1933
1934       p = p->next;
1935       while (p && p->sym)
1936         {
1937           ritem[itemno++] = p->sym->value;
1938           /* A rule gets by default the precedence and associativity
1939              of the last token in it.  */
1940           if (p->sym->class == token_sym)
1941             {
1942               rprec[ruleno] = p->sym->prec;
1943               rassoc[ruleno] = p->sym->assoc;
1944             }
1945           if (p)
1946             p = p->next;
1947         }
1948
1949       /* If this rule has a %prec,
1950          the specified symbol's precedence replaces the default.  */
1951       if (ruleprec)
1952         {
1953           rprec[ruleno] = ruleprec->prec;
1954           rassoc[ruleno] = ruleprec->assoc;
1955           rprecsym[ruleno] = ruleprec->value;
1956         }
1957
1958       ritem[itemno++] = -ruleno;
1959       ruleno++;
1960
1961       if (p)
1962         p = p->next;
1963     }
1964
1965   ritem[itemno] = 0;
1966 }
1967 \f
1968 /*-------------------------------------------------------------------.
1969 | Read in the grammar specification and record it in the format      |
1970 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1971 | and all actions into ACTION_OBSTACK, in each case forming the body |
1972 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1973 | statement to decide which guard or action to execute.              |
1974 `-------------------------------------------------------------------*/
1975
1976 void
1977 reader (void)
1978 {
1979   start_flag = 0;
1980   startval = NULL;              /* start symbol not specified yet. */
1981
1982 #if 0
1983   /* initially assume token number translation not needed.  */
1984   translations = 0;
1985 #endif
1986   /* Nowadays translations is always set to 1, since we give `error' a
1987      user-token-number to satisfy the Posix demand for YYERRCODE==256.
1988    */
1989   translations = 1;
1990
1991   nsyms = 1;
1992   nvars = 0;
1993   nrules = 0;
1994   nitems = 0;
1995   rline_allocated = 10;
1996   rline = XCALLOC (short, rline_allocated);
1997
1998   typed = 0;
1999   lastprec = 0;
2000
2001   semantic_parser = 0;
2002   pure_parser = 0;
2003
2004   grammar = NULL;
2005
2006   init_lex ();
2007   lineno = 1;
2008
2009   /* Initialize the muscle obstack.  */
2010   obstack_init (&muscle_obstack);
2011
2012   /* Initialize the symbol table.  */
2013   tabinit ();
2014
2015   /* Construct the error token */
2016   errtoken = getsym ("error");
2017   errtoken->class = token_sym;
2018   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
2019
2020   /* Construct a token that represents all undefined literal tokens.
2021      It is always token number 2.  */
2022   undeftoken = getsym ("$undefined.");
2023   undeftoken->class = token_sym;
2024   undeftoken->user_token_number = 2;
2025
2026   /* Read the declaration section.  Copy %{ ... %} groups to
2027      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
2028      etc. found there.  */
2029   read_declarations ();
2030   /* Read in the grammar, build grammar in list form.  Write out
2031      guards and actions.  */
2032   readgram ();
2033   /* Some C code is given at the end of the grammar file. */
2034   read_additionnal_code ();
2035
2036   /* Now we know whether we need the line-number stack.  If we do,
2037      write its type into the .tab.h file.
2038      This is no longer need with header skeleton.  */
2039
2040   /* Assign the symbols their symbol numbers.  Write #defines for the
2041      token symbols into FDEFINES if requested.  */
2042   packsymbols ();
2043   /* Convert the grammar into the format described in gram.h.  */
2044   packgram ();
2045   /* Free the symbol table data structure since symbols are now all
2046      referred to by symbol number.  */
2047   free_symtab ();
2048 }