src/reader.c

   1 /* Input parser for bison
   2    Copyright 1984, 1986, 1989, 1992, 1998, 2000, 2001
   3    Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    Bison is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2, or (at your option)
  10    any later version.
  11
  12    Bison is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with Bison; see the file COPYING.  If not, write to
  19    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20    Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include "system.h"
  24 #include "obstack.h"
  25 #include "quotearg.h"
  26 #include "quote.h"
  27 #include "getargs.h"
  28 #include "files.h"
  29 #include "symtab.h"
  30 #include "lex.h"
  31 #include "gram.h"
  32 #include "complain.h"
  33 #include "output.h"
  34 #include "reader.h"
  35 #include "conflicts.h"
  36
  37 typedef struct symbol_list
  38 {
  39   struct symbol_list *next;
  40   bucket *sym;
  41   int line;
  42   bucket *ruleprec;
  43 }
  44 symbol_list;
  45
  46 int lineno;
  47 char **tags;
  48 short *user_toknums;
  49 static symbol_list *grammar;
  50 static int start_flag;
  51 static bucket *startval;
  52
  53 /* Nonzero if components of semantic values are used, implying
  54    they must be unions.  */
  55 static int value_components_used;
  56
  57 /* Nonzero if %union has been seen.  */
  58 static int typed;
  59
  60 /* Incremented for each %left, %right or %nonassoc seen */
  61 static int lastprec;
  62
  63 static bucket *errtoken;
  64 static bucket *undeftoken;
  65
  66
  67 static symbol_list *
  68 symbol_list_new (bucket *sym)
  69 {
  70   symbol_list *res = XMALLOC (symbol_list, 1);
  71   res->next = NULL;
  72   res->sym = sym;
  73   res->line = lineno;
  74   res->ruleprec = NULL;
  75   return res;
  76 }
  77
  78 \f
  79
  80 /*===================\
  81 | Low level lexing.  |
  82 \===================*/
  83
  84 static void
  85 skip_to_char (int target)
  86 {
  87   int c;
  88   if (target == '\n')
  89     complain (_("   Skipping to next \\n"));
  90   else
  91     complain (_("   Skipping to next %c"), target);
  92
  93   do
  94     c = skip_white_space ();
  95   while (c != target && c != EOF);
  96   if (c != EOF)
  97     ungetc (c, finput);
  98 }
  99
 100
 101 /*---------------------------------------------------------.
 102 | Read a signed integer from STREAM and return its value.  |
 103 `---------------------------------------------------------*/
 104
 105 static inline int
 106 read_signed_integer (FILE *stream)
 107 {
 108   int c = getc (stream);
 109   int sign = 1;
 110   int n = 0;
 111
 112   if (c == '-')
 113     {
 114       c = getc (stream);
 115       sign = -1;
 116     }
 117
 118   while (isdigit (c))
 119     {
 120       n = 10 * n + (c - '0');
 121       c = getc (stream);
 122     }
 123
 124   ungetc (c, stream);
 125
 126   return sign * n;
 127 }
 128 \f
 129 /*--------------------------------------------------------------.
 130 | Get the data type (alternative in the union) of the value for |
 131 | symbol N in rule RULE.                                        |
 132 `--------------------------------------------------------------*/
 133
 134 static char *
 135 get_type_name (int n, symbol_list *rule)
 136 {
 137   int i;
 138   symbol_list *rp;
 139
 140   if (n < 0)
 141     {
 142       complain (_("invalid $ value"));
 143       return NULL;
 144     }
 145
 146   rp = rule;
 147   i = 0;
 148
 149   while (i < n)
 150     {
 151       rp = rp->next;
 152       if (rp == NULL || rp->sym == NULL)
 153         {
 154           complain (_("invalid $ value"));
 155           return NULL;
 156         }
 157       i++;
 158     }
 159
 160   return rp->sym->type_name;
 161 }
 162 \f
 163 /*------------------------------------------------------------.
 164 | Dump the string from FIN to OOUT if non null.  MATCH is the |
 165 | delimiter of the string (either ' or ").                    |
 166 `------------------------------------------------------------*/
 167
 168 static inline void
 169 copy_string (FILE *fin, struct obstack *oout, int match)
 170 {
 171   int c;
 172
 173   obstack_1grow (oout, match);
 174
 175   c = getc (fin);
 176
 177   while (c != match)
 178     {
 179       if (c == EOF)
 180         fatal (_("unterminated string at end of file"));
 181       if (c == '\n')
 182         {
 183           complain (_("unterminated string"));
 184           ungetc (c, fin);
 185           c = match;            /* invent terminator */
 186           continue;
 187         }
 188
 189       obstack_1grow (oout, c);
 190
 191       if (c == '\\')
 192         {
 193           c = getc (fin);
 194           if (c == EOF)
 195             fatal (_("unterminated string at end of file"));
 196           obstack_1grow (oout, c);
 197
 198           if (c == '\n')
 199             lineno++;
 200         }
 201
 202       c = getc (fin);
 203     }
 204
 205   obstack_1grow (oout, c);
 206 }
 207
 208
 209 /*-----------------------------------------------------------------.
 210 | Dump the wannabee comment from IN to OUT1 and OUT2 (which can be |
 211 | NULL).  In fact we just saw a `/', which might or might not be a |
 212 | comment.  In any case, copy what we saw.                         |
 213 |                                                                  |
 214 | OUT2 might be NULL.                                              |
 215 `-----------------------------------------------------------------*/
 216
 217 static inline void
 218 copy_comment2 (FILE *fin, struct obstack *oout1, struct obstack *oout2)
 219 {
 220   int cplus_comment;
 221   int ended;
 222   int c;
 223
 224   /* We read a `/', output it. */
 225   obstack_1grow (oout1, '/');
 226   if (oout2)
 227     obstack_1grow (oout2, '/');
 228
 229   switch ((c = getc (fin)))
 230     {
 231     case '/':
 232       cplus_comment = 1;
 233       break;
 234     case '*':
 235       cplus_comment = 0;
 236       break;
 237     default:
 238       ungetc (c, fin);
 239       return;
 240     }
 241
 242   obstack_1grow (oout1, c);
 243   if (oout2)
 244     obstack_1grow (oout2, c);
 245   c = getc (fin);
 246
 247   ended = 0;
 248   while (!ended)
 249     {
 250       if (!cplus_comment && c == '*')
 251         {
 252           while (c == '*')
 253             {
 254               obstack_1grow (oout1, c);
 255               if (oout2)
 256                 obstack_1grow (oout2, c);
 257               c = getc (fin);
 258             }
 259
 260           if (c == '/')
 261             {
 262               obstack_1grow (oout1, c);
 263               if (oout2)
 264                 obstack_1grow (oout2, c);
 265               ended = 1;
 266             }
 267         }
 268       else if (c == '\n')
 269         {
 270           lineno++;
 271           obstack_1grow (oout1, c);
 272           if (oout2)
 273             obstack_1grow (oout2, c);
 274           if (cplus_comment)
 275             ended = 1;
 276           else
 277             c = getc (fin);
 278         }
 279       else if (c == EOF)
 280         fatal (_("unterminated comment"));
 281       else
 282         {
 283           obstack_1grow (oout1, c);
 284           if (oout2)
 285             obstack_1grow (oout2, c);
 286           c = getc (fin);
 287         }
 288     }
 289 }
 290
 291
 292 /*-------------------------------------------------------------------.
 293 | Dump the comment (actually the current string starting with a `/') |
 294 | from FIN to OOUT.                                                  |
 295 `-------------------------------------------------------------------*/
 296
 297 static inline void
 298 copy_comment (FILE *fin, struct obstack *oout)
 299 {
 300   copy_comment2 (fin, oout, NULL);
 301 }
 302
 303
 304 /*-----------------------------------------------------------------.
 305 | FIN is pointing to a location (i.e., a `@').  Output to OOUT a   |
 306 | reference to this location. STACK_OFFSET is the number of values |
 307 | in the current rule so far, which says where to find `@0' with   |
 308 | respect to the top of the stack.                                 |
 309 `-----------------------------------------------------------------*/
 310
 311 static inline void
 312 copy_at (FILE *fin, struct obstack *oout,
 313          struct symbol_list *rule, int stack_offset)
 314 {
 315   symbol_list *rp;
 316   int c;
 317
 318   c = getc (fin);
 319   if (c == '$')
 320     {
 321       obstack_sgrow (oout, "yyloc");
 322       locations_flag = 1;
 323     }
 324   else if (isdigit (c) || c == '-')
 325     {
 326       int n, i;
 327
 328       ungetc (c, fin);
 329       n = read_signed_integer (fin);
 330
 331       rp = rule;
 332       i = 0;
 333
 334       while (i < n)
 335         {
 336           rp = rp->next;
 337           if (rp == NULL)
 338             {
 339               complain (_("invalid @ value"));
 340               return;
 341             }
 342           i++;
 343         }
 344
 345       obstack_fgrow1 (oout, "yylsp[%d]", n - stack_offset);
 346       locations_flag = 1;
 347     }
 348   else
 349     {
 350       char buf[] = "@c";
 351       buf[1] = c;
 352       complain (_("%s is invalid"), quote (buf));
 353     }
 354 }
 355
 356
 357 /*-------------------------------------------------------------------.
 358 | FIN is pointing to a wannabee semantic value (i.e., a `$').        |
 359 |                                                                    |
 360 | Possible inputs: $[<TYPENAME>]($|integer)                          |
 361 |                                                                    |
 362 | Output to OOUT a reference to this semantic value. STACK_OFFSET is |
 363 | the number of values in the current rule so far, which says where  |
 364 | to find `$0' with respect to the top of the stack.                 |
 365 `-------------------------------------------------------------------*/
 366
 367 static inline void
 368 copy_dollar (FILE *fin, struct obstack *oout,
 369              symbol_list *rule, int stack_offset)
 370 {
 371   int c = getc (fin);
 372   const char *type_name = NULL;
 373
 374   /* Get the type name if explicit. */
 375   if (c == '<')
 376     {
 377       read_type_name (fin);
 378       type_name = token_buffer;
 379       value_components_used = 1;
 380       c = getc (fin);
 381     }
 382
 383   if (c == '$')
 384     {
 385       obstack_sgrow (oout, "yyval");
 386
 387       if (!type_name)
 388         type_name = get_type_name (0, rule);
 389       if (type_name)
 390         obstack_fgrow1 (oout, ".%s", type_name);
 391       if (!type_name && typed)
 392         complain (_("$$ of `%s' has no declared type"),
 393                   rule->sym->tag);
 394     }
 395   else if (isdigit (c) || c == '-')
 396     {
 397       int n;
 398       ungetc (c, fin);
 399       n = read_signed_integer (fin);
 400
 401       if (!type_name && n > 0)
 402         type_name = get_type_name (n, rule);
 403
 404       obstack_fgrow1 (oout, "yyvsp[%d]", n - stack_offset);
 405
 406       if (type_name)
 407         obstack_fgrow1 (oout, ".%s", type_name);
 408       if (!type_name && typed)
 409         complain (_("$%d of `%s' has no declared type"),
 410                   n, rule->sym->tag);
 411     }
 412   else
 413     {
 414       char buf[] = "$c";
 415       buf[1] = c;
 416       complain (_("%s is invalid"), quote (buf));
 417     }
 418 }
 419 \f
 420 /*-------------------------------------------------------------------.
 421 | Copy the contents of a `%{ ... %}' into the definitions file.  The |
 422 | `%{' has already been read.  Return after reading the `%}'.        |
 423 `-------------------------------------------------------------------*/
 424
 425 static void
 426 copy_definition (void)
 427 {
 428   int c;
 429   /* -1 while reading a character if prev char was %. */
 430   int after_percent;
 431
 432   if (!no_lines_flag)
 433     obstack_fgrow2 (&attrs_obstack, "#line %d %s\n",
 434                     lineno, quotearg_style (c_quoting_style, infile));
 435
 436   after_percent = 0;
 437
 438   c = getc (finput);
 439
 440   for (;;)
 441     {
 442       switch (c)
 443         {
 444         case '\n':
 445           obstack_1grow (&attrs_obstack, c);
 446           lineno++;
 447           break;
 448
 449         case '%':
 450           after_percent = -1;
 451           break;
 452
 453         case '\'':
 454         case '"':
 455           copy_string (finput, &attrs_obstack, c);
 456           break;
 457
 458         case '/':
 459           copy_comment (finput, &attrs_obstack);
 460           break;
 461
 462         case EOF:
 463           fatal ("%s", _("unterminated `%{' definition"));
 464
 465         default:
 466           obstack_1grow (&attrs_obstack, c);
 467         }
 468
 469       c = getc (finput);
 470
 471       if (after_percent)
 472         {
 473           if (c == '}')
 474             return;
 475           obstack_1grow (&attrs_obstack, '%');
 476         }
 477       after_percent = 0;
 478     }
 479 }
 480
 481
 482 /*-------------------------------------------------------------------.
 483 | Parse what comes after %token or %nterm.  For %token, WHAT_IS is   |
 484 | token_sym and WHAT_IS_NOT is nterm_sym.  For %nterm, the arguments |
 485 | are reversed.                                                      |
 486 `-------------------------------------------------------------------*/
 487
 488 static void
 489 parse_token_decl (symbol_class what_is, symbol_class what_is_not)
 490 {
 491   token_t token = tok_undef;
 492   char *typename = NULL;
 493
 494   /* The symbol being defined.  */
 495   struct bucket *symbol = NULL;
 496
 497   /* After `%token' and `%nterm', any number of symbols maybe be
 498      defined.  */
 499   for (;;)
 500     {
 501       int tmp_char = ungetc (skip_white_space (), finput);
 502
 503       /* `%' (for instance from `%token', or from `%%' etc.) is the
 504          only valid means to end this declaration.  */
 505       if (tmp_char == '%')
 506         return;
 507       if (tmp_char == EOF)
 508         fatal (_("Premature EOF after %s"), token_buffer);
 509
 510       token = lex ();
 511       if (token == tok_comma)
 512         {
 513           symbol = NULL;
 514           continue;
 515         }
 516       if (token == tok_typename)
 517         {
 518           typename = xstrdup (token_buffer);
 519           value_components_used = 1;
 520           symbol = NULL;
 521         }
 522       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
 523         {
 524           if (symval->alias)
 525             warn (_("symbol `%s' used more than once as a literal string"),
 526                   symval->tag);
 527           else if (symbol->alias)
 528             warn (_("symbol `%s' given more than one literal string"),
 529                   symbol->tag);
 530           else
 531             {
 532               symval->class = token_sym;
 533               symval->type_name = typename;
 534               symval->user_token_number = symbol->user_token_number;
 535               symbol->user_token_number = SALIAS;
 536               symval->alias = symbol;
 537               symbol->alias = symval;
 538               /* symbol and symval combined are only one symbol */
 539               nsyms--;
 540             }
 541           symbol = NULL;
 542         }
 543       else if (token == tok_identifier)
 544         {
 545           int oldclass = symval->class;
 546           symbol = symval;
 547
 548           if (symbol->class == what_is_not)
 549             complain (_("symbol %s redefined"), symbol->tag);
 550           symbol->class = what_is;
 551           if (what_is == nterm_sym && oldclass != nterm_sym)
 552             symbol->value = nvars++;
 553
 554           if (typename)
 555             {
 556               if (symbol->type_name == NULL)
 557                 symbol->type_name = typename;
 558               else if (strcmp (typename, symbol->type_name) != 0)
 559                 complain (_("type redeclaration for %s"), symbol->tag);
 560             }
 561         }
 562       else if (symbol && token == tok_number)
 563         {
 564           symbol->user_token_number = numval;
 565         }
 566       else
 567         {
 568           complain (_("`%s' is invalid in %s"),
 569                     token_buffer,
 570                     (what_is == token_sym) ? "%token" : "%nterm");
 571           skip_to_char ('%');
 572         }
 573     }
 574
 575 }
 576
 577
 578 /*------------------------------.
 579 | Parse what comes after %start |
 580 `------------------------------*/
 581
 582 static void
 583 parse_start_decl (void)
 584 {
 585   if (start_flag)
 586     complain (_("multiple %s declarations"), "%start");
 587   if (lex () != tok_identifier)
 588     complain (_("invalid %s declaration"), "%start");
 589   else
 590     {
 591       start_flag = 1;
 592       startval = symval;
 593     }
 594 }
 595
 596 /*-----------------------------------------------------------.
 597 | read in a %type declaration and record its information for |
 598 | get_type_name to access                                    |
 599 `-----------------------------------------------------------*/
 600
 601 static void
 602 parse_type_decl (void)
 603 {
 604   char *name;
 605
 606   if (lex () != tok_typename)
 607     {
 608       complain ("%s", _("%type declaration has no <typename>"));
 609       skip_to_char ('%');
 610       return;
 611     }
 612
 613   name = xstrdup (token_buffer);
 614
 615   for (;;)
 616     {
 617       token_t t;
 618       int tmp_char = ungetc (skip_white_space (), finput);
 619
 620       if (tmp_char == '%')
 621         return;
 622       if (tmp_char == EOF)
 623         fatal (_("Premature EOF after %s"), token_buffer);
 624
 625       t = lex ();
 626
 627       switch (t)
 628         {
 629
 630         case tok_comma:
 631         case tok_semicolon:
 632           break;
 633
 634         case tok_identifier:
 635           if (symval->type_name == NULL)
 636             symval->type_name = name;
 637           else if (strcmp (name, symval->type_name) != 0)
 638             complain (_("type redeclaration for %s"), symval->tag);
 639
 640           break;
 641
 642         default:
 643           complain (_("invalid %%type declaration due to item: %s"),
 644                     token_buffer);
 645           skip_to_char ('%');
 646         }
 647     }
 648 }
 649
 650
 651
 652 /*----------------------------------------------------------------.
 653 | Read in a %left, %right or %nonassoc declaration and record its |
 654 | information.                                                    |
 655 `----------------------------------------------------------------*/
 656
 657 static void
 658 parse_assoc_decl (associativity assoc)
 659 {
 660   char *name = NULL;
 661   int prev = 0;
 662
 663   lastprec++;                   /* Assign a new precedence level, never 0.  */
 664
 665   for (;;)
 666     {
 667       token_t t;
 668       int tmp_char = ungetc (skip_white_space (), finput);
 669
 670       if (tmp_char == '%')
 671         return;
 672       if (tmp_char == EOF)
 673         fatal (_("Premature EOF after %s"), token_buffer);
 674
 675       t = lex ();
 676
 677       switch (t)
 678         {
 679         case tok_typename:
 680           name = xstrdup (token_buffer);
 681           break;
 682
 683         case tok_comma:
 684           break;
 685
 686         case tok_identifier:
 687           if (symval->prec != 0)
 688             complain (_("redefining precedence of %s"), symval->tag);
 689           symval->prec = lastprec;
 690           symval->assoc = assoc;
 691           if (symval->class == nterm_sym)
 692             complain (_("symbol %s redefined"), symval->tag);
 693           symval->class = token_sym;
 694           if (name)
 695             {                   /* record the type, if one is specified */
 696               if (symval->type_name == NULL)
 697                 symval->type_name = name;
 698               else if (strcmp (name, symval->type_name) != 0)
 699                 complain (_("type redeclaration for %s"), symval->tag);
 700             }
 701           break;
 702
 703         case tok_number:
 704           if (prev == tok_identifier)
 705             {
 706               symval->user_token_number = numval;
 707             }
 708           else
 709             {
 710               complain (_
 711                         ("invalid text (%s) - number should be after identifier"),
 712 token_buffer);
 713               skip_to_char ('%');
 714             }
 715           break;
 716
 717         case tok_semicolon:
 718           return;
 719
 720         default:
 721           complain (_("unexpected item: %s"), token_buffer);
 722           skip_to_char ('%');
 723         }
 724
 725       prev = t;
 726     }
 727 }
 728
 729
 730
 731 /*--------------------------------------------------------------.
 732 | Copy the union declaration into ATTRS_OBSTACK (and fdefines), |
 733 | where it is made into the definition of YYSTYPE, the type of  |
 734 | elements of the parser value stack.                           |
 735 `--------------------------------------------------------------*/
 736
 737 static void
 738 parse_union_decl (void)
 739 {
 740   int c;
 741   int count = 0;
 742   const char *prologue = "\
 743 #ifndef YYSTYPE\n\
 744 typedef union";
 745   const char *epilogue = "\
 746  yystype;\n\
 747 # define YYSTYPE yystype\n\
 748 #endif\n";
 749
 750   if (typed)
 751     complain (_("multiple %s declarations"), "%union");
 752
 753   typed = 1;
 754
 755   if (!no_lines_flag)
 756     obstack_fgrow2 (&attrs_obstack, "\n#line %d %s\n",
 757                     lineno, quotearg_style (c_quoting_style, infile));
 758   else
 759     obstack_1grow (&attrs_obstack, '\n');
 760
 761   obstack_sgrow (&attrs_obstack, prologue);
 762   if (defines_flag)
 763     obstack_sgrow (&defines_obstack, prologue);
 764
 765   c = getc (finput);
 766
 767   while (c != EOF)
 768     {
 769
 770       /* If C contains '/', it is output by copy_comment ().  */
 771       if (c != '/')
 772         {
 773           obstack_1grow (&attrs_obstack, c);
 774           if (defines_flag)
 775             obstack_1grow (&defines_obstack, c);
 776         }
 777
 778       switch (c)
 779         {
 780         case '\n':
 781           lineno++;
 782           break;
 783
 784         case '/':
 785           copy_comment2 (finput, &defines_obstack, &attrs_obstack);
 786           break;
 787
 788         case '{':
 789           count++;
 790           break;
 791
 792         case '}':
 793           if (count == 0)
 794             complain (_("unmatched %s"), "`}'");
 795           count--;
 796           if (count <= 0)
 797             {
 798               obstack_sgrow (&attrs_obstack, epilogue);
 799               if (defines_flag)
 800                 obstack_sgrow (&defines_obstack, epilogue);
 801               /* JF don't choke on trailing semi */
 802               c = skip_white_space ();
 803               if (c != ';')
 804                 ungetc (c, finput);
 805               return;
 806             }
 807         }
 808
 809       c = getc (finput);
 810     }
 811 }
 812
 813
 814 /*-------------------------------------------------------.
 815 | Parse the declaration %expect N which says to expect N |
 816 | shift-reduce conflicts.                                |
 817 `-------------------------------------------------------*/
 818
 819 static void
 820 parse_expect_decl (void)
 821 {
 822   int c = skip_white_space ();
 823   ungetc (c, finput);
 824
 825   if (!isdigit (c))
 826     complain (_("argument of %%expect is not an integer"));
 827   else
 828     expected_conflicts = read_signed_integer (finput);
 829 }
 830
 831
 832 /*-------------------------------------------------------------------.
 833 | Parse what comes after %thong.  the full syntax is                 |
 834 |                                                                    |
 835 |                %thong <type> token number literal                  |
 836 |                                                                    |
 837 | the <type> or number may be omitted.  The number specifies the     |
 838 | user_token_number.                                                 |
 839 |                                                                    |
 840 | Two symbols are entered in the table, one for the token symbol and |
 841 | one for the literal.  Both are given the <type>, if any, from the  |
 842 | declaration.  The ->user_token_number of the first is SALIAS and   |
 843 | the ->user_token_number of the second is set to the number, if     |
 844 | any, from the declaration.  The two symbols are linked via         |
 845 | pointers in their ->alias fields.                                  |
 846 |                                                                    |
 847 | During OUTPUT_DEFINES_TABLE, the symbol is reported thereafter,    |
 848 | only the literal string is retained it is the literal string that  |
 849 | is output to yytname                                               |
 850 `-------------------------------------------------------------------*/
 851
 852 static void
 853 parse_thong_decl (void)
 854 {
 855   token_t token;
 856   struct bucket *symbol;
 857   char *typename = 0;
 858   int usrtoknum = SUNDEF;
 859
 860   token = lex ();               /* fetch typename or first token */
 861   if (token == tok_typename)
 862     {
 863       typename = xstrdup (token_buffer);
 864       value_components_used = 1;
 865       token = lex ();           /* fetch first token */
 866     }
 867
 868   /* process first token */
 869
 870   if (token != tok_identifier)
 871     {
 872       complain (_("unrecognized item %s, expected an identifier"),
 873                 token_buffer);
 874       skip_to_char ('%');
 875       return;
 876     }
 877   symval->class = token_sym;
 878   symval->type_name = typename;
 879   symval->user_token_number = SALIAS;
 880   symbol = symval;
 881
 882   token = lex ();               /* get number or literal string */
 883
 884   if (token == tok_number)
 885     {
 886       usrtoknum = numval;
 887       token = lex ();           /* okay, did number, now get literal */
 888     }
 889
 890   /* process literal string token */
 891
 892   if (token != tok_identifier || *symval->tag != '\"')
 893     {
 894       complain (_("expected string constant instead of %s"), token_buffer);
 895       skip_to_char ('%');
 896       return;
 897     }
 898   symval->class = token_sym;
 899   symval->type_name = typename;
 900   symval->user_token_number = usrtoknum;
 901
 902   symval->alias = symbol;
 903   symbol->alias = symval;
 904
 905   /* symbol and symval combined are only one symbol.  */
 906   nsyms--;
 907 }
 908
 909
 910 /*------------------------------------------------------------------.
 911 | Parse a double quoted parameter. It was used for                  |
 912 | %{source,header}_extension.  For the moment, It is not used since |
 913 | extension features have been removed.                             |
 914 `------------------------------------------------------------------*/
 915
 916 #if 0
 917
 918 static const char *
 919 parse_dquoted_param (const char *from)
 920 {
 921   char buff[32];
 922   int c;
 923   int i;
 924
 925   c = skip_white_space ();
 926
 927   if (c != '"')
 928     {
 929       ungetc (c, finput);
 930       complain (_("invalid %s declaration"), from);
 931       return NULL;
 932     }
 933
 934   c = getc (finput);
 935   for (i = 0; (c >= '!') && (c <= '~'); i++)
 936     {
 937       if (c == '"')
 938         break;
 939
 940       if (c == '\\')
 941         {
 942           c = getc (finput);
 943           if ((c < '!') && (c > '~'))
 944             break;
 945         }
 946
 947       buff[i] = c;
 948       c = getc (finput);
 949     }
 950   buff[i] = '\0';
 951
 952   if (c != '"')
 953     {
 954       ungetc (c, finput);
 955       complain (_("invalid %s declaration"), from);
 956       return NULL;
 957     }
 958
 959   return xstrdup (buff);
 960 }
 961
 962 #endif
 963
 964
 965 /*----------------------------------------------------------------.
 966 | Read from finput until `%%' is seen.  Discard the `%%'.  Handle |
 967 | any `%' declarations, and copy the contents of any `%{ ... %}'  |
 968 | groups to ATTRS_OBSTACK.                                        |
 969 `----------------------------------------------------------------*/
 970
 971 static void
 972 read_declarations (void)
 973 {
 974   for (;;)
 975     {
 976       int c = skip_white_space ();
 977
 978       if (c == '%')
 979         {
 980           token_t tok = parse_percent_token ();
 981
 982           switch (tok)
 983             {
 984             case tok_two_percents:
 985               return;
 986
 987             case tok_percent_left_curly:
 988               copy_definition ();
 989               break;
 990
 991             case tok_token:
 992               parse_token_decl (token_sym, nterm_sym);
 993               break;
 994
 995             case tok_nterm:
 996               parse_token_decl (nterm_sym, token_sym);
 997               break;
 998
 999             case tok_type:
1000               parse_type_decl ();
1001               break;
1002
1003             case tok_start:
1004               parse_start_decl ();
1005               break;
1006
1007             case tok_union:
1008               parse_union_decl ();
1009               break;
1010
1011             case tok_expect:
1012               parse_expect_decl ();
1013               break;
1014
1015             case tok_thong:
1016               parse_thong_decl ();
1017               break;
1018
1019             case tok_left:
1020               parse_assoc_decl (left_assoc);
1021               break;
1022
1023             case tok_right:
1024               parse_assoc_decl (right_assoc);
1025               break;
1026
1027             case tok_nonassoc:
1028               parse_assoc_decl (non_assoc);
1029               break;
1030
1031             case tok_noop:
1032               break;
1033
1034             case tok_stropt:
1035             case tok_intopt:
1036             case tok_obsolete:
1037               abort ();
1038               break;
1039
1040             case tok_illegal:
1041             default:
1042               complain (_("unrecognized: %s"), token_buffer);
1043               skip_to_char ('%');
1044             }
1045         }
1046       else if (c == EOF)
1047         fatal (_("no input grammar"));
1048       else
1049         {
1050           char buf[] = "c";
1051           buf[0] = c;
1052           complain (_("unknown character: %s"), quote (buf));
1053           skip_to_char ('%');
1054         }
1055     }
1056 }
1057 \f
1058 /*-------------------------------------------------------------------.
1059 | Assuming that a `{' has just been seen, copy everything up to the  |
1060 | matching `}' into the actions file.  STACK_OFFSET is the number of |
1061 | values in the current rule so far, which says where to find `$0'   |
1062 | with respect to the top of the stack.                              |
1063 `-------------------------------------------------------------------*/
1064
1065 static void
1066 copy_action (symbol_list *rule, int stack_offset)
1067 {
1068   int c;
1069   int count;
1070   char buf[4096];
1071
1072   /* offset is always 0 if parser has already popped the stack pointer */
1073   if (semantic_parser)
1074     stack_offset = 0;
1075
1076   sprintf (buf, "\ncase %d:\n", nrules);
1077   obstack_grow (&action_obstack, buf, strlen (buf));
1078
1079   if (!no_lines_flag)
1080     {
1081       sprintf (buf, "#line %d %s\n",
1082                lineno, quotearg_style (c_quoting_style, infile));
1083       obstack_grow (&action_obstack, buf, strlen (buf));
1084     }
1085   obstack_1grow (&action_obstack, '{');
1086
1087   count = 1;
1088   c = getc (finput);
1089
1090   while (count > 0)
1091     {
1092       while (c != '}')
1093         {
1094           switch (c)
1095             {
1096             case '\n':
1097               obstack_1grow (&action_obstack, c);
1098               lineno++;
1099               break;
1100
1101             case '{':
1102               obstack_1grow (&action_obstack, c);
1103               count++;
1104               break;
1105
1106             case '\'':
1107             case '"':
1108               copy_string (finput, &action_obstack, c);
1109               break;
1110
1111             case '/':
1112               copy_comment (finput, &action_obstack);
1113               break;
1114
1115             case '$':
1116               copy_dollar (finput, &action_obstack,
1117                            rule, stack_offset);
1118               break;
1119
1120             case '@':
1121               copy_at (finput, &action_obstack,
1122                        rule, stack_offset);
1123               break;
1124
1125             case EOF:
1126               fatal (_("unmatched %s"), "`{'");
1127
1128             default:
1129               obstack_1grow (&action_obstack, c);
1130             }
1131
1132           c = getc (finput);
1133         }
1134
1135       /* above loop exits when c is '}' */
1136
1137       if (--count)
1138         {
1139           obstack_1grow (&action_obstack, c);
1140           c = getc (finput);
1141         }
1142     }
1143
1144   /* As a Bison extension, add the ending semicolon.  Since some Yacc
1145      don't do that, help people using bison as a Yacc finding their
1146      missing semicolons.  */
1147   if (yacc_flag)
1148     obstack_sgrow (&action_obstack, "}\n    break;");
1149   else
1150     obstack_sgrow (&action_obstack, ";\n    break;}");
1151 }
1152 \f
1153 /*-------------------------------------------------------------------.
1154 | After `%guard' is seen in the input file, copy the actual guard    |
1155 | into the guards file.  If the guard is followed by an action, copy |
1156 | that into the actions file.  STACK_OFFSET is the number of values  |
1157 | in the current rule so far, which says where to find `$0' with     |
1158 | respect to the top of the stack, for the simple parser in which    |
1159 | the stack is not popped until after the guard is run.              |
1160 `-------------------------------------------------------------------*/
1161
1162 static void
1163 copy_guard (symbol_list *rule, int stack_offset)
1164 {
1165   int c;
1166   int count;
1167   int brace_flag = 0;
1168
1169   /* offset is always 0 if parser has already popped the stack pointer */
1170   if (semantic_parser)
1171     stack_offset = 0;
1172
1173   obstack_fgrow1 (&guard_obstack, "\ncase %d:\n", nrules);
1174   if (!no_lines_flag)
1175     obstack_fgrow2 (&guard_obstack, "#line %d %s\n",
1176                     lineno, quotearg_style (c_quoting_style, infile));
1177   obstack_1grow (&guard_obstack, '{');
1178
1179   count = 0;
1180   c = getc (finput);
1181
1182   while (brace_flag ? (count > 0) : (c != ';'))
1183     {
1184       switch (c)
1185         {
1186         case '\n':
1187           obstack_1grow (&guard_obstack, c);
1188           lineno++;
1189           break;
1190
1191         case '{':
1192           obstack_1grow (&guard_obstack, c);
1193           brace_flag = 1;
1194           count++;
1195           break;
1196
1197         case '}':
1198           obstack_1grow (&guard_obstack, c);
1199           if (count > 0)
1200             count--;
1201           else
1202             {
1203               complain (_("unmatched %s"), "`}'");
1204               c = getc (finput);        /* skip it */
1205             }
1206           break;
1207
1208         case '\'':
1209         case '"':
1210           copy_string (finput, &guard_obstack, c);
1211           break;
1212
1213         case '/':
1214           copy_comment (finput, &guard_obstack);
1215           break;
1216
1217         case '$':
1218           copy_dollar (finput, &guard_obstack, rule, stack_offset);
1219           break;
1220
1221         case '@':
1222           copy_at (finput, &guard_obstack, rule, stack_offset);
1223           break;
1224
1225         case EOF:
1226           fatal ("%s", _("unterminated %guard clause"));
1227
1228         default:
1229           obstack_1grow (&guard_obstack, c);
1230         }
1231
1232       if (c != '}' || count != 0)
1233         c = getc (finput);
1234     }
1235
1236   c = skip_white_space ();
1237
1238   obstack_sgrow (&guard_obstack, ";\n    break;}");
1239   if (c == '{')
1240     copy_action (rule, stack_offset);
1241   else if (c == '=')
1242     {
1243       c = getc (finput);        /* why not skip_white_space -wjh */
1244       if (c == '{')
1245         copy_action (rule, stack_offset);
1246     }
1247   else
1248     ungetc (c, finput);
1249 }
1250 \f
1251
1252 /*-------------------------------------------------------------------.
1253 | Generate a dummy symbol, a nonterminal, whose name cannot conflict |
1254 | with the user's names.                                             |
1255 `-------------------------------------------------------------------*/
1256
1257 static bucket *
1258 gensym (void)
1259 {
1260   /* Incremented for each generated symbol */
1261   static int gensym_count = 0;
1262   static char buf[256];
1263
1264   bucket *sym;
1265
1266   sprintf (buf, "@%d", ++gensym_count);
1267   token_buffer = buf;
1268   sym = getsym (token_buffer);
1269   sym->class = nterm_sym;
1270   sym->value = nvars++;
1271   return sym;
1272 }
1273
1274 #if 0
1275 /*------------------------------------------------------------------.
1276 | read in a %type declaration and record its information for        |
1277 | get_type_name to access.  This is unused.  It is only called from |
1278 | the #if 0 part of readgram                                        |
1279 `------------------------------------------------------------------*/
1280
1281 static int
1282 get_type (void)
1283 {
1284   int k;
1285   token_t token;
1286   char *name;
1287
1288   token = lex ();
1289
1290   if (token != tok_typename)
1291     {
1292       complain (_("invalid %s declaration"), "%type");
1293       return t;
1294     }
1295
1296   name = xstrdup (token_buffer);
1297
1298   for (;;)
1299     {
1300       token = lex ();
1301
1302       switch (token)
1303         {
1304         case tok_semicolon:
1305           return lex ();
1306
1307         case tok_comma:
1308           break;
1309
1310         case tok_identifier:
1311           if (symval->type_name == NULL)
1312             symval->type_name = name;
1313           else if (strcmp (name, symval->type_name) != 0)
1314             complain (_("type redeclaration for %s"), symval->tag);
1315
1316           break;
1317
1318         default:
1319           return token;
1320         }
1321     }
1322 }
1323
1324 #endif
1325 \f
1326 /*------------------------------------------------------------------.
1327 | Parse the input grammar into a one symbol_list structure.  Each   |
1328 | rule is represented by a sequence of symbols: the left hand side  |
1329 | followed by the contents of the right hand side, followed by a    |
1330 | null pointer instead of a symbol to terminate the rule.  The next |
1331 | symbol is the lhs of the following rule.                          |
1332 |                                                                   |
1333 | All guards and actions are copied out to the appropriate files,   |
1334 | labelled by the rule number they apply to.                        |
1335 `------------------------------------------------------------------*/
1336
1337 static void
1338 readgram (void)
1339 {
1340   token_t t;
1341   bucket *lhs = NULL;
1342   symbol_list *p;
1343   symbol_list *p1;
1344   bucket *bp;
1345
1346   /* Points to first symbol_list of current rule. its symbol is the
1347      lhs of the rule.  */
1348   symbol_list *crule;
1349   /* Points to the symbol_list preceding crule.  */
1350   symbol_list *crule1;
1351
1352   p1 = NULL;
1353
1354   t = lex ();
1355
1356   while (t != tok_two_percents && t != tok_eof)
1357     {
1358       if (t == tok_identifier || t == tok_bar)
1359         {
1360           int action_flag = 0;
1361           /* Number of symbols in rhs of this rule so far */
1362           int rulelength = 0;
1363           int xactions = 0;     /* JF for error checking */
1364           bucket *first_rhs = 0;
1365
1366           if (t == tok_identifier)
1367             {
1368               lhs = symval;
1369
1370               if (!start_flag)
1371                 {
1372                   startval = lhs;
1373                   start_flag = 1;
1374                 }
1375
1376               t = lex ();
1377               if (t != tok_colon)
1378                 {
1379                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1380                   unlex (t);
1381                 }
1382             }
1383
1384           if (nrules == 0 && t == tok_bar)
1385             {
1386               complain (_("grammar starts with vertical bar"));
1387               lhs = symval;     /* BOGUS: use a random symval */
1388             }
1389           /* start a new rule and record its lhs.  */
1390
1391           nrules++;
1392           nitems++;
1393
1394           p = symbol_list_new (lhs);
1395
1396           crule1 = p1;
1397           if (p1)
1398             p1->next = p;
1399           else
1400             grammar = p;
1401
1402           p1 = p;
1403           crule = p;
1404
1405           /* mark the rule's lhs as a nonterminal if not already so.  */
1406
1407           if (lhs->class == unknown_sym)
1408             {
1409               lhs->class = nterm_sym;
1410               lhs->value = nvars;
1411               nvars++;
1412             }
1413           else if (lhs->class == token_sym)
1414             complain (_("rule given for %s, which is a token"), lhs->tag);
1415
1416           /* read the rhs of the rule.  */
1417
1418           for (;;)
1419             {
1420               t = lex ();
1421               if (t == tok_prec)
1422                 {
1423                   t = lex ();
1424                   crule->ruleprec = symval;
1425                   t = lex ();
1426                 }
1427
1428               if (!(t == tok_identifier || t == tok_left_curly))
1429                 break;
1430
1431               /* If next token is an identifier, see if a colon follows it.
1432                  If one does, exit this rule now.  */
1433               if (t == tok_identifier)
1434                 {
1435                   bucket *ssave;
1436                   token_t t1;
1437
1438                   ssave = symval;
1439                   t1 = lex ();
1440                   unlex (t1);
1441                   symval = ssave;
1442                   if (t1 == tok_colon)
1443                     break;
1444
1445                   if (!first_rhs)       /* JF */
1446                     first_rhs = symval;
1447                   /* Not followed by colon =>
1448                      process as part of this rule's rhs.  */
1449                 }
1450
1451               /* If we just passed an action, that action was in the middle
1452                  of a rule, so make a dummy rule to reduce it to a
1453                  non-terminal.  */
1454               if (action_flag)
1455                 {
1456                   /* Since the action was written out with this rule's
1457                      number, we must give the new rule this number by
1458                      inserting the new rule before it.  */
1459
1460                   /* Make a dummy nonterminal, a gensym.  */
1461                   bucket *sdummy = gensym ();
1462
1463                   /* Make a new rule, whose body is empty, before the
1464                      current one, so that the action just read can
1465                      belong to it.  */
1466                   nrules++;
1467                   nitems++;
1468                   p = symbol_list_new (sdummy);
1469                   /* Attach its lineno to that of the host rule. */
1470                   p->line = crule->line;
1471                   if (crule1)
1472                     crule1->next = p;
1473                   else
1474                     grammar = p;
1475                   /* End of the rule. */
1476                   crule1 = symbol_list_new (NULL);
1477                   crule1->next = crule;
1478
1479                   p->next = crule1;
1480
1481                   /* Insert the dummy generated by that rule into this
1482                      rule.  */
1483                   nitems++;
1484                   p = symbol_list_new (sdummy);
1485                   p1->next = p;
1486                   p1 = p;
1487
1488                   action_flag = 0;
1489                 }
1490
1491               if (t == tok_identifier)
1492                 {
1493                   nitems++;
1494                   p = symbol_list_new (symval);
1495                   p1->next = p;
1496                   p1 = p;
1497                 }
1498               else              /* handle an action.  */
1499                 {
1500                   copy_action (crule, rulelength);
1501                   action_flag = 1;
1502                   xactions++;   /* JF */
1503                 }
1504               rulelength++;
1505             }                   /* end of  read rhs of rule */
1506
1507           /* Put an empty link in the list to mark the end of this rule  */
1508           p = symbol_list_new (NULL);
1509           p1->next = p;
1510           p1 = p;
1511
1512           if (t == tok_prec)
1513             {
1514               complain (_("two @prec's in a row"));
1515               t = lex ();
1516               crule->ruleprec = symval;
1517               t = lex ();
1518             }
1519           if (t == tok_guard)
1520             {
1521               if (!semantic_parser)
1522                 complain (_("%%guard present but %%semantic_parser not specified"));
1523
1524               copy_guard (crule, rulelength);
1525               t = lex ();
1526             }
1527           else if (t == tok_left_curly)
1528             {
1529               /* This case never occurs -wjh */
1530               if (action_flag)
1531                 complain (_("two actions at end of one rule"));
1532               copy_action (crule, rulelength);
1533               action_flag = 1;
1534               xactions++;       /* -wjh */
1535               t = lex ();
1536             }
1537           /* If $$ is being set in default way, report if any type
1538              mismatch.  */
1539           else if (!xactions
1540                    && first_rhs && lhs->type_name != first_rhs->type_name)
1541             {
1542               if (lhs->type_name == 0
1543                   || first_rhs->type_name == 0
1544                   || strcmp (lhs->type_name, first_rhs->type_name))
1545                 complain (_("type clash (`%s' `%s') on default action"),
1546                           lhs->type_name ? lhs->type_name : "",
1547                           first_rhs->type_name ? first_rhs->type_name : "");
1548             }
1549           /* Warn if there is no default for $$ but we need one.  */
1550           else if (!xactions && !first_rhs && lhs->type_name != 0)
1551             complain (_("empty rule for typed nonterminal, and no action"));
1552           if (t == tok_semicolon)
1553             t = lex ();
1554         }
1555 #if 0
1556       /* these things can appear as alternatives to rules.  */
1557 /* NO, they cannot.
1558         a) none of the documentation allows them
1559         b) most of them scan forward until finding a next %
1560                 thus they may swallow lots of intervening rules
1561 */
1562       else if (t == tok_token)
1563         {
1564           parse_token_decl (token_sym, nterm_sym);
1565           t = lex ();
1566         }
1567       else if (t == tok_nterm)
1568         {
1569           parse_token_decl (nterm_sym, token_sym);
1570           t = lex ();
1571         }
1572       else if (t == tok_type)
1573         {
1574           t = get_type ();
1575         }
1576       else if (t == tok_union)
1577         {
1578           parse_union_decl ();
1579           t = lex ();
1580         }
1581       else if (t == tok_expect)
1582         {
1583           parse_expect_decl ();
1584           t = lex ();
1585         }
1586       else if (t == tok_start)
1587         {
1588           parse_start_decl ();
1589           t = lex ();
1590         }
1591 #endif
1592
1593       else
1594         {
1595           complain (_("invalid input: %s"), quote (token_buffer));
1596           t = lex ();
1597         }
1598     }
1599
1600   /* grammar has been read.  Do some checking */
1601
1602   if (nsyms > MAXSHORT)
1603     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1604            MAXSHORT);
1605   if (nrules == 0)
1606     fatal (_("no rules in the input grammar"));
1607
1608   /* JF put out same default YYSTYPE as YACC does */
1609   if (typed == 0
1610       && !value_components_used)
1611     {
1612       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1613          but it seems better to be consistent.
1614          Most programs should declare their own type anyway.  */
1615       obstack_sgrow (&attrs_obstack,
1616                            "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1617       if (defines_flag)
1618         obstack_sgrow (&defines_obstack, "\
1619 # ifndef YYSTYPE\n\
1620 #  define YYSTYPE int\n\
1621 # endif\n");
1622     }
1623
1624   /* Report any undefined symbols and consider them nonterminals.  */
1625
1626   for (bp = firstsymbol; bp; bp = bp->next)
1627     if (bp->class == unknown_sym)
1628       {
1629         complain (_
1630                   ("symbol %s is used, but is not defined as a token and has no rules"),
1631                   bp->tag);
1632         bp->class = nterm_sym;
1633         bp->value = nvars++;
1634       }
1635
1636   ntokens = nsyms - nvars;
1637 }
1638 \f
1639 /*--------------------------------------------------------------.
1640 | For named tokens, but not literal ones, define the name.  The |
1641 | value is the user token number.                               |
1642 `--------------------------------------------------------------*/
1643
1644 static void
1645 output_token_defines (struct obstack *oout)
1646 {
1647   bucket *bp;
1648   char *cp, *symbol;
1649   char c;
1650
1651   for (bp = firstsymbol; bp; bp = bp->next)
1652     {
1653       symbol = bp->tag;         /* get symbol */
1654
1655       if (bp->value >= ntokens)
1656         continue;
1657       if (bp->user_token_number == SALIAS)
1658         continue;
1659       if ('\'' == *symbol)
1660         continue;               /* skip literal character */
1661       if (bp == errtoken)
1662         continue;               /* skip error token */
1663       if ('\"' == *symbol)
1664         {
1665           /* use literal string only if given a symbol with an alias */
1666           if (bp->alias)
1667             symbol = bp->alias->tag;
1668           else
1669             continue;
1670         }
1671
1672       /* Don't #define nonliteral tokens whose names contain periods.  */
1673       cp = symbol;
1674       while ((c = *cp++) && c != '.');
1675       if (c != '\0')
1676         continue;
1677
1678       obstack_fgrow2 (oout, "# define\t%s\t%d\n",
1679                       symbol, bp->user_token_number);
1680       if (semantic_parser)
1681         /* FIXME: This is certainly dead wrong, and should be just as
1682            above. --akim.  */
1683         obstack_fgrow2 (oout, "# define\tT%s\t%d\n", symbol, bp->value);
1684     }
1685
1686   obstack_1grow (oout, '\n');
1687 }
1688
1689
1690 /*--------------------.
1691 | Output the header.  |
1692 `--------------------*/
1693
1694 static void
1695 symbols_output (void)
1696 {
1697   if (defines_flag)
1698     {
1699       output_token_defines (&defines_obstack);
1700
1701       if (!pure_parser)
1702         {
1703           if (spec_name_prefix)
1704             obstack_fgrow1 (&defines_obstack, "\nextern YYSTYPE %slval;\n",
1705                             spec_name_prefix);
1706           else
1707             obstack_sgrow (&defines_obstack,
1708                                  "\nextern YYSTYPE yylval;\n");
1709         }
1710
1711       if (semantic_parser)
1712         {
1713           int i;
1714
1715           for (i = ntokens; i < nsyms; i++)
1716             {
1717               /* don't make these for dummy nonterminals made by gensym.  */
1718               if (*tags[i] != '@')
1719                 obstack_fgrow2 (&defines_obstack,
1720                                 "# define\tNT%s\t%d\n", tags[i], i);
1721             }
1722 #if 0
1723           /* `fdefines' is now a temporary file, so we need to copy its
1724              contents in `done', so we can't close it here.  */
1725           fclose (fdefines);
1726           fdefines = NULL;
1727 #endif
1728         }
1729     }
1730 }
1731
1732
1733 /*------------------------------------------------------------------.
1734 | Set TOKEN_TRANSLATIONS.  Check that no two symbols share the same |
1735 | number.                                                           |
1736 `------------------------------------------------------------------*/
1737
1738 static void
1739 token_translations_init (void)
1740 {
1741   bucket *bp = NULL;
1742   int i;
1743
1744   token_translations = XCALLOC (short, max_user_token_number + 1);
1745
1746   /* Initialize all entries for literal tokens to 2, the internal
1747      token number for $undefined., which represents all invalid
1748      inputs.  */
1749   for (i = 0; i <= max_user_token_number; i++)
1750     token_translations[i] = 2;
1751
1752   for (bp = firstsymbol; bp; bp = bp->next)
1753     {
1754       /* Non-terminal? */
1755       if (bp->value >= ntokens)
1756         continue;
1757       /* A token string alias? */
1758       if (bp->user_token_number == SALIAS)
1759         continue;
1760
1761       assert (bp->user_token_number != SUNDEF);
1762
1763       /* A token which translation has already been set? */
1764       if (token_translations[bp->user_token_number] != 2)
1765         complain (_("tokens %s and %s both assigned number %d"),
1766                   tags[token_translations[bp->user_token_number]],
1767                   bp->tag, bp->user_token_number);
1768       token_translations[bp->user_token_number] = bp->value;
1769     }
1770 }
1771
1772
1773 /*------------------------------------------------------------------.
1774 | Assign symbol numbers, and write definition of token names into   |
1775 | FDEFINES.  Set up vectors TAGS and SPREC of names and precedences |
1776 | of symbols.                                                       |
1777 `------------------------------------------------------------------*/
1778
1779 static void
1780 packsymbols (void)
1781 {
1782   bucket *bp = NULL;
1783   int tokno = 1;
1784   int last_user_token_number;
1785   static char DOLLAR[] = "$";
1786
1787   tags = XCALLOC (char *, nsyms + 1);
1788   user_toknums = XCALLOC (short, nsyms + 1);
1789
1790   sprec = XCALLOC (short, nsyms);
1791   sassoc = XCALLOC (short, nsyms);
1792
1793   /* The EOF token. */
1794   tags[0] = DOLLAR;
1795   user_toknums[0] = 0;
1796
1797   max_user_token_number = 256;
1798   last_user_token_number = 256;
1799
1800   for (bp = firstsymbol; bp; bp = bp->next)
1801     {
1802       if (bp->class == nterm_sym)
1803         {
1804           bp->value += ntokens;
1805         }
1806       else if (bp->alias)
1807         {
1808           /* this symbol and its alias are a single token defn.
1809              allocate a tokno, and assign to both check agreement of
1810              ->prec and ->assoc fields and make both the same */
1811           if (bp->value == 0)
1812             bp->value = bp->alias->value = tokno++;
1813
1814           if (bp->prec != bp->alias->prec)
1815             {
1816               if (bp->prec != 0 && bp->alias->prec != 0
1817                   && bp->user_token_number == SALIAS)
1818                 complain (_("conflicting precedences for %s and %s"),
1819                           bp->tag, bp->alias->tag);
1820               if (bp->prec != 0)
1821                 bp->alias->prec = bp->prec;
1822               else
1823                 bp->prec = bp->alias->prec;
1824             }
1825
1826           if (bp->assoc != bp->alias->assoc)
1827             {
1828               if (bp->assoc != 0 && bp->alias->assoc != 0
1829                   && bp->user_token_number == SALIAS)
1830                 complain (_("conflicting assoc values for %s and %s"),
1831                           bp->tag, bp->alias->tag);
1832               if (bp->assoc != 0)
1833                 bp->alias->assoc = bp->assoc;
1834               else
1835                 bp->assoc = bp->alias->assoc;
1836             }
1837
1838           if (bp->user_token_number == SALIAS)
1839             continue;           /* do not do processing below for SALIASs */
1840
1841         }
1842       else                      /* bp->class == token_sym */
1843         {
1844           bp->value = tokno++;
1845         }
1846
1847       if (bp->class == token_sym)
1848         {
1849           if (bp->user_token_number == SUNDEF)
1850             bp->user_token_number = ++last_user_token_number;
1851           if (bp->user_token_number > max_user_token_number)
1852             max_user_token_number = bp->user_token_number;
1853         }
1854
1855       tags[bp->value] = bp->tag;
1856       user_toknums[bp->value] = bp->user_token_number;
1857       sprec[bp->value] = bp->prec;
1858       sassoc[bp->value] = bp->assoc;
1859     }
1860
1861   token_translations_init ();
1862
1863   error_token_number = errtoken->value;
1864
1865   if (!no_parser_flag)
1866     output_token_defines (&table_obstack);
1867
1868   if (startval->class == unknown_sym)
1869     fatal (_("the start symbol %s is undefined"), startval->tag);
1870   else if (startval->class == token_sym)
1871     fatal (_("the start symbol %s is a token"), startval->tag);
1872
1873   start_symbol = startval->value;
1874 }
1875
1876
1877 /*---------------------------------------------------------------.
1878 | Convert the rules into the representation using RRHS, RLHS and |
1879 | RITEMS.                                                        |
1880 `---------------------------------------------------------------*/
1881
1882 static void
1883 packgram (void)
1884 {
1885   int itemno;
1886   int ruleno;
1887   symbol_list *p;
1888
1889   ritem = XCALLOC (short, nitems + 1);
1890   rule_table = XCALLOC (rule_t, nrules) - 1;
1891
1892   itemno = 0;
1893   ruleno = 1;
1894
1895   p = grammar;
1896   while (p)
1897     {
1898       bucket *ruleprec = p->ruleprec;
1899       rule_table[ruleno].lhs = p->sym->value;
1900       rule_table[ruleno].rhs = itemno;
1901       rule_table[ruleno].line = p->line;
1902       rule_table[ruleno].useful = TRUE;
1903
1904       p = p->next;
1905       while (p && p->sym)
1906         {
1907           ritem[itemno++] = p->sym->value;
1908           /* A rule gets by default the precedence and associativity
1909              of the last token in it.  */
1910           if (p->sym->class == token_sym)
1911             {
1912               rule_table[ruleno].prec = p->sym->prec;
1913               rule_table[ruleno].assoc = p->sym->assoc;
1914             }
1915           if (p)
1916             p = p->next;
1917         }
1918
1919       /* If this rule has a %prec,
1920          the specified symbol's precedence replaces the default.  */
1921       if (ruleprec)
1922         {
1923           rule_table[ruleno].prec = ruleprec->prec;
1924           rule_table[ruleno].assoc = ruleprec->assoc;
1925           rule_table[ruleno].precsym = ruleprec->value;
1926         }
1927
1928       ritem[itemno++] = -ruleno;
1929       ruleno++;
1930
1931       if (p)
1932         p = p->next;
1933     }
1934
1935   ritem[itemno] = 0;
1936
1937   if (trace_flag)
1938     ritem_print (stderr);
1939 }
1940 \f
1941 /*-------------------------------------------------------------------.
1942 | Read in the grammar specification and record it in the format      |
1943 | described in gram.h.  All guards are copied into the GUARD_OBSTACK |
1944 | and all actions into ACTION_OBSTACK, in each case forming the body |
1945 | of a C function (YYGUARD or YYACTION) which contains a switch      |
1946 | statement to decide which guard or action to execute.              |
1947 `-------------------------------------------------------------------*/
1948
1949 void
1950 reader (void)
1951 {
1952   start_flag = 0;
1953   startval = NULL;              /* start symbol not specified yet. */
1954
1955   nsyms = 1;
1956   nvars = 0;
1957   nrules = 0;
1958   nitems = 0;
1959
1960   typed = 0;
1961   lastprec = 0;
1962
1963   semantic_parser = 0;
1964   pure_parser = 0;
1965
1966   grammar = NULL;
1967
1968   lex_init ();
1969   lineno = 1;
1970
1971   /* Initialize the symbol table.  */
1972   tabinit ();
1973   /* Construct the error token */
1974   errtoken = getsym ("error");
1975   errtoken->class = token_sym;
1976   errtoken->user_token_number = 256;    /* Value specified by POSIX.  */
1977   /* Construct a token that represents all undefined literal tokens.
1978      It is always token number 2.  */
1979   undeftoken = getsym ("$undefined.");
1980   undeftoken->class = token_sym;
1981   undeftoken->user_token_number = 2;
1982
1983   /* Read the declaration section.  Copy %{ ... %} groups to
1984      TABLE_OBSTACK and FDEFINES file.  Also notice any %token, %left,
1985      etc. found there.  */
1986   obstack_fgrow3 (&table_obstack, "\
1987 /* %s, made from %s\n\
1988    by GNU bison %s.  */\n\
1989 \n",
1990                   no_parser_flag ? "Bison-generated parse tables" : "A Bison parser",
1991                   infile, VERSION);
1992
1993   obstack_sgrow (&table_obstack,
1994                        "#define YYBISON 1  /* Identify Bison output.  */\n\n");
1995   read_declarations ();
1996   /* Start writing the guard and action files, if they are needed.  */
1997   output_headers ();
1998   /* Read in the grammar, build grammar in list form.  Write out
1999      guards and actions.  */
2000   readgram ();
2001   /* Now we know whether we need the line-number stack.  If we do,
2002      write its type into the .tab.h file.  */
2003   if (defines_flag)
2004     reader_output_yylsp (&defines_obstack);
2005   /* Write closing delimiters for actions and guards.  */
2006   output_trailers ();
2007   if (locations_flag)
2008     obstack_sgrow (&table_obstack, "#define YYLSP_NEEDED 1\n\n");
2009   /* Assign the symbols their symbol numbers.  Write #defines for the
2010      token symbols into FDEFINES if requested.  */
2011   packsymbols ();
2012   /* Convert the grammar into the format described in gram.h.  */
2013   packgram ();
2014   /* Output the headers. */
2015   symbols_output ();
2016 }
2017
2018
2019 /*------------------------------------------------------------------.
2020 | Define YYLTYPE.  Cannot be in the skeleton since we might have to |
2021 | output it in the headers if --defines is used.                    |
2022 `------------------------------------------------------------------*/
2023
2024 void
2025 reader_output_yylsp (struct obstack *oout)
2026 {
2027   if (locations_flag)
2028     obstack_sgrow (oout, "\
2029 \n\
2030 #ifndef YYLTYPE\n\
2031 typedef struct yyltype\n\
2032 {\n\
2033   int first_line;\n\
2034   int first_column;\n\
2035 \n\
2036   int last_line;\n\
2037   int last_column;\n\
2038 } yyltype;\n\
2039 \n\
2040 # define YYLTYPE yyltype\n\
2041 #endif\n\
2042 \n");
2043 }