src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    Bison is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    Bison is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with Bison; see the file COPYING.  If not, write to
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19    Boston, MA 02111-1307, USA.  */
  20
  21
  22 /* Read in the grammar specification and record it in the format
  23    described in gram.h.  All guards are copied into the fguard file
  24    and all actions into faction, in each case forming the body of a C
  25    function (yyguard or yyaction) which contains a switch statement to
  26    decide which guard or action to execute.
  27
  28    The entry point is reader ().  */
  29
  30 #include <stdio.h>
  31 #include "system.h"
  32 #include "files.h"
  33 #include "alloc.h"
  34 #include "symtab.h"
  35 #include "lex.h"
  36 #include "gram.h"
  37 #include "machine.h"
  38 #include "complain.h"
  39
  40 #define LTYPESTR        "\
  41 \n\
  42 #ifndef YYLTYPE\n\
  43 typedef\n\
  44   struct yyltype\n\
  45 \
  46     {\n\
  47       int timestamp;\n\
  48       int first_line;\n\
  49       int first_column;\
  50 \n\
  51       int last_line;\n\
  52       int last_column;\n\
  53       char *text;\n\
  54    }\n\
  55 \
  56   yyltype;\n\
  57 \n\
  58 #define YYLTYPE yyltype\n\
  59 #endif\n\
  60 \n"
  61
  62 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  63 int rline_allocated;
  64
  65 extern int definesflag;
  66 extern int nolinesflag;
  67 extern int noparserflag;
  68 extern int rawtoknumflag;
  69 extern bucket *symval;
  70 extern int numval;
  71 extern int expected_conflicts;
  72 extern char *token_buffer;
  73 extern int maxtoken;
  74
  75 extern void init_lex PARAMS((void));
  76 extern char *grow_token_buffer PARAMS((char *));
  77 extern void tabinit PARAMS((void));
  78 extern void output_headers PARAMS((void));
  79 extern void output_trailers PARAMS((void));
  80 extern void free_symtab PARAMS((void));
  81 extern void open_extra_files PARAMS((void));
  82 extern char *printable_version PARAMS((int));
  83 extern void unlex PARAMS((int));
  84
  85 extern int skip_white_space PARAMS((void));
  86 extern int parse_percent_token PARAMS((void));
  87 extern int lex PARAMS((void));
  88
  89 typedef
  90   struct symbol_list
  91     {
  92       struct symbol_list *next;
  93       bucket *sym;
  94       bucket *ruleprec;
  95     }
  96   symbol_list;
  97
  98
  99 void reader PARAMS((void));
 100 void reader_output_yylsp PARAMS((FILE *));
 101 void read_declarations PARAMS((void));
 102 void copy_definition PARAMS((void));
 103 void parse_token_decl PARAMS((int, int));
 104 void parse_start_decl PARAMS((void));
 105 void parse_type_decl PARAMS((void));
 106 void parse_assoc_decl PARAMS((int));
 107 void parse_union_decl PARAMS((void));
 108 void parse_expect_decl PARAMS((void));
 109 char *get_type_name PARAMS((int, symbol_list *));
 110 void copy_guard PARAMS((symbol_list *, int));
 111 void parse_thong_decl PARAMS((void));
 112 void copy_action PARAMS((symbol_list *, int));
 113 bucket *gensym PARAMS((void));
 114 void readgram PARAMS((void));
 115 void record_rule_line PARAMS((void));
 116 void packsymbols PARAMS((void));
 117 void output_token_defines PARAMS((FILE *));
 118 void packgram PARAMS((void));
 119 int read_signed_integer PARAMS((FILE *));
 120
 121 #if 0
 122 static int get_type PARAMS((void));
 123 #endif
 124
 125 int lineno;
 126 symbol_list *grammar;
 127 int start_flag;
 128 bucket *startval;
 129 char **tags;
 130 int *user_toknums;
 131
 132 /* Nonzero if components of semantic values are used, implying
 133    they must be unions.  */
 134 static int value_components_used;
 135
 136 static int typed;  /* nonzero if %union has been seen.  */
 137
 138 static int lastprec;  /* incremented for each %left, %right or %nonassoc seen */
 139
 140 static int gensym_count;  /* incremented for each generated symbol */
 141
 142 static bucket *errtoken;
 143 static bucket *undeftoken;
 144
 145 /* Nonzero if any action or guard uses the @n construct.  */
 146 static int yylsp_needed;
 147
 148
 149 static void
 150 skip_to_char (int target)
 151 {
 152   int c;
 153   if (target == '\n')
 154     complain (_("   Skipping to next \\n"));
 155   else
 156     complain (_("   Skipping to next %c"), target);
 157
 158   do
 159     c = skip_white_space();
 160   while (c != target && c != EOF);
 161   if (c != EOF)
 162     ungetc(c, finput);
 163 }
 164
 165
 166 /* Dump the string from FINPUT to FOUTPUT.  MATCH is the delimiter of
 167    the string (either ' or ").  */
 168
 169 static inline void
 170 copy_string (FILE *finput, FILE *foutput, int match)
 171 {
 172   int c;
 173
 174   putc (match, foutput);
 175   c = getc (finput);
 176
 177   while (c != match)
 178     {
 179       if (c == EOF)
 180         fatal (_("unterminated string at end of file"));
 181       if (c == '\n')
 182         {
 183           complain (_("unterminated string"));
 184           ungetc (c, finput);
 185           c = match;            /* invent terminator */
 186           continue;
 187         }
 188
 189       putc(c, foutput);
 190
 191       if (c == '\\')
 192         {
 193           c = getc (finput);
 194           if (c == EOF)
 195             fatal (_("unterminated string at end of file"));
 196           putc (c, foutput);
 197           if (c == '\n')
 198             lineno++;
 199         }
 200
 201       c = getc(finput);
 202     }
 203
 204   putc(c, foutput);
 205 }
 206
 207
 208 /* Dump the comment from IN to OUT1 and OUT2.  C is either `*' or `/',
 209    depending upon the type of comments used.  OUT2 might be NULL.  */
 210
 211 static inline void
 212 copy_comment2 (FILE *in, FILE *out1, FILE* out2, int c)
 213 {
 214   int cplus_comment;
 215   register int ended;
 216
 217   cplus_comment = (c == '/');
 218   putc (c, out1);
 219   if (out2)
 220     putc (c, out2);
 221   c = getc (in);
 222
 223   ended = 0;
 224   while (!ended)
 225     {
 226       if (!cplus_comment && c == '*')
 227         {
 228           while (c == '*')
 229             {
 230               putc (c, out1);
 231               if (out2)
 232                 putc (c, out2);
 233               c = getc (in);
 234             }
 235
 236           if (c == '/')
 237             {
 238               putc(c, out1);
 239               if (out2)
 240                 putc(c, out2);
 241               ended = 1;
 242             }
 243         }
 244       else if (c == '\n')
 245         {
 246           lineno++;
 247           putc (c, out1);
 248           if (out2)
 249             putc (c, out2);
 250           if (cplus_comment)
 251             ended = 1;
 252           else
 253             c = getc (in);
 254         }
 255       else if (c == EOF)
 256         fatal (_("unterminated comment"));
 257       else
 258         {
 259           putc (c, out1);
 260           if (out2)
 261             putc (c, out2);
 262           c = getc (in);
 263         }
 264     }
 265 }
 266
 267
 268 /* Dump the comment from FINPUT to FOUTPUT.  C is either `*' or `/',
 269    depending upon the type of comments used.  */
 270
 271 static inline void
 272 copy_comment (FILE *finput, FILE *foutput, int c)
 273 {
 274   copy_comment2 (finput, foutput, NULL, c);
 275 }
 276
 277
 278 void
 279 reader (void)
 280 {
 281   start_flag = 0;
 282   startval = NULL;  /* start symbol not specified yet. */
 283
 284 #if 0
 285   translations = 0;  /* initially assume token number translation not needed.  */
 286 #endif
 287   /* Nowadays translations is always set to 1,
 288      since we give `error' a user-token-number
 289      to satisfy the Posix demand for YYERRCODE==256.  */
 290   translations = 1;
 291
 292   nsyms = 1;
 293   nvars = 0;
 294   nrules = 0;
 295   nitems = 0;
 296   rline_allocated = 10;
 297   rline = NEW2(rline_allocated, short);
 298
 299   typed = 0;
 300   lastprec = 0;
 301
 302   gensym_count = 0;
 303
 304   semantic_parser = 0;
 305   pure_parser = 0;
 306   yylsp_needed = 0;
 307
 308   grammar = NULL;
 309
 310   init_lex();
 311   lineno = 1;
 312
 313   /* initialize the symbol table.  */
 314   tabinit();
 315   /* construct the error token */
 316   errtoken = getsym("error");
 317   errtoken->class = STOKEN;
 318   errtoken->user_token_number = 256; /* Value specified by posix.  */
 319   /* construct a token that represents all undefined literal tokens. */
 320   /* it is always token number 2.  */
 321   undeftoken = getsym("$undefined.");
 322   undeftoken->class = STOKEN;
 323   undeftoken->user_token_number = 2;
 324   /* Read the declaration section.  Copy %{ ... %} groups to ftable and fdefines file.
 325      Also notice any %token, %left, etc. found there.  */
 326   if (noparserflag)
 327     fprintf(ftable, "\n/*  Bison-generated parse tables, made from %s\n",
 328                 infile);
 329   else
 330     fprintf(ftable, "\n/*  A Bison parser, made from %s\n", infile);
 331   fprintf(ftable, "    by %s  */\n\n", VERSION_STRING);
 332   fprintf(ftable, "#define YYBISON 1  /* Identify Bison output.  */\n\n");
 333   read_declarations();
 334   /* start writing the guard and action files, if they are needed.  */
 335   output_headers();
 336   /* read in the grammar, build grammar in list form.  write out guards and actions.  */
 337   readgram();
 338   /* Now we know whether we need the line-number stack.
 339      If we do, write its type into the .tab.h file.  */
 340   if (fdefines)
 341     reader_output_yylsp(fdefines);
 342   /* write closing delimiters for actions and guards.  */
 343   output_trailers();
 344   if (yylsp_needed)
 345     fprintf(ftable, "#define YYLSP_NEEDED\n\n");
 346   /* assign the symbols their symbol numbers.
 347      Write #defines for the token symbols into fdefines if requested.  */
 348   packsymbols();
 349   /* convert the grammar into the format described in gram.h.  */
 350   packgram();
 351   /* free the symbol table data structure
 352      since symbols are now all referred to by symbol number.  */
 353   free_symtab();
 354 }
 355
 356 void
 357 reader_output_yylsp (FILE *f)
 358 {
 359   if (yylsp_needed)
 360     fprintf(f, LTYPESTR);
 361 }
 362
 363 /* Read from finput until `%%' is seen.  Discard the `%%'.  Handle any
 364    `%' declarations, and copy the contents of any `%{ ... %}' groups
 365    to fattrs.  */
 366
 367 void
 368 read_declarations (void)
 369 {
 370   register int c;
 371   register int tok;
 372
 373   for (;;)
 374     {
 375       c = skip_white_space();
 376
 377       if (c == '%')
 378         {
 379           tok = parse_percent_token();
 380
 381           switch (tok)
 382             {
 383             case TWO_PERCENTS:
 384               return;
 385
 386             case PERCENT_LEFT_CURLY:
 387               copy_definition();
 388               break;
 389
 390             case TOKEN:
 391               parse_token_decl (STOKEN, SNTERM);
 392               break;
 393
 394             case NTERM:
 395               parse_token_decl (SNTERM, STOKEN);
 396               break;
 397
 398             case TYPE:
 399               parse_type_decl();
 400               break;
 401
 402             case START:
 403               parse_start_decl();
 404               break;
 405
 406             case UNION:
 407               parse_union_decl ();
 408               break;
 409
 410             case EXPECT:
 411               parse_expect_decl();
 412               break;
 413             case THONG:
 414               parse_thong_decl();
 415               break;
 416             case LEFT:
 417               parse_assoc_decl(LEFT_ASSOC);
 418               break;
 419
 420             case RIGHT:
 421               parse_assoc_decl(RIGHT_ASSOC);
 422               break;
 423
 424             case NONASSOC:
 425               parse_assoc_decl(NON_ASSOC);
 426               break;
 427
 428             case SEMANTIC_PARSER:
 429               if (semantic_parser == 0)
 430                 {
 431                   semantic_parser = 1;
 432                   open_extra_files();
 433                 }
 434               break;
 435
 436             case PURE_PARSER:
 437               pure_parser = 1;
 438               break;
 439
 440             case NOOP:
 441               break;
 442
 443             default:
 444               complain (_("unrecognized: %s"), token_buffer);
 445               skip_to_char('%');
 446           }
 447         }
 448       else if (c == EOF)
 449         fatal (_("no input grammar"));
 450       else
 451         {
 452           complain (_("unknown character: %s"), printable_version(c));
 453           skip_to_char('%');
 454         }
 455     }
 456 }
 457
 458
 459 /* Copy the contents of a `%{ ... %}' into the definitions file.  The
 460    `%{' has already been read.  Return after reading the `%}'.  */
 461
 462 void
 463 copy_definition (void)
 464 {
 465   register int c;
 466   /* -1 while reading a character if prev char was %. */
 467   register int after_percent;
 468
 469   if (!nolinesflag)
 470     fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
 471
 472   after_percent = 0;
 473
 474   c = getc (finput);
 475
 476   for (;;)
 477     {
 478       switch (c)
 479         {
 480         case '\n':
 481           putc(c, fattrs);
 482           lineno++;
 483           break;
 484
 485         case '%':
 486           after_percent = -1;
 487           break;
 488
 489         case '\'':
 490         case '"':
 491           copy_string (finput, fattrs, c);
 492           break;
 493
 494         case '/':
 495           putc (c, fattrs);
 496           c = getc (finput);
 497           if (c != '*' && c != '/')
 498             continue;
 499           copy_comment (finput, fattrs, c);
 500           break;
 501
 502         case EOF:
 503           fatal ("%s",
 504                  _("unterminated `%{' definition"));
 505
 506         default:
 507           putc(c, fattrs);
 508         }
 509
 510       c = getc(finput);
 511
 512       if (after_percent)
 513         {
 514           if (c == '}')
 515             return;
 516           putc('%', fattrs);
 517         }
 518       after_percent = 0;
 519
 520     }
 521
 522 }
 523
 524
 525
 526 /* parse what comes after %token or %nterm.
 527 For %token, what_is is STOKEN and what_is_not is SNTERM.
 528 For %nterm, the arguments are reversed.  */
 529
 530 void
 531 parse_token_decl (int what_is, int what_is_not)
 532 {
 533   register int token = 0;
 534   register char *typename = 0;
 535   register struct bucket *symbol = NULL;  /* pts to symbol being defined */
 536   int k;
 537
 538   for (;;)
 539     {
 540       int tmp_char = ungetc (skip_white_space (), finput);
 541
 542       if (tmp_char == '%')
 543         return;
 544       if (tmp_char == EOF)
 545         fatal (_("Premature EOF after %s"), token_buffer);
 546
 547       token = lex();
 548       if (token == COMMA)
 549         {
 550           symbol = NULL;
 551           continue;
 552         }
 553       if (token == TYPENAME)
 554         {
 555           k = strlen(token_buffer);
 556           typename = NEW2(k + 1, char);
 557           strcpy(typename, token_buffer);
 558           value_components_used = 1;
 559           symbol = NULL;
 560         }
 561       else if (token == IDENTIFIER && *symval->tag == '\"'
 562                 && symbol)
 563         {
 564           translations = 1;
 565           symval->class = STOKEN;
 566           symval->type_name = typename;
 567           symval->user_token_number = symbol->user_token_number;
 568           symbol->user_token_number = SALIAS;
 569
 570           symval->alias = symbol;
 571           symbol->alias = symval;
 572           symbol = NULL;
 573
 574           nsyms--;   /* symbol and symval combined are only one symbol */
 575         }
 576       else if (token == IDENTIFIER)
 577         {
 578           int oldclass = symval->class;
 579           symbol = symval;
 580
 581           if (symbol->class == what_is_not)
 582             complain (_("symbol %s redefined"), symbol->tag);
 583           symbol->class = what_is;
 584           if (what_is == SNTERM && oldclass != SNTERM)
 585             symbol->value = nvars++;
 586
 587           if (typename)
 588             {
 589               if (symbol->type_name == NULL)
 590                 symbol->type_name = typename;
 591               else if (strcmp(typename, symbol->type_name) != 0)
 592                 complain (_("type redeclaration for %s"), symbol->tag);
 593             }
 594         }
 595       else if (symbol && token == NUMBER)
 596         {
 597           symbol->user_token_number = numval;
 598           translations = 1;
 599         }
 600       else
 601         {
 602           complain (_("`%s' is invalid in %s"),
 603                     token_buffer,
 604                 (what_is == STOKEN) ? "%token" : "%nterm");
 605           skip_to_char('%');
 606         }
 607     }
 608
 609 }
 610
 611 /* parse what comes after %thong
 612         the full syntax is
 613                 %thong <type> token number literal
 614  the <type> or number may be omitted.  The number specifies the
 615  user_token_number.
 616
 617  Two symbols are entered in the table, one for the token symbol and
 618  one for the literal.  Both are given the <type>, if any, from the declaration.
 619  The ->user_token_number of the first is SALIAS and the ->user_token_number
 620  of the second is set to the number, if any, from the declaration.
 621  The two symbols are linked via pointers in their ->alias fields.
 622
 623  during output_defines_table, the symbol is reported
 624  thereafter, only the literal string is retained
 625  it is the literal string that is output to yytname
 626 */
 627
 628 void
 629 parse_thong_decl (void)
 630 {
 631   register int token;
 632   register struct bucket *symbol;
 633   register char *typename = 0;
 634   int k, usrtoknum;
 635
 636   translations = 1;
 637   token = lex();                /* fetch typename or first token */
 638   if (token == TYPENAME) {
 639     k = strlen(token_buffer);
 640     typename = NEW2(k + 1, char);
 641     strcpy(typename, token_buffer);
 642     value_components_used = 1;
 643     token = lex();              /* fetch first token */
 644   }
 645
 646   /* process first token */
 647
 648   if (token != IDENTIFIER)
 649     {
 650       complain (_("unrecognized item %s, expected an identifier"),
 651                 token_buffer);
 652       skip_to_char('%');
 653       return;
 654     }
 655   symval->class = STOKEN;
 656   symval->type_name = typename;
 657   symval->user_token_number = SALIAS;
 658   symbol = symval;
 659
 660   token = lex();                /* get number or literal string */
 661
 662   if (token == NUMBER) {
 663     usrtoknum = numval;
 664     token = lex();              /* okay, did number, now get literal */
 665   }
 666   else usrtoknum = 0;
 667
 668   /* process literal string token */
 669
 670   if (token != IDENTIFIER || *symval->tag != '\"')
 671     {
 672       complain (_("expected string constant instead of %s"),
 673                 token_buffer);
 674       skip_to_char('%');
 675       return;
 676     }
 677   symval->class = STOKEN;
 678   symval->type_name = typename;
 679   symval->user_token_number = usrtoknum;
 680
 681   symval->alias = symbol;
 682   symbol->alias = symval;
 683
 684   nsyms--;                      /* symbol and symval combined are only one symbol */
 685 }
 686
 687
 688 /* Parse what comes after %start */
 689
 690 void
 691 parse_start_decl (void)
 692 {
 693   if (start_flag)
 694     complain (_("multiple %s declarations"), "%start");
 695   if (lex () != IDENTIFIER)
 696     complain (_("invalid %s declaration"), "%start");
 697   else
 698     {
 699       start_flag = 1;
 700       startval = symval;
 701     }
 702 }
 703
 704
 705
 706 /* read in a %type declaration and record its information for get_type_name to access */
 707
 708 void
 709 parse_type_decl (void)
 710 {
 711   register int k;
 712   register char *name;
 713
 714   if (lex() != TYPENAME)
 715     {
 716       complain ("%s", _("%type declaration has no <typename>"));
 717       skip_to_char('%');
 718       return;
 719     }
 720
 721   k = strlen(token_buffer);
 722   name = NEW2(k + 1, char);
 723   strcpy(name, token_buffer);
 724
 725   for (;;)
 726     {
 727       register int t;
 728       int tmp_char = ungetc (skip_white_space (), finput);
 729
 730       if (tmp_char == '%')
 731         return;
 732       if (tmp_char == EOF)
 733         fatal (_("Premature EOF after %s"), token_buffer);
 734
 735       t = lex();
 736
 737       switch (t)
 738         {
 739
 740         case COMMA:
 741         case SEMICOLON:
 742           break;
 743
 744         case IDENTIFIER:
 745           if (symval->type_name == NULL)
 746             symval->type_name = name;
 747           else if (strcmp(name, symval->type_name) != 0)
 748             complain (_("type redeclaration for %s"), symval->tag);
 749
 750           break;
 751
 752         default:
 753           complain (_("invalid %%type declaration due to item: %s"),
 754                     token_buffer);
 755           skip_to_char('%');
 756         }
 757     }
 758 }
 759
 760
 761
 762 /* read in a %left, %right or %nonassoc declaration and record its information.  */
 763 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC.  */
 764
 765 void
 766 parse_assoc_decl (int assoc)
 767 {
 768   register int k;
 769   register char *name = NULL;
 770   register int prev = 0;
 771
 772   lastprec++;  /* Assign a new precedence level, never 0.  */
 773
 774   for (;;)
 775     {
 776       register int t;
 777       int tmp_char = ungetc (skip_white_space (), finput);
 778
 779       if (tmp_char == '%')
 780         return;
 781       if (tmp_char == EOF)
 782         fatal (_("Premature EOF after %s"), token_buffer);
 783
 784       t = lex();
 785
 786       switch (t)
 787         {
 788
 789         case TYPENAME:
 790           k = strlen(token_buffer);
 791           name = NEW2(k + 1, char);
 792           strcpy(name, token_buffer);
 793           break;
 794
 795         case COMMA:
 796           break;
 797
 798         case IDENTIFIER:
 799           if (symval->prec != 0)
 800             complain (_("redefining precedence of %s"), symval->tag);
 801           symval->prec = lastprec;
 802           symval->assoc = assoc;
 803           if (symval->class == SNTERM)
 804             complain (_("symbol %s redefined"), symval->tag);
 805           symval->class = STOKEN;
 806           if (name)
 807             { /* record the type, if one is specified */
 808               if (symval->type_name == NULL)
 809                 symval->type_name = name;
 810               else if (strcmp(name, symval->type_name) != 0)
 811                 complain (_("type redeclaration for %s"), symval->tag);
 812             }
 813           break;
 814
 815         case NUMBER:
 816           if (prev == IDENTIFIER)
 817             {
 818               symval->user_token_number = numval;
 819               translations = 1;
 820             }
 821           else
 822             {
 823               complain (_("invalid text (%s) - number should be after identifier"),
 824                         token_buffer);
 825               skip_to_char('%');
 826             }
 827           break;
 828
 829         case SEMICOLON:
 830           return;
 831
 832         default:
 833           complain (_("unexpected item: %s"), token_buffer);
 834           skip_to_char('%');
 835         }
 836
 837       prev = t;
 838
 839     }
 840 }
 841
 842
 843
 844 /* copy the union declaration into fattrs (and fdefines),
 845    where it is made into the
 846    definition of YYSTYPE, the type of elements of the parser value stack.  */
 847
 848 void
 849 parse_union_decl (void)
 850 {
 851   register int c;
 852   register int count = 0;
 853
 854   if (typed)
 855     complain (_("multiple %s declarations"), "%union");
 856
 857   typed = 1;
 858
 859   if (!nolinesflag)
 860     fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
 861   else
 862     fprintf (fattrs, "\n");
 863
 864   fprintf (fattrs, "typedef union");
 865   if (fdefines)
 866     fprintf (fdefines, "typedef union");
 867
 868   c = getc (finput);
 869
 870   while (c != EOF)
 871     {
 872       putc (c, fattrs);
 873       if (fdefines)
 874         putc (c, fdefines);
 875
 876       switch (c)
 877         {
 878         case '\n':
 879           lineno++;
 880           break;
 881
 882         case '/':
 883           c = getc (finput);
 884           if (c != '*' && c != '/')
 885             continue;
 886           copy_comment2 (finput, fattrs, fdefines, c);
 887           break;
 888
 889
 890         case '{':
 891           count++;
 892           break;
 893
 894         case '}':
 895           if (count == 0)
 896             complain (_("unmatched %s"), "`}'");
 897           count--;
 898           if (count <= 0)
 899             {
 900               fprintf (fattrs, " YYSTYPE;\n");
 901               if (fdefines)
 902                 fprintf (fdefines, " YYSTYPE;\n");
 903               /* JF don't choke on trailing semi */
 904               c = skip_white_space ();
 905               if (c != ';')
 906                 ungetc (c,finput);
 907               return;
 908             }
 909         }
 910
 911       c = getc (finput);
 912     }
 913 }
 914
 915 /* parse the declaration %expect N which says to expect N
 916    shift-reduce conflicts.  */
 917
 918 void
 919 parse_expect_decl (void)
 920 {
 921   register int c;
 922   register int count;
 923   char buffer[20];
 924
 925   c = getc(finput);
 926   while (c == ' ' || c == '\t')
 927     c = getc(finput);
 928
 929   count = 0;
 930   while (c >= '0' && c <= '9')
 931     {
 932       if (count < 20)
 933         buffer[count++] = c;
 934       c = getc(finput);
 935     }
 936   buffer[count] = 0;
 937
 938   ungetc (c, finput);
 939
 940   if (count <= 0 || count > 10)
 941     complain ("%s", _("argument of %expect is not an integer"));
 942   expected_conflicts = atoi (buffer);
 943 }
 944
 945 /* that's all of parsing the declaration section */
 946 \f
 947 /* FINPUT is pointing to a location (i.e., a `@').  Output to FOUTPUT
 948    a reference to this location. STACK_OFFSET is the number of values
 949    in the current rule so far, which says where to find `$0' with
 950    respect to the top of the stack.  */
 951 static inline void
 952 copy_at (FILE *finput, FILE *foutput, int stack_offset)
 953 {
 954   int c;
 955
 956   c = getc (finput);
 957   if (c == '$')
 958     {
 959       fprintf (foutput, "yyloc");
 960       yylsp_needed = 1;
 961     }
 962   else if (isdigit(c) || c == '-')
 963     {
 964       int n;
 965
 966       ungetc (c, finput);
 967       n = read_signed_integer (finput);
 968
 969       fprintf (foutput, "yylsp[%d]", n - stack_offset);
 970       yylsp_needed = 1;
 971     }
 972   else
 973     complain (_("@%s is invalid"), printable_version (c));
 974 }
 975
 976
 977 /* Get the data type (alternative in the union) of the value for
 978    symbol n in rule rule.  */
 979
 980 char *
 981 get_type_name (int n, symbol_list *rule)
 982 {
 983   register int i;
 984   register symbol_list *rp;
 985
 986   if (n < 0)
 987     {
 988       complain (_("invalid $ value"));
 989       return NULL;
 990     }
 991
 992   rp = rule;
 993   i = 0;
 994
 995   while (i < n)
 996     {
 997       rp = rp->next;
 998       if (rp == NULL || rp->sym == NULL)
 999         {
1000           complain (_("invalid $ value"));
1001           return NULL;
1002         }
1003       i++;
1004     }
1005
1006   return rp->sym->type_name;
1007 }
1008
1009
1010
1011 /* After `%guard' is seen in the input file, copy the actual guard
1012    into the guards file.  If the guard is followed by an action, copy
1013    that into the actions file.  STACK_OFFSET is the number of values
1014    in the current rule so far, which says where to find `$0' with
1015    respect to the top of the stack, for the simple parser in which the
1016    stack is not popped until after the guard is run.  */
1017
1018 void
1019 copy_guard (symbol_list *rule, int stack_offset)
1020 {
1021   register int c;
1022   register int n;
1023   register int count;
1024   register char *type_name;
1025   int brace_flag = 0;
1026
1027   /* offset is always 0 if parser has already popped the stack pointer */
1028   if (semantic_parser) stack_offset = 0;
1029
1030   fprintf(fguard, "\ncase %d:\n", nrules);
1031   if (!nolinesflag)
1032     fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1033   putc('{', fguard);
1034
1035   count = 0;
1036   c = getc(finput);
1037
1038   while (brace_flag ? (count > 0) : (c != ';'))
1039     {
1040       switch (c)
1041         {
1042         case '\n':
1043           putc(c, fguard);
1044           lineno++;
1045           break;
1046
1047         case '{':
1048           putc(c, fguard);
1049           brace_flag = 1;
1050           count++;
1051           break;
1052
1053         case '}':
1054           putc(c, fguard);
1055           if (count > 0)
1056             count--;
1057           else
1058             {
1059               complain (_("unmatched %s"), "`}'");
1060               c = getc(finput); /* skip it */
1061             }
1062           break;
1063
1064         case '\'':
1065         case '"':
1066           copy_string (finput, fguard, c);
1067           break;
1068
1069         case '/':
1070           putc (c, fguard);
1071           c = getc (finput);
1072           if (c != '*' && c != '/')
1073             continue;
1074           copy_comment (finput, fguard, c);
1075           break;
1076
1077         case '$':
1078           c = getc(finput);
1079           type_name = NULL;
1080
1081           if (c == '<')
1082             {
1083               register char *cp = token_buffer;
1084
1085               while ((c = getc(finput)) != '>' && c > 0)
1086                 {
1087                   if (cp == token_buffer + maxtoken)
1088                     cp = grow_token_buffer(cp);
1089
1090                   *cp++ = c;
1091                 }
1092               *cp = 0;
1093               type_name = token_buffer;
1094
1095               c = getc(finput);
1096             }
1097
1098           if (c == '$')
1099             {
1100               fprintf(fguard, "yyval");
1101               if (!type_name)
1102                 type_name = rule->sym->type_name;
1103               if (type_name)
1104                 fprintf(fguard, ".%s", type_name);
1105               if(!type_name && typed)
1106                 complain (_("$$ of `%s' has no declared type"),
1107                           rule->sym->tag);
1108             }
1109           else if (isdigit(c) || c == '-')
1110             {
1111               ungetc (c, finput);
1112               n = read_signed_integer (finput);
1113               c = getc (finput);
1114
1115               if (!type_name && n > 0)
1116                 type_name = get_type_name(n, rule);
1117
1118               fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1119               if (type_name)
1120                 fprintf(fguard, ".%s", type_name);
1121               if (!type_name && typed)
1122                 complain (_("$%d of `%s' has no declared type"),
1123                           n, rule->sym->tag);
1124               continue;
1125             }
1126           else
1127             complain (_("$%s is invalid"), printable_version (c));
1128           break;
1129
1130         case '@':
1131           copy_at (finput, fguard, stack_offset);
1132           break;
1133
1134         case EOF:
1135           fatal ("%s",
1136                  _("unterminated %guard clause"));
1137
1138         default:
1139           putc (c, fguard);
1140         }
1141
1142       if (c != '}' || count != 0)
1143         c = getc(finput);
1144     }
1145
1146   c = skip_white_space();
1147
1148   fprintf(fguard, ";\n    break;}");
1149   if (c == '{')
1150     copy_action (rule, stack_offset);
1151   else if (c == '=')
1152     {
1153       c = getc(finput); /* why not skip_white_space -wjh */
1154       if (c == '{')
1155         copy_action (rule, stack_offset);
1156     }
1157   else
1158     ungetc(c, finput);
1159 }
1160
1161
1162
1163 /* Assuming that a `{' has just been seen, copy everything up to the
1164    matching `}' into the actions file.  STACK_OFFSET is the number of
1165    values in the current rule so far, which says where to find `$0'
1166    with respect to the top of the stack.  */
1167
1168 void
1169 copy_action (symbol_list *rule, int stack_offset)
1170 {
1171   register int c;
1172   register int n;
1173   register int count;
1174   register char *type_name;
1175
1176   /* offset is always 0 if parser has already popped the stack pointer */
1177   if (semantic_parser)
1178     stack_offset = 0;
1179
1180   fprintf (faction, "\ncase %d:\n", nrules);
1181   if (!nolinesflag)
1182     fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1183   putc ('{', faction);
1184
1185   count = 1;
1186   c = getc(finput);
1187
1188   while (count > 0)
1189     {
1190       while (c != '}')
1191         {
1192           switch (c)
1193             {
1194             case '\n':
1195               putc(c, faction);
1196               lineno++;
1197               break;
1198
1199             case '{':
1200               putc(c, faction);
1201               count++;
1202               break;
1203
1204             case '\'':
1205             case '"':
1206               copy_string (finput, faction, c);
1207               break;
1208
1209             case '/':
1210               putc (c, faction);
1211               c = getc (finput);
1212               if (c != '*' && c != '/')
1213                 continue;
1214               copy_comment (finput, faction, c);
1215               break;
1216
1217             case '$':
1218               c = getc(finput);
1219               type_name = NULL;
1220
1221               if (c == '<')
1222                 {
1223                   register char *cp = token_buffer;
1224
1225                   while ((c = getc(finput)) != '>' && c > 0)
1226                     {
1227                       if (cp == token_buffer + maxtoken)
1228                         cp = grow_token_buffer(cp);
1229
1230                       *cp++ = c;
1231                     }
1232                   *cp = 0;
1233                   type_name = token_buffer;
1234                   value_components_used = 1;
1235
1236                   c = getc(finput);
1237                 }
1238               if (c == '$')
1239                 {
1240                   fprintf(faction, "yyval");
1241                   if (!type_name)
1242                     type_name = get_type_name(0, rule);
1243                   if (type_name)
1244                     fprintf(faction, ".%s", type_name);
1245                   if(!type_name && typed)
1246                     complain (_("$$ of `%s' has no declared type"),
1247                               rule->sym->tag);
1248                 }
1249               else if (isdigit(c) || c == '-')
1250                 {
1251                   ungetc (c, finput);
1252                   n = read_signed_integer(finput);
1253                   c = getc(finput);
1254
1255                   if (!type_name && n > 0)
1256                     type_name = get_type_name(n, rule);
1257
1258                   fprintf(faction, "yyvsp[%d]", n - stack_offset);
1259                   if (type_name)
1260                     fprintf(faction, ".%s", type_name);
1261                   if(!type_name && typed)
1262                     complain (_("$%d of `%s' has no declared type"),
1263                               n, rule->sym->tag);
1264                   continue;
1265                 }
1266               else
1267                 complain (_("$%s is invalid"), printable_version (c));
1268
1269               break;
1270
1271             case '@':
1272               copy_at (finput, faction, stack_offset);
1273               break;
1274
1275             case EOF:
1276               fatal (_("unmatched %s"), "`{'");
1277
1278             default:
1279               putc(c, faction);
1280             }
1281
1282           c = getc(finput);
1283         }
1284
1285       /* above loop exits when c is '}' */
1286
1287       if (--count)
1288         {
1289           putc(c, faction);
1290           c = getc(finput);
1291         }
1292     }
1293
1294   fprintf(faction, ";\n    break;}");
1295 }
1296
1297
1298
1299 /* generate a dummy symbol, a nonterminal,
1300 whose name cannot conflict with the user's names. */
1301
1302 bucket *
1303 gensym (void)
1304 {
1305   register bucket *sym;
1306
1307   sprintf (token_buffer, "@%d", ++gensym_count);
1308   sym = getsym(token_buffer);
1309   sym->class = SNTERM;
1310   sym->value = nvars++;
1311   return sym;
1312 }
1313
1314 /* Parse the input grammar into a one symbol_list structure.
1315 Each rule is represented by a sequence of symbols: the left hand side
1316 followed by the contents of the right hand side, followed by a null pointer
1317 instead of a symbol to terminate the rule.
1318 The next symbol is the lhs of the following rule.
1319
1320 All guards and actions are copied out to the appropriate files,
1321 labelled by the rule number they apply to.  */
1322
1323 void
1324 readgram (void)
1325 {
1326   register int t;
1327   register bucket *lhs = NULL;
1328   register symbol_list *p;
1329   register symbol_list *p1;
1330   register bucket *bp;
1331
1332   symbol_list *crule;   /* points to first symbol_list of current rule.  */
1333                         /* its symbol is the lhs of the rule.   */
1334   symbol_list *crule1;  /* points to the symbol_list preceding crule.  */
1335
1336   p1 = NULL;
1337
1338   t = lex();
1339
1340   while (t != TWO_PERCENTS && t != ENDFILE)
1341     {
1342       if (t == IDENTIFIER || t == BAR)
1343         {
1344           register int actionflag = 0;
1345           int rulelength = 0;  /* number of symbols in rhs of this rule so far  */
1346           int xactions = 0;     /* JF for error checking */
1347           bucket *first_rhs = 0;
1348
1349           if (t == IDENTIFIER)
1350             {
1351               lhs = symval;
1352
1353               if (!start_flag)
1354                 {
1355                   startval = lhs;
1356                   start_flag = 1;
1357                 }
1358
1359               t = lex();
1360               if (t != COLON)
1361                 {
1362                   complain (_("ill-formed rule: initial symbol not followed by colon"));
1363                   unlex(t);
1364                 }
1365             }
1366
1367           if (nrules == 0 && t == BAR)
1368             {
1369               complain (_("grammar starts with vertical bar"));
1370               lhs = symval;     /* BOGUS: use a random symval */
1371             }
1372           /* start a new rule and record its lhs.  */
1373
1374           nrules++;
1375           nitems++;
1376
1377           record_rule_line ();
1378
1379           p = NEW(symbol_list);
1380           p->sym = lhs;
1381
1382           crule1 = p1;
1383           if (p1)
1384             p1->next = p;
1385           else
1386             grammar = p;
1387
1388           p1 = p;
1389           crule = p;
1390
1391           /* mark the rule's lhs as a nonterminal if not already so.  */
1392
1393           if (lhs->class == SUNKNOWN)
1394             {
1395               lhs->class = SNTERM;
1396               lhs->value = nvars;
1397               nvars++;
1398             }
1399           else if (lhs->class == STOKEN)
1400             complain (_("rule given for %s, which is a token"), lhs->tag);
1401
1402           /* read the rhs of the rule.  */
1403
1404           for (;;)
1405             {
1406               t = lex();
1407               if (t == PREC)
1408                 {
1409                   t = lex();
1410                   crule->ruleprec = symval;
1411                   t = lex();
1412                 }
1413
1414               if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1415
1416               /* If next token is an identifier, see if a colon follows it.
1417                  If one does, exit this rule now.  */
1418               if (t == IDENTIFIER)
1419                 {
1420                   register bucket *ssave;
1421                   register int t1;
1422
1423                   ssave = symval;
1424                   t1 = lex();
1425                   unlex(t1);
1426                   symval = ssave;
1427                   if (t1 == COLON) break;
1428
1429                   if(!first_rhs)        /* JF */
1430                     first_rhs = symval;
1431                   /* Not followed by colon =>
1432                      process as part of this rule's rhs.  */
1433                 }
1434
1435               /* If we just passed an action, that action was in the middle
1436                  of a rule, so make a dummy rule to reduce it to a
1437                  non-terminal.  */
1438               if (actionflag)
1439                 {
1440                   register bucket *sdummy;
1441
1442                   /* Since the action was written out with this rule's */
1443                   /* number, we must give the new rule this number */
1444                   /* by inserting the new rule before it.  */
1445
1446                   /* Make a dummy nonterminal, a gensym.  */
1447                   sdummy = gensym();
1448
1449                   /* Make a new rule, whose body is empty,
1450                      before the current one, so that the action
1451                      just read can belong to it.  */
1452                   nrules++;
1453                   nitems++;
1454                   record_rule_line ();
1455                   p = NEW(symbol_list);
1456                   if (crule1)
1457                     crule1->next = p;
1458                   else grammar = p;
1459                   p->sym = sdummy;
1460                   crule1 = NEW(symbol_list);
1461                   p->next = crule1;
1462                   crule1->next = crule;
1463
1464                   /* insert the dummy generated by that rule into this rule.  */
1465                   nitems++;
1466                   p = NEW(symbol_list);
1467                   p->sym = sdummy;
1468                   p1->next = p;
1469                   p1 = p;
1470
1471                   actionflag = 0;
1472                 }
1473
1474               if (t == IDENTIFIER)
1475                 {
1476                   nitems++;
1477                   p = NEW(symbol_list);
1478                   p->sym = symval;
1479                   p1->next = p;
1480                   p1 = p;
1481                 }
1482               else /* handle an action.  */
1483                 {
1484                   copy_action(crule, rulelength);
1485                   actionflag = 1;
1486                   xactions++;   /* JF */
1487                 }
1488               rulelength++;
1489             }   /* end of  read rhs of rule */
1490
1491           /* Put an empty link in the list to mark the end of this rule  */
1492           p = NEW(symbol_list);
1493           p1->next = p;
1494           p1 = p;
1495
1496           if (t == PREC)
1497             {
1498               complain (_("two @prec's in a row"));
1499               t = lex();
1500               crule->ruleprec = symval;
1501               t = lex();
1502             }
1503           if (t == GUARD)
1504             {
1505               if (! semantic_parser)
1506                 complain ("%s",
1507                           _("%guard present but %semantic_parser not specified"));
1508
1509               copy_guard(crule, rulelength);
1510               t = lex();
1511             }
1512           else if (t == LEFT_CURLY)
1513             {
1514                 /* This case never occurs -wjh */
1515               if (actionflag)
1516                 complain (_("two actions at end of one rule"));
1517               copy_action(crule, rulelength);
1518               actionflag = 1;
1519               xactions++;       /* -wjh */
1520               t = lex();
1521             }
1522           /* If $$ is being set in default way, report if any type
1523              mismatch.  */
1524           else if (!xactions
1525                    && first_rhs
1526                    && lhs->type_name != first_rhs->type_name)
1527             {
1528               if (lhs->type_name == 0
1529                   || first_rhs->type_name == 0
1530                   || strcmp(lhs->type_name,first_rhs->type_name))
1531                 complain (_("type clash (`%s' `%s') on default action"),
1532                           lhs->type_name ? lhs->type_name : "",
1533                              first_rhs->type_name ? first_rhs->type_name : "");
1534             }
1535           /* Warn if there is no default for $$ but we need one.  */
1536           else if (!xactions && !first_rhs && lhs->type_name != 0)
1537             complain (_("empty rule for typed nonterminal, and no action"));
1538           if (t == SEMICOLON)
1539             t = lex();
1540         }
1541 #if 0
1542   /* these things can appear as alternatives to rules.  */
1543 /* NO, they cannot.
1544         a) none of the documentation allows them
1545         b) most of them scan forward until finding a next %
1546                 thus they may swallow lots of intervening rules
1547 */
1548       else if (t == TOKEN)
1549         {
1550           parse_token_decl(STOKEN, SNTERM);
1551           t = lex();
1552         }
1553       else if (t == NTERM)
1554         {
1555           parse_token_decl(SNTERM, STOKEN);
1556           t = lex();
1557         }
1558       else if (t == TYPE)
1559         {
1560           t = get_type();
1561         }
1562       else if (t == UNION)
1563         {
1564           parse_union_decl();
1565           t = lex();
1566         }
1567       else if (t == EXPECT)
1568         {
1569           parse_expect_decl();
1570           t = lex();
1571         }
1572       else if (t == START)
1573         {
1574           parse_start_decl();
1575           t = lex();
1576         }
1577 #endif
1578
1579       else
1580         {
1581           complain (_("invalid input: %s"), token_buffer);
1582           t = lex();
1583         }
1584     }
1585
1586   /* grammar has been read.  Do some checking */
1587
1588   if (nsyms > MAXSHORT)
1589     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1590            MAXSHORT);
1591   if (nrules == 0)
1592     fatal (_("no rules in the input grammar"));
1593
1594   if (typed == 0        /* JF put out same default YYSTYPE as YACC does */
1595       && !value_components_used)
1596     {
1597       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1598          but it seems better to be consistent.
1599          Most programs should declare their own type anyway.  */
1600       fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1601       if (fdefines)
1602         fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1603     }
1604
1605   /* Report any undefined symbols and consider them nonterminals.  */
1606
1607   for (bp = firstsymbol; bp; bp = bp->next)
1608     if (bp->class == SUNKNOWN)
1609       {
1610         complain (_("symbol %s is used, but is not defined as a token and has no rules"),
1611                   bp->tag);
1612         bp->class = SNTERM;
1613         bp->value = nvars++;
1614       }
1615
1616   ntokens = nsyms - nvars;
1617 }
1618
1619
1620 void
1621 record_rule_line (void)
1622 {
1623   /* Record each rule's source line number in rline table.  */
1624
1625   if (nrules >= rline_allocated)
1626     {
1627       rline_allocated = nrules * 2;
1628       rline = (short *) xrealloc ((char *) rline,
1629                                   rline_allocated * sizeof (short));
1630     }
1631   rline[nrules] = lineno;
1632 }
1633
1634
1635 #if 0
1636 /* read in a %type declaration and record its information for get_type_name to access */
1637 /* this is unused.  it is only called from the #if 0 part of readgram */
1638 static int
1639 get_type (void)
1640 {
1641   register int k;
1642   register int t;
1643   register char *name;
1644
1645   t = lex();
1646
1647   if (t != TYPENAME)
1648     {
1649       complain (_("invalid %s declaration"), "%type");
1650       return t;
1651     }
1652
1653   k = strlen(token_buffer);
1654   name = NEW2(k + 1, char);
1655   strcpy(name, token_buffer);
1656
1657   for (;;)
1658     {
1659       t = lex();
1660
1661       switch (t)
1662         {
1663         case SEMICOLON:
1664           return lex();
1665
1666         case COMMA:
1667           break;
1668
1669         case IDENTIFIER:
1670           if (symval->type_name == NULL)
1671             symval->type_name = name;
1672           else if (strcmp(name, symval->type_name) != 0)
1673             complain (_("type redeclaration for %s"), symval->tag);
1674
1675           break;
1676
1677         default:
1678           return t;
1679         }
1680     }
1681 }
1682 #endif
1683
1684
1685 /* Assign symbol numbers, and write definition of token names into
1686    fdefines.  Set up vectors tags and sprec of names and precedences
1687    of symbols.  */
1688
1689 void
1690 packsymbols (void)
1691 {
1692   register bucket *bp;
1693   register int tokno = 1;
1694   register int i;
1695   register int last_user_token_number;
1696
1697   /* int lossage = 0; JF set but not used */
1698
1699   tags = NEW2(nsyms + 1, char *);
1700   tags[0] = "$";
1701   user_toknums = NEW2(nsyms + 1, int);
1702   user_toknums[0] = 0;
1703
1704   sprec = NEW2(nsyms, short);
1705   sassoc = NEW2(nsyms, short);
1706
1707   max_user_token_number = 256;
1708   last_user_token_number = 256;
1709
1710   for (bp = firstsymbol; bp; bp = bp->next)
1711     {
1712       if (bp->class == SNTERM)
1713         {
1714           bp->value += ntokens;
1715         }
1716       else if (bp->alias)
1717         {
1718           /* this symbol and its alias are a single token defn.
1719              allocate a tokno, and assign to both check agreement of
1720              ->prec and ->assoc fields and make both the same */
1721           if (bp->value == 0)
1722             bp->value = bp->alias->value = tokno++;
1723
1724           if (bp->prec != bp->alias->prec)
1725             {
1726               if (bp->prec != 0 && bp->alias->prec != 0
1727                   && bp->user_token_number == SALIAS)
1728                 complain (_("conflicting precedences for %s and %s"),
1729                           bp->tag, bp->alias->tag);
1730               if (bp->prec != 0)
1731                 bp->alias->prec = bp->prec;
1732               else
1733                 bp->prec = bp->alias->prec;
1734             }
1735
1736           if (bp->assoc != bp->alias->assoc)
1737             {
1738               if (bp->assoc != 0 && bp->alias->assoc != 0
1739                   && bp->user_token_number == SALIAS)
1740                 complain (_("conflicting assoc values for %s and %s"),
1741                           bp->tag, bp->alias->tag);
1742               if (bp->assoc != 0)
1743                 bp->alias->assoc = bp->assoc;
1744               else
1745                 bp->assoc = bp->alias->assoc;
1746             }
1747
1748           if (bp->user_token_number == SALIAS)
1749             continue;  /* do not do processing below for SALIASs */
1750
1751         }
1752       else  /* bp->class == STOKEN */
1753         {
1754           bp->value = tokno++;
1755         }
1756
1757       if (bp->class == STOKEN)
1758         {
1759           if (translations && !(bp->user_token_number))
1760             bp->user_token_number = ++last_user_token_number;
1761           if (bp->user_token_number > max_user_token_number)
1762             max_user_token_number = bp->user_token_number;
1763         }
1764
1765       tags[bp->value] = bp->tag;
1766       user_toknums[bp->value] = bp->user_token_number;
1767       sprec[bp->value] = bp->prec;
1768       sassoc[bp->value] = bp->assoc;
1769
1770     }
1771
1772   if (translations)
1773     {
1774       register int i;
1775
1776       token_translations = NEW2(max_user_token_number+1, short);
1777
1778       /* initialize all entries for literal tokens to 2, the internal
1779          token number for $undefined., which represents all invalid
1780          inputs.  */
1781       for (i = 0; i <= max_user_token_number; i++)
1782         token_translations[i] = 2;
1783
1784       for (bp = firstsymbol; bp; bp = bp->next)
1785         {
1786           if (bp->value >= ntokens)
1787             continue;             /* non-terminal */
1788           if (bp->user_token_number == SALIAS)
1789             continue;
1790           if (token_translations[bp->user_token_number] != 2)
1791             complain (_("tokens %s and %s both assigned number %d"),
1792                       tags[token_translations[bp->user_token_number]],
1793                   bp->tag,
1794                   bp->user_token_number);
1795           token_translations[bp->user_token_number] = bp->value;
1796         }
1797     }
1798
1799   error_token_number = errtoken->value;
1800
1801   if (! noparserflag)
1802     output_token_defines(ftable);
1803
1804   if (startval->class == SUNKNOWN)
1805     fatal (_("the start symbol %s is undefined"), startval->tag);
1806   else if (startval->class == STOKEN)
1807     fatal (_("the start symbol %s is a token"), startval->tag);
1808
1809   start_symbol = startval->value;
1810
1811   if (definesflag)
1812     {
1813       output_token_defines(fdefines);
1814
1815       if (!pure_parser)
1816         {
1817           if (spec_name_prefix)
1818             fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1819           else
1820             fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1821         }
1822
1823       if (semantic_parser)
1824         for (i = ntokens; i < nsyms; i++)
1825           {
1826             /* don't make these for dummy nonterminals made by gensym.  */
1827             if (*tags[i] != '@')
1828               fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1829           }
1830 #if 0
1831       /* `fdefines' is now a temporary file, so we need to copy its
1832          contents in `done', so we can't close it here.  */
1833       fclose(fdefines);
1834       fdefines = NULL;
1835 #endif
1836     }
1837 }
1838
1839 /* For named tokens, but not literal ones, define the name.  The value
1840    is the user token number.  */
1841
1842 void
1843 output_token_defines (FILE *file)
1844 {
1845   bucket *bp;
1846   register char *cp, *symbol;
1847   register char c;
1848
1849   for (bp = firstsymbol; bp; bp = bp->next)
1850     {
1851       symbol = bp->tag;                         /* get symbol */
1852
1853       if (bp->value >= ntokens) continue;
1854       if (bp->user_token_number == SALIAS) continue;
1855       if ('\'' == *symbol) continue;    /* skip literal character */
1856       if (bp == errtoken) continue;     /* skip error token */
1857       if ('\"' == *symbol)
1858         {
1859                 /* use literal string only if given a symbol with an alias */
1860                 if (bp->alias)
1861                         symbol = bp->alias->tag;
1862                 else
1863                         continue;
1864         }
1865
1866       /* Don't #define nonliteral tokens whose names contain periods.  */
1867       cp = symbol;
1868       while ((c = *cp++) && c != '.');
1869       if (c != '\0')  continue;
1870
1871       fprintf (file, "#define\t%s\t%d\n", symbol,
1872                ((translations && ! rawtoknumflag)
1873                 ? bp->user_token_number
1874                 : bp->value));
1875       if (semantic_parser)
1876         fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1877     }
1878
1879   putc('\n', file);
1880 }
1881
1882
1883
1884 /* convert the rules into the representation using rrhs, rlhs and ritems.  */
1885
1886 void
1887 packgram (void)
1888 {
1889   register int itemno;
1890   register int ruleno;
1891   register symbol_list *p;
1892 /*  register bucket *bp; JF unused */
1893
1894   bucket *ruleprec;
1895
1896   ritem = NEW2(nitems + 1, short);
1897   rlhs = NEW2(nrules, short) - 1;
1898   rrhs = NEW2(nrules, short) - 1;
1899   rprec = NEW2(nrules, short) - 1;
1900   rprecsym = NEW2(nrules, short) - 1;
1901   rassoc = NEW2(nrules, short) - 1;
1902
1903   itemno = 0;
1904   ruleno = 1;
1905
1906   p = grammar;
1907   while (p)
1908     {
1909       rlhs[ruleno] = p->sym->value;
1910       rrhs[ruleno] = itemno;
1911       ruleprec = p->ruleprec;
1912
1913       p = p->next;
1914       while (p && p->sym)
1915         {
1916           ritem[itemno++] = p->sym->value;
1917           /* A rule gets by default the precedence and associativity
1918              of the last token in it.  */
1919           if (p->sym->class == STOKEN)
1920             {
1921               rprec[ruleno] = p->sym->prec;
1922               rassoc[ruleno] = p->sym->assoc;
1923             }
1924           if (p) p = p->next;
1925         }
1926
1927       /* If this rule has a %prec,
1928          the specified symbol's precedence replaces the default.  */
1929       if (ruleprec)
1930         {
1931           rprec[ruleno] = ruleprec->prec;
1932           rassoc[ruleno] = ruleprec->assoc;
1933           rprecsym[ruleno] = ruleprec->value;
1934         }
1935
1936       ritem[itemno++] = -ruleno;
1937       ruleno++;
1938
1939       if (p) p = p->next;
1940     }
1941
1942   ritem[itemno] = 0;
1943 }
1944 \f
1945 /* Read a signed integer from STREAM and return its value.  */
1946
1947 int
1948 read_signed_integer (FILE *stream)
1949 {
1950   register int c = getc(stream);
1951   register int sign = 1;
1952   register int n;
1953
1954   if (c == '-')
1955     {
1956       c = getc(stream);
1957       sign = -1;
1958     }
1959   n = 0;
1960   while (isdigit(c))
1961     {
1962       n = 10*n + (c - '0');
1963       c = getc(stream);
1964     }
1965
1966   ungetc(c, stream);
1967
1968   return n * sign;
1969 }