src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    Bison is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    Bison is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with Bison; see the file COPYING.  If not, write to
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19    Boston, MA 02111-1307, USA.  */
  20
  21
  22 /* Read in the grammar specification and record it in the format
  23    described in gram.h.  All guards are copied into the fguard file
  24    and all actions into faction, in each case forming the body of a C
  25    function (yyguard or yyaction) which contains a switch statement to
  26    decide which guard or action to execute.
  27
  28    The entry point is reader ().  */
  29
  30 #include <stdio.h>
  31 #include "system.h"
  32 #include "files.h"
  33 #include "alloc.h"
  34 #include "symtab.h"
  35 #include "lex.h"
  36 #include "gram.h"
  37 #include "machine.h"
  38
  39 #define LTYPESTR        "\
  40 \n\
  41 #ifndef YYLTYPE\n\
  42 typedef\n\
  43   struct yyltype\n\
  44 \
  45     {\n\
  46       int timestamp;\n\
  47       int first_line;\n\
  48       int first_column;\
  49 \n\
  50       int last_line;\n\
  51       int last_column;\n\
  52       char *text;\n\
  53    }\n\
  54 \
  55   yyltype;\n\
  56 \n\
  57 #define YYLTYPE yyltype\n\
  58 #endif\n\
  59 \n"
  60
  61 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  62 int rline_allocated;
  63
  64 extern char *program_name;
  65 extern int definesflag;
  66 extern int nolinesflag;
  67 extern int noparserflag;
  68 extern int rawtoknumflag;
  69 extern bucket *symval;
  70 extern int numval;
  71 extern int expected_conflicts;
  72 extern char *token_buffer;
  73 extern int maxtoken;
  74
  75 extern void init_lex PARAMS((void));
  76 extern char *grow_token_buffer PARAMS((char *));
  77 extern void tabinit PARAMS((void));
  78 extern void output_headers PARAMS((void));
  79 extern void output_trailers PARAMS((void));
  80 extern void free_symtab PARAMS((void));
  81 extern void open_extra_files PARAMS((void));
  82 extern char *int_to_string PARAMS((int));
  83 extern char *printable_version PARAMS((int));
  84 extern void fatal PARAMS((char *));
  85 extern void fatals PARAMS((char *, char *));
  86 extern void warn PARAMS((char *));
  87 extern void warni PARAMS((char *, int));
  88 extern void warns PARAMS((char *, char *));
  89 extern void warnss PARAMS((char *, char *, char *));
  90 extern void warnsss PARAMS((char *, char *, char *, char *));
  91 extern void unlex PARAMS((int));
  92 extern void done PARAMS((int));
  93
  94 extern int skip_white_space PARAMS((void));
  95 extern int parse_percent_token PARAMS((void));
  96 extern int lex PARAMS((void));
  97
  98 typedef
  99   struct symbol_list
 100     {
 101       struct symbol_list *next;
 102       bucket *sym;
 103       bucket *ruleprec;
 104     }
 105   symbol_list;
 106
 107
 108 void reader PARAMS((void));
 109 void reader_output_yylsp PARAMS((FILE *));
 110 void read_declarations PARAMS((void));
 111 void copy_definition PARAMS((void));
 112 void parse_token_decl PARAMS((int, int));
 113 void parse_start_decl PARAMS((void));
 114 void parse_type_decl PARAMS((void));
 115 void parse_assoc_decl PARAMS((int));
 116 void parse_union_decl PARAMS((void));
 117 void parse_expect_decl PARAMS((void));
 118 char *get_type_name PARAMS((int, symbol_list *));
 119 void copy_guard PARAMS((symbol_list *, int));
 120 void parse_thong_decl PARAMS((void));
 121 void copy_action PARAMS((symbol_list *, int));
 122 bucket *gensym PARAMS((void));
 123 void readgram PARAMS((void));
 124 void record_rule_line PARAMS((void));
 125 void packsymbols PARAMS((void));
 126 void output_token_defines PARAMS((FILE *));
 127 void packgram PARAMS((void));
 128 int read_signed_integer PARAMS((FILE *));
 129
 130 #if 0
 131 static int get_type PARAMS((void));
 132 #endif
 133
 134 int lineno;
 135 symbol_list *grammar;
 136 int start_flag;
 137 bucket *startval;
 138 char **tags;
 139 int *user_toknums;
 140
 141 /* Nonzero if components of semantic values are used, implying
 142    they must be unions.  */
 143 static int value_components_used;
 144
 145 static int typed;  /* nonzero if %union has been seen.  */
 146
 147 static int lastprec;  /* incremented for each %left, %right or %nonassoc seen */
 148
 149 static int gensym_count;  /* incremented for each generated symbol */
 150
 151 static bucket *errtoken;
 152 static bucket *undeftoken;
 153
 154 /* Nonzero if any action or guard uses the @n construct.  */
 155 static int yylsp_needed;
 156
 157
 158 static void
 159 skip_to_char (int target)
 160 {
 161   int c;
 162   if (target == '\n')
 163     warn(_("   Skipping to next \\n"));
 164   else
 165     warni(_("   Skipping to next %c"), target);
 166
 167   do
 168     c = skip_white_space();
 169   while (c != target && c != EOF);
 170   if (c != EOF)
 171     ungetc(c, finput);
 172 }
 173
 174
 175 /* Dump the string from FINPUT to FOUTPUT.  MATCH is the delimiter of
 176    the string (either ' or ").  */
 177
 178 static inline void
 179 copy_string (FILE *finput, FILE *foutput, int match)
 180 {
 181   int c;
 182
 183   putc (match, foutput);
 184   c = getc (finput);
 185
 186   while (c != match)
 187     {
 188       if (c == EOF)
 189         fatal (_("unterminated string at end of file"));
 190       if (c == '\n')
 191         {
 192           warn (_("unterminated string"));
 193           ungetc (c, finput);
 194           c = match;            /* invent terminator */
 195           continue;
 196         }
 197
 198       putc(c, foutput);
 199
 200       if (c == '\\')
 201         {
 202           c = getc (finput);
 203           if (c == EOF)
 204             fatal (_("unterminated string at end of file"));
 205           putc (c, foutput);
 206           if (c == '\n')
 207             lineno++;
 208         }
 209
 210       c = getc(finput);
 211     }
 212
 213   putc(c, foutput);
 214 }
 215
 216
 217 /* Dump the comment from FINPUT to FOUTPUT.  C is either `*' or `/',
 218    depending upon the type of comments used.  */
 219
 220 static inline void
 221 copy_comment (FILE *finput, FILE *foutput, int c)
 222 {
 223   int cplus_comment;
 224   register int match;
 225   register int ended;
 226
 227   cplus_comment = (c == '/');
 228   putc (c, foutput);
 229   c = getc (finput);
 230
 231   ended = 0;
 232   while (!ended)
 233     {
 234       if (!cplus_comment && c == '*')
 235         {
 236           while (c == '*')
 237             {
 238               putc(c, foutput);
 239               c = getc(finput);
 240             }
 241
 242           if (c == '/')
 243             {
 244               putc(c, foutput);
 245               ended = 1;
 246             }
 247         }
 248       else if (c == '\n')
 249         {
 250           lineno++;
 251           putc (c, foutput);
 252           if (cplus_comment)
 253             ended = 1;
 254           else
 255             c = getc(finput);
 256         }
 257       else if (c == EOF)
 258         fatal (_("unterminated comment"));
 259       else
 260         {
 261           putc (c, foutput);
 262           c = getc (finput);
 263         }
 264     }
 265 }
 266
 267
 268 void
 269 reader (void)
 270 {
 271   start_flag = 0;
 272   startval = NULL;  /* start symbol not specified yet. */
 273
 274 #if 0
 275   translations = 0;  /* initially assume token number translation not needed.  */
 276 #endif
 277   /* Nowadays translations is always set to 1,
 278      since we give `error' a user-token-number
 279      to satisfy the Posix demand for YYERRCODE==256.  */
 280   translations = 1;
 281
 282   nsyms = 1;
 283   nvars = 0;
 284   nrules = 0;
 285   nitems = 0;
 286   rline_allocated = 10;
 287   rline = NEW2(rline_allocated, short);
 288
 289   typed = 0;
 290   lastprec = 0;
 291
 292   gensym_count = 0;
 293
 294   semantic_parser = 0;
 295   pure_parser = 0;
 296   yylsp_needed = 0;
 297
 298   grammar = NULL;
 299
 300   init_lex();
 301   lineno = 1;
 302
 303   /* initialize the symbol table.  */
 304   tabinit();
 305   /* construct the error token */
 306   errtoken = getsym("error");
 307   errtoken->class = STOKEN;
 308   errtoken->user_token_number = 256; /* Value specified by posix.  */
 309   /* construct a token that represents all undefined literal tokens. */
 310   /* it is always token number 2.  */
 311   undeftoken = getsym("$undefined.");
 312   undeftoken->class = STOKEN;
 313   undeftoken->user_token_number = 2;
 314   /* Read the declaration section.  Copy %{ ... %} groups to ftable and fdefines file.
 315      Also notice any %token, %left, etc. found there.  */
 316   if (noparserflag)
 317     fprintf(ftable, "\n/*  Bison-generated parse tables, made from %s\n",
 318                 infile);
 319   else
 320     fprintf(ftable, "\n/*  A Bison parser, made from %s\n", infile);
 321   fprintf(ftable, "    by %s  */\n\n", VERSION_STRING);
 322   fprintf(ftable, "#define YYBISON 1  /* Identify Bison output.  */\n\n");
 323   read_declarations();
 324   /* start writing the guard and action files, if they are needed.  */
 325   output_headers();
 326   /* read in the grammar, build grammar in list form.  write out guards and actions.  */
 327   readgram();
 328   /* Now we know whether we need the line-number stack.
 329      If we do, write its type into the .tab.h file.  */
 330   if (fdefines)
 331     reader_output_yylsp(fdefines);
 332   /* write closing delimiters for actions and guards.  */
 333   output_trailers();
 334   if (yylsp_needed)
 335     fprintf(ftable, "#define YYLSP_NEEDED\n\n");
 336   /* assign the symbols their symbol numbers.
 337      Write #defines for the token symbols into fdefines if requested.  */
 338   packsymbols();
 339   /* convert the grammar into the format described in gram.h.  */
 340   packgram();
 341   /* free the symbol table data structure
 342      since symbols are now all referred to by symbol number.  */
 343   free_symtab();
 344 }
 345
 346 void
 347 reader_output_yylsp (FILE *f)
 348 {
 349   if (yylsp_needed)
 350     fprintf(f, LTYPESTR);
 351 }
 352
 353 /* Read from finput until `%%' is seen.  Discard the `%%'.  Handle any
 354    `%' declarations, and copy the contents of any `%{ ... %}' groups
 355    to fattrs.  */
 356
 357 void
 358 read_declarations (void)
 359 {
 360   register int c;
 361   register int tok;
 362
 363   for (;;)
 364     {
 365       c = skip_white_space();
 366
 367       if (c == '%')
 368         {
 369           tok = parse_percent_token();
 370
 371           switch (tok)
 372             {
 373             case TWO_PERCENTS:
 374               return;
 375
 376             case PERCENT_LEFT_CURLY:
 377               copy_definition();
 378               break;
 379
 380             case TOKEN:
 381               parse_token_decl (STOKEN, SNTERM);
 382               break;
 383
 384             case NTERM:
 385               parse_token_decl (SNTERM, STOKEN);
 386               break;
 387
 388             case TYPE:
 389               parse_type_decl();
 390               break;
 391
 392             case START:
 393               parse_start_decl();
 394               break;
 395
 396             case UNION:
 397               parse_union_decl();
 398               break;
 399
 400             case EXPECT:
 401               parse_expect_decl();
 402               break;
 403             case THONG:
 404               parse_thong_decl();
 405               break;
 406             case LEFT:
 407               parse_assoc_decl(LEFT_ASSOC);
 408               break;
 409
 410             case RIGHT:
 411               parse_assoc_decl(RIGHT_ASSOC);
 412               break;
 413
 414             case NONASSOC:
 415               parse_assoc_decl(NON_ASSOC);
 416               break;
 417
 418             case SEMANTIC_PARSER:
 419               if (semantic_parser == 0)
 420                 {
 421                   semantic_parser = 1;
 422                   open_extra_files();
 423                 }
 424               break;
 425
 426             case PURE_PARSER:
 427               pure_parser = 1;
 428               break;
 429
 430             case NOOP:
 431               break;
 432
 433             default:
 434               warns(_("unrecognized: %s"), token_buffer);
 435               skip_to_char('%');
 436           }
 437         }
 438       else if (c == EOF)
 439         fatal(_("no input grammar"));
 440       else
 441         {
 442           warns (_("unknown character: %s"), printable_version(c));
 443           skip_to_char('%');
 444         }
 445     }
 446 }
 447
 448
 449 /* Copy the contents of a `%{ ... %}' into the definitions file.  The
 450    `%{' has already been read.  Return after reading the `%}'.  */
 451
 452 void
 453 copy_definition (void)
 454 {
 455   register int c;
 456   /* -1 while reading a character if prev char was %. */
 457   register int after_percent;
 458
 459   if (!nolinesflag)
 460     fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
 461
 462   after_percent = 0;
 463
 464   c = getc (finput);
 465
 466   for (;;)
 467     {
 468       switch (c)
 469         {
 470         case '\n':
 471           putc(c, fattrs);
 472           lineno++;
 473           break;
 474
 475         case '%':
 476           after_percent = -1;
 477           break;
 478
 479         case '\'':
 480         case '"':
 481           copy_string (finput, fattrs, c);
 482           break;
 483
 484         case '/':
 485           putc (c, fattrs);
 486           c = getc (finput);
 487           if (c != '*' && c != '/')
 488             continue;
 489           copy_comment (finput, fattrs, c);
 490           break;
 491
 492         case EOF:
 493           fatal(_("unterminated `%{' definition"));
 494
 495         default:
 496           putc(c, fattrs);
 497         }
 498
 499       c = getc(finput);
 500
 501       if (after_percent)
 502         {
 503           if (c == '}')
 504             return;
 505           putc('%', fattrs);
 506         }
 507       after_percent = 0;
 508
 509     }
 510
 511 }
 512
 513
 514
 515 /* parse what comes after %token or %nterm.
 516 For %token, what_is is STOKEN and what_is_not is SNTERM.
 517 For %nterm, the arguments are reversed.  */
 518
 519 void
 520 parse_token_decl (int what_is, int what_is_not)
 521 {
 522   register int token = 0;
 523   register char *typename = 0;
 524   register struct bucket *symbol = NULL;  /* pts to symbol being defined */
 525   int k;
 526
 527   for (;;)
 528     {
 529       int tmp_char = ungetc (skip_white_space (), finput);
 530
 531       if (tmp_char == '%')
 532         return;
 533       if (tmp_char == EOF)
 534         fatals ("Premature EOF after %s", token_buffer);
 535
 536       token = lex();
 537       if (token == COMMA)
 538         {
 539           symbol = NULL;
 540           continue;
 541         }
 542       if (token == TYPENAME)
 543         {
 544           k = strlen(token_buffer);
 545           typename = NEW2(k + 1, char);
 546           strcpy(typename, token_buffer);
 547           value_components_used = 1;
 548           symbol = NULL;
 549         }
 550       else if (token == IDENTIFIER && *symval->tag == '\"'
 551                 && symbol)
 552         {
 553           translations = 1;
 554           symval->class = STOKEN;
 555           symval->type_name = typename;
 556           symval->user_token_number = symbol->user_token_number;
 557           symbol->user_token_number = SALIAS;
 558
 559           symval->alias = symbol;
 560           symbol->alias = symval;
 561           symbol = NULL;
 562
 563           nsyms--;   /* symbol and symval combined are only one symbol */
 564         }
 565       else if (token == IDENTIFIER)
 566         {
 567           int oldclass = symval->class;
 568           symbol = symval;
 569
 570           if (symbol->class == what_is_not)
 571             warns(_("symbol %s redefined"), symbol->tag);
 572           symbol->class = what_is;
 573           if (what_is == SNTERM && oldclass != SNTERM)
 574             symbol->value = nvars++;
 575
 576           if (typename)
 577             {
 578               if (symbol->type_name == NULL)
 579                 symbol->type_name = typename;
 580               else if (strcmp(typename, symbol->type_name) != 0)
 581                 warns(_("type redeclaration for %s"), symbol->tag);
 582             }
 583         }
 584       else if (symbol && token == NUMBER)
 585         {
 586           symbol->user_token_number = numval;
 587           translations = 1;
 588         }
 589       else
 590         {
 591           warnss(_("`%s' is invalid in %s"),
 592                 token_buffer,
 593                 (what_is == STOKEN) ? "%token" : "%nterm");
 594           skip_to_char('%');
 595         }
 596     }
 597
 598 }
 599
 600 /* parse what comes after %thong
 601         the full syntax is
 602                 %thong <type> token number literal
 603  the <type> or number may be omitted.  The number specifies the
 604  user_token_number.
 605
 606  Two symbols are entered in the table, one for the token symbol and
 607  one for the literal.  Both are given the <type>, if any, from the declaration.
 608  The ->user_token_number of the first is SALIAS and the ->user_token_number
 609  of the second is set to the number, if any, from the declaration.
 610  The two symbols are linked via pointers in their ->alias fields.
 611
 612  during output_defines_table, the symbol is reported
 613  thereafter, only the literal string is retained
 614  it is the literal string that is output to yytname
 615 */
 616
 617 void
 618 parse_thong_decl (void)
 619 {
 620   register int token;
 621   register struct bucket *symbol;
 622   register char *typename = 0;
 623   int k, usrtoknum;
 624
 625   translations = 1;
 626   token = lex();                /* fetch typename or first token */
 627   if (token == TYPENAME) {
 628     k = strlen(token_buffer);
 629     typename = NEW2(k + 1, char);
 630     strcpy(typename, token_buffer);
 631     value_components_used = 1;
 632     token = lex();              /* fetch first token */
 633   }
 634
 635   /* process first token */
 636
 637   if (token != IDENTIFIER)
 638     {
 639       warns(_("unrecognized item %s, expected an identifier"),
 640             token_buffer);
 641       skip_to_char('%');
 642       return;
 643     }
 644   symval->class = STOKEN;
 645   symval->type_name = typename;
 646   symval->user_token_number = SALIAS;
 647   symbol = symval;
 648
 649   token = lex();                /* get number or literal string */
 650
 651   if (token == NUMBER) {
 652     usrtoknum = numval;
 653     token = lex();              /* okay, did number, now get literal */
 654   }
 655   else usrtoknum = 0;
 656
 657   /* process literal string token */
 658
 659   if (token != IDENTIFIER || *symval->tag != '\"')
 660     {
 661       warns(_("expected string constant instead of %s"),
 662             token_buffer);
 663       skip_to_char('%');
 664       return;
 665     }
 666   symval->class = STOKEN;
 667   symval->type_name = typename;
 668   symval->user_token_number = usrtoknum;
 669
 670   symval->alias = symbol;
 671   symbol->alias = symval;
 672
 673   nsyms--;                      /* symbol and symval combined are only one symbol */
 674 }
 675
 676
 677 /* parse what comes after %start */
 678
 679 void
 680 parse_start_decl (void)
 681 {
 682   if (start_flag)
 683     warn(_("multiple %start declarations"));
 684   if (lex() != IDENTIFIER)
 685     warn(_("invalid %start declaration"));
 686   else
 687     {
 688       start_flag = 1;
 689       startval = symval;
 690     }
 691 }
 692
 693
 694
 695 /* read in a %type declaration and record its information for get_type_name to access */
 696
 697 void
 698 parse_type_decl (void)
 699 {
 700   register int k;
 701   register char *name;
 702
 703   if (lex() != TYPENAME)
 704     {
 705       warn(_("%type declaration has no <typename>"));
 706       skip_to_char('%');
 707       return;
 708     }
 709
 710   k = strlen(token_buffer);
 711   name = NEW2(k + 1, char);
 712   strcpy(name, token_buffer);
 713
 714   for (;;)
 715     {
 716       register int t;
 717       int tmp_char = ungetc (skip_white_space (), finput);
 718
 719       if (tmp_char == '%')
 720         return;
 721       if (tmp_char == EOF)
 722         fatals ("Premature EOF after %s", token_buffer);
 723
 724       t = lex();
 725
 726       switch (t)
 727         {
 728
 729         case COMMA:
 730         case SEMICOLON:
 731           break;
 732
 733         case IDENTIFIER:
 734           if (symval->type_name == NULL)
 735             symval->type_name = name;
 736           else if (strcmp(name, symval->type_name) != 0)
 737             warns(_("type redeclaration for %s"), symval->tag);
 738
 739           break;
 740
 741         default:
 742           warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
 743           skip_to_char('%');
 744         }
 745     }
 746 }
 747
 748
 749
 750 /* read in a %left, %right or %nonassoc declaration and record its information.  */
 751 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC.  */
 752
 753 void
 754 parse_assoc_decl (int assoc)
 755 {
 756   register int k;
 757   register char *name = NULL;
 758   register int prev = 0;
 759
 760   lastprec++;  /* Assign a new precedence level, never 0.  */
 761
 762   for (;;)
 763     {
 764       register int t;
 765       int tmp_char = ungetc (skip_white_space (), finput);
 766
 767       if (tmp_char == '%')
 768         return;
 769       if (tmp_char == EOF)
 770         fatals ("Premature EOF after %s", token_buffer);
 771
 772       t = lex();
 773
 774       switch (t)
 775         {
 776
 777         case TYPENAME:
 778           k = strlen(token_buffer);
 779           name = NEW2(k + 1, char);
 780           strcpy(name, token_buffer);
 781           break;
 782
 783         case COMMA:
 784           break;
 785
 786         case IDENTIFIER:
 787           if (symval->prec != 0)
 788             warns(_("redefining precedence of %s"), symval->tag);
 789           symval->prec = lastprec;
 790           symval->assoc = assoc;
 791           if (symval->class == SNTERM)
 792             warns(_("symbol %s redefined"), symval->tag);
 793           symval->class = STOKEN;
 794           if (name)
 795             { /* record the type, if one is specified */
 796               if (symval->type_name == NULL)
 797                 symval->type_name = name;
 798               else if (strcmp(name, symval->type_name) != 0)
 799                 warns(_("type redeclaration for %s"), symval->tag);
 800             }
 801           break;
 802
 803         case NUMBER:
 804           if (prev == IDENTIFIER)
 805             {
 806               symval->user_token_number = numval;
 807               translations = 1;
 808             }
 809           else
 810             {
 811               warns(_("invalid text (%s) - number should be after identifier"),
 812                         token_buffer);
 813               skip_to_char('%');
 814             }
 815           break;
 816
 817         case SEMICOLON:
 818           return;
 819
 820         default:
 821           warns(_("unexpected item: %s"), token_buffer);
 822           skip_to_char('%');
 823         }
 824
 825       prev = t;
 826
 827     }
 828 }
 829
 830
 831
 832 /* copy the union declaration into fattrs (and fdefines),
 833    where it is made into the
 834    definition of YYSTYPE, the type of elements of the parser value stack.  */
 835
 836 void
 837 parse_union_decl (void)
 838 {
 839   register int c;
 840   register int count;
 841   register int in_comment;
 842   int cplus_comment;
 843
 844   if (typed)
 845     warn(_("multiple %union declarations"));
 846
 847   typed = 1;
 848
 849   if (!nolinesflag)
 850     fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
 851   else
 852     fprintf(fattrs, "\n");
 853
 854   fprintf(fattrs, "typedef union");
 855   if (fdefines)
 856     fprintf(fdefines, "typedef union");
 857
 858   count = 0;
 859   in_comment = 0;
 860
 861   c = getc(finput);
 862
 863   while (c != EOF)
 864     {
 865       putc(c, fattrs);
 866       if (fdefines)
 867         putc(c, fdefines);
 868
 869       switch (c)
 870         {
 871         case '\n':
 872           lineno++;
 873           break;
 874
 875         case '/':
 876           c = getc(finput);
 877           if (c != '*' && c != '/')
 878             ungetc(c, finput);
 879           else
 880             {
 881               putc(c, fattrs);
 882               if (fdefines)
 883                 putc(c, fdefines);
 884               cplus_comment = (c == '/');
 885               in_comment = 1;
 886               c = getc(finput);
 887               while (in_comment)
 888                 {
 889                   putc(c, fattrs);
 890                   if (fdefines)
 891                     putc(c, fdefines);
 892
 893                   if (c == '\n')
 894                     {
 895                       lineno++;
 896                       if (cplus_comment)
 897                         {
 898                           in_comment = 0;
 899                           break;
 900                         }
 901                     }
 902                   if (c == EOF)
 903                     fatal(_("unterminated comment at end of file"));
 904
 905                   if (!cplus_comment && c == '*')
 906                     {
 907                       c = getc(finput);
 908                       if (c == '/')
 909                         {
 910                           putc('/', fattrs);
 911                           if (fdefines)
 912                             putc('/', fdefines);
 913                           in_comment = 0;
 914                         }
 915                     }
 916                   else
 917                     c = getc(finput);
 918                 }
 919             }
 920           break;
 921
 922
 923         case '{':
 924           count++;
 925           break;
 926
 927         case '}':
 928           if (count == 0)
 929             warn (_("unmatched close-brace (`}')"));
 930           count--;
 931           if (count <= 0)
 932             {
 933               fprintf(fattrs, " YYSTYPE;\n");
 934               if (fdefines)
 935                 fprintf(fdefines, " YYSTYPE;\n");
 936               /* JF don't choke on trailing semi */
 937               c=skip_white_space();
 938               if(c!=';') ungetc(c,finput);
 939               return;
 940             }
 941         }
 942
 943       c = getc(finput);
 944     }
 945 }
 946
 947 /* parse the declaration %expect N which says to expect N
 948    shift-reduce conflicts.  */
 949
 950 void
 951 parse_expect_decl (void)
 952 {
 953   register int c;
 954   register int count;
 955   char buffer[20];
 956
 957   c = getc(finput);
 958   while (c == ' ' || c == '\t')
 959     c = getc(finput);
 960
 961   count = 0;
 962   while (c >= '0' && c <= '9')
 963     {
 964       if (count < 20)
 965         buffer[count++] = c;
 966       c = getc(finput);
 967     }
 968   buffer[count] = 0;
 969
 970   ungetc (c, finput);
 971
 972   if (count <= 0 || count > 10)
 973         warn(_("argument of %expect is not an integer"));
 974   expected_conflicts = atoi (buffer);
 975 }
 976
 977 /* that's all of parsing the declaration section */
 978 \f
 979 /* FINPUT is pointing to a location (i.e., a `@').  Output to FOUTPUT
 980    a reference to this location. STACK_OFFSET is the number of values
 981    in the current rule so far, which says where to find `$0' with
 982    respect to the top of the stack.  */
 983 static inline void
 984 copy_at (FILE *finput, FILE *foutput, int stack_offset)
 985 {
 986   int c;
 987
 988   c = getc (finput);
 989   if (c == '$')
 990     {
 991       fprintf (foutput, "yyloc");
 992       yylsp_needed = 1;
 993     }
 994   else if (isdigit(c) || c == '-')
 995     {
 996       int n;
 997
 998       ungetc (c, finput);
 999       n = read_signed_integer (finput);
1000
1001       fprintf (foutput, "yylsp[%d]", n - stack_offset);
1002       yylsp_needed = 1;
1003     }
1004   else
1005     warns (_("@%s is invalid"), printable_version (c));
1006 }
1007
1008
1009 /* Get the data type (alternative in the union) of the value for symbol n in rule rule.  */
1010
1011 char *
1012 get_type_name (int n, symbol_list *rule)
1013 {
1014   static char *msg = N_("invalid $ value");
1015
1016   register int i;
1017   register symbol_list *rp;
1018
1019   if (n < 0)
1020     {
1021       warn(_(msg));
1022       return NULL;
1023     }
1024
1025   rp = rule;
1026   i = 0;
1027
1028   while (i < n)
1029     {
1030       rp = rp->next;
1031       if (rp == NULL || rp->sym == NULL)
1032         {
1033           warn(_(msg));
1034           return NULL;
1035         }
1036       i++;
1037     }
1038
1039   return (rp->sym->type_name);
1040 }
1041
1042
1043
1044 /* After `%guard' is seen in the input file, copy the actual guard
1045    into the guards file.  If the guard is followed by an action, copy
1046    that into the actions file.  STACK_OFFSET is the number of values
1047    in the current rule so far, which says where to find `$0' with
1048    respect to the top of the stack, for the simple parser in which the
1049    stack is not popped until after the guard is run.  */
1050
1051 void
1052 copy_guard (symbol_list *rule, int stack_offset)
1053 {
1054   register int c;
1055   register int n;
1056   register int count;
1057   register char *type_name;
1058   int brace_flag = 0;
1059
1060   /* offset is always 0 if parser has already popped the stack pointer */
1061   if (semantic_parser) stack_offset = 0;
1062
1063   fprintf(fguard, "\ncase %d:\n", nrules);
1064   if (!nolinesflag)
1065     fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1066   putc('{', fguard);
1067
1068   count = 0;
1069   c = getc(finput);
1070
1071   while (brace_flag ? (count > 0) : (c != ';'))
1072     {
1073       switch (c)
1074         {
1075         case '\n':
1076           putc(c, fguard);
1077           lineno++;
1078           break;
1079
1080         case '{':
1081           putc(c, fguard);
1082           brace_flag = 1;
1083           count++;
1084           break;
1085
1086         case '}':
1087           putc(c, fguard);
1088           if (count > 0)
1089             count--;
1090           else
1091             {
1092               warn(_("unmatched right brace (`}')"));
1093               c = getc(finput); /* skip it */
1094             }
1095           break;
1096
1097         case '\'':
1098         case '"':
1099           copy_string (finput, fguard, c);
1100           break;
1101
1102         case '/':
1103           putc (c, fguard);
1104           c = getc (finput);
1105           if (c != '*' && c != '/')
1106             continue;
1107           copy_comment (finput, fguard, c);
1108           break;
1109
1110         case '$':
1111           c = getc(finput);
1112           type_name = NULL;
1113
1114           if (c == '<')
1115             {
1116               register char *cp = token_buffer;
1117
1118               while ((c = getc(finput)) != '>' && c > 0)
1119                 {
1120                   if (cp == token_buffer + maxtoken)
1121                     cp = grow_token_buffer(cp);
1122
1123                   *cp++ = c;
1124                 }
1125               *cp = 0;
1126               type_name = token_buffer;
1127
1128               c = getc(finput);
1129             }
1130
1131           if (c == '$')
1132             {
1133               fprintf(fguard, "yyval");
1134               if (!type_name)
1135                 type_name = rule->sym->type_name;
1136               if (type_name)
1137                 fprintf(fguard, ".%s", type_name);
1138               if(!type_name && typed)
1139                 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1140             }
1141           else if (isdigit(c) || c == '-')
1142             {
1143               ungetc (c, finput);
1144               n = read_signed_integer (finput);
1145               c = getc (finput);
1146
1147               if (!type_name && n > 0)
1148                 type_name = get_type_name(n, rule);
1149
1150               fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1151               if (type_name)
1152                 fprintf(fguard, ".%s", type_name);
1153               if (!type_name && typed)
1154                 warnss (_("$%s of `%s' has no declared type"),
1155                         int_to_string(n), rule->sym->tag);
1156               continue;
1157             }
1158           else
1159             warns(_("$%s is invalid"), printable_version(c));
1160           break;
1161
1162         case '@':
1163           copy_at (finput, fguard, stack_offset);
1164           break;
1165
1166         case EOF:
1167           fatal (_("unterminated %%guard clause"));
1168
1169         default:
1170           putc (c, fguard);
1171         }
1172
1173       if (c != '}' || count != 0)
1174         c = getc(finput);
1175     }
1176
1177   c = skip_white_space();
1178
1179   fprintf(fguard, ";\n    break;}");
1180   if (c == '{')
1181     copy_action (rule, stack_offset);
1182   else if (c == '=')
1183     {
1184       c = getc(finput); /* why not skip_white_space -wjh */
1185       if (c == '{')
1186         copy_action (rule, stack_offset);
1187     }
1188   else
1189     ungetc(c, finput);
1190 }
1191
1192
1193
1194 /* Assuming that a `{' has just been seen, copy everything up to the
1195    matching `}' into the actions file.  STACK_OFFSET is the number of
1196    values in the current rule so far, which says where to find `$0'
1197    with respect to the top of the stack.  */
1198
1199 void
1200 copy_action (symbol_list *rule, int stack_offset)
1201 {
1202   register int c;
1203   register int n;
1204   register int count;
1205   register char *type_name;
1206
1207   /* offset is always 0 if parser has already popped the stack pointer */
1208   if (semantic_parser)
1209     stack_offset = 0;
1210
1211   fprintf (faction, "\ncase %d:\n", nrules);
1212   if (!nolinesflag)
1213     fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1214   putc ('{', faction);
1215
1216   count = 1;
1217   c = getc(finput);
1218
1219   while (count > 0)
1220     {
1221       while (c != '}')
1222         {
1223           switch (c)
1224             {
1225             case '\n':
1226               putc(c, faction);
1227               lineno++;
1228               break;
1229
1230             case '{':
1231               putc(c, faction);
1232               count++;
1233               break;
1234
1235             case '\'':
1236             case '"':
1237               copy_string (finput, faction, c);
1238               break;
1239
1240             case '/':
1241               putc(c, faction);
1242               c = getc(finput);
1243               if (c != '*' && c != '/')
1244                 continue;
1245               copy_comment (finput, faction, c);
1246               break;
1247
1248             case '$':
1249               c = getc(finput);
1250               type_name = NULL;
1251
1252               if (c == '<')
1253                 {
1254                   register char *cp = token_buffer;
1255
1256                   while ((c = getc(finput)) != '>' && c > 0)
1257                     {
1258                       if (cp == token_buffer + maxtoken)
1259                         cp = grow_token_buffer(cp);
1260
1261                       *cp++ = c;
1262                     }
1263                   *cp = 0;
1264                   type_name = token_buffer;
1265                   value_components_used = 1;
1266
1267                   c = getc(finput);
1268                 }
1269               if (c == '$')
1270                 {
1271                   fprintf(faction, "yyval");
1272                   if (!type_name)
1273                     type_name = get_type_name(0, rule);
1274                   if (type_name)
1275                     fprintf(faction, ".%s", type_name);
1276                   if(!type_name && typed)
1277                     warns(_("$$ of `%s' has no declared type"),
1278                           rule->sym->tag);
1279                 }
1280               else if (isdigit(c) || c == '-')
1281                 {
1282                   ungetc (c, finput);
1283                   n = read_signed_integer(finput);
1284                   c = getc(finput);
1285
1286                   if (!type_name && n > 0)
1287                     type_name = get_type_name(n, rule);
1288
1289                   fprintf(faction, "yyvsp[%d]", n - stack_offset);
1290                   if (type_name)
1291                     fprintf(faction, ".%s", type_name);
1292                   if(!type_name && typed)
1293                     warnss(_("$%s of `%s' has no declared type"),
1294                                 int_to_string(n), rule->sym->tag);
1295                   continue;
1296                 }
1297               else
1298                 warns(_("$%s is invalid"), printable_version(c));
1299
1300               break;
1301
1302             case '@':
1303               copy_at (finput, faction, stack_offset);
1304               break;
1305
1306             case EOF:
1307               fatal(_("unmatched `{'"));
1308
1309             default:
1310               putc(c, faction);
1311             }
1312
1313           c = getc(finput);
1314         }
1315
1316       /* above loop exits when c is '}' */
1317
1318       if (--count)
1319         {
1320           putc(c, faction);
1321           c = getc(finput);
1322         }
1323     }
1324
1325   fprintf(faction, ";\n    break;}");
1326 }
1327
1328
1329
1330 /* generate a dummy symbol, a nonterminal,
1331 whose name cannot conflict with the user's names. */
1332
1333 bucket *
1334 gensym (void)
1335 {
1336   register bucket *sym;
1337
1338   sprintf (token_buffer, "@%d", ++gensym_count);
1339   sym = getsym(token_buffer);
1340   sym->class = SNTERM;
1341   sym->value = nvars++;
1342   return (sym);
1343 }
1344
1345 /* Parse the input grammar into a one symbol_list structure.
1346 Each rule is represented by a sequence of symbols: the left hand side
1347 followed by the contents of the right hand side, followed by a null pointer
1348 instead of a symbol to terminate the rule.
1349 The next symbol is the lhs of the following rule.
1350
1351 All guards and actions are copied out to the appropriate files,
1352 labelled by the rule number they apply to.  */
1353
1354 void
1355 readgram (void)
1356 {
1357   register int t;
1358   register bucket *lhs = NULL;
1359   register symbol_list *p;
1360   register symbol_list *p1;
1361   register bucket *bp;
1362
1363   symbol_list *crule;   /* points to first symbol_list of current rule.  */
1364                         /* its symbol is the lhs of the rule.   */
1365   symbol_list *crule1;  /* points to the symbol_list preceding crule.  */
1366
1367   p1 = NULL;
1368
1369   t = lex();
1370
1371   while (t != TWO_PERCENTS && t != ENDFILE)
1372     {
1373       if (t == IDENTIFIER || t == BAR)
1374         {
1375           register int actionflag = 0;
1376           int rulelength = 0;  /* number of symbols in rhs of this rule so far  */
1377           int xactions = 0;     /* JF for error checking */
1378           bucket *first_rhs = 0;
1379
1380           if (t == IDENTIFIER)
1381             {
1382               lhs = symval;
1383
1384               if (!start_flag)
1385                 {
1386                   startval = lhs;
1387                   start_flag = 1;
1388                 }
1389
1390               t = lex();
1391               if (t != COLON)
1392                 {
1393                   warn(_("ill-formed rule: initial symbol not followed by colon"));
1394                   unlex(t);
1395                 }
1396             }
1397
1398           if (nrules == 0 && t == BAR)
1399             {
1400               warn(_("grammar starts with vertical bar"));
1401               lhs = symval;     /* BOGUS: use a random symval */
1402             }
1403           /* start a new rule and record its lhs.  */
1404
1405           nrules++;
1406           nitems++;
1407
1408           record_rule_line ();
1409
1410           p = NEW(symbol_list);
1411           p->sym = lhs;
1412
1413           crule1 = p1;
1414           if (p1)
1415             p1->next = p;
1416           else
1417             grammar = p;
1418
1419           p1 = p;
1420           crule = p;
1421
1422           /* mark the rule's lhs as a nonterminal if not already so.  */
1423
1424           if (lhs->class == SUNKNOWN)
1425             {
1426               lhs->class = SNTERM;
1427               lhs->value = nvars;
1428               nvars++;
1429             }
1430           else if (lhs->class == STOKEN)
1431             warns(_("rule given for %s, which is a token"), lhs->tag);
1432
1433           /* read the rhs of the rule.  */
1434
1435           for (;;)
1436             {
1437               t = lex();
1438               if (t == PREC)
1439                 {
1440                   t = lex();
1441                   crule->ruleprec = symval;
1442                   t = lex();
1443                 }
1444
1445               if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1446
1447               /* If next token is an identifier, see if a colon follows it.
1448                  If one does, exit this rule now.  */
1449               if (t == IDENTIFIER)
1450                 {
1451                   register bucket *ssave;
1452                   register int t1;
1453
1454                   ssave = symval;
1455                   t1 = lex();
1456                   unlex(t1);
1457                   symval = ssave;
1458                   if (t1 == COLON) break;
1459
1460                   if(!first_rhs)        /* JF */
1461                     first_rhs = symval;
1462                   /* Not followed by colon =>
1463                      process as part of this rule's rhs.  */
1464                 }
1465
1466               /* If we just passed an action, that action was in the middle
1467                  of a rule, so make a dummy rule to reduce it to a
1468                  non-terminal.  */
1469               if (actionflag)
1470                 {
1471                   register bucket *sdummy;
1472
1473                   /* Since the action was written out with this rule's */
1474                   /* number, we must give the new rule this number */
1475                   /* by inserting the new rule before it.  */
1476
1477                   /* Make a dummy nonterminal, a gensym.  */
1478                   sdummy = gensym();
1479
1480                   /* Make a new rule, whose body is empty,
1481                      before the current one, so that the action
1482                      just read can belong to it.  */
1483                   nrules++;
1484                   nitems++;
1485                   record_rule_line ();
1486                   p = NEW(symbol_list);
1487                   if (crule1)
1488                     crule1->next = p;
1489                   else grammar = p;
1490                   p->sym = sdummy;
1491                   crule1 = NEW(symbol_list);
1492                   p->next = crule1;
1493                   crule1->next = crule;
1494
1495                   /* insert the dummy generated by that rule into this rule.  */
1496                   nitems++;
1497                   p = NEW(symbol_list);
1498                   p->sym = sdummy;
1499                   p1->next = p;
1500                   p1 = p;
1501
1502                   actionflag = 0;
1503                 }
1504
1505               if (t == IDENTIFIER)
1506                 {
1507                   nitems++;
1508                   p = NEW(symbol_list);
1509                   p->sym = symval;
1510                   p1->next = p;
1511                   p1 = p;
1512                 }
1513               else /* handle an action.  */
1514                 {
1515                   copy_action(crule, rulelength);
1516                   actionflag = 1;
1517                   xactions++;   /* JF */
1518                 }
1519               rulelength++;
1520             }   /* end of  read rhs of rule */
1521
1522           /* Put an empty link in the list to mark the end of this rule  */
1523           p = NEW(symbol_list);
1524           p1->next = p;
1525           p1 = p;
1526
1527           if (t == PREC)
1528             {
1529               warn(_("two @prec's in a row"));
1530               t = lex();
1531               crule->ruleprec = symval;
1532               t = lex();
1533             }
1534           if (t == GUARD)
1535             {
1536               if (! semantic_parser)
1537                 warn(_("%%guard present but %%semantic_parser not specified"));
1538
1539               copy_guard(crule, rulelength);
1540               t = lex();
1541             }
1542           else if (t == LEFT_CURLY)
1543             {
1544                 /* This case never occurs -wjh */
1545               if (actionflag)
1546                 warn(_("two actions at end of one rule"));
1547               copy_action(crule, rulelength);
1548               actionflag = 1;
1549               xactions++;       /* -wjh */
1550               t = lex();
1551             }
1552           /* If $$ is being set in default way, warn if any type
1553              mismatch.  */
1554           else if (!xactions
1555                    && first_rhs
1556                    && lhs->type_name != first_rhs->type_name)
1557             {
1558               if (lhs->type_name == 0
1559                   || first_rhs->type_name == 0
1560                   || strcmp(lhs->type_name,first_rhs->type_name))
1561                 warnss(_("type clash (`%s' `%s') on default action"),
1562                        lhs->type_name ? lhs->type_name : "",
1563                        first_rhs->type_name ? first_rhs->type_name : "");
1564             }
1565           /* Warn if there is no default for $$ but we need one.  */
1566           else if (!xactions && !first_rhs && lhs->type_name != 0)
1567             warn(_("empty rule for typed nonterminal, and no action"));
1568           if (t == SEMICOLON)
1569             t = lex();
1570         }
1571 #if 0
1572   /* these things can appear as alternatives to rules.  */
1573 /* NO, they cannot.
1574         a) none of the documentation allows them
1575         b) most of them scan forward until finding a next %
1576                 thus they may swallow lots of intervening rules
1577 */
1578       else if (t == TOKEN)
1579         {
1580           parse_token_decl(STOKEN, SNTERM);
1581           t = lex();
1582         }
1583       else if (t == NTERM)
1584         {
1585           parse_token_decl(SNTERM, STOKEN);
1586           t = lex();
1587         }
1588       else if (t == TYPE)
1589         {
1590           t = get_type();
1591         }
1592       else if (t == UNION)
1593         {
1594           parse_union_decl();
1595           t = lex();
1596         }
1597       else if (t == EXPECT)
1598         {
1599           parse_expect_decl();
1600           t = lex();
1601         }
1602       else if (t == START)
1603         {
1604           parse_start_decl();
1605           t = lex();
1606         }
1607 #endif
1608
1609       else
1610         {
1611           warns(_("invalid input: %s"), token_buffer);
1612           t = lex();
1613         }
1614     }
1615
1616   /* grammar has been read.  Do some checking */
1617
1618   if (nsyms > MAXSHORT)
1619     fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
1620            int_to_string(MAXSHORT));
1621   if (nrules == 0)
1622     fatal(_("no rules in the input grammar"));
1623
1624   if (typed == 0        /* JF put out same default YYSTYPE as YACC does */
1625       && !value_components_used)
1626     {
1627       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1628          but it seems better to be consistent.
1629          Most programs should declare their own type anyway.  */
1630       fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1631       if (fdefines)
1632         fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1633     }
1634
1635   /* Report any undefined symbols and consider them nonterminals.  */
1636
1637   for (bp = firstsymbol; bp; bp = bp->next)
1638     if (bp->class == SUNKNOWN)
1639       {
1640         warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1641                         bp->tag);
1642         bp->class = SNTERM;
1643         bp->value = nvars++;
1644       }
1645
1646   ntokens = nsyms - nvars;
1647 }
1648
1649
1650 void
1651 record_rule_line (void)
1652 {
1653   /* Record each rule's source line number in rline table.  */
1654
1655   if (nrules >= rline_allocated)
1656     {
1657       rline_allocated = nrules * 2;
1658       rline = (short *) xrealloc ((char *) rline,
1659                                   rline_allocated * sizeof (short));
1660     }
1661   rline[nrules] = lineno;
1662 }
1663
1664
1665 #if 0
1666 /* read in a %type declaration and record its information for get_type_name to access */
1667 /* this is unused.  it is only called from the #if 0 part of readgram */
1668 static int
1669 get_type (void)
1670 {
1671   register int k;
1672   register int t;
1673   register char *name;
1674
1675   t = lex();
1676
1677   if (t != TYPENAME)
1678     {
1679       warn(_("ill-formed %type declaration"));
1680       return t;
1681     }
1682
1683   k = strlen(token_buffer);
1684   name = NEW2(k + 1, char);
1685   strcpy(name, token_buffer);
1686
1687   for (;;)
1688     {
1689       t = lex();
1690
1691       switch (t)
1692         {
1693         case SEMICOLON:
1694           return (lex());
1695
1696         case COMMA:
1697           break;
1698
1699         case IDENTIFIER:
1700           if (symval->type_name == NULL)
1701             symval->type_name = name;
1702           else if (strcmp(name, symval->type_name) != 0)
1703             warns(_("type redeclaration for %s"), symval->tag);
1704
1705           break;
1706
1707         default:
1708           return (t);
1709         }
1710     }
1711 }
1712 #endif
1713
1714
1715 /* Assign symbol numbers, and write definition of token names into
1716    fdefines.  Set up vectors tags and sprec of names and precedences
1717    of symbols.  */
1718
1719 void
1720 packsymbols (void)
1721 {
1722   register bucket *bp;
1723   register int tokno = 1;
1724   register int i;
1725   register int last_user_token_number;
1726
1727   /* int lossage = 0; JF set but not used */
1728
1729   tags = NEW2(nsyms + 1, char *);
1730   tags[0] = "$";
1731   user_toknums = NEW2(nsyms + 1, int);
1732   user_toknums[0] = 0;
1733
1734   sprec = NEW2(nsyms, short);
1735   sassoc = NEW2(nsyms, short);
1736
1737   max_user_token_number = 256;
1738   last_user_token_number = 256;
1739
1740   for (bp = firstsymbol; bp; bp = bp->next)
1741     {
1742       if (bp->class == SNTERM)
1743         {
1744           bp->value += ntokens;
1745         }
1746       else if (bp->alias)
1747         {
1748           /* this symbol and its alias are a single token defn.
1749              allocate a tokno, and assign to both check agreement of
1750              ->prec and ->assoc fields and make both the same */
1751           if (bp->value == 0)
1752             bp->value = bp->alias->value = tokno++;
1753
1754           if (bp->prec != bp->alias->prec)
1755             {
1756               if (bp->prec != 0 && bp->alias->prec != 0
1757                   && bp->user_token_number == SALIAS)
1758                 warnss(_("conflicting precedences for %s and %s"),
1759                        bp->tag, bp->alias->tag);
1760               if (bp->prec != 0)
1761                 bp->alias->prec = bp->prec;
1762               else
1763                 bp->prec = bp->alias->prec;
1764             }
1765
1766           if (bp->assoc != bp->alias->assoc)
1767             {
1768             if (bp->assoc != 0 && bp->alias->assoc != 0
1769                 && bp->user_token_number == SALIAS)
1770               warnss(_("conflicting assoc values for %s and %s"),
1771                      bp->tag, bp->alias->tag);
1772             if (bp->assoc != 0)
1773               bp->alias->assoc = bp->assoc;
1774             else
1775               bp->assoc = bp->alias->assoc;
1776           }
1777
1778           if (bp->user_token_number == SALIAS)
1779             continue;  /* do not do processing below for SALIASs */
1780
1781         }
1782       else  /* bp->class == STOKEN */
1783         {
1784           bp->value = tokno++;
1785         }
1786
1787       if (bp->class == STOKEN)
1788         {
1789           if (translations && !(bp->user_token_number))
1790             bp->user_token_number = ++last_user_token_number;
1791           if (bp->user_token_number > max_user_token_number)
1792             max_user_token_number = bp->user_token_number;
1793         }
1794
1795       tags[bp->value] = bp->tag;
1796       user_toknums[bp->value] = bp->user_token_number;
1797       sprec[bp->value] = bp->prec;
1798       sassoc[bp->value] = bp->assoc;
1799
1800     }
1801
1802   if (translations)
1803     {
1804       register int i;
1805
1806       token_translations = NEW2(max_user_token_number+1, short);
1807
1808       /* initialize all entries for literal tokens to 2, the internal
1809          token number for $undefined., which represents all invalid
1810          inputs.  */
1811       for (i = 0; i <= max_user_token_number; i++)
1812         token_translations[i] = 2;
1813
1814       for (bp = firstsymbol; bp; bp = bp->next)
1815         {
1816           if (bp->value >= ntokens)
1817             continue;             /* non-terminal */
1818           if (bp->user_token_number == SALIAS)
1819             continue;
1820           if (token_translations[bp->user_token_number] != 2)
1821             warnsss(_("tokens %s and %s both assigned number %s"),
1822                     tags[token_translations[bp->user_token_number]],
1823                     bp->tag,
1824                     int_to_string(bp->user_token_number));
1825           token_translations[bp->user_token_number] = bp->value;
1826         }
1827     }
1828
1829   error_token_number = errtoken->value;
1830
1831   if (! noparserflag)
1832     output_token_defines(ftable);
1833
1834   if (startval->class == SUNKNOWN)
1835     fatals(_("the start symbol %s is undefined"), startval->tag);
1836   else if (startval->class == STOKEN)
1837     fatals(_("the start symbol %s is a token"), startval->tag);
1838
1839   start_symbol = startval->value;
1840
1841   if (definesflag)
1842     {
1843       output_token_defines(fdefines);
1844
1845       if (!pure_parser)
1846         {
1847           if (spec_name_prefix)
1848             fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1849           else
1850             fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1851         }
1852
1853       if (semantic_parser)
1854         for (i = ntokens; i < nsyms; i++)
1855           {
1856             /* don't make these for dummy nonterminals made by gensym.  */
1857             if (*tags[i] != '@')
1858               fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1859           }
1860 #if 0
1861       /* `fdefines' is now a temporary file, so we need to copy its
1862          contents in `done', so we can't close it here.  */
1863       fclose(fdefines);
1864       fdefines = NULL;
1865 #endif
1866     }
1867 }
1868
1869 /* For named tokens, but not literal ones, define the name.  The value
1870    is the user token number.  */
1871
1872 void
1873 output_token_defines (FILE *file)
1874 {
1875   bucket *bp;
1876   register char *cp, *symbol;
1877   register char c;
1878
1879   for (bp = firstsymbol; bp; bp = bp->next)
1880     {
1881       symbol = bp->tag;                         /* get symbol */
1882
1883       if (bp->value >= ntokens) continue;
1884       if (bp->user_token_number == SALIAS) continue;
1885       if ('\'' == *symbol) continue;    /* skip literal character */
1886       if (bp == errtoken) continue;     /* skip error token */
1887       if ('\"' == *symbol)
1888         {
1889                 /* use literal string only if given a symbol with an alias */
1890                 if (bp->alias)
1891                         symbol = bp->alias->tag;
1892                 else
1893                         continue;
1894         }
1895
1896       /* Don't #define nonliteral tokens whose names contain periods.  */
1897       cp = symbol;
1898       while ((c = *cp++) && c != '.');
1899       if (c != '\0')  continue;
1900
1901       fprintf(file, "#define\t%s\t%d\n", symbol,
1902                 ((translations && ! rawtoknumflag)
1903                         ? bp->user_token_number
1904                         : bp->value));
1905       if (semantic_parser)
1906         fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1907     }
1908
1909   putc('\n', file);
1910 }
1911
1912
1913
1914 /* convert the rules into the representation using rrhs, rlhs and ritems.  */
1915
1916 void
1917 packgram (void)
1918 {
1919   register int itemno;
1920   register int ruleno;
1921   register symbol_list *p;
1922 /*  register bucket *bp; JF unused */
1923
1924   bucket *ruleprec;
1925
1926   ritem = NEW2(nitems + 1, short);
1927   rlhs = NEW2(nrules, short) - 1;
1928   rrhs = NEW2(nrules, short) - 1;
1929   rprec = NEW2(nrules, short) - 1;
1930   rprecsym = NEW2(nrules, short) - 1;
1931   rassoc = NEW2(nrules, short) - 1;
1932
1933   itemno = 0;
1934   ruleno = 1;
1935
1936   p = grammar;
1937   while (p)
1938     {
1939       rlhs[ruleno] = p->sym->value;
1940       rrhs[ruleno] = itemno;
1941       ruleprec = p->ruleprec;
1942
1943       p = p->next;
1944       while (p && p->sym)
1945         {
1946           ritem[itemno++] = p->sym->value;
1947           /* A rule gets by default the precedence and associativity
1948              of the last token in it.  */
1949           if (p->sym->class == STOKEN)
1950             {
1951               rprec[ruleno] = p->sym->prec;
1952               rassoc[ruleno] = p->sym->assoc;
1953             }
1954           if (p) p = p->next;
1955         }
1956
1957       /* If this rule has a %prec,
1958          the specified symbol's precedence replaces the default.  */
1959       if (ruleprec)
1960         {
1961           rprec[ruleno] = ruleprec->prec;
1962           rassoc[ruleno] = ruleprec->assoc;
1963           rprecsym[ruleno] = ruleprec->value;
1964         }
1965
1966       ritem[itemno++] = -ruleno;
1967       ruleno++;
1968
1969       if (p) p = p->next;
1970     }
1971
1972   ritem[itemno] = 0;
1973 }
1974 \f
1975 /* Read a signed integer from STREAM and return its value.  */
1976
1977 int
1978 read_signed_integer (FILE *stream)
1979 {
1980   register int c = getc(stream);
1981   register int sign = 1;
1982   register int n;
1983
1984   if (c == '-')
1985     {
1986       c = getc(stream);
1987       sign = -1;
1988     }
1989   n = 0;
1990   while (isdigit(c))
1991     {
1992       n = 10*n + (c - '0');
1993       c = getc(stream);
1994     }
1995
1996   ungetc(c, stream);
1997
1998   return n * sign;
1999 }