src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 1986, 1989, 1992, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of Bison, the GNU Compiler Compiler.
   5
   6 Bison is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 Bison is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with Bison; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 /* read in the grammar specification and record it in the format described in gram.h.
  23   All guards are copied into the fguard file and all actions into faction,
  24   in each case forming the body of a C function (yyguard or yyaction)
  25   which contains a switch statement to decide which guard or action to execute.
  26
  27 The entry point is reader().  */
  28
  29 #include <stdio.h>
  30 #include "system.h"
  31 #include "files.h"
  32 #include "alloc.h"
  33 #include "symtab.h"
  34 #include "lex.h"
  35 #include "gram.h"
  36 #include "machine.h"
  37
  38 #define LTYPESTR        "\n#ifndef YYLTYPE\ntypedef\n  struct yyltype\n\
  39     {\n      int timestamp;\n      int first_line;\n      int first_column;\
  40 \n      int last_line;\n      int last_column;\n      char *text;\n   }\n\
  41   yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
  42
  43 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  44 int rline_allocated;
  45
  46 extern char *program_name;
  47 extern int definesflag;
  48 extern int nolinesflag;
  49 extern int noparserflag;
  50 extern int rawtoknumflag;
  51 extern bucket *symval;
  52 extern int numval;
  53 extern int expected_conflicts;
  54 extern char *token_buffer;
  55 extern int maxtoken;
  56
  57 extern void init_lex PARAMS((void));
  58 extern char *grow_token_buffer PARAMS((char *));
  59 extern void tabinit PARAMS((void));
  60 extern void output_headers PARAMS((void));
  61 extern void output_trailers PARAMS((void));
  62 extern void free_symtab PARAMS((void));
  63 extern void open_extra_files PARAMS((void));
  64 extern char *int_to_string PARAMS((int));
  65 extern char *printable_version PARAMS((int));
  66 extern void fatal PARAMS((char *));
  67 extern void fatals PARAMS((char *, char *));
  68 extern void warn PARAMS((char *));
  69 extern void warni PARAMS((char *, int));
  70 extern void warns PARAMS((char *, char *));
  71 extern void warnss PARAMS((char *, char *, char *));
  72 extern void warnsss PARAMS((char *, char *, char *, char *));
  73 extern void unlex PARAMS((int));
  74 extern void done PARAMS((int));
  75
  76 extern int skip_white_space PARAMS((void));
  77 extern int parse_percent_token PARAMS((void));
  78 extern int lex PARAMS((void));
  79
  80 typedef
  81   struct symbol_list
  82     {
  83       struct symbol_list *next;
  84       bucket *sym;
  85       bucket *ruleprec;
  86     }
  87   symbol_list;
  88
  89
  90 void reader PARAMS((void));
  91 void reader_output_yylsp PARAMS((FILE *));
  92 void read_declarations PARAMS((void));
  93 void copy_definition PARAMS((void));
  94 void parse_token_decl PARAMS((int, int));
  95 void parse_start_decl PARAMS((void));
  96 void parse_type_decl PARAMS((void));
  97 void parse_assoc_decl PARAMS((int));
  98 void parse_union_decl PARAMS((void));
  99 void parse_expect_decl PARAMS((void));
 100 char *get_type_name PARAMS((int, symbol_list *));
 101 void copy_guard PARAMS((symbol_list *, int));
 102 void parse_thong_decl PARAMS((void));
 103 void copy_action PARAMS((symbol_list *, int));
 104 bucket *gensym PARAMS((void));
 105 void readgram PARAMS((void));
 106 void record_rule_line PARAMS((void));
 107 void packsymbols PARAMS((void));
 108 void output_token_defines PARAMS((FILE *));
 109 void packgram PARAMS((void));
 110 int read_signed_integer PARAMS((FILE *));
 111
 112 #if 0
 113 static int get_type PARAMS((void));
 114 #endif
 115
 116 int lineno;
 117 symbol_list *grammar;
 118 int start_flag;
 119 bucket *startval;
 120 char **tags;
 121 int *user_toknums;
 122
 123 /* Nonzero if components of semantic values are used, implying
 124    they must be unions.  */
 125 static int value_components_used;
 126
 127 static int typed;  /* nonzero if %union has been seen.  */
 128
 129 static int lastprec;  /* incremented for each %left, %right or %nonassoc seen */
 130
 131 static int gensym_count;  /* incremented for each generated symbol */
 132
 133 static bucket *errtoken;
 134 static bucket *undeftoken;
 135
 136 /* Nonzero if any action or guard uses the @n construct.  */
 137 static int yylsp_needed;
 138
 139 extern char *version_string;
 140
 141
 142 static void
 143 skip_to_char (int target)
 144 {
 145   int c;
 146   if (target == '\n')
 147     warn(_("   Skipping to next \\n"));
 148   else
 149     warni(_("   Skipping to next %c"), target);
 150
 151   do
 152     c = skip_white_space();
 153   while (c != target && c != EOF);
 154   if (c != EOF)
 155     ungetc(c, finput);
 156 }
 157
 158
 159 void
 160 reader (void)
 161 {
 162   start_flag = 0;
 163   startval = NULL;  /* start symbol not specified yet. */
 164
 165 #if 0
 166   translations = 0;  /* initially assume token number translation not needed.  */
 167 #endif
 168   /* Nowadays translations is always set to 1,
 169      since we give `error' a user-token-number
 170      to satisfy the Posix demand for YYERRCODE==256.  */
 171   translations = 1;
 172
 173   nsyms = 1;
 174   nvars = 0;
 175   nrules = 0;
 176   nitems = 0;
 177   rline_allocated = 10;
 178   rline = NEW2(rline_allocated, short);
 179
 180   typed = 0;
 181   lastprec = 0;
 182
 183   gensym_count = 0;
 184
 185   semantic_parser = 0;
 186   pure_parser = 0;
 187   yylsp_needed = 0;
 188
 189   grammar = NULL;
 190
 191   init_lex();
 192   lineno = 1;
 193
 194   /* initialize the symbol table.  */
 195   tabinit();
 196   /* construct the error token */
 197   errtoken = getsym("error");
 198   errtoken->class = STOKEN;
 199   errtoken->user_token_number = 256; /* Value specified by posix.  */
 200   /* construct a token that represents all undefined literal tokens. */
 201   /* it is always token number 2.  */
 202   undeftoken = getsym("$undefined.");
 203   undeftoken->class = STOKEN;
 204   undeftoken->user_token_number = 2;
 205   /* Read the declaration section.  Copy %{ ... %} groups to ftable and fdefines file.
 206      Also notice any %token, %left, etc. found there.  */
 207   if (noparserflag)
 208     fprintf(ftable, "\n/*  Bison-generated parse tables, made from %s\n",
 209                 infile);
 210   else
 211     fprintf(ftable, "\n/*  A Bison parser, made from %s\n", infile);
 212   fprintf(ftable, " by  %s  */\n\n", version_string);
 213   fprintf(ftable, "#define YYBISON 1  /* Identify Bison output.  */\n\n");
 214   read_declarations();
 215   /* start writing the guard and action files, if they are needed.  */
 216   output_headers();
 217   /* read in the grammar, build grammar in list form.  write out guards and actions.  */
 218   readgram();
 219   /* Now we know whether we need the line-number stack.
 220      If we do, write its type into the .tab.h file.  */
 221   if (fdefines)
 222     reader_output_yylsp(fdefines);
 223   /* write closing delimiters for actions and guards.  */
 224   output_trailers();
 225   if (yylsp_needed)
 226     fprintf(ftable, "#define YYLSP_NEEDED\n\n");
 227   /* assign the symbols their symbol numbers.
 228      Write #defines for the token symbols into fdefines if requested.  */
 229   packsymbols();
 230   /* convert the grammar into the format described in gram.h.  */
 231   packgram();
 232   /* free the symbol table data structure
 233      since symbols are now all referred to by symbol number.  */
 234   free_symtab();
 235 }
 236
 237 void
 238 reader_output_yylsp (FILE *f)
 239 {
 240   if (yylsp_needed)
 241     fprintf(f, LTYPESTR);
 242 }
 243
 244 /* read from finput until %% is seen.  Discard the %%.
 245 Handle any % declarations,
 246 and copy the contents of any %{ ... %} groups to fattrs.  */
 247
 248 void
 249 read_declarations (void)
 250 {
 251   register int c;
 252   register int tok;
 253
 254   for (;;)
 255     {
 256       c = skip_white_space();
 257
 258       if (c == '%')
 259         {
 260           tok = parse_percent_token();
 261
 262           switch (tok)
 263             {
 264             case TWO_PERCENTS:
 265               return;
 266
 267             case PERCENT_LEFT_CURLY:
 268               copy_definition();
 269               break;
 270
 271             case TOKEN:
 272               parse_token_decl (STOKEN, SNTERM);
 273               break;
 274
 275             case NTERM:
 276               parse_token_decl (SNTERM, STOKEN);
 277               break;
 278
 279             case TYPE:
 280               parse_type_decl();
 281               break;
 282
 283             case START:
 284               parse_start_decl();
 285               break;
 286
 287             case UNION:
 288               parse_union_decl();
 289               break;
 290
 291             case EXPECT:
 292               parse_expect_decl();
 293               break;
 294             case THONG:
 295               parse_thong_decl();
 296               break;
 297             case LEFT:
 298               parse_assoc_decl(LEFT_ASSOC);
 299               break;
 300
 301             case RIGHT:
 302               parse_assoc_decl(RIGHT_ASSOC);
 303               break;
 304
 305             case NONASSOC:
 306               parse_assoc_decl(NON_ASSOC);
 307               break;
 308
 309             case SEMANTIC_PARSER:
 310               if (semantic_parser == 0)
 311                 {
 312                   semantic_parser = 1;
 313                   open_extra_files();
 314                 }
 315               break;
 316
 317             case PURE_PARSER:
 318               pure_parser = 1;
 319               break;
 320
 321             case NOOP:
 322               break;
 323
 324             default:
 325               warns(_("unrecognized: %s"), token_buffer);
 326               skip_to_char('%');
 327           }
 328         }
 329       else if (c == EOF)
 330         fatal(_("no input grammar"));
 331       else
 332         {
 333                 char buff[100];
 334                 sprintf(buff, _("unknown character: %s"), printable_version(c));
 335                 warn(buff);
 336                 skip_to_char('%');
 337         }
 338     }
 339 }
 340
 341
 342 /* copy the contents of a %{ ... %} into the definitions file.
 343 The %{ has already been read.  Return after reading the %}.  */
 344
 345 void
 346 copy_definition (void)
 347 {
 348   register int c;
 349   register int match;
 350   register int ended;
 351   register int after_percent;  /* -1 while reading a character if prev char was % */
 352   int cplus_comment;
 353
 354   if (!nolinesflag)
 355     fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
 356
 357   after_percent = 0;
 358
 359   c = getc(finput);
 360
 361   for (;;)
 362     {
 363       switch (c)
 364         {
 365         case '\n':
 366           putc(c, fattrs);
 367           lineno++;
 368           break;
 369
 370         case '%':
 371           after_percent = -1;
 372           break;
 373
 374         case '\'':
 375         case '"':
 376           match = c;
 377           putc(c, fattrs);
 378           c = getc(finput);
 379
 380           while (c != match)
 381             {
 382               if (c == EOF)
 383                 fatal(_("unterminated string at end of file"));
 384               if (c == '\n')
 385                 {
 386                   warn(_("unterminated string"));
 387                   ungetc(c, finput);
 388                   c = match;
 389                   continue;
 390                 }
 391
 392               putc(c, fattrs);
 393
 394               if (c == '\\')
 395                 {
 396                   c = getc(finput);
 397                   if (c == EOF)
 398                     fatal(_("unterminated string at end of file"));
 399                   putc(c, fattrs);
 400                   if (c == '\n')
 401                     lineno++;
 402                 }
 403
 404               c = getc(finput);
 405             }
 406
 407           putc(c, fattrs);
 408           break;
 409
 410         case '/':
 411           putc(c, fattrs);
 412           c = getc(finput);
 413           if (c != '*' && c != '/')
 414             continue;
 415
 416           cplus_comment = (c == '/');
 417           putc(c, fattrs);
 418           c = getc(finput);
 419
 420           ended = 0;
 421           while (!ended)
 422             {
 423               if (!cplus_comment && c == '*')
 424                 {
 425                   while (c == '*')
 426                     {
 427                       putc(c, fattrs);
 428                       c = getc(finput);
 429                     }
 430
 431                   if (c == '/')
 432                     {
 433                       putc(c, fattrs);
 434                       ended = 1;
 435                     }
 436                 }
 437               else if (c == '\n')
 438                 {
 439                   lineno++;
 440                   putc(c, fattrs);
 441                   if (cplus_comment)
 442                     ended = 1;
 443                   else
 444                     c = getc(finput);
 445                 }
 446               else if (c == EOF)
 447                 fatal(_("unterminated comment in `%{' definition"));
 448               else
 449                 {
 450                   putc(c, fattrs);
 451                   c = getc(finput);
 452                 }
 453             }
 454
 455           break;
 456
 457         case EOF:
 458           fatal(_("unterminated `%{' definition"));
 459
 460         default:
 461           putc(c, fattrs);
 462         }
 463
 464       c = getc(finput);
 465
 466       if (after_percent)
 467         {
 468           if (c == '}')
 469             return;
 470           putc('%', fattrs);
 471         }
 472       after_percent = 0;
 473
 474     }
 475
 476 }
 477
 478
 479
 480 /* parse what comes after %token or %nterm.
 481 For %token, what_is is STOKEN and what_is_not is SNTERM.
 482 For %nterm, the arguments are reversed.  */
 483
 484 void
 485 parse_token_decl (int what_is, int what_is_not)
 486 {
 487   register int token = 0;
 488   register char *typename = 0;
 489   register struct bucket *symbol = NULL;  /* pts to symbol being defined */
 490   int k;
 491
 492   for (;;)
 493     {
 494       int tmp_char = ungetc (skip_white_space (), finput);
 495
 496       if (tmp_char == '%')
 497         return;
 498       if (tmp_char == EOF)
 499         fatals ("Premature EOF after %s", token_buffer);
 500
 501       token = lex();
 502       if (token == COMMA)
 503         {
 504           symbol = NULL;
 505           continue;
 506         }
 507       if (token == TYPENAME)
 508         {
 509           k = strlen(token_buffer);
 510           typename = NEW2(k + 1, char);
 511           strcpy(typename, token_buffer);
 512           value_components_used = 1;
 513           symbol = NULL;
 514         }
 515       else if (token == IDENTIFIER && *symval->tag == '\"'
 516                 && symbol)
 517         {
 518           translations = 1;
 519           symval->class = STOKEN;
 520           symval->type_name = typename;
 521           symval->user_token_number = symbol->user_token_number;
 522           symbol->user_token_number = SALIAS;
 523
 524           symval->alias = symbol;
 525           symbol->alias = symval;
 526           symbol = NULL;
 527
 528           nsyms--;   /* symbol and symval combined are only one symbol */
 529         }
 530       else if (token == IDENTIFIER)
 531         {
 532           int oldclass = symval->class;
 533           symbol = symval;
 534
 535           if (symbol->class == what_is_not)
 536             warns(_("symbol %s redefined"), symbol->tag);
 537           symbol->class = what_is;
 538           if (what_is == SNTERM && oldclass != SNTERM)
 539             symbol->value = nvars++;
 540
 541           if (typename)
 542             {
 543               if (symbol->type_name == NULL)
 544                 symbol->type_name = typename;
 545               else if (strcmp(typename, symbol->type_name) != 0)
 546                 warns(_("type redeclaration for %s"), symbol->tag);
 547             }
 548         }
 549       else if (symbol && token == NUMBER)
 550         {
 551           symbol->user_token_number = numval;
 552           translations = 1;
 553         }
 554       else
 555         {
 556           warnss(_("`%s' is invalid in %s"),
 557                 token_buffer,
 558                 (what_is == STOKEN) ? "%token" : "%nterm");
 559           skip_to_char('%');
 560         }
 561     }
 562
 563 }
 564
 565 /* parse what comes after %thong
 566         the full syntax is
 567                 %thong <type> token number literal
 568  the <type> or number may be omitted.  The number specifies the
 569  user_token_number.
 570
 571  Two symbols are entered in the table, one for the token symbol and
 572  one for the literal.  Both are given the <type>, if any, from the declaration.
 573  The ->user_token_number of the first is SALIAS and the ->user_token_number
 574  of the second is set to the number, if any, from the declaration.
 575  The two symbols are linked via pointers in their ->alias fields.
 576
 577  during output_defines_table, the symbol is reported
 578  thereafter, only the literal string is retained
 579  it is the literal string that is output to yytname
 580 */
 581
 582 void
 583 parse_thong_decl (void)
 584 {
 585   register int token;
 586   register struct bucket *symbol;
 587   register char *typename = 0;
 588   int k, usrtoknum;
 589
 590   translations = 1;
 591   token = lex();                /* fetch typename or first token */
 592   if (token == TYPENAME) {
 593     k = strlen(token_buffer);
 594     typename = NEW2(k + 1, char);
 595     strcpy(typename, token_buffer);
 596     value_components_used = 1;
 597     token = lex();              /* fetch first token */
 598   }
 599
 600   /* process first token */
 601
 602   if (token != IDENTIFIER)
 603     {
 604       warns(_("unrecognized item %s, expected an identifier"),
 605             token_buffer);
 606       skip_to_char('%');
 607       return;
 608     }
 609   symval->class = STOKEN;
 610   symval->type_name = typename;
 611   symval->user_token_number = SALIAS;
 612   symbol = symval;
 613
 614   token = lex();                /* get number or literal string */
 615
 616   if (token == NUMBER) {
 617     usrtoknum = numval;
 618     token = lex();              /* okay, did number, now get literal */
 619   }
 620   else usrtoknum = 0;
 621
 622   /* process literal string token */
 623
 624   if (token != IDENTIFIER || *symval->tag != '\"')
 625     {
 626       warns(_("expected string constant instead of %s"),
 627             token_buffer);
 628       skip_to_char('%');
 629       return;
 630     }
 631   symval->class = STOKEN;
 632   symval->type_name = typename;
 633   symval->user_token_number = usrtoknum;
 634
 635   symval->alias = symbol;
 636   symbol->alias = symval;
 637
 638   nsyms--;                      /* symbol and symval combined are only one symbol */
 639 }
 640
 641
 642 /* parse what comes after %start */
 643
 644 void
 645 parse_start_decl (void)
 646 {
 647   if (start_flag)
 648     warn(_("multiple %start declarations"));
 649   if (lex() != IDENTIFIER)
 650     warn(_("invalid %start declaration"));
 651   else
 652     {
 653       start_flag = 1;
 654       startval = symval;
 655     }
 656 }
 657
 658
 659
 660 /* read in a %type declaration and record its information for get_type_name to access */
 661
 662 void
 663 parse_type_decl (void)
 664 {
 665   register int k;
 666   register char *name;
 667
 668   if (lex() != TYPENAME)
 669     {
 670       warn(_("%type declaration has no <typename>"));
 671       skip_to_char('%');
 672       return;
 673     }
 674
 675   k = strlen(token_buffer);
 676   name = NEW2(k + 1, char);
 677   strcpy(name, token_buffer);
 678
 679   for (;;)
 680     {
 681       register int t;
 682       int tmp_char = ungetc (skip_white_space (), finput);
 683
 684       if (tmp_char == '%')
 685         return;
 686       if (tmp_char == EOF)
 687         fatals ("Premature EOF after %s", token_buffer);
 688
 689       t = lex();
 690
 691       switch (t)
 692         {
 693
 694         case COMMA:
 695         case SEMICOLON:
 696           break;
 697
 698         case IDENTIFIER:
 699           if (symval->type_name == NULL)
 700             symval->type_name = name;
 701           else if (strcmp(name, symval->type_name) != 0)
 702             warns(_("type redeclaration for %s"), symval->tag);
 703
 704           break;
 705
 706         default:
 707           warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
 708           skip_to_char('%');
 709         }
 710     }
 711 }
 712
 713
 714
 715 /* read in a %left, %right or %nonassoc declaration and record its information.  */
 716 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC.  */
 717
 718 void
 719 parse_assoc_decl (int assoc)
 720 {
 721   register int k;
 722   register char *name = NULL;
 723   register int prev = 0;
 724
 725   lastprec++;  /* Assign a new precedence level, never 0.  */
 726
 727   for (;;)
 728     {
 729       register int t;
 730       int tmp_char = ungetc (skip_white_space (), finput);
 731
 732       if (tmp_char == '%')
 733         return;
 734       if (tmp_char == EOF)
 735         fatals ("Premature EOF after %s", token_buffer);
 736
 737       t = lex();
 738
 739       switch (t)
 740         {
 741
 742         case TYPENAME:
 743           k = strlen(token_buffer);
 744           name = NEW2(k + 1, char);
 745           strcpy(name, token_buffer);
 746           break;
 747
 748         case COMMA:
 749           break;
 750
 751         case IDENTIFIER:
 752           if (symval->prec != 0)
 753             warns(_("redefining precedence of %s"), symval->tag);
 754           symval->prec = lastprec;
 755           symval->assoc = assoc;
 756           if (symval->class == SNTERM)
 757             warns(_("symbol %s redefined"), symval->tag);
 758           symval->class = STOKEN;
 759           if (name)
 760             { /* record the type, if one is specified */
 761               if (symval->type_name == NULL)
 762                 symval->type_name = name;
 763               else if (strcmp(name, symval->type_name) != 0)
 764                 warns(_("type redeclaration for %s"), symval->tag);
 765             }
 766           break;
 767
 768         case NUMBER:
 769           if (prev == IDENTIFIER)
 770             {
 771               symval->user_token_number = numval;
 772               translations = 1;
 773             }
 774           else
 775             {
 776               warns(_("invalid text (%s) - number should be after identifier"),
 777                         token_buffer);
 778               skip_to_char('%');
 779             }
 780           break;
 781
 782         case SEMICOLON:
 783           return;
 784
 785         default:
 786           warns(_("unexpected item: %s"), token_buffer);
 787           skip_to_char('%');
 788         }
 789
 790       prev = t;
 791
 792     }
 793 }
 794
 795
 796
 797 /* copy the union declaration into fattrs (and fdefines),
 798    where it is made into the
 799    definition of YYSTYPE, the type of elements of the parser value stack.  */
 800
 801 void
 802 parse_union_decl (void)
 803 {
 804   register int c;
 805   register int count;
 806   register int in_comment;
 807   int cplus_comment;
 808
 809   if (typed)
 810     warn(_("multiple %union declarations"));
 811
 812   typed = 1;
 813
 814   if (!nolinesflag)
 815     fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
 816   else
 817     fprintf(fattrs, "\n");
 818
 819   fprintf(fattrs, "typedef union");
 820   if (fdefines)
 821     fprintf(fdefines, "typedef union");
 822
 823   count = 0;
 824   in_comment = 0;
 825
 826   c = getc(finput);
 827
 828   while (c != EOF)
 829     {
 830       putc(c, fattrs);
 831       if (fdefines)
 832         putc(c, fdefines);
 833
 834       switch (c)
 835         {
 836         case '\n':
 837           lineno++;
 838           break;
 839
 840         case '/':
 841           c = getc(finput);
 842           if (c != '*' && c != '/')
 843             ungetc(c, finput);
 844           else
 845             {
 846               putc(c, fattrs);
 847               if (fdefines)
 848                 putc(c, fdefines);
 849               cplus_comment = (c == '/');
 850               in_comment = 1;
 851               c = getc(finput);
 852               while (in_comment)
 853                 {
 854                   putc(c, fattrs);
 855                   if (fdefines)
 856                     putc(c, fdefines);
 857
 858                   if (c == '\n')
 859                     {
 860                       lineno++;
 861                       if (cplus_comment)
 862                         {
 863                           in_comment = 0;
 864                           break;
 865                         }
 866                     }
 867                   if (c == EOF)
 868                     fatal(_("unterminated comment at end of file"));
 869
 870                   if (!cplus_comment && c == '*')
 871                     {
 872                       c = getc(finput);
 873                       if (c == '/')
 874                         {
 875                           putc('/', fattrs);
 876                           if (fdefines)
 877                             putc('/', fdefines);
 878                           in_comment = 0;
 879                         }
 880                     }
 881                   else
 882                     c = getc(finput);
 883                 }
 884             }
 885           break;
 886
 887
 888         case '{':
 889           count++;
 890           break;
 891
 892         case '}':
 893           if (count == 0)
 894             warn (_("unmatched close-brace (`}')"));
 895           count--;
 896           if (count <= 0)
 897             {
 898               fprintf(fattrs, " YYSTYPE;\n");
 899               if (fdefines)
 900                 fprintf(fdefines, " YYSTYPE;\n");
 901               /* JF don't choke on trailing semi */
 902               c=skip_white_space();
 903               if(c!=';') ungetc(c,finput);
 904               return;
 905             }
 906         }
 907
 908       c = getc(finput);
 909     }
 910 }
 911
 912 /* parse the declaration %expect N which says to expect N
 913    shift-reduce conflicts.  */
 914
 915 void
 916 parse_expect_decl (void)
 917 {
 918   register int c;
 919   register int count;
 920   char buffer[20];
 921
 922   c = getc(finput);
 923   while (c == ' ' || c == '\t')
 924     c = getc(finput);
 925
 926   count = 0;
 927   while (c >= '0' && c <= '9')
 928     {
 929       if (count < 20)
 930         buffer[count++] = c;
 931       c = getc(finput);
 932     }
 933   buffer[count] = 0;
 934
 935   ungetc (c, finput);
 936
 937   if (count <= 0 || count > 10)
 938         warn(_("argument of %expect is not an integer"));
 939   expected_conflicts = atoi (buffer);
 940 }
 941
 942 /* that's all of parsing the declaration section */
 943 \f
 944 /* Get the data type (alternative in the union) of the value for symbol n in rule rule.  */
 945
 946 char *
 947 get_type_name (int n, symbol_list *rule)
 948 {
 949   static char *msg = N_("invalid $ value");
 950
 951   register int i;
 952   register symbol_list *rp;
 953
 954   if (n < 0)
 955     {
 956       warn(_(msg));
 957       return NULL;
 958     }
 959
 960   rp = rule;
 961   i = 0;
 962
 963   while (i < n)
 964     {
 965       rp = rp->next;
 966       if (rp == NULL || rp->sym == NULL)
 967         {
 968           warn(_(msg));
 969           return NULL;
 970         }
 971       i++;
 972     }
 973
 974   return (rp->sym->type_name);
 975 }
 976
 977
 978 /* after %guard is seen in the input file,
 979 copy the actual guard into the guards file.
 980 If the guard is followed by an action, copy that into the actions file.
 981 stack_offset is the number of values in the current rule so far,
 982 which says where to find $0 with respect to the top of the stack,
 983 for the simple parser in which the stack is not popped until after the guard is run.  */
 984
 985 void
 986 copy_guard (symbol_list *rule, int stack_offset)
 987 {
 988   register int c;
 989   register int n;
 990   register int count;
 991   register int match;
 992   register int ended;
 993   register char *type_name;
 994   int brace_flag = 0;
 995   int cplus_comment;
 996
 997   /* offset is always 0 if parser has already popped the stack pointer */
 998   if (semantic_parser) stack_offset = 0;
 999
1000   fprintf(fguard, "\ncase %d:\n", nrules);
1001   if (!nolinesflag)
1002     fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
1003   putc('{', fguard);
1004
1005   count = 0;
1006   c = getc(finput);
1007
1008   while (brace_flag ? (count > 0) : (c != ';'))
1009     {
1010       switch (c)
1011         {
1012         case '\n':
1013           putc(c, fguard);
1014           lineno++;
1015           break;
1016
1017         case '{':
1018           putc(c, fguard);
1019           brace_flag = 1;
1020           count++;
1021           break;
1022
1023         case '}':
1024           putc(c, fguard);
1025           if (count > 0)
1026             count--;
1027           else
1028             {
1029               warn(_("unmatched right brace (`}')"));
1030               c = getc(finput); /* skip it */
1031             }
1032           break;
1033
1034         case '\'':
1035         case '"':
1036           match = c;
1037           putc(c, fguard);
1038           c = getc(finput);
1039
1040           while (c != match)
1041             {
1042               if (c == EOF)
1043                 fatal(_("unterminated string at end of file"));
1044               if (c == '\n')
1045                 {
1046                   warn(_("unterminated string"));
1047                   ungetc(c, finput);
1048                   c = match;            /* invent terminator */
1049                   continue;
1050                 }
1051
1052               putc(c, fguard);
1053
1054               if (c == '\\')
1055                 {
1056                   c = getc(finput);
1057                   if (c == EOF)
1058                     fatal(_("unterminated string"));
1059                   putc(c, fguard);
1060                   if (c == '\n')
1061                     lineno++;
1062                 }
1063
1064               c = getc(finput);
1065             }
1066
1067           putc(c, fguard);
1068           break;
1069
1070         case '/':
1071           putc(c, fguard);
1072           c = getc(finput);
1073           if (c != '*' && c != '/')
1074             continue;
1075
1076           cplus_comment = (c == '/');
1077           putc(c, fguard);
1078           c = getc(finput);
1079
1080           ended = 0;
1081           while (!ended)
1082             {
1083               if (!cplus_comment && c == '*')
1084                 {
1085                   while (c == '*')
1086                     {
1087                       putc(c, fguard);
1088                       c = getc(finput);
1089                     }
1090
1091                   if (c == '/')
1092                     {
1093                       putc(c, fguard);
1094                       ended = 1;
1095                     }
1096                 }
1097               else if (c == '\n')
1098                 {
1099                   lineno++;
1100                   putc(c, fguard);
1101                   if (cplus_comment)
1102                     ended = 1;
1103                   else
1104                     c = getc(finput);
1105                 }
1106               else if (c == EOF)
1107                 fatal(_("unterminated comment"));
1108               else
1109                 {
1110                   putc(c, fguard);
1111                   c = getc(finput);
1112                 }
1113             }
1114
1115           break;
1116
1117         case '$':
1118           c = getc(finput);
1119           type_name = NULL;
1120
1121           if (c == '<')
1122             {
1123               register char *cp = token_buffer;
1124
1125               while ((c = getc(finput)) != '>' && c > 0)
1126                 {
1127                   if (cp == token_buffer + maxtoken)
1128                     cp = grow_token_buffer(cp);
1129
1130                   *cp++ = c;
1131                 }
1132               *cp = 0;
1133               type_name = token_buffer;
1134
1135               c = getc(finput);
1136             }
1137
1138           if (c == '$')
1139             {
1140               fprintf(fguard, "yyval");
1141               if (!type_name) type_name = rule->sym->type_name;
1142               if (type_name)
1143                 fprintf(fguard, ".%s", type_name);
1144               if(!type_name && typed)
1145                 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1146             }
1147
1148           else if (isdigit(c) || c == '-')
1149             {
1150               ungetc (c, finput);
1151               n = read_signed_integer(finput);
1152               c = getc(finput);
1153
1154               if (!type_name && n > 0)
1155                 type_name = get_type_name(n, rule);
1156
1157               fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1158               if (type_name)
1159                 fprintf(fguard, ".%s", type_name);
1160               if(!type_name && typed)
1161                 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1162               continue;
1163             }
1164           else
1165             warns(_("$%s is invalid"), printable_version(c));
1166
1167           break;
1168
1169         case '@':
1170           c = getc(finput);
1171           if (isdigit(c) || c == '-')
1172             {
1173               ungetc (c, finput);
1174               n = read_signed_integer(finput);
1175               c = getc(finput);
1176             }
1177           else
1178             {
1179               warns(_("@%s is invalid"), printable_version(c));
1180               n = 1;
1181             }
1182
1183           fprintf(fguard, "yylsp[%d]", n - stack_offset);
1184           yylsp_needed = 1;
1185
1186           continue;
1187
1188         case EOF:
1189           fatal(_("unterminated %%guard clause"));
1190
1191         default:
1192           putc(c, fguard);
1193         }
1194
1195       if (c != '}' || count != 0)
1196         c = getc(finput);
1197     }
1198
1199   c = skip_white_space();
1200
1201   fprintf(fguard, ";\n    break;}");
1202   if (c == '{')
1203     copy_action(rule, stack_offset);
1204   else if (c == '=')
1205     {
1206       c = getc(finput); /* why not skip_white_space -wjh */
1207       if (c == '{')
1208         copy_action(rule, stack_offset);
1209     }
1210   else
1211     ungetc(c, finput);
1212 }
1213
1214
1215
1216 /* Assuming that a { has just been seen, copy everything up to the matching }
1217 into the actions file.
1218 stack_offset is the number of values in the current rule so far,
1219 which says where to find $0 with respect to the top of the stack.  */
1220
1221 void
1222 copy_action (symbol_list *rule, int stack_offset)
1223 {
1224   register int c;
1225   register int n;
1226   register int count;
1227   register int match;
1228   register int ended;
1229   register char *type_name;
1230   int cplus_comment;
1231
1232   /* offset is always 0 if parser has already popped the stack pointer */
1233   if (semantic_parser) stack_offset = 0;
1234
1235   fprintf(faction, "\ncase %d:\n", nrules);
1236   if (!nolinesflag)
1237     fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1238   putc('{', faction);
1239
1240   count = 1;
1241   c = getc(finput);
1242
1243   while (count > 0)
1244     {
1245       while (c != '}')
1246         {
1247           switch (c)
1248             {
1249             case '\n':
1250               putc(c, faction);
1251               lineno++;
1252               break;
1253
1254             case '{':
1255               putc(c, faction);
1256               count++;
1257               break;
1258
1259             case '\'':
1260             case '"':
1261               match = c;
1262               putc(c, faction);
1263               c = getc(finput);
1264
1265               while (c != match)
1266                 {
1267                   if (c == '\n')
1268                     {
1269                       warn(_("unterminated string"));
1270                       ungetc(c, finput);
1271                       c = match;
1272                       continue;
1273                     }
1274                   else if (c == EOF)
1275                     fatal(_("unterminated string at end of file"));
1276
1277                   putc(c, faction);
1278
1279                   if (c == '\\')
1280                     {
1281                       c = getc(finput);
1282                       if (c == EOF)
1283                         fatal(_("unterminated string"));
1284                       putc(c, faction);
1285                       if (c == '\n')
1286                         lineno++;
1287                     }
1288
1289                   c = getc(finput);
1290                 }
1291
1292               putc(c, faction);
1293               break;
1294
1295             case '/':
1296               putc(c, faction);
1297               c = getc(finput);
1298               if (c != '*' && c != '/')
1299                 continue;
1300
1301               cplus_comment = (c == '/');
1302               putc(c, faction);
1303               c = getc(finput);
1304
1305               ended = 0;
1306               while (!ended)
1307                 {
1308                   if (!cplus_comment && c == '*')
1309                     {
1310                       while (c == '*')
1311                         {
1312                           putc(c, faction);
1313                           c = getc(finput);
1314                         }
1315
1316                       if (c == '/')
1317                         {
1318                           putc(c, faction);
1319                           ended = 1;
1320                         }
1321                     }
1322                   else if (c == '\n')
1323                     {
1324                       lineno++;
1325                       putc(c, faction);
1326                       if (cplus_comment)
1327                         ended = 1;
1328                       else
1329                         c = getc(finput);
1330                     }
1331                   else if (c == EOF)
1332                     fatal(_("unterminated comment"));
1333                   else
1334                     {
1335                       putc(c, faction);
1336                       c = getc(finput);
1337                     }
1338                 }
1339
1340               break;
1341
1342             case '$':
1343               c = getc(finput);
1344               type_name = NULL;
1345
1346               if (c == '<')
1347                 {
1348                   register char *cp = token_buffer;
1349
1350                   while ((c = getc(finput)) != '>' && c > 0)
1351                     {
1352                       if (cp == token_buffer + maxtoken)
1353                         cp = grow_token_buffer(cp);
1354
1355                       *cp++ = c;
1356                     }
1357                   *cp = 0;
1358                   type_name = token_buffer;
1359                   value_components_used = 1;
1360
1361                   c = getc(finput);
1362                 }
1363               if (c == '$')
1364                 {
1365                   fprintf(faction, "yyval");
1366                   if (!type_name) type_name = get_type_name(0, rule);
1367                   if (type_name)
1368                     fprintf(faction, ".%s", type_name);
1369                   if(!type_name && typed)
1370                     warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1371                 }
1372               else if (isdigit(c) || c == '-')
1373                 {
1374                   ungetc (c, finput);
1375                   n = read_signed_integer(finput);
1376                   c = getc(finput);
1377
1378                   if (!type_name && n > 0)
1379                     type_name = get_type_name(n, rule);
1380
1381                   fprintf(faction, "yyvsp[%d]", n - stack_offset);
1382                   if (type_name)
1383                     fprintf(faction, ".%s", type_name);
1384                   if(!type_name && typed)
1385                     warnss(_("$%s of `%s' has no declared type"),
1386                                 int_to_string(n), rule->sym->tag);
1387                   continue;
1388                 }
1389               else
1390                 warns(_("$%s is invalid"), printable_version(c));
1391
1392               break;
1393
1394             case '@':
1395               c = getc(finput);
1396               if (isdigit(c) || c == '-')
1397                 {
1398                   ungetc (c, finput);
1399                   n = read_signed_integer(finput);
1400                   c = getc(finput);
1401                 }
1402               else
1403                 {
1404                   warn(_("invalid @-construct"));
1405                   n = 1;
1406                 }
1407
1408               fprintf(faction, "yylsp[%d]", n - stack_offset);
1409               yylsp_needed = 1;
1410
1411               continue;
1412
1413             case EOF:
1414               fatal(_("unmatched `{'"));
1415
1416             default:
1417               putc(c, faction);
1418             }
1419
1420           c = getc(finput);
1421         }
1422
1423       /* above loop exits when c is '}' */
1424
1425       if (--count)
1426         {
1427           putc(c, faction);
1428           c = getc(finput);
1429         }
1430     }
1431
1432   fprintf(faction, ";\n    break;}");
1433 }
1434
1435
1436
1437 /* generate a dummy symbol, a nonterminal,
1438 whose name cannot conflict with the user's names. */
1439
1440 bucket *
1441 gensym (void)
1442 {
1443   register bucket *sym;
1444
1445   sprintf (token_buffer, "@%d", ++gensym_count);
1446   sym = getsym(token_buffer);
1447   sym->class = SNTERM;
1448   sym->value = nvars++;
1449   return (sym);
1450 }
1451
1452 /* Parse the input grammar into a one symbol_list structure.
1453 Each rule is represented by a sequence of symbols: the left hand side
1454 followed by the contents of the right hand side, followed by a null pointer
1455 instead of a symbol to terminate the rule.
1456 The next symbol is the lhs of the following rule.
1457
1458 All guards and actions are copied out to the appropriate files,
1459 labelled by the rule number they apply to.  */
1460
1461 void
1462 readgram (void)
1463 {
1464   register int t;
1465   register bucket *lhs = NULL;
1466   register symbol_list *p;
1467   register symbol_list *p1;
1468   register bucket *bp;
1469
1470   symbol_list *crule;   /* points to first symbol_list of current rule.  */
1471                         /* its symbol is the lhs of the rule.   */
1472   symbol_list *crule1;  /* points to the symbol_list preceding crule.  */
1473
1474   p1 = NULL;
1475
1476   t = lex();
1477
1478   while (t != TWO_PERCENTS && t != ENDFILE)
1479     {
1480       if (t == IDENTIFIER || t == BAR)
1481         {
1482           register int actionflag = 0;
1483           int rulelength = 0;  /* number of symbols in rhs of this rule so far  */
1484           int xactions = 0;     /* JF for error checking */
1485           bucket *first_rhs = 0;
1486
1487           if (t == IDENTIFIER)
1488             {
1489               lhs = symval;
1490
1491               if (!start_flag)
1492                 {
1493                   startval = lhs;
1494                   start_flag = 1;
1495                 }
1496
1497               t = lex();
1498               if (t != COLON)
1499                 {
1500                   warn(_("ill-formed rule: initial symbol not followed by colon"));
1501                   unlex(t);
1502                 }
1503             }
1504
1505           if (nrules == 0 && t == BAR)
1506             {
1507               warn(_("grammar starts with vertical bar"));
1508               lhs = symval;     /* BOGUS: use a random symval */
1509             }
1510           /* start a new rule and record its lhs.  */
1511
1512           nrules++;
1513           nitems++;
1514
1515           record_rule_line ();
1516
1517           p = NEW(symbol_list);
1518           p->sym = lhs;
1519
1520           crule1 = p1;
1521           if (p1)
1522             p1->next = p;
1523           else
1524             grammar = p;
1525
1526           p1 = p;
1527           crule = p;
1528
1529           /* mark the rule's lhs as a nonterminal if not already so.  */
1530
1531           if (lhs->class == SUNKNOWN)
1532             {
1533               lhs->class = SNTERM;
1534               lhs->value = nvars;
1535               nvars++;
1536             }
1537           else if (lhs->class == STOKEN)
1538             warns(_("rule given for %s, which is a token"), lhs->tag);
1539
1540           /* read the rhs of the rule.  */
1541
1542           for (;;)
1543             {
1544               t = lex();
1545               if (t == PREC)
1546                 {
1547                   t = lex();
1548                   crule->ruleprec = symval;
1549                   t = lex();
1550                 }
1551
1552               if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1553
1554               /* If next token is an identifier, see if a colon follows it.
1555                  If one does, exit this rule now.  */
1556               if (t == IDENTIFIER)
1557                 {
1558                   register bucket *ssave;
1559                   register int t1;
1560
1561                   ssave = symval;
1562                   t1 = lex();
1563                   unlex(t1);
1564                   symval = ssave;
1565                   if (t1 == COLON) break;
1566
1567                   if(!first_rhs)        /* JF */
1568                     first_rhs = symval;
1569                   /* Not followed by colon =>
1570                      process as part of this rule's rhs.  */
1571                 }
1572
1573               /* If we just passed an action, that action was in the middle
1574                  of a rule, so make a dummy rule to reduce it to a
1575                  non-terminal.  */
1576               if (actionflag)
1577                 {
1578                   register bucket *sdummy;
1579
1580                   /* Since the action was written out with this rule's */
1581                   /* number, we must give the new rule this number */
1582                   /* by inserting the new rule before it.  */
1583
1584                   /* Make a dummy nonterminal, a gensym.  */
1585                   sdummy = gensym();
1586
1587                   /* Make a new rule, whose body is empty,
1588                      before the current one, so that the action
1589                      just read can belong to it.  */
1590                   nrules++;
1591                   nitems++;
1592                   record_rule_line ();
1593                   p = NEW(symbol_list);
1594                   if (crule1)
1595                     crule1->next = p;
1596                   else grammar = p;
1597                   p->sym = sdummy;
1598                   crule1 = NEW(symbol_list);
1599                   p->next = crule1;
1600                   crule1->next = crule;
1601
1602                   /* insert the dummy generated by that rule into this rule.  */
1603                   nitems++;
1604                   p = NEW(symbol_list);
1605                   p->sym = sdummy;
1606                   p1->next = p;
1607                   p1 = p;
1608
1609                   actionflag = 0;
1610                 }
1611
1612               if (t == IDENTIFIER)
1613                 {
1614                   nitems++;
1615                   p = NEW(symbol_list);
1616                   p->sym = symval;
1617                   p1->next = p;
1618                   p1 = p;
1619                 }
1620               else /* handle an action.  */
1621                 {
1622                   copy_action(crule, rulelength);
1623                   actionflag = 1;
1624                   xactions++;   /* JF */
1625                 }
1626               rulelength++;
1627             }   /* end of  read rhs of rule */
1628
1629           /* Put an empty link in the list to mark the end of this rule  */
1630           p = NEW(symbol_list);
1631           p1->next = p;
1632           p1 = p;
1633
1634           if (t == PREC)
1635             {
1636               warn(_("two @prec's in a row"));
1637               t = lex();
1638               crule->ruleprec = symval;
1639               t = lex();
1640             }
1641           if (t == GUARD)
1642             {
1643               if (! semantic_parser)
1644                 warn(_("%%guard present but %%semantic_parser not specified"));
1645
1646               copy_guard(crule, rulelength);
1647               t = lex();
1648             }
1649           else if (t == LEFT_CURLY)
1650             {
1651                 /* This case never occurs -wjh */
1652               if (actionflag)  warn(_("two actions at end of one rule"));
1653               copy_action(crule, rulelength);
1654               actionflag = 1;
1655               xactions++;       /* -wjh */
1656               t = lex();
1657             }
1658           /* If $$ is being set in default way,
1659              warn if any type mismatch.  */
1660           else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1661             {
1662               if (lhs->type_name == 0 || first_rhs->type_name == 0
1663                   || strcmp(lhs->type_name,first_rhs->type_name))
1664                 warnss(_("type clash (`%s' `%s') on default action"),
1665                         lhs->type_name ? lhs->type_name : "",
1666                         first_rhs->type_name ? first_rhs->type_name : "");
1667             }
1668           /* Warn if there is no default for $$ but we need one.  */
1669           else if (!xactions && !first_rhs && lhs->type_name != 0)
1670             warn(_("empty rule for typed nonterminal, and no action"));
1671           if (t == SEMICOLON)
1672             t = lex();
1673         }
1674 #if 0
1675   /* these things can appear as alternatives to rules.  */
1676 /* NO, they cannot.
1677         a) none of the documentation allows them
1678         b) most of them scan forward until finding a next %
1679                 thus they may swallow lots of intervening rules
1680 */
1681       else if (t == TOKEN)
1682         {
1683           parse_token_decl(STOKEN, SNTERM);
1684           t = lex();
1685         }
1686       else if (t == NTERM)
1687         {
1688           parse_token_decl(SNTERM, STOKEN);
1689           t = lex();
1690         }
1691       else if (t == TYPE)
1692         {
1693           t = get_type();
1694         }
1695       else if (t == UNION)
1696         {
1697           parse_union_decl();
1698           t = lex();
1699         }
1700       else if (t == EXPECT)
1701         {
1702           parse_expect_decl();
1703           t = lex();
1704         }
1705       else if (t == START)
1706         {
1707           parse_start_decl();
1708           t = lex();
1709         }
1710 #endif
1711
1712       else
1713         {
1714           warns(_("invalid input: %s"), token_buffer);
1715           t = lex();
1716         }
1717     }
1718
1719   /* grammar has been read.  Do some checking */
1720
1721   if (nsyms > MAXSHORT)
1722     fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
1723            int_to_string(MAXSHORT));
1724   if (nrules == 0)
1725     fatal(_("no rules in the input grammar"));
1726
1727   if (typed == 0        /* JF put out same default YYSTYPE as YACC does */
1728       && !value_components_used)
1729     {
1730       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1731          but it seems better to be consistent.
1732          Most programs should declare their own type anyway.  */
1733       fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1734       if (fdefines)
1735         fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1736     }
1737
1738   /* Report any undefined symbols and consider them nonterminals.  */
1739
1740   for (bp = firstsymbol; bp; bp = bp->next)
1741     if (bp->class == SUNKNOWN)
1742       {
1743         warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1744                         bp->tag);
1745         bp->class = SNTERM;
1746         bp->value = nvars++;
1747       }
1748
1749   ntokens = nsyms - nvars;
1750 }
1751
1752
1753 void
1754 record_rule_line (void)
1755 {
1756   /* Record each rule's source line number in rline table.  */
1757
1758   if (nrules >= rline_allocated)
1759     {
1760       rline_allocated = nrules * 2;
1761       rline = (short *) xrealloc ((char *) rline,
1762                                   rline_allocated * sizeof (short));
1763     }
1764   rline[nrules] = lineno;
1765 }
1766
1767
1768 #if 0
1769 /* read in a %type declaration and record its information for get_type_name to access */
1770 /* this is unused.  it is only called from the #if 0 part of readgram */
1771 static int
1772 get_type (void)
1773 {
1774   register int k;
1775   register int t;
1776   register char *name;
1777
1778   t = lex();
1779
1780   if (t != TYPENAME)
1781     {
1782       warn(_("ill-formed %type declaration"));
1783       return t;
1784     }
1785
1786   k = strlen(token_buffer);
1787   name = NEW2(k + 1, char);
1788   strcpy(name, token_buffer);
1789
1790   for (;;)
1791     {
1792       t = lex();
1793
1794       switch (t)
1795         {
1796         case SEMICOLON:
1797           return (lex());
1798
1799         case COMMA:
1800           break;
1801
1802         case IDENTIFIER:
1803           if (symval->type_name == NULL)
1804             symval->type_name = name;
1805           else if (strcmp(name, symval->type_name) != 0)
1806             warns(_("type redeclaration for %s"), symval->tag);
1807
1808           break;
1809
1810         default:
1811           return (t);
1812         }
1813     }
1814 }
1815 #endif
1816
1817
1818 /* assign symbol numbers, and write definition of token names into fdefines.
1819 Set up vectors tags and sprec of names and precedences of symbols.  */
1820
1821 void
1822 packsymbols (void)
1823 {
1824   register bucket *bp;
1825   register int tokno = 1;
1826   register int i;
1827   register int last_user_token_number;
1828
1829   /* int lossage = 0; JF set but not used */
1830
1831   tags = NEW2(nsyms + 1, char *);
1832   tags[0] = "$";
1833   user_toknums = NEW2(nsyms + 1, int);
1834   user_toknums[0] = 0;
1835
1836   sprec = NEW2(nsyms, short);
1837   sassoc = NEW2(nsyms, short);
1838
1839   max_user_token_number = 256;
1840   last_user_token_number = 256;
1841
1842   for (bp = firstsymbol; bp; bp = bp->next)
1843     {
1844       if (bp->class == SNTERM)
1845         {
1846           bp->value += ntokens;
1847         }
1848       else if (bp->alias)
1849         {
1850                 /* this symbol and its alias are a single token defn.
1851                   allocate a tokno, and assign to both
1852                   check agreement of ->prec and ->assoc fields
1853                         and make both the same
1854                 */
1855                 if (bp->value == 0)
1856                         bp->value = bp->alias->value = tokno++;
1857
1858                 if (bp->prec != bp->alias->prec) {
1859                         if (bp->prec != 0 && bp->alias->prec != 0
1860                                         && bp->user_token_number == SALIAS)
1861                                 warnss(_("conflicting precedences for %s and %s"),
1862                                         bp->tag, bp->alias->tag);
1863                         if (bp->prec != 0) bp->alias->prec = bp->prec;
1864                         else bp->prec = bp->alias->prec;
1865                 }
1866
1867                 if (bp->assoc != bp->alias->assoc) {
1868                         if (bp->assoc != 0 && bp->alias->assoc != 0
1869                                         && bp->user_token_number == SALIAS)
1870                                 warnss(_("conflicting assoc values for %s and %s"),
1871                                         bp->tag, bp->alias->tag);
1872                         if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1873                         else bp->assoc = bp->alias->assoc;
1874                 }
1875
1876                 if (bp->user_token_number == SALIAS)
1877                         continue;  /* do not do processing below for SALIASs */
1878
1879         }
1880       else  /* bp->class == STOKEN */
1881         {
1882           bp->value = tokno++;
1883         }
1884
1885       if (bp->class == STOKEN)
1886         {
1887           if (translations && !(bp->user_token_number))
1888             bp->user_token_number = ++last_user_token_number;
1889           if (bp->user_token_number > max_user_token_number)
1890             max_user_token_number = bp->user_token_number;
1891         }
1892
1893       tags[bp->value] = bp->tag;
1894       user_toknums[bp->value] = bp->user_token_number;
1895       sprec[bp->value] = bp->prec;
1896       sassoc[bp->value] = bp->assoc;
1897
1898     }
1899
1900   if (translations)
1901     {
1902       register int i;
1903
1904       token_translations = NEW2(max_user_token_number+1, short);
1905
1906       /* initialize all entries for literal tokens to 2,
1907          the internal token number for $undefined.,
1908          which represents all invalid inputs.  */
1909       for (i = 0; i <= max_user_token_number; i++)
1910         token_translations[i] = 2;
1911
1912       for (bp = firstsymbol; bp; bp = bp->next)
1913         {
1914           if (bp->value >= ntokens) continue;             /* non-terminal */
1915           if (bp->user_token_number == SALIAS) continue;
1916           if (token_translations[bp->user_token_number] != 2)
1917             warnsss(_("tokens %s and %s both assigned number %s"),
1918                               tags[token_translations[bp->user_token_number]],
1919                               bp->tag,
1920                               int_to_string(bp->user_token_number));
1921           token_translations[bp->user_token_number] = bp->value;
1922         }
1923     }
1924
1925   error_token_number = errtoken->value;
1926
1927   if (! noparserflag)
1928     output_token_defines(ftable);
1929
1930   if (startval->class == SUNKNOWN)
1931     fatals(_("the start symbol %s is undefined"), startval->tag);
1932   else if (startval->class == STOKEN)
1933     fatals(_("the start symbol %s is a token"), startval->tag);
1934
1935   start_symbol = startval->value;
1936
1937   if (definesflag)
1938     {
1939       output_token_defines(fdefines);
1940
1941       if (!pure_parser)
1942         {
1943           if (spec_name_prefix)
1944             fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1945           else
1946             fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1947         }
1948
1949       if (semantic_parser)
1950         for (i = ntokens; i < nsyms; i++)
1951           {
1952             /* don't make these for dummy nonterminals made by gensym.  */
1953             if (*tags[i] != '@')
1954               fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1955           }
1956 #if 0
1957       /* `fdefines' is now a temporary file, so we need to copy its
1958          contents in `done', so we can't close it here.  */
1959       fclose(fdefines);
1960       fdefines = NULL;
1961 #endif
1962     }
1963 }
1964
1965 /* For named tokens, but not literal ones, define the name.
1966    The value is the user token number.
1967 */
1968 void
1969 output_token_defines (FILE *file)
1970 {
1971   bucket *bp;
1972   register char *cp, *symbol;
1973   register char c;
1974
1975   for (bp = firstsymbol; bp; bp = bp->next)
1976     {
1977       symbol = bp->tag;                         /* get symbol */
1978
1979       if (bp->value >= ntokens) continue;
1980       if (bp->user_token_number == SALIAS) continue;
1981       if ('\'' == *symbol) continue;    /* skip literal character */
1982       if (bp == errtoken) continue;     /* skip error token */
1983       if ('\"' == *symbol)
1984         {
1985                 /* use literal string only if given a symbol with an alias */
1986                 if (bp->alias)
1987                         symbol = bp->alias->tag;
1988                 else
1989                         continue;
1990         }
1991
1992       /* Don't #define nonliteral tokens whose names contain periods.  */
1993       cp = symbol;
1994       while ((c = *cp++) && c != '.');
1995       if (c != '\0')  continue;
1996
1997       fprintf(file, "#define\t%s\t%d\n", symbol,
1998                 ((translations && ! rawtoknumflag)
1999                         ? bp->user_token_number
2000                         : bp->value));
2001       if (semantic_parser)
2002         fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
2003     }
2004
2005   putc('\n', file);
2006 }
2007
2008
2009
2010 /* convert the rules into the representation using rrhs, rlhs and ritems.  */
2011
2012 void
2013 packgram (void)
2014 {
2015   register int itemno;
2016   register int ruleno;
2017   register symbol_list *p;
2018 /*  register bucket *bp; JF unused */
2019
2020   bucket *ruleprec;
2021
2022   ritem = NEW2(nitems + 1, short);
2023   rlhs = NEW2(nrules, short) - 1;
2024   rrhs = NEW2(nrules, short) - 1;
2025   rprec = NEW2(nrules, short) - 1;
2026   rprecsym = NEW2(nrules, short) - 1;
2027   rassoc = NEW2(nrules, short) - 1;
2028
2029   itemno = 0;
2030   ruleno = 1;
2031
2032   p = grammar;
2033   while (p)
2034     {
2035       rlhs[ruleno] = p->sym->value;
2036       rrhs[ruleno] = itemno;
2037       ruleprec = p->ruleprec;
2038
2039       p = p->next;
2040       while (p && p->sym)
2041         {
2042           ritem[itemno++] = p->sym->value;
2043           /* A rule gets by default the precedence and associativity
2044              of the last token in it.  */
2045           if (p->sym->class == STOKEN)
2046             {
2047               rprec[ruleno] = p->sym->prec;
2048               rassoc[ruleno] = p->sym->assoc;
2049             }
2050           if (p) p = p->next;
2051         }
2052
2053       /* If this rule has a %prec,
2054          the specified symbol's precedence replaces the default.  */
2055       if (ruleprec)
2056         {
2057           rprec[ruleno] = ruleprec->prec;
2058           rassoc[ruleno] = ruleprec->assoc;
2059           rprecsym[ruleno] = ruleprec->value;
2060         }
2061
2062       ritem[itemno++] = -ruleno;
2063       ruleno++;
2064
2065       if (p) p = p->next;
2066     }
2067
2068   ritem[itemno] = 0;
2069 }
2070 \f
2071 /* Read a signed integer from STREAM and return its value.  */
2072
2073 int
2074 read_signed_integer (FILE *stream)
2075 {
2076   register int c = getc(stream);
2077   register int sign = 1;
2078   register int n;
2079
2080   if (c == '-')
2081     {
2082       c = getc(stream);
2083       sign = -1;
2084     }
2085   n = 0;
2086   while (isdigit(c))
2087     {
2088       n = 10*n + (c - '0');
2089       c = getc(stream);
2090     }
2091
2092   ungetc(c, stream);
2093
2094   return n * sign;
2095 }