src/reader.c

   1 /* Input parser for bison
   2    Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    Bison is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    Bison is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with Bison; see the file COPYING.  If not, write to
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19    Boston, MA 02111-1307, USA.  */
  20
  21
  22 /* Read in the grammar specification and record it in the format
  23    described in gram.h.  All guards are copied into the fguard file
  24    and all actions into faction, in each case forming the body of a C
  25    function (yyguard or yyaction) which contains a switch statement to
  26    decide which guard or action to execute.
  27
  28    The entry point is reader ().  */
  29
  30 #include <stdio.h>
  31 #include "system.h"
  32 #include "files.h"
  33 #include "alloc.h"
  34 #include "symtab.h"
  35 #include "lex.h"
  36 #include "gram.h"
  37 #include "machine.h"
  38
  39 #define LTYPESTR        "\n#ifndef YYLTYPE\ntypedef\n  struct yyltype\n\
  40     {\n      int timestamp;\n      int first_line;\n      int first_column;\
  41 \n      int last_line;\n      int last_column;\n      char *text;\n   }\n\
  42   yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
  43
  44 /* Number of slots allocated (but not necessarily used yet) in `rline'  */
  45 int rline_allocated;
  46
  47 extern char *program_name;
  48 extern int definesflag;
  49 extern int nolinesflag;
  50 extern int noparserflag;
  51 extern int rawtoknumflag;
  52 extern bucket *symval;
  53 extern int numval;
  54 extern int expected_conflicts;
  55 extern char *token_buffer;
  56 extern int maxtoken;
  57
  58 extern void init_lex PARAMS((void));
  59 extern char *grow_token_buffer PARAMS((char *));
  60 extern void tabinit PARAMS((void));
  61 extern void output_headers PARAMS((void));
  62 extern void output_trailers PARAMS((void));
  63 extern void free_symtab PARAMS((void));
  64 extern void open_extra_files PARAMS((void));
  65 extern char *int_to_string PARAMS((int));
  66 extern char *printable_version PARAMS((int));
  67 extern void fatal PARAMS((char *));
  68 extern void fatals PARAMS((char *, char *));
  69 extern void warn PARAMS((char *));
  70 extern void warni PARAMS((char *, int));
  71 extern void warns PARAMS((char *, char *));
  72 extern void warnss PARAMS((char *, char *, char *));
  73 extern void warnsss PARAMS((char *, char *, char *, char *));
  74 extern void unlex PARAMS((int));
  75 extern void done PARAMS((int));
  76
  77 extern int skip_white_space PARAMS((void));
  78 extern int parse_percent_token PARAMS((void));
  79 extern int lex PARAMS((void));
  80
  81 typedef
  82   struct symbol_list
  83     {
  84       struct symbol_list *next;
  85       bucket *sym;
  86       bucket *ruleprec;
  87     }
  88   symbol_list;
  89
  90
  91 void reader PARAMS((void));
  92 void reader_output_yylsp PARAMS((FILE *));
  93 void read_declarations PARAMS((void));
  94 void copy_definition PARAMS((void));
  95 void parse_token_decl PARAMS((int, int));
  96 void parse_start_decl PARAMS((void));
  97 void parse_type_decl PARAMS((void));
  98 void parse_assoc_decl PARAMS((int));
  99 void parse_union_decl PARAMS((void));
 100 void parse_expect_decl PARAMS((void));
 101 char *get_type_name PARAMS((int, symbol_list *));
 102 void copy_guard PARAMS((symbol_list *, int));
 103 void parse_thong_decl PARAMS((void));
 104 void copy_action PARAMS((symbol_list *, int));
 105 bucket *gensym PARAMS((void));
 106 void readgram PARAMS((void));
 107 void record_rule_line PARAMS((void));
 108 void packsymbols PARAMS((void));
 109 void output_token_defines PARAMS((FILE *));
 110 void packgram PARAMS((void));
 111 int read_signed_integer PARAMS((FILE *));
 112
 113 #if 0
 114 static int get_type PARAMS((void));
 115 #endif
 116
 117 int lineno;
 118 symbol_list *grammar;
 119 int start_flag;
 120 bucket *startval;
 121 char **tags;
 122 int *user_toknums;
 123
 124 /* Nonzero if components of semantic values are used, implying
 125    they must be unions.  */
 126 static int value_components_used;
 127
 128 static int typed;  /* nonzero if %union has been seen.  */
 129
 130 static int lastprec;  /* incremented for each %left, %right or %nonassoc seen */
 131
 132 static int gensym_count;  /* incremented for each generated symbol */
 133
 134 static bucket *errtoken;
 135 static bucket *undeftoken;
 136
 137 /* Nonzero if any action or guard uses the @n construct.  */
 138 static int yylsp_needed;
 139
 140
 141 static void
 142 skip_to_char (int target)
 143 {
 144   int c;
 145   if (target == '\n')
 146     warn(_("   Skipping to next \\n"));
 147   else
 148     warni(_("   Skipping to next %c"), target);
 149
 150   do
 151     c = skip_white_space();
 152   while (c != target && c != EOF);
 153   if (c != EOF)
 154     ungetc(c, finput);
 155 }
 156
 157
 158 void
 159 reader (void)
 160 {
 161   start_flag = 0;
 162   startval = NULL;  /* start symbol not specified yet. */
 163
 164 #if 0
 165   translations = 0;  /* initially assume token number translation not needed.  */
 166 #endif
 167   /* Nowadays translations is always set to 1,
 168      since we give `error' a user-token-number
 169      to satisfy the Posix demand for YYERRCODE==256.  */
 170   translations = 1;
 171
 172   nsyms = 1;
 173   nvars = 0;
 174   nrules = 0;
 175   nitems = 0;
 176   rline_allocated = 10;
 177   rline = NEW2(rline_allocated, short);
 178
 179   typed = 0;
 180   lastprec = 0;
 181
 182   gensym_count = 0;
 183
 184   semantic_parser = 0;
 185   pure_parser = 0;
 186   yylsp_needed = 0;
 187
 188   grammar = NULL;
 189
 190   init_lex();
 191   lineno = 1;
 192
 193   /* initialize the symbol table.  */
 194   tabinit();
 195   /* construct the error token */
 196   errtoken = getsym("error");
 197   errtoken->class = STOKEN;
 198   errtoken->user_token_number = 256; /* Value specified by posix.  */
 199   /* construct a token that represents all undefined literal tokens. */
 200   /* it is always token number 2.  */
 201   undeftoken = getsym("$undefined.");
 202   undeftoken->class = STOKEN;
 203   undeftoken->user_token_number = 2;
 204   /* Read the declaration section.  Copy %{ ... %} groups to ftable and fdefines file.
 205      Also notice any %token, %left, etc. found there.  */
 206   if (noparserflag)
 207     fprintf(ftable, "\n/*  Bison-generated parse tables, made from %s\n",
 208                 infile);
 209   else
 210     fprintf(ftable, "\n/*  A Bison parser, made from %s\n", infile);
 211   fprintf(ftable, "    by %s  */\n\n", VERSION_STRING);
 212   fprintf(ftable, "#define YYBISON 1  /* Identify Bison output.  */\n\n");
 213   read_declarations();
 214   /* start writing the guard and action files, if they are needed.  */
 215   output_headers();
 216   /* read in the grammar, build grammar in list form.  write out guards and actions.  */
 217   readgram();
 218   /* Now we know whether we need the line-number stack.
 219      If we do, write its type into the .tab.h file.  */
 220   if (fdefines)
 221     reader_output_yylsp(fdefines);
 222   /* write closing delimiters for actions and guards.  */
 223   output_trailers();
 224   if (yylsp_needed)
 225     fprintf(ftable, "#define YYLSP_NEEDED\n\n");
 226   /* assign the symbols their symbol numbers.
 227      Write #defines for the token symbols into fdefines if requested.  */
 228   packsymbols();
 229   /* convert the grammar into the format described in gram.h.  */
 230   packgram();
 231   /* free the symbol table data structure
 232      since symbols are now all referred to by symbol number.  */
 233   free_symtab();
 234 }
 235
 236 void
 237 reader_output_yylsp (FILE *f)
 238 {
 239   if (yylsp_needed)
 240     fprintf(f, LTYPESTR);
 241 }
 242
 243 /* Read from finput until `%%' is seen.  Discard the `%%'.  Handle any
 244    `%' declarations, and copy the contents of any `%{ ... %}' groups
 245    to fattrs.  */
 246
 247 void
 248 read_declarations (void)
 249 {
 250   register int c;
 251   register int tok;
 252
 253   for (;;)
 254     {
 255       c = skip_white_space();
 256
 257       if (c == '%')
 258         {
 259           tok = parse_percent_token();
 260
 261           switch (tok)
 262             {
 263             case TWO_PERCENTS:
 264               return;
 265
 266             case PERCENT_LEFT_CURLY:
 267               copy_definition();
 268               break;
 269
 270             case TOKEN:
 271               parse_token_decl (STOKEN, SNTERM);
 272               break;
 273
 274             case NTERM:
 275               parse_token_decl (SNTERM, STOKEN);
 276               break;
 277
 278             case TYPE:
 279               parse_type_decl();
 280               break;
 281
 282             case START:
 283               parse_start_decl();
 284               break;
 285
 286             case UNION:
 287               parse_union_decl();
 288               break;
 289
 290             case EXPECT:
 291               parse_expect_decl();
 292               break;
 293             case THONG:
 294               parse_thong_decl();
 295               break;
 296             case LEFT:
 297               parse_assoc_decl(LEFT_ASSOC);
 298               break;
 299
 300             case RIGHT:
 301               parse_assoc_decl(RIGHT_ASSOC);
 302               break;
 303
 304             case NONASSOC:
 305               parse_assoc_decl(NON_ASSOC);
 306               break;
 307
 308             case SEMANTIC_PARSER:
 309               if (semantic_parser == 0)
 310                 {
 311                   semantic_parser = 1;
 312                   open_extra_files();
 313                 }
 314               break;
 315
 316             case PURE_PARSER:
 317               pure_parser = 1;
 318               break;
 319
 320             case NOOP:
 321               break;
 322
 323             default:
 324               warns(_("unrecognized: %s"), token_buffer);
 325               skip_to_char('%');
 326           }
 327         }
 328       else if (c == EOF)
 329         fatal(_("no input grammar"));
 330       else
 331         {
 332                 char buff[100];
 333                 sprintf(buff, _("unknown character: %s"), printable_version(c));
 334                 warn(buff);
 335                 skip_to_char('%');
 336         }
 337     }
 338 }
 339
 340
 341 /* copy the contents of a %{ ... %} into the definitions file.
 342 The %{ has already been read.  Return after reading the %}.  */
 343
 344 void
 345 copy_definition (void)
 346 {
 347   register int c;
 348   register int match;
 349   register int ended;
 350   register int after_percent;  /* -1 while reading a character if prev char was % */
 351   int cplus_comment;
 352
 353   if (!nolinesflag)
 354     fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
 355
 356   after_percent = 0;
 357
 358   c = getc(finput);
 359
 360   for (;;)
 361     {
 362       switch (c)
 363         {
 364         case '\n':
 365           putc(c, fattrs);
 366           lineno++;
 367           break;
 368
 369         case '%':
 370           after_percent = -1;
 371           break;
 372
 373         case '\'':
 374         case '"':
 375           match = c;
 376           putc(c, fattrs);
 377           c = getc(finput);
 378
 379           while (c != match)
 380             {
 381               if (c == EOF)
 382                 fatal(_("unterminated string at end of file"));
 383               if (c == '\n')
 384                 {
 385                   warn(_("unterminated string"));
 386                   ungetc(c, finput);
 387                   c = match;
 388                   continue;
 389                 }
 390
 391               putc(c, fattrs);
 392
 393               if (c == '\\')
 394                 {
 395                   c = getc(finput);
 396                   if (c == EOF)
 397                     fatal(_("unterminated string at end of file"));
 398                   putc(c, fattrs);
 399                   if (c == '\n')
 400                     lineno++;
 401                 }
 402
 403               c = getc(finput);
 404             }
 405
 406           putc(c, fattrs);
 407           break;
 408
 409         case '/':
 410           putc(c, fattrs);
 411           c = getc(finput);
 412           if (c != '*' && c != '/')
 413             continue;
 414
 415           cplus_comment = (c == '/');
 416           putc(c, fattrs);
 417           c = getc(finput);
 418
 419           ended = 0;
 420           while (!ended)
 421             {
 422               if (!cplus_comment && c == '*')
 423                 {
 424                   while (c == '*')
 425                     {
 426                       putc(c, fattrs);
 427                       c = getc(finput);
 428                     }
 429
 430                   if (c == '/')
 431                     {
 432                       putc(c, fattrs);
 433                       ended = 1;
 434                     }
 435                 }
 436               else if (c == '\n')
 437                 {
 438                   lineno++;
 439                   putc(c, fattrs);
 440                   if (cplus_comment)
 441                     ended = 1;
 442                   else
 443                     c = getc(finput);
 444                 }
 445               else if (c == EOF)
 446                 fatal(_("unterminated comment in `%{' definition"));
 447               else
 448                 {
 449                   putc(c, fattrs);
 450                   c = getc(finput);
 451                 }
 452             }
 453
 454           break;
 455
 456         case EOF:
 457           fatal(_("unterminated `%{' definition"));
 458
 459         default:
 460           putc(c, fattrs);
 461         }
 462
 463       c = getc(finput);
 464
 465       if (after_percent)
 466         {
 467           if (c == '}')
 468             return;
 469           putc('%', fattrs);
 470         }
 471       after_percent = 0;
 472
 473     }
 474
 475 }
 476
 477
 478
 479 /* parse what comes after %token or %nterm.
 480 For %token, what_is is STOKEN and what_is_not is SNTERM.
 481 For %nterm, the arguments are reversed.  */
 482
 483 void
 484 parse_token_decl (int what_is, int what_is_not)
 485 {
 486   register int token = 0;
 487   register char *typename = 0;
 488   register struct bucket *symbol = NULL;  /* pts to symbol being defined */
 489   int k;
 490
 491   for (;;)
 492     {
 493       int tmp_char = ungetc (skip_white_space (), finput);
 494
 495       if (tmp_char == '%')
 496         return;
 497       if (tmp_char == EOF)
 498         fatals ("Premature EOF after %s", token_buffer);
 499
 500       token = lex();
 501       if (token == COMMA)
 502         {
 503           symbol = NULL;
 504           continue;
 505         }
 506       if (token == TYPENAME)
 507         {
 508           k = strlen(token_buffer);
 509           typename = NEW2(k + 1, char);
 510           strcpy(typename, token_buffer);
 511           value_components_used = 1;
 512           symbol = NULL;
 513         }
 514       else if (token == IDENTIFIER && *symval->tag == '\"'
 515                 && symbol)
 516         {
 517           translations = 1;
 518           symval->class = STOKEN;
 519           symval->type_name = typename;
 520           symval->user_token_number = symbol->user_token_number;
 521           symbol->user_token_number = SALIAS;
 522
 523           symval->alias = symbol;
 524           symbol->alias = symval;
 525           symbol = NULL;
 526
 527           nsyms--;   /* symbol and symval combined are only one symbol */
 528         }
 529       else if (token == IDENTIFIER)
 530         {
 531           int oldclass = symval->class;
 532           symbol = symval;
 533
 534           if (symbol->class == what_is_not)
 535             warns(_("symbol %s redefined"), symbol->tag);
 536           symbol->class = what_is;
 537           if (what_is == SNTERM && oldclass != SNTERM)
 538             symbol->value = nvars++;
 539
 540           if (typename)
 541             {
 542               if (symbol->type_name == NULL)
 543                 symbol->type_name = typename;
 544               else if (strcmp(typename, symbol->type_name) != 0)
 545                 warns(_("type redeclaration for %s"), symbol->tag);
 546             }
 547         }
 548       else if (symbol && token == NUMBER)
 549         {
 550           symbol->user_token_number = numval;
 551           translations = 1;
 552         }
 553       else
 554         {
 555           warnss(_("`%s' is invalid in %s"),
 556                 token_buffer,
 557                 (what_is == STOKEN) ? "%token" : "%nterm");
 558           skip_to_char('%');
 559         }
 560     }
 561
 562 }
 563
 564 /* parse what comes after %thong
 565         the full syntax is
 566                 %thong <type> token number literal
 567  the <type> or number may be omitted.  The number specifies the
 568  user_token_number.
 569
 570  Two symbols are entered in the table, one for the token symbol and
 571  one for the literal.  Both are given the <type>, if any, from the declaration.
 572  The ->user_token_number of the first is SALIAS and the ->user_token_number
 573  of the second is set to the number, if any, from the declaration.
 574  The two symbols are linked via pointers in their ->alias fields.
 575
 576  during output_defines_table, the symbol is reported
 577  thereafter, only the literal string is retained
 578  it is the literal string that is output to yytname
 579 */
 580
 581 void
 582 parse_thong_decl (void)
 583 {
 584   register int token;
 585   register struct bucket *symbol;
 586   register char *typename = 0;
 587   int k, usrtoknum;
 588
 589   translations = 1;
 590   token = lex();                /* fetch typename or first token */
 591   if (token == TYPENAME) {
 592     k = strlen(token_buffer);
 593     typename = NEW2(k + 1, char);
 594     strcpy(typename, token_buffer);
 595     value_components_used = 1;
 596     token = lex();              /* fetch first token */
 597   }
 598
 599   /* process first token */
 600
 601   if (token != IDENTIFIER)
 602     {
 603       warns(_("unrecognized item %s, expected an identifier"),
 604             token_buffer);
 605       skip_to_char('%');
 606       return;
 607     }
 608   symval->class = STOKEN;
 609   symval->type_name = typename;
 610   symval->user_token_number = SALIAS;
 611   symbol = symval;
 612
 613   token = lex();                /* get number or literal string */
 614
 615   if (token == NUMBER) {
 616     usrtoknum = numval;
 617     token = lex();              /* okay, did number, now get literal */
 618   }
 619   else usrtoknum = 0;
 620
 621   /* process literal string token */
 622
 623   if (token != IDENTIFIER || *symval->tag != '\"')
 624     {
 625       warns(_("expected string constant instead of %s"),
 626             token_buffer);
 627       skip_to_char('%');
 628       return;
 629     }
 630   symval->class = STOKEN;
 631   symval->type_name = typename;
 632   symval->user_token_number = usrtoknum;
 633
 634   symval->alias = symbol;
 635   symbol->alias = symval;
 636
 637   nsyms--;                      /* symbol and symval combined are only one symbol */
 638 }
 639
 640
 641 /* parse what comes after %start */
 642
 643 void
 644 parse_start_decl (void)
 645 {
 646   if (start_flag)
 647     warn(_("multiple %start declarations"));
 648   if (lex() != IDENTIFIER)
 649     warn(_("invalid %start declaration"));
 650   else
 651     {
 652       start_flag = 1;
 653       startval = symval;
 654     }
 655 }
 656
 657
 658
 659 /* read in a %type declaration and record its information for get_type_name to access */
 660
 661 void
 662 parse_type_decl (void)
 663 {
 664   register int k;
 665   register char *name;
 666
 667   if (lex() != TYPENAME)
 668     {
 669       warn(_("%type declaration has no <typename>"));
 670       skip_to_char('%');
 671       return;
 672     }
 673
 674   k = strlen(token_buffer);
 675   name = NEW2(k + 1, char);
 676   strcpy(name, token_buffer);
 677
 678   for (;;)
 679     {
 680       register int t;
 681       int tmp_char = ungetc (skip_white_space (), finput);
 682
 683       if (tmp_char == '%')
 684         return;
 685       if (tmp_char == EOF)
 686         fatals ("Premature EOF after %s", token_buffer);
 687
 688       t = lex();
 689
 690       switch (t)
 691         {
 692
 693         case COMMA:
 694         case SEMICOLON:
 695           break;
 696
 697         case IDENTIFIER:
 698           if (symval->type_name == NULL)
 699             symval->type_name = name;
 700           else if (strcmp(name, symval->type_name) != 0)
 701             warns(_("type redeclaration for %s"), symval->tag);
 702
 703           break;
 704
 705         default:
 706           warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
 707           skip_to_char('%');
 708         }
 709     }
 710 }
 711
 712
 713
 714 /* read in a %left, %right or %nonassoc declaration and record its information.  */
 715 /* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC.  */
 716
 717 void
 718 parse_assoc_decl (int assoc)
 719 {
 720   register int k;
 721   register char *name = NULL;
 722   register int prev = 0;
 723
 724   lastprec++;  /* Assign a new precedence level, never 0.  */
 725
 726   for (;;)
 727     {
 728       register int t;
 729       int tmp_char = ungetc (skip_white_space (), finput);
 730
 731       if (tmp_char == '%')
 732         return;
 733       if (tmp_char == EOF)
 734         fatals ("Premature EOF after %s", token_buffer);
 735
 736       t = lex();
 737
 738       switch (t)
 739         {
 740
 741         case TYPENAME:
 742           k = strlen(token_buffer);
 743           name = NEW2(k + 1, char);
 744           strcpy(name, token_buffer);
 745           break;
 746
 747         case COMMA:
 748           break;
 749
 750         case IDENTIFIER:
 751           if (symval->prec != 0)
 752             warns(_("redefining precedence of %s"), symval->tag);
 753           symval->prec = lastprec;
 754           symval->assoc = assoc;
 755           if (symval->class == SNTERM)
 756             warns(_("symbol %s redefined"), symval->tag);
 757           symval->class = STOKEN;
 758           if (name)
 759             { /* record the type, if one is specified */
 760               if (symval->type_name == NULL)
 761                 symval->type_name = name;
 762               else if (strcmp(name, symval->type_name) != 0)
 763                 warns(_("type redeclaration for %s"), symval->tag);
 764             }
 765           break;
 766
 767         case NUMBER:
 768           if (prev == IDENTIFIER)
 769             {
 770               symval->user_token_number = numval;
 771               translations = 1;
 772             }
 773           else
 774             {
 775               warns(_("invalid text (%s) - number should be after identifier"),
 776                         token_buffer);
 777               skip_to_char('%');
 778             }
 779           break;
 780
 781         case SEMICOLON:
 782           return;
 783
 784         default:
 785           warns(_("unexpected item: %s"), token_buffer);
 786           skip_to_char('%');
 787         }
 788
 789       prev = t;
 790
 791     }
 792 }
 793
 794
 795
 796 /* copy the union declaration into fattrs (and fdefines),
 797    where it is made into the
 798    definition of YYSTYPE, the type of elements of the parser value stack.  */
 799
 800 void
 801 parse_union_decl (void)
 802 {
 803   register int c;
 804   register int count;
 805   register int in_comment;
 806   int cplus_comment;
 807
 808   if (typed)
 809     warn(_("multiple %union declarations"));
 810
 811   typed = 1;
 812
 813   if (!nolinesflag)
 814     fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
 815   else
 816     fprintf(fattrs, "\n");
 817
 818   fprintf(fattrs, "typedef union");
 819   if (fdefines)
 820     fprintf(fdefines, "typedef union");
 821
 822   count = 0;
 823   in_comment = 0;
 824
 825   c = getc(finput);
 826
 827   while (c != EOF)
 828     {
 829       putc(c, fattrs);
 830       if (fdefines)
 831         putc(c, fdefines);
 832
 833       switch (c)
 834         {
 835         case '\n':
 836           lineno++;
 837           break;
 838
 839         case '/':
 840           c = getc(finput);
 841           if (c != '*' && c != '/')
 842             ungetc(c, finput);
 843           else
 844             {
 845               putc(c, fattrs);
 846               if (fdefines)
 847                 putc(c, fdefines);
 848               cplus_comment = (c == '/');
 849               in_comment = 1;
 850               c = getc(finput);
 851               while (in_comment)
 852                 {
 853                   putc(c, fattrs);
 854                   if (fdefines)
 855                     putc(c, fdefines);
 856
 857                   if (c == '\n')
 858                     {
 859                       lineno++;
 860                       if (cplus_comment)
 861                         {
 862                           in_comment = 0;
 863                           break;
 864                         }
 865                     }
 866                   if (c == EOF)
 867                     fatal(_("unterminated comment at end of file"));
 868
 869                   if (!cplus_comment && c == '*')
 870                     {
 871                       c = getc(finput);
 872                       if (c == '/')
 873                         {
 874                           putc('/', fattrs);
 875                           if (fdefines)
 876                             putc('/', fdefines);
 877                           in_comment = 0;
 878                         }
 879                     }
 880                   else
 881                     c = getc(finput);
 882                 }
 883             }
 884           break;
 885
 886
 887         case '{':
 888           count++;
 889           break;
 890
 891         case '}':
 892           if (count == 0)
 893             warn (_("unmatched close-brace (`}')"));
 894           count--;
 895           if (count <= 0)
 896             {
 897               fprintf(fattrs, " YYSTYPE;\n");
 898               if (fdefines)
 899                 fprintf(fdefines, " YYSTYPE;\n");
 900               /* JF don't choke on trailing semi */
 901               c=skip_white_space();
 902               if(c!=';') ungetc(c,finput);
 903               return;
 904             }
 905         }
 906
 907       c = getc(finput);
 908     }
 909 }
 910
 911 /* parse the declaration %expect N which says to expect N
 912    shift-reduce conflicts.  */
 913
 914 void
 915 parse_expect_decl (void)
 916 {
 917   register int c;
 918   register int count;
 919   char buffer[20];
 920
 921   c = getc(finput);
 922   while (c == ' ' || c == '\t')
 923     c = getc(finput);
 924
 925   count = 0;
 926   while (c >= '0' && c <= '9')
 927     {
 928       if (count < 20)
 929         buffer[count++] = c;
 930       c = getc(finput);
 931     }
 932   buffer[count] = 0;
 933
 934   ungetc (c, finput);
 935
 936   if (count <= 0 || count > 10)
 937         warn(_("argument of %expect is not an integer"));
 938   expected_conflicts = atoi (buffer);
 939 }
 940
 941 /* that's all of parsing the declaration section */
 942 \f
 943 /* Get the data type (alternative in the union) of the value for symbol n in rule rule.  */
 944
 945 char *
 946 get_type_name (int n, symbol_list *rule)
 947 {
 948   static char *msg = N_("invalid $ value");
 949
 950   register int i;
 951   register symbol_list *rp;
 952
 953   if (n < 0)
 954     {
 955       warn(_(msg));
 956       return NULL;
 957     }
 958
 959   rp = rule;
 960   i = 0;
 961
 962   while (i < n)
 963     {
 964       rp = rp->next;
 965       if (rp == NULL || rp->sym == NULL)
 966         {
 967           warn(_(msg));
 968           return NULL;
 969         }
 970       i++;
 971     }
 972
 973   return (rp->sym->type_name);
 974 }
 975
 976
 977 /* After `%guard' is seen in the input file, copy the actual guard
 978    into the guards file.  If the guard is followed by an action, copy
 979    that into the actions file.  STACK_OFFSET is the number of values
 980    in the current rule so far, which says where to find `$0' with
 981    respect to the top of the stack, for the simple parser in which the
 982    stack is not popped until after the guard is run.  */
 983
 984 void
 985 copy_guard (symbol_list *rule, int stack_offset)
 986 {
 987   register int c;
 988   register int n;
 989   register int count;
 990   register int match;
 991   register int ended;
 992   register char *type_name;
 993   int brace_flag = 0;
 994   int cplus_comment;
 995
 996   /* offset is always 0 if parser has already popped the stack pointer */
 997   if (semantic_parser) stack_offset = 0;
 998
 999   fprintf(fguard, "\ncase %d:\n", nrules);
1000   if (!nolinesflag)
1001     fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1002   putc('{', fguard);
1003
1004   count = 0;
1005   c = getc(finput);
1006
1007   while (brace_flag ? (count > 0) : (c != ';'))
1008     {
1009       switch (c)
1010         {
1011         case '\n':
1012           putc(c, fguard);
1013           lineno++;
1014           break;
1015
1016         case '{':
1017           putc(c, fguard);
1018           brace_flag = 1;
1019           count++;
1020           break;
1021
1022         case '}':
1023           putc(c, fguard);
1024           if (count > 0)
1025             count--;
1026           else
1027             {
1028               warn(_("unmatched right brace (`}')"));
1029               c = getc(finput); /* skip it */
1030             }
1031           break;
1032
1033         case '\'':
1034         case '"':
1035           match = c;
1036           putc(c, fguard);
1037           c = getc(finput);
1038
1039           while (c != match)
1040             {
1041               if (c == EOF)
1042                 fatal(_("unterminated string at end of file"));
1043               if (c == '\n')
1044                 {
1045                   warn(_("unterminated string"));
1046                   ungetc(c, finput);
1047                   c = match;            /* invent terminator */
1048                   continue;
1049                 }
1050
1051               putc(c, fguard);
1052
1053               if (c == '\\')
1054                 {
1055                   c = getc(finput);
1056                   if (c == EOF)
1057                     fatal(_("unterminated string"));
1058                   putc(c, fguard);
1059                   if (c == '\n')
1060                     lineno++;
1061                 }
1062
1063               c = getc(finput);
1064             }
1065
1066           putc(c, fguard);
1067           break;
1068
1069         case '/':
1070           putc(c, fguard);
1071           c = getc(finput);
1072           if (c != '*' && c != '/')
1073             continue;
1074
1075           cplus_comment = (c == '/');
1076           putc(c, fguard);
1077           c = getc(finput);
1078
1079           ended = 0;
1080           while (!ended)
1081             {
1082               if (!cplus_comment && c == '*')
1083                 {
1084                   while (c == '*')
1085                     {
1086                       putc(c, fguard);
1087                       c = getc(finput);
1088                     }
1089
1090                   if (c == '/')
1091                     {
1092                       putc(c, fguard);
1093                       ended = 1;
1094                     }
1095                 }
1096               else if (c == '\n')
1097                 {
1098                   lineno++;
1099                   putc(c, fguard);
1100                   if (cplus_comment)
1101                     ended = 1;
1102                   else
1103                     c = getc(finput);
1104                 }
1105               else if (c == EOF)
1106                 fatal(_("unterminated comment"));
1107               else
1108                 {
1109                   putc(c, fguard);
1110                   c = getc(finput);
1111                 }
1112             }
1113
1114           break;
1115
1116         case '$':
1117           c = getc(finput);
1118           type_name = NULL;
1119
1120           if (c == '<')
1121             {
1122               register char *cp = token_buffer;
1123
1124               while ((c = getc(finput)) != '>' && c > 0)
1125                 {
1126                   if (cp == token_buffer + maxtoken)
1127                     cp = grow_token_buffer(cp);
1128
1129                   *cp++ = c;
1130                 }
1131               *cp = 0;
1132               type_name = token_buffer;
1133
1134               c = getc(finput);
1135             }
1136
1137           if (c == '$')
1138             {
1139               fprintf(fguard, "yyval");
1140               if (!type_name)
1141                 type_name = rule->sym->type_name;
1142               if (type_name)
1143                 fprintf(fguard, ".%s", type_name);
1144               if(!type_name && typed)
1145                 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1146             }
1147           else if (isdigit(c) || c == '-')
1148             {
1149               ungetc (c, finput);
1150               n = read_signed_integer (finput);
1151               c = getc (finput);
1152
1153               if (!type_name && n > 0)
1154                 type_name = get_type_name(n, rule);
1155
1156               fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1157               if (type_name)
1158                 fprintf(fguard, ".%s", type_name);
1159               if(!type_name && typed)
1160                 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1161               continue;
1162             }
1163           else
1164             warns(_("$%s is invalid"), printable_version(c));
1165
1166           break;
1167
1168         case '@':
1169           c = getc(finput);
1170           if (isdigit(c) || c == '-')
1171             {
1172               ungetc (c, finput);
1173               n = read_signed_integer(finput);
1174               c = getc(finput);
1175             }
1176           else
1177             {
1178               warns(_("@%s is invalid"), printable_version(c));
1179               n = 1;
1180             }
1181
1182           fprintf(fguard, "yylsp[%d]", n - stack_offset);
1183           yylsp_needed = 1;
1184
1185           continue;
1186
1187         case EOF:
1188           fatal(_("unterminated %%guard clause"));
1189
1190         default:
1191           putc(c, fguard);
1192         }
1193
1194       if (c != '}' || count != 0)
1195         c = getc(finput);
1196     }
1197
1198   c = skip_white_space();
1199
1200   fprintf(fguard, ";\n    break;}");
1201   if (c == '{')
1202     copy_action(rule, stack_offset);
1203   else if (c == '=')
1204     {
1205       c = getc(finput); /* why not skip_white_space -wjh */
1206       if (c == '{')
1207         copy_action(rule, stack_offset);
1208     }
1209   else
1210     ungetc(c, finput);
1211 }
1212
1213
1214
1215 /* Assuming that a `{' has just been seen, copy everything up to the
1216    matching `}' into the actions file.  STACK_OFFSET is the number of
1217    values in the current rule so far, which says where to find `$0'
1218    with respect to the top of the stack.  */
1219
1220 void
1221 copy_action (symbol_list *rule, int stack_offset)
1222 {
1223   register int c;
1224   register int n;
1225   register int count;
1226   register int match;
1227   register int ended;
1228   register char *type_name;
1229   int cplus_comment;
1230
1231   /* offset is always 0 if parser has already popped the stack pointer */
1232   if (semantic_parser)
1233     stack_offset = 0;
1234
1235   fprintf (faction, "\ncase %d:\n", nrules);
1236   if (!nolinesflag)
1237     fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1238   putc ('{', faction);
1239
1240   count = 1;
1241   c = getc(finput);
1242
1243   while (count > 0)
1244     {
1245       while (c != '}')
1246         {
1247           switch (c)
1248             {
1249             case '\n':
1250               putc(c, faction);
1251               lineno++;
1252               break;
1253
1254             case '{':
1255               putc(c, faction);
1256               count++;
1257               break;
1258
1259             case '\'':
1260             case '"':
1261               match = c;
1262               putc(c, faction);
1263               c = getc(finput);
1264
1265               while (c != match)
1266                 {
1267                   if (c == '\n')
1268                     {
1269                       warn(_("unterminated string"));
1270                       ungetc(c, finput);
1271                       c = match;
1272                       continue;
1273                     }
1274                   else if (c == EOF)
1275                     fatal(_("unterminated string at end of file"));
1276
1277                   putc(c, faction);
1278
1279                   if (c == '\\')
1280                     {
1281                       c = getc(finput);
1282                       if (c == EOF)
1283                         fatal(_("unterminated string"));
1284                       putc(c, faction);
1285                       if (c == '\n')
1286                         lineno++;
1287                     }
1288
1289                   c = getc(finput);
1290                 }
1291
1292               putc(c, faction);
1293               break;
1294
1295             case '/':
1296               putc(c, faction);
1297               c = getc(finput);
1298               if (c != '*' && c != '/')
1299                 continue;
1300
1301               cplus_comment = (c == '/');
1302               putc(c, faction);
1303               c = getc(finput);
1304
1305               ended = 0;
1306               while (!ended)
1307                 {
1308                   if (!cplus_comment && c == '*')
1309                     {
1310                       while (c == '*')
1311                         {
1312                           putc(c, faction);
1313                           c = getc(finput);
1314                         }
1315
1316                       if (c == '/')
1317                         {
1318                           putc(c, faction);
1319                           ended = 1;
1320                         }
1321                     }
1322                   else if (c == '\n')
1323                     {
1324                       lineno++;
1325                       putc(c, faction);
1326                       if (cplus_comment)
1327                         ended = 1;
1328                       else
1329                         c = getc(finput);
1330                     }
1331                   else if (c == EOF)
1332                     fatal(_("unterminated comment"));
1333                   else
1334                     {
1335                       putc(c, faction);
1336                       c = getc(finput);
1337                     }
1338                 }
1339
1340               break;
1341
1342             case '$':
1343               c = getc(finput);
1344               type_name = NULL;
1345
1346               if (c == '<')
1347                 {
1348                   register char *cp = token_buffer;
1349
1350                   while ((c = getc(finput)) != '>' && c > 0)
1351                     {
1352                       if (cp == token_buffer + maxtoken)
1353                         cp = grow_token_buffer(cp);
1354
1355                       *cp++ = c;
1356                     }
1357                   *cp = 0;
1358                   type_name = token_buffer;
1359                   value_components_used = 1;
1360
1361                   c = getc(finput);
1362                 }
1363               if (c == '$')
1364                 {
1365                   fprintf(faction, "yyval");
1366                   if (!type_name)
1367                     type_name = get_type_name(0, rule);
1368                   if (type_name)
1369                     fprintf(faction, ".%s", type_name);
1370                   if(!type_name && typed)
1371                     warns(_("$$ of `%s' has no declared type"),
1372                           rule->sym->tag);
1373                 }
1374               else if (isdigit(c) || c == '-')
1375                 {
1376                   ungetc (c, finput);
1377                   n = read_signed_integer(finput);
1378                   c = getc(finput);
1379
1380                   if (!type_name && n > 0)
1381                     type_name = get_type_name(n, rule);
1382
1383                   fprintf(faction, "yyvsp[%d]", n - stack_offset);
1384                   if (type_name)
1385                     fprintf(faction, ".%s", type_name);
1386                   if(!type_name && typed)
1387                     warnss(_("$%s of `%s' has no declared type"),
1388                                 int_to_string(n), rule->sym->tag);
1389                   continue;
1390                 }
1391               else
1392                 warns(_("$%s is invalid"), printable_version(c));
1393
1394               break;
1395
1396             case '@':
1397               c = getc(finput);
1398               if (isdigit(c) || c == '-')
1399                 {
1400                   ungetc (c, finput);
1401                   n = read_signed_integer(finput);
1402                   c = getc(finput);
1403                 }
1404               else
1405                 {
1406                   warn(_("invalid @-construct"));
1407                   n = 1;
1408                 }
1409
1410               fprintf(faction, "yylsp[%d]", n - stack_offset);
1411               yylsp_needed = 1;
1412
1413               continue;
1414
1415             case EOF:
1416               fatal(_("unmatched `{'"));
1417
1418             default:
1419               putc(c, faction);
1420             }
1421
1422           c = getc(finput);
1423         }
1424
1425       /* above loop exits when c is '}' */
1426
1427       if (--count)
1428         {
1429           putc(c, faction);
1430           c = getc(finput);
1431         }
1432     }
1433
1434   fprintf(faction, ";\n    break;}");
1435 }
1436
1437
1438
1439 /* generate a dummy symbol, a nonterminal,
1440 whose name cannot conflict with the user's names. */
1441
1442 bucket *
1443 gensym (void)
1444 {
1445   register bucket *sym;
1446
1447   sprintf (token_buffer, "@%d", ++gensym_count);
1448   sym = getsym(token_buffer);
1449   sym->class = SNTERM;
1450   sym->value = nvars++;
1451   return (sym);
1452 }
1453
1454 /* Parse the input grammar into a one symbol_list structure.
1455 Each rule is represented by a sequence of symbols: the left hand side
1456 followed by the contents of the right hand side, followed by a null pointer
1457 instead of a symbol to terminate the rule.
1458 The next symbol is the lhs of the following rule.
1459
1460 All guards and actions are copied out to the appropriate files,
1461 labelled by the rule number they apply to.  */
1462
1463 void
1464 readgram (void)
1465 {
1466   register int t;
1467   register bucket *lhs = NULL;
1468   register symbol_list *p;
1469   register symbol_list *p1;
1470   register bucket *bp;
1471
1472   symbol_list *crule;   /* points to first symbol_list of current rule.  */
1473                         /* its symbol is the lhs of the rule.   */
1474   symbol_list *crule1;  /* points to the symbol_list preceding crule.  */
1475
1476   p1 = NULL;
1477
1478   t = lex();
1479
1480   while (t != TWO_PERCENTS && t != ENDFILE)
1481     {
1482       if (t == IDENTIFIER || t == BAR)
1483         {
1484           register int actionflag = 0;
1485           int rulelength = 0;  /* number of symbols in rhs of this rule so far  */
1486           int xactions = 0;     /* JF for error checking */
1487           bucket *first_rhs = 0;
1488
1489           if (t == IDENTIFIER)
1490             {
1491               lhs = symval;
1492
1493               if (!start_flag)
1494                 {
1495                   startval = lhs;
1496                   start_flag = 1;
1497                 }
1498
1499               t = lex();
1500               if (t != COLON)
1501                 {
1502                   warn(_("ill-formed rule: initial symbol not followed by colon"));
1503                   unlex(t);
1504                 }
1505             }
1506
1507           if (nrules == 0 && t == BAR)
1508             {
1509               warn(_("grammar starts with vertical bar"));
1510               lhs = symval;     /* BOGUS: use a random symval */
1511             }
1512           /* start a new rule and record its lhs.  */
1513
1514           nrules++;
1515           nitems++;
1516
1517           record_rule_line ();
1518
1519           p = NEW(symbol_list);
1520           p->sym = lhs;
1521
1522           crule1 = p1;
1523           if (p1)
1524             p1->next = p;
1525           else
1526             grammar = p;
1527
1528           p1 = p;
1529           crule = p;
1530
1531           /* mark the rule's lhs as a nonterminal if not already so.  */
1532
1533           if (lhs->class == SUNKNOWN)
1534             {
1535               lhs->class = SNTERM;
1536               lhs->value = nvars;
1537               nvars++;
1538             }
1539           else if (lhs->class == STOKEN)
1540             warns(_("rule given for %s, which is a token"), lhs->tag);
1541
1542           /* read the rhs of the rule.  */
1543
1544           for (;;)
1545             {
1546               t = lex();
1547               if (t == PREC)
1548                 {
1549                   t = lex();
1550                   crule->ruleprec = symval;
1551                   t = lex();
1552                 }
1553
1554               if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1555
1556               /* If next token is an identifier, see if a colon follows it.
1557                  If one does, exit this rule now.  */
1558               if (t == IDENTIFIER)
1559                 {
1560                   register bucket *ssave;
1561                   register int t1;
1562
1563                   ssave = symval;
1564                   t1 = lex();
1565                   unlex(t1);
1566                   symval = ssave;
1567                   if (t1 == COLON) break;
1568
1569                   if(!first_rhs)        /* JF */
1570                     first_rhs = symval;
1571                   /* Not followed by colon =>
1572                      process as part of this rule's rhs.  */
1573                 }
1574
1575               /* If we just passed an action, that action was in the middle
1576                  of a rule, so make a dummy rule to reduce it to a
1577                  non-terminal.  */
1578               if (actionflag)
1579                 {
1580                   register bucket *sdummy;
1581
1582                   /* Since the action was written out with this rule's */
1583                   /* number, we must give the new rule this number */
1584                   /* by inserting the new rule before it.  */
1585
1586                   /* Make a dummy nonterminal, a gensym.  */
1587                   sdummy = gensym();
1588
1589                   /* Make a new rule, whose body is empty,
1590                      before the current one, so that the action
1591                      just read can belong to it.  */
1592                   nrules++;
1593                   nitems++;
1594                   record_rule_line ();
1595                   p = NEW(symbol_list);
1596                   if (crule1)
1597                     crule1->next = p;
1598                   else grammar = p;
1599                   p->sym = sdummy;
1600                   crule1 = NEW(symbol_list);
1601                   p->next = crule1;
1602                   crule1->next = crule;
1603
1604                   /* insert the dummy generated by that rule into this rule.  */
1605                   nitems++;
1606                   p = NEW(symbol_list);
1607                   p->sym = sdummy;
1608                   p1->next = p;
1609                   p1 = p;
1610
1611                   actionflag = 0;
1612                 }
1613
1614               if (t == IDENTIFIER)
1615                 {
1616                   nitems++;
1617                   p = NEW(symbol_list);
1618                   p->sym = symval;
1619                   p1->next = p;
1620                   p1 = p;
1621                 }
1622               else /* handle an action.  */
1623                 {
1624                   copy_action(crule, rulelength);
1625                   actionflag = 1;
1626                   xactions++;   /* JF */
1627                 }
1628               rulelength++;
1629             }   /* end of  read rhs of rule */
1630
1631           /* Put an empty link in the list to mark the end of this rule  */
1632           p = NEW(symbol_list);
1633           p1->next = p;
1634           p1 = p;
1635
1636           if (t == PREC)
1637             {
1638               warn(_("two @prec's in a row"));
1639               t = lex();
1640               crule->ruleprec = symval;
1641               t = lex();
1642             }
1643           if (t == GUARD)
1644             {
1645               if (! semantic_parser)
1646                 warn(_("%%guard present but %%semantic_parser not specified"));
1647
1648               copy_guard(crule, rulelength);
1649               t = lex();
1650             }
1651           else if (t == LEFT_CURLY)
1652             {
1653                 /* This case never occurs -wjh */
1654               if (actionflag)  warn(_("two actions at end of one rule"));
1655               copy_action(crule, rulelength);
1656               actionflag = 1;
1657               xactions++;       /* -wjh */
1658               t = lex();
1659             }
1660           /* If $$ is being set in default way,
1661              warn if any type mismatch.  */
1662           else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1663             {
1664               if (lhs->type_name == 0 || first_rhs->type_name == 0
1665                   || strcmp(lhs->type_name,first_rhs->type_name))
1666                 warnss(_("type clash (`%s' `%s') on default action"),
1667                         lhs->type_name ? lhs->type_name : "",
1668                         first_rhs->type_name ? first_rhs->type_name : "");
1669             }
1670           /* Warn if there is no default for $$ but we need one.  */
1671           else if (!xactions && !first_rhs && lhs->type_name != 0)
1672             warn(_("empty rule for typed nonterminal, and no action"));
1673           if (t == SEMICOLON)
1674             t = lex();
1675         }
1676 #if 0
1677   /* these things can appear as alternatives to rules.  */
1678 /* NO, they cannot.
1679         a) none of the documentation allows them
1680         b) most of them scan forward until finding a next %
1681                 thus they may swallow lots of intervening rules
1682 */
1683       else if (t == TOKEN)
1684         {
1685           parse_token_decl(STOKEN, SNTERM);
1686           t = lex();
1687         }
1688       else if (t == NTERM)
1689         {
1690           parse_token_decl(SNTERM, STOKEN);
1691           t = lex();
1692         }
1693       else if (t == TYPE)
1694         {
1695           t = get_type();
1696         }
1697       else if (t == UNION)
1698         {
1699           parse_union_decl();
1700           t = lex();
1701         }
1702       else if (t == EXPECT)
1703         {
1704           parse_expect_decl();
1705           t = lex();
1706         }
1707       else if (t == START)
1708         {
1709           parse_start_decl();
1710           t = lex();
1711         }
1712 #endif
1713
1714       else
1715         {
1716           warns(_("invalid input: %s"), token_buffer);
1717           t = lex();
1718         }
1719     }
1720
1721   /* grammar has been read.  Do some checking */
1722
1723   if (nsyms > MAXSHORT)
1724     fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
1725            int_to_string(MAXSHORT));
1726   if (nrules == 0)
1727     fatal(_("no rules in the input grammar"));
1728
1729   if (typed == 0        /* JF put out same default YYSTYPE as YACC does */
1730       && !value_components_used)
1731     {
1732       /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1733          but it seems better to be consistent.
1734          Most programs should declare their own type anyway.  */
1735       fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1736       if (fdefines)
1737         fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1738     }
1739
1740   /* Report any undefined symbols and consider them nonterminals.  */
1741
1742   for (bp = firstsymbol; bp; bp = bp->next)
1743     if (bp->class == SUNKNOWN)
1744       {
1745         warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1746                         bp->tag);
1747         bp->class = SNTERM;
1748         bp->value = nvars++;
1749       }
1750
1751   ntokens = nsyms - nvars;
1752 }
1753
1754
1755 void
1756 record_rule_line (void)
1757 {
1758   /* Record each rule's source line number in rline table.  */
1759
1760   if (nrules >= rline_allocated)
1761     {
1762       rline_allocated = nrules * 2;
1763       rline = (short *) xrealloc ((char *) rline,
1764                                   rline_allocated * sizeof (short));
1765     }
1766   rline[nrules] = lineno;
1767 }
1768
1769
1770 #if 0
1771 /* read in a %type declaration and record its information for get_type_name to access */
1772 /* this is unused.  it is only called from the #if 0 part of readgram */
1773 static int
1774 get_type (void)
1775 {
1776   register int k;
1777   register int t;
1778   register char *name;
1779
1780   t = lex();
1781
1782   if (t != TYPENAME)
1783     {
1784       warn(_("ill-formed %type declaration"));
1785       return t;
1786     }
1787
1788   k = strlen(token_buffer);
1789   name = NEW2(k + 1, char);
1790   strcpy(name, token_buffer);
1791
1792   for (;;)
1793     {
1794       t = lex();
1795
1796       switch (t)
1797         {
1798         case SEMICOLON:
1799           return (lex());
1800
1801         case COMMA:
1802           break;
1803
1804         case IDENTIFIER:
1805           if (symval->type_name == NULL)
1806             symval->type_name = name;
1807           else if (strcmp(name, symval->type_name) != 0)
1808             warns(_("type redeclaration for %s"), symval->tag);
1809
1810           break;
1811
1812         default:
1813           return (t);
1814         }
1815     }
1816 }
1817 #endif
1818
1819
1820 /* assign symbol numbers, and write definition of token names into fdefines.
1821 Set up vectors tags and sprec of names and precedences of symbols.  */
1822
1823 void
1824 packsymbols (void)
1825 {
1826   register bucket *bp;
1827   register int tokno = 1;
1828   register int i;
1829   register int last_user_token_number;
1830
1831   /* int lossage = 0; JF set but not used */
1832
1833   tags = NEW2(nsyms + 1, char *);
1834   tags[0] = "$";
1835   user_toknums = NEW2(nsyms + 1, int);
1836   user_toknums[0] = 0;
1837
1838   sprec = NEW2(nsyms, short);
1839   sassoc = NEW2(nsyms, short);
1840
1841   max_user_token_number = 256;
1842   last_user_token_number = 256;
1843
1844   for (bp = firstsymbol; bp; bp = bp->next)
1845     {
1846       if (bp->class == SNTERM)
1847         {
1848           bp->value += ntokens;
1849         }
1850       else if (bp->alias)
1851         {
1852                 /* this symbol and its alias are a single token defn.
1853                   allocate a tokno, and assign to both
1854                   check agreement of ->prec and ->assoc fields
1855                         and make both the same
1856                 */
1857                 if (bp->value == 0)
1858                         bp->value = bp->alias->value = tokno++;
1859
1860                 if (bp->prec != bp->alias->prec) {
1861                         if (bp->prec != 0 && bp->alias->prec != 0
1862                                         && bp->user_token_number == SALIAS)
1863                                 warnss(_("conflicting precedences for %s and %s"),
1864                                         bp->tag, bp->alias->tag);
1865                         if (bp->prec != 0) bp->alias->prec = bp->prec;
1866                         else bp->prec = bp->alias->prec;
1867                 }
1868
1869                 if (bp->assoc != bp->alias->assoc) {
1870                         if (bp->assoc != 0 && bp->alias->assoc != 0
1871                                         && bp->user_token_number == SALIAS)
1872                                 warnss(_("conflicting assoc values for %s and %s"),
1873                                         bp->tag, bp->alias->tag);
1874                         if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1875                         else bp->assoc = bp->alias->assoc;
1876                 }
1877
1878                 if (bp->user_token_number == SALIAS)
1879                         continue;  /* do not do processing below for SALIASs */
1880
1881         }
1882       else  /* bp->class == STOKEN */
1883         {
1884           bp->value = tokno++;
1885         }
1886
1887       if (bp->class == STOKEN)
1888         {
1889           if (translations && !(bp->user_token_number))
1890             bp->user_token_number = ++last_user_token_number;
1891           if (bp->user_token_number > max_user_token_number)
1892             max_user_token_number = bp->user_token_number;
1893         }
1894
1895       tags[bp->value] = bp->tag;
1896       user_toknums[bp->value] = bp->user_token_number;
1897       sprec[bp->value] = bp->prec;
1898       sassoc[bp->value] = bp->assoc;
1899
1900     }
1901
1902   if (translations)
1903     {
1904       register int i;
1905
1906       token_translations = NEW2(max_user_token_number+1, short);
1907
1908       /* initialize all entries for literal tokens to 2,
1909          the internal token number for $undefined.,
1910          which represents all invalid inputs.  */
1911       for (i = 0; i <= max_user_token_number; i++)
1912         token_translations[i] = 2;
1913
1914       for (bp = firstsymbol; bp; bp = bp->next)
1915         {
1916           if (bp->value >= ntokens) continue;             /* non-terminal */
1917           if (bp->user_token_number == SALIAS) continue;
1918           if (token_translations[bp->user_token_number] != 2)
1919             warnsss(_("tokens %s and %s both assigned number %s"),
1920                               tags[token_translations[bp->user_token_number]],
1921                               bp->tag,
1922                               int_to_string(bp->user_token_number));
1923           token_translations[bp->user_token_number] = bp->value;
1924         }
1925     }
1926
1927   error_token_number = errtoken->value;
1928
1929   if (! noparserflag)
1930     output_token_defines(ftable);
1931
1932   if (startval->class == SUNKNOWN)
1933     fatals(_("the start symbol %s is undefined"), startval->tag);
1934   else if (startval->class == STOKEN)
1935     fatals(_("the start symbol %s is a token"), startval->tag);
1936
1937   start_symbol = startval->value;
1938
1939   if (definesflag)
1940     {
1941       output_token_defines(fdefines);
1942
1943       if (!pure_parser)
1944         {
1945           if (spec_name_prefix)
1946             fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1947           else
1948             fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1949         }
1950
1951       if (semantic_parser)
1952         for (i = ntokens; i < nsyms; i++)
1953           {
1954             /* don't make these for dummy nonterminals made by gensym.  */
1955             if (*tags[i] != '@')
1956               fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1957           }
1958 #if 0
1959       /* `fdefines' is now a temporary file, so we need to copy its
1960          contents in `done', so we can't close it here.  */
1961       fclose(fdefines);
1962       fdefines = NULL;
1963 #endif
1964     }
1965 }
1966
1967 /* For named tokens, but not literal ones, define the name.
1968    The value is the user token number.
1969 */
1970 void
1971 output_token_defines (FILE *file)
1972 {
1973   bucket *bp;
1974   register char *cp, *symbol;
1975   register char c;
1976
1977   for (bp = firstsymbol; bp; bp = bp->next)
1978     {
1979       symbol = bp->tag;                         /* get symbol */
1980
1981       if (bp->value >= ntokens) continue;
1982       if (bp->user_token_number == SALIAS) continue;
1983       if ('\'' == *symbol) continue;    /* skip literal character */
1984       if (bp == errtoken) continue;     /* skip error token */
1985       if ('\"' == *symbol)
1986         {
1987                 /* use literal string only if given a symbol with an alias */
1988                 if (bp->alias)
1989                         symbol = bp->alias->tag;
1990                 else
1991                         continue;
1992         }
1993
1994       /* Don't #define nonliteral tokens whose names contain periods.  */
1995       cp = symbol;
1996       while ((c = *cp++) && c != '.');
1997       if (c != '\0')  continue;
1998
1999       fprintf(file, "#define\t%s\t%d\n", symbol,
2000                 ((translations && ! rawtoknumflag)
2001                         ? bp->user_token_number
2002                         : bp->value));
2003       if (semantic_parser)
2004         fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
2005     }
2006
2007   putc('\n', file);
2008 }
2009
2010
2011
2012 /* convert the rules into the representation using rrhs, rlhs and ritems.  */
2013
2014 void
2015 packgram (void)
2016 {
2017   register int itemno;
2018   register int ruleno;
2019   register symbol_list *p;
2020 /*  register bucket *bp; JF unused */
2021
2022   bucket *ruleprec;
2023
2024   ritem = NEW2(nitems + 1, short);
2025   rlhs = NEW2(nrules, short) - 1;
2026   rrhs = NEW2(nrules, short) - 1;
2027   rprec = NEW2(nrules, short) - 1;
2028   rprecsym = NEW2(nrules, short) - 1;
2029   rassoc = NEW2(nrules, short) - 1;
2030
2031   itemno = 0;
2032   ruleno = 1;
2033
2034   p = grammar;
2035   while (p)
2036     {
2037       rlhs[ruleno] = p->sym->value;
2038       rrhs[ruleno] = itemno;
2039       ruleprec = p->ruleprec;
2040
2041       p = p->next;
2042       while (p && p->sym)
2043         {
2044           ritem[itemno++] = p->sym->value;
2045           /* A rule gets by default the precedence and associativity
2046              of the last token in it.  */
2047           if (p->sym->class == STOKEN)
2048             {
2049               rprec[ruleno] = p->sym->prec;
2050               rassoc[ruleno] = p->sym->assoc;
2051             }
2052           if (p) p = p->next;
2053         }
2054
2055       /* If this rule has a %prec,
2056          the specified symbol's precedence replaces the default.  */
2057       if (ruleprec)
2058         {
2059           rprec[ruleno] = ruleprec->prec;
2060           rassoc[ruleno] = ruleprec->assoc;
2061           rprecsym[ruleno] = ruleprec->value;
2062         }
2063
2064       ritem[itemno++] = -ruleno;
2065       ruleno++;
2066
2067       if (p) p = p->next;
2068     }
2069
2070   ritem[itemno] = 0;
2071 }
2072 \f
2073 /* Read a signed integer from STREAM and return its value.  */
2074
2075 int
2076 read_signed_integer (FILE *stream)
2077 {
2078   register int c = getc(stream);
2079   register int sign = 1;
2080   register int n;
2081
2082   if (c == '-')
2083     {
2084       c = getc(stream);
2085       sign = -1;
2086     }
2087   n = 0;
2088   while (isdigit(c))
2089     {
2090       n = 10*n + (c - '0');
2091       c = getc(stream);
2092     }
2093
2094   ungetc(c, stream);
2095
2096   return n * sign;
2097 }