src/parse-gram.y

   1 %{/* Bison Grammar Parser                             -*- C -*-
   2
   3    Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20    02110-1301  USA
  21 */
  22
  23 #include <config.h>
  24 #include "system.h"
  25
  26 #include "complain.h"
  27 #include "conflicts.h"
  28 #include "files.h"
  29 #include "getargs.h"
  30 #include "gram.h"
  31 #include "muscle_tab.h"
  32 #include "output.h"
  33 #include "quotearg.h"
  34 #include "reader.h"
  35 #include "symlist.h"
  36 #include "strverscmp.h"
  37
  38 #define YYLLOC_DEFAULT(Current, Rhs, N)  (Current) = lloc_default (Rhs, N)
  39 static YYLTYPE lloc_default (YYLTYPE const *, int);
  40
  41 #define YY_LOCATION_PRINT(File, Loc) \
  42           location_print (File, Loc)
  43
  44 static void version_check (location const *loc, char const *version);
  45
  46 /* Request detailed syntax error messages, and pass them to GRAM_ERROR.
  47    FIXME: depends on the undocumented availability of YYLLOC.  */
  48 #undef  yyerror
  49 #define yyerror(Msg) \
  50         gram_error (&yylloc, Msg)
  51 static void gram_error (location const *, char const *);
  52
  53 static void add_param (char const *, char *, location);
  54
  55 static symbol_class current_class = unknown_sym;
  56 static uniqstr current_type = 0;
  57 static symbol *current_lhs;
  58 static location current_lhs_location;
  59 static int current_prec = 0;
  60
  61 #ifdef UINT_FAST8_MAX
  62 # define YYTYPE_UINT8 uint_fast8_t
  63 #endif
  64 #ifdef INT_FAST8_MAX
  65 # define YYTYPE_INT8 int_fast8_t
  66 #endif
  67 #ifdef UINT_FAST16_MAX
  68 # define YYTYPE_UINT16 uint_fast16_t
  69 #endif
  70 #ifdef INT_FAST16_MAX
  71 # define YYTYPE_INT16 int_fast16_t
  72 #endif
  73 %}
  74
  75 %debug
  76 %verbose
  77 %defines
  78 %locations
  79 %pure-parser
  80 %error-verbose
  81 %defines
  82 %name-prefix="gram_"
  83
  84 %initial-action
  85 {
  86   /* Bison's grammar can initial empty locations, hence a default
  87      location is needed. */
  88   @$.start.file   = @$.end.file   = current_file;
  89   @$.start.line   = @$.end.line   = 1;
  90   @$.start.column = @$.end.column = 0;
  91 }
  92
  93 /* Only NUMBERS have a value.  */
  94 %union
  95 {
  96   symbol *symbol;
  97   symbol_list *list;
  98   int integer;
  99   char *chars;
 100   assoc assoc;
 101   uniqstr uniqstr;
 102 };
 103
 104 /* Define the tokens together with their human representation.  */
 105 %token GRAM_EOF 0 "end of file"
 106 %token STRING     "string"
 107 %token INT        "integer"
 108
 109 %token PERCENT_TOKEN       "%token"
 110 %token PERCENT_NTERM       "%nterm"
 111
 112 %token PERCENT_TYPE        "%type"
 113 %token PERCENT_DESTRUCTOR  "%destructor {...}"
 114 %token PERCENT_PRINTER     "%printer {...}"
 115
 116 %token PERCENT_UNION       "%union {...}"
 117
 118 %token PERCENT_LEFT        "%left"
 119 %token PERCENT_RIGHT       "%right"
 120 %token PERCENT_NONASSOC    "%nonassoc"
 121
 122 %token PERCENT_PREC          "%prec"
 123 %token PERCENT_DPREC         "%dprec"
 124 %token PERCENT_MERGE         "%merge"
 125
 126
 127 /*----------------------.
 128 | Global Declarations.  |
 129 `----------------------*/
 130
 131 %token
 132   PERCENT_DEBUG           "%debug"
 133   PERCENT_DEFAULT_PREC    "%default-prec"
 134   PERCENT_DEFINE          "%define"
 135   PERCENT_DEFINES         "%defines"
 136   PERCENT_ERROR_VERBOSE   "%error-verbose"
 137   PERCENT_EXPECT          "%expect"
 138   PERCENT_EXPECT_RR       "%expect-rr"
 139   PERCENT_FILE_PREFIX     "%file-prefix"
 140   PERCENT_GLR_PARSER      "%glr-parser"
 141   PERCENT_INITIAL_ACTION  "%initial-action {...}"
 142   PERCENT_LEX_PARAM       "%lex-param {...}"
 143   PERCENT_LOCATIONS       "%locations"
 144   PERCENT_NAME_PREFIX     "%name-prefix"
 145   PERCENT_NO_DEFAULT_PREC "%no-default-prec"
 146   PERCENT_NO_LINES        "%no-lines"
 147   PERCENT_NONDETERMINISTIC_PARSER
 148                           "%nondeterministic-parser"
 149   PERCENT_OUTPUT          "%output"
 150   PERCENT_PARSE_PARAM     "%parse-param {...}"
 151   PERCENT_PURE_PARSER     "%pure-parser"
 152   PERCENT_REQUIRE         "%require"
 153   PERCENT_SKELETON        "%skeleton"
 154   PERCENT_START           "%start"
 155   PERCENT_TOKEN_TABLE     "%token-table"
 156   PERCENT_VERBOSE         "%verbose"
 157   PERCENT_YACC            "%yacc"
 158 ;
 159
 160 %token TYPE            "type"
 161 %token EQUAL           "="
 162 %token SEMICOLON       ";"
 163 %token PIPE            "|"
 164 %token ID              "identifier"
 165 %token ID_COLON        "identifier:"
 166 %token PERCENT_PERCENT "%%"
 167 %token PROLOGUE        "%{...%}"
 168 %token EPILOGUE        "epilogue"
 169 %token BRACED_CODE     "{...}"
 170
 171
 172 %type <chars> STRING string_content
 173               "%destructor {...}"
 174               "%initial-action {...}"
 175               "%lex-param {...}"
 176               "%parse-param {...}"
 177               "%printer {...}"
 178               "%union {...}"
 179               PROLOGUE EPILOGUE
 180 %printer { fprintf (stderr, "\"%s\"", $$); }
 181               STRING string_content
 182 %printer { fprintf (stderr, "{\n%s\n}", $$); }
 183               "%destructor {...}"
 184               "%initial-action {...}"
 185               "%lex-param {...}"
 186               "%parse-param {...}"
 187               "%printer {...}"
 188               "%union {...}"
 189               PROLOGUE EPILOGUE
 190 %type <uniqstr> TYPE
 191 %printer { fprintf (stderr, "<%s>", $$); } TYPE
 192 %type <integer> INT
 193 %printer { fprintf (stderr, "%d", $$); } INT
 194 %type <symbol> ID symbol string_as_id
 195 %printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
 196 %type <symbol> ID_COLON
 197 %printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
 198 %type <assoc> precedence_declarator
 199 %type <list>  symbols.1
 200 %%
 201
 202 input:
 203   declarations "%%" grammar epilogue.opt
 204 ;
 205
 206
 207         /*------------------------------------.
 208         | Declarations: before the first %%.  |
 209         `------------------------------------*/
 210
 211 declarations:
 212   /* Nothing */
 213 | declarations declaration
 214 ;
 215
 216 declaration:
 217   grammar_declaration
 218 | PROLOGUE                                 { prologue_augment ($1, @1); }
 219 | "%debug"                                 { debug_flag = true; }
 220 | "%define" string_content
 221     {
 222       static char one[] = "1";
 223       muscle_insert ($2, one);
 224     }
 225 | "%define" string_content string_content  { muscle_insert ($2, $3); }
 226 | "%defines"                               { defines_flag = true; }
 227 | "%error-verbose"                         { error_verbose = true; }
 228 | "%expect" INT                            { expected_sr_conflicts = $2; }
 229 | "%expect-rr" INT                         { expected_rr_conflicts = $2; }
 230 | "%file-prefix" "=" string_content        { spec_file_prefix = $3; }
 231 | "%glr-parser"
 232     {
 233       nondeterministic_parser = true;
 234       glr_parser = true;
 235     }
 236 | "%initial-action {...}"
 237     {
 238       muscle_code_grow ("initial_action", $1, @1);
 239     }
 240 | "%lex-param {...}"                       { add_param ("lex_param", $1, @1); }
 241 | "%locations"                             { locations_flag = true; }
 242 | "%name-prefix" "=" string_content        { spec_name_prefix = $3; }
 243 | "%no-lines"                              { no_lines_flag = true; }
 244 | "%nondeterministic-parser"               { nondeterministic_parser = true; }
 245 | "%output" "=" string_content             { spec_outfile = $3; }
 246 | "%parse-param {...}"                     { add_param ("parse_param", $1, @1); }
 247 | "%pure-parser"                           { pure_parser = true; }
 248 | "%require" string_content                { version_check (&@2, $2); }
 249 | "%skeleton" string_content               { skeleton = $2; }
 250 | "%token-table"                           { token_table_flag = true; }
 251 | "%verbose"                               { report_flag = report_states; }
 252 | "%yacc"                                  { yacc_flag = true; }
 253 | /*FIXME: Err?  What is this horror doing here? */ ";"
 254 ;
 255
 256 grammar_declaration:
 257   precedence_declaration
 258 | symbol_declaration
 259 | "%start" symbol
 260     {
 261       grammar_start_symbol_set ($2, @2);
 262     }
 263 | "%union {...}"
 264     {
 265       char const *body = $1;
 266
 267       if (typed)
 268         {
 269           /* Concatenate the union bodies, turning the first one's
 270              trailing '}' into '\n', and omitting the second one's '{'.  */
 271           char *code = muscle_find ("stype");
 272           code[strlen (code) - 1] = '\n';
 273           body++;
 274         }
 275
 276       typed = true;
 277       muscle_code_grow ("stype", body, @1);
 278     }
 279 | "%destructor {...}" symbols.1
 280     {
 281       symbol_list *list;
 282       for (list = $2; list; list = list->next)
 283         symbol_destructor_set (list->sym, $1, @1);
 284       symbol_list_free ($2);
 285     }
 286 | "%printer {...}" symbols.1
 287     {
 288       symbol_list *list;
 289       for (list = $2; list; list = list->next)
 290         symbol_printer_set (list->sym, $1, @1);
 291       symbol_list_free ($2);
 292     }
 293 | "%default-prec"
 294     {
 295       default_prec = true;
 296     }
 297 | "%no-default-prec"
 298     {
 299       default_prec = false;
 300     }
 301 ;
 302
 303 symbol_declaration:
 304   "%nterm" { current_class = nterm_sym; } symbol_defs.1
 305     {
 306       current_class = unknown_sym;
 307       current_type = NULL;
 308     }
 309 | "%token" { current_class = token_sym; } symbol_defs.1
 310     {
 311       current_class = unknown_sym;
 312       current_type = NULL;
 313     }
 314 | "%type" TYPE symbols.1
 315     {
 316       symbol_list *list;
 317       for (list = $3; list; list = list->next)
 318         symbol_type_set (list->sym, $2, @2);
 319       symbol_list_free ($3);
 320     }
 321 ;
 322
 323 precedence_declaration:
 324   precedence_declarator type.opt symbols.1
 325     {
 326       symbol_list *list;
 327       ++current_prec;
 328       for (list = $3; list; list = list->next)
 329         {
 330           symbol_type_set (list->sym, current_type, @2);
 331           symbol_precedence_set (list->sym, current_prec, $1, @1);
 332         }
 333       symbol_list_free ($3);
 334       current_type = NULL;
 335     }
 336 ;
 337
 338 precedence_declarator:
 339   "%left"     { $$ = left_assoc; }
 340 | "%right"    { $$ = right_assoc; }
 341 | "%nonassoc" { $$ = non_assoc; }
 342 ;
 343
 344 type.opt:
 345   /* Nothing. */ { current_type = NULL; }
 346 | TYPE           { current_type = $1; }
 347 ;
 348
 349 /* One or more nonterminals to be %typed. */
 350
 351 symbols.1:
 352   symbol            { $$ = symbol_list_new ($1, @1); }
 353 | symbols.1 symbol  { $$ = symbol_list_prepend ($1, $2, @2); }
 354 ;
 355
 356 /* One token definition.  */
 357 symbol_def:
 358   TYPE
 359      {
 360        current_type = $1;
 361      }
 362 | ID
 363      {
 364        symbol_class_set ($1, current_class, @1, true);
 365        symbol_type_set ($1, current_type, @1);
 366      }
 367 | ID INT
 368     {
 369       symbol_class_set ($1, current_class, @1, true);
 370       symbol_type_set ($1, current_type, @1);
 371       symbol_user_token_number_set ($1, $2, @2);
 372     }
 373 | ID string_as_id
 374     {
 375       symbol_class_set ($1, current_class, @1, true);
 376       symbol_type_set ($1, current_type, @1);
 377       symbol_make_alias ($1, $2, @$);
 378     }
 379 | ID INT string_as_id
 380     {
 381       symbol_class_set ($1, current_class, @1, true);
 382       symbol_type_set ($1, current_type, @1);
 383       symbol_user_token_number_set ($1, $2, @2);
 384       symbol_make_alias ($1, $3, @$);
 385     }
 386 ;
 387
 388 /* One or more symbol definitions. */
 389 symbol_defs.1:
 390   symbol_def
 391 | symbol_defs.1 symbol_def
 392 ;
 393
 394
 395         /*------------------------------------------.
 396         | The grammar section: between the two %%.  |
 397         `------------------------------------------*/
 398
 399 grammar:
 400   rules_or_grammar_declaration
 401 | grammar rules_or_grammar_declaration
 402 ;
 403
 404 /* As a Bison extension, one can use the grammar declarations in the
 405    body of the grammar.  */
 406 rules_or_grammar_declaration:
 407   rules
 408 | grammar_declaration ";"
 409 | error ";"
 410     {
 411       yyerrok;
 412     }
 413 ;
 414
 415 rules:
 416   ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
 417 ;
 418
 419 rhses.1:
 420   rhs                { grammar_current_rule_end (@1); }
 421 | rhses.1 "|" rhs    { grammar_current_rule_end (@3); }
 422 | rhses.1 ";"
 423 ;
 424
 425 rhs:
 426   /* Nothing.  */
 427     { grammar_current_rule_begin (current_lhs, current_lhs_location); }
 428 | rhs symbol
 429     { grammar_current_rule_symbol_append ($2, @2); }
 430 | rhs action
 431 | rhs "%prec" symbol
 432     { grammar_current_rule_prec_set ($3, @3); }
 433 | rhs "%dprec" INT
 434     { grammar_current_rule_dprec_set ($3, @3); }
 435 | rhs "%merge" TYPE
 436     { grammar_current_rule_merge_set ($3, @3); }
 437 ;
 438
 439 symbol:
 440   ID              { $$ = $1; }
 441 | string_as_id    { $$ = $1; }
 442 ;
 443
 444 /* Handle the semantics of an action specially, with a mid-rule
 445    action, so that grammar_current_rule_action_append is invoked
 446    immediately after the braced code is read by the scanner.
 447
 448    This implementation relies on the LALR(1) parsing algorithm.
 449    If grammar_current_rule_action_append were executed in a normal
 450    action for this rule, then when the input grammar contains two
 451    successive actions, the scanner would have to read both actions
 452    before reducing this rule.  That wouldn't work, since the scanner
 453    relies on all preceding input actions being processed by
 454    grammar_current_rule_action_append before it scans the next
 455    action.  */
 456 action:
 457     { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
 458   BRACED_CODE
 459 ;
 460
 461 /* A string used as an ID: quote it.  */
 462 string_as_id:
 463   STRING
 464     {
 465       $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
 466       symbol_class_set ($$, token_sym, @1, false);
 467     }
 468 ;
 469
 470 /* A string used for its contents.  Don't quote it.  */
 471 string_content:
 472   STRING
 473     { $$ = $1; }
 474 ;
 475
 476
 477 epilogue.opt:
 478   /* Nothing.  */
 479 | "%%" EPILOGUE
 480     {
 481       muscle_code_grow ("epilogue", $2, @2);
 482       scanner_last_string_free ();
 483     }
 484 ;
 485
 486 %%
 487
 488
 489 /* Return the location of the left-hand side of a rule whose
 490    right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
 491    the right-hand side, and return an empty location equal to the end
 492    boundary of RHS[0] if the right-hand side is empty.  */
 493
 494 static YYLTYPE
 495 lloc_default (YYLTYPE const *rhs, int n)
 496 {
 497   int i;
 498   YYLTYPE loc;
 499
 500   /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
 501      The bug is fixed in 7.4.2m, but play it safe for now.  */
 502   loc.start = rhs[n].end;
 503   loc.end = rhs[n].end;
 504
 505   /* Ignore empty nonterminals the start of the the right-hand side.
 506      Do not bother to ignore them at the end of the right-hand side,
 507      since empty nonterminals have the same end as their predecessors.  */
 508   for (i = 1; i <= n; i++)
 509     if (! equal_boundaries (rhs[i].start, rhs[i].end))
 510       {
 511         loc.start = rhs[i].start;
 512         break;
 513       }
 514
 515   return loc;
 516 }
 517
 518
 519 /* Add a lex-param or a parse-param (depending on TYPE) with
 520    declaration DECL and location LOC.  */
 521
 522 static void
 523 add_param (char const *type, char *decl, location loc)
 524 {
 525   static char const alphanum[26 + 26 + 1 + 10] =
 526     "abcdefghijklmnopqrstuvwxyz"
 527     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 528     "_"
 529     "0123456789";
 530   char const *name_start = NULL;
 531   char *p;
 532
 533   /* Stop on last actual character.  */
 534   for (p = decl; p[1]; p++)
 535     if ((p == decl
 536          || ! memchr (alphanum, p[-1], sizeof alphanum))
 537         && memchr (alphanum, p[0], sizeof alphanum - 10))
 538       name_start = p;
 539
 540   /* Strip the surrounding '{' and '}', and any blanks just inside
 541      the braces.  */
 542   while (*--p == ' ' || *p == '\t')
 543     continue;
 544   p[1] = '\0';
 545   while (*++decl == ' ' || *decl == '\t')
 546     continue;
 547
 548   if (! name_start)
 549     complain_at (loc, _("missing identifier in parameter declaration"));
 550   else
 551     {
 552       char *name;
 553       size_t name_len;
 554
 555       for (name_len = 1;
 556            memchr (alphanum, name_start[name_len], sizeof alphanum);
 557            name_len++)
 558         continue;
 559
 560       name = xmalloc (name_len + 1);
 561       memcpy (name, name_start, name_len);
 562       name[name_len] = '\0';
 563       muscle_pair_list_grow (type, decl, name);
 564       free (name);
 565     }
 566
 567   scanner_last_string_free ();
 568 }
 569
 570 static void
 571 version_check (location const *loc, char const *version)
 572 {
 573   if (strverscmp (version, PACKAGE_VERSION) > 0)
 574     {
 575       complain_at (*loc, "require bison %s, but have %s",
 576                    version, PACKAGE_VERSION);
 577       exit (63);
 578     }
 579 }
 580
 581 static void
 582 gram_error (location const *loc, char const *msg)
 583 {
 584   complain_at (*loc, "%s", msg);
 585 }
 586
 587 char const *
 588 token_name (int type)
 589 {
 590   return yytname[YYTRANSLATE (type)];
 591 }