1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, 
   4    2009-2011 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  28 #include "conflicts.h" 
  32 #include "muscle-tab.h" 
  36 #include "scan-gram.h" 
  37 #include "scan-code.h" 
  39 static void prepare_percent_define_front_end_variables (void); 
  40 static void check_and_convert_grammar (void); 
  42 static symbol_list 
*grammar 
= NULL
; 
  43 static bool start_flag 
= false; 
  44 merger_list 
*merge_functions
; 
  46 /* Was %union seen?  */ 
  47 bool union_seen 
= false; 
  50 bool tag_seen 
= false; 
  52 /* Should rules have a default precedence?  */ 
  53 bool default_prec 
= true; 
  55 /*-----------------------. 
  56 | Set the start symbol.  | 
  57 `-----------------------*/ 
  60 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  63     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  68       startsymbol_location 
= loc
; 
  74 /*------------------------------------------------------------------------. 
  75 | Return the merger index for a merging function named NAME.  Records the | 
  76 | function, if new, in MERGER_LIST.                                       | 
  77 `------------------------------------------------------------------------*/ 
  80 get_merge_function (uniqstr name
) 
  89   head
.next 
= merge_functions
; 
  90   for (syms 
= &head
, n 
= 1; syms
->next
; syms 
= syms
->next
, n 
+= 1) 
  91     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
  93   if (syms
->next 
== NULL
) 
  95       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
  96       syms
->next
->name 
= uniqstr_new (name
); 
  97       /* After all symbol type declarations have been parsed, packgram invokes 
  98          record_merge_function_type to set the type.  */ 
  99       syms
->next
->type 
= NULL
; 
 100       syms
->next
->next 
= NULL
; 
 101       merge_functions 
= head
.next
; 
 106 /*-------------------------------------------------------------------------. 
 107 | For the existing merging function with index MERGER, record the result   | 
 108 | type as TYPE as required by the lhs of the rule whose %merge declaration | 
 109 | is at DECLARATION_LOC.                                                   | 
 110 `-------------------------------------------------------------------------*/ 
 113 record_merge_function_type (int merger
, uniqstr type
, location declaration_loc
) 
 116   merger_list 
*merge_function
; 
 122     type 
= uniqstr_new (""); 
 125   for (merge_function 
= merge_functions
; 
 126        merge_function 
!= NULL 
&& merger_find 
!= merger
; 
 127        merge_function 
= merge_function
->next
) 
 129   aver (merge_function 
!= NULL 
&& merger_find 
== merger
); 
 130   if (merge_function
->type 
!= NULL 
&& !UNIQSTR_EQ (merge_function
->type
, type
)) 
 132       complain_at (declaration_loc
, 
 133                    _("result type clash on merge function `%s': <%s> != <%s>"), 
 134                    merge_function
->name
, type
, merge_function
->type
); 
 135       complain_at (merge_function
->type_declaration_location
, 
 136                    _("previous declaration")); 
 138   merge_function
->type 
= uniqstr_new (type
); 
 139   merge_function
->type_declaration_location 
= declaration_loc
; 
 142 /*--------------------------------------. 
 143 | Free all merge-function definitions.  | 
 144 `--------------------------------------*/ 
 147 free_merger_functions (void) 
 149   merger_list 
*L0 
= merge_functions
; 
 152       merger_list 
*L1 
= L0
->next
; 
 159 /*-------------------------------------------------------------------. 
 160 | Parse the input grammar into a one symbol_list structure.  Each    | 
 161 | rule is represented by a sequence of symbols: the left hand side   | 
 162 | followed by the contents of the right hand side, followed by a     | 
 163 | null pointer instead of a symbol to terminate the rule.  The next  | 
 164 | symbol is the lhs of the following rule.                           | 
 166 | All actions are copied out, labelled by the rule number they apply | 
 168 `-------------------------------------------------------------------*/ 
 170 /* The (currently) last symbol of GRAMMAR. */ 
 171 static symbol_list 
*grammar_end 
= NULL
; 
 173 /* Append SYM to the grammar.  */ 
 175 grammar_symbol_append (symbol 
*sym
, location loc
) 
 177   symbol_list 
*p 
= symbol_list_sym_new (sym
, loc
); 
 180     grammar_end
->next 
= p
; 
 186   /* A null SYM stands for an end of rule; it is not an actual 
 195 assign_named_ref (symbol_list 
*p
, named_ref 
*name
) 
 197   symbol 
*sym 
= p
->content
.sym
; 
 199   if (name
->id 
== sym
->tag
) 
 202                _("duplicated symbol name for %s ignored"), 
 204       named_ref_free (name
); 
 211 /* The rule currently being defined, and the previous rule. 
 212    CURRENT_RULE points to the first LHS of the current rule, while 
 213    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 214 static symbol_list 
*current_rule 
= NULL
; 
 215 static symbol_list 
*previous_rule_end 
= NULL
; 
 218 /*----------------------------------------------. 
 219 | Create a new rule for LHS in to the GRAMMAR.  | 
 220 `----------------------------------------------*/ 
 223 grammar_current_rule_begin (symbol 
*lhs
, location loc
, 
 228   /* Start a new rule and record its lhs.  */ 
 230   previous_rule_end 
= grammar_end
; 
 232   p 
= grammar_symbol_append (lhs
, loc
); 
 234     assign_named_ref(p
, lhs_name
); 
 236   current_rule 
= grammar_end
; 
 238   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 239   if (lhs
->class == unknown_sym
) 
 241       lhs
->class = nterm_sym
; 
 245   else if (lhs
->class == token_sym
) 
 246     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 250 /*----------------------------------------------------------------------. 
 251 | A symbol should be used if either:                                    | 
 252 |   1. It has a destructor.                                             | 
 253 |   2. --warnings=midrule-values and the symbol is a mid-rule symbol    | 
 254 |      (i.e., the generated LHS replacing a mid-rule action) that was   | 
 255 |      assigned to or used, as in "exp: { $$ = 1; } { $$ = $1; }".      | 
 256 `----------------------------------------------------------------------*/ 
 259 symbol_should_be_used (symbol_list 
const *s
) 
 261   if (symbol_destructor_get (s
->content
.sym
)->code
) 
 263   if (warnings_flag 
& warnings_midrule_values
) 
 264     return ((s
->midrule 
&& s
->midrule
->action_props
.is_value_used
) 
 265             || (s
->midrule_parent_rule
 
 266                 && symbol_list_n_get (s
->midrule_parent_rule
, 
 267                                       s
->midrule_parent_rhs_index
) 
 268                      ->action_props
.is_value_used
)); 
 272 /*----------------------------------------------------------------. 
 273 | Check that the rule R is properly defined.  For instance, there | 
 274 | should be no type clash on the default action.                  | 
 275 `----------------------------------------------------------------*/ 
 278 grammar_rule_check (const symbol_list 
*r
) 
 282      If there is an action, then there is nothing we can do: the user 
 283      is allowed to shoot herself in the foot. 
 285      Don't worry about the default action if $$ is untyped, since $$'s 
 286      value can't be used.  */ 
 287   if (!r
->action_props
.code 
&& r
->content
.sym
->type_name
) 
 289       symbol 
*first_rhs 
= r
->next
->content
.sym
; 
 290       /* If $$ is being set in default way, report if any type mismatch.  */ 
 293           char const *lhs_type 
= r
->content
.sym
->type_name
; 
 294           const char *rhs_type 
= 
 295             first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 296           if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 297             warn_at (r
->location
, 
 298                      _("type clash on default action: <%s> != <%s>"), 
 301       /* Warn if there is no default for $$ but we need one.  */ 
 303         warn_at (r
->location
, 
 304                  _("empty rule for typed nonterminal, and no action")); 
 307   /* Check that symbol values that should be used are in fact used.  */ 
 309     symbol_list 
const *l 
= r
; 
 311     for (; l 
&& l
->content
.sym
; l 
= l
->next
, ++n
) 
 312       if (! (l
->action_props
.is_value_used
 
 313              || !symbol_should_be_used (l
) 
 314              /* The default action, $$ = $1, `uses' both.  */ 
 315              || (!r
->action_props
.code 
&& (n 
== 0 || n 
== 1)))) 
 318             warn_at (r
->location
, _("unused value: $%d"), n
); 
 320             warn_at (r
->location
, _("unset value: $$")); 
 324   /* See comments in grammar_current_rule_prec_set for how POSIX 
 325      mandates this complaint.  It's only for identifiers, so skip 
 326      it for char literals and strings, which are always tokens.  */ 
 328       && r
->ruleprec
->tag
[0] != '\'' && r
->ruleprec
->tag
[0] != '"' 
 329       && !r
->ruleprec
->declared 
&& !r
->ruleprec
->prec
) 
 330     warn_at (r
->location
, _("token for %%prec is not defined: %s"), 
 335 /*-------------------------------------. 
 336 | End the currently being grown rule.  | 
 337 `-------------------------------------*/ 
 340 grammar_current_rule_end (location loc
) 
 342   /* Put an empty link in the list to mark the end of this rule  */ 
 343   grammar_symbol_append (NULL
, grammar_end
->location
); 
 344   current_rule
->location 
= loc
; 
 348 /*-------------------------------------------------------------------. 
 349 | The previous action turns out the be a mid-rule action.  Attach it | 
 350 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 351 | mid-rule action, and append this dummy nonterminal to the current  | 
 353 `-------------------------------------------------------------------*/ 
 356 grammar_midrule_action (void) 
 358   /* Since the action was written out with this rule's number, we must 
 359      give the new rule this number by inserting the new rule before 
 362   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 363      action.  Create the MIDRULE.  */ 
 364   location dummy_location 
= current_rule
->action_props
.location
; 
 365   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 366   symbol_list 
*midrule 
= symbol_list_sym_new (dummy
, dummy_location
); 
 368   /* Remember named_ref of previous action. */ 
 369   named_ref 
*action_name 
= current_rule
->action_props
.named_ref
; 
 371   /* Make a new rule, whose body is empty, before the current one, so 
 372      that the action just read can belong to it.  */ 
 375   /* Attach its location and actions to that of the DUMMY.  */ 
 376   midrule
->location 
= dummy_location
; 
 377   code_props_rule_action_init (&midrule
->action_props
, 
 378                                current_rule
->action_props
.code
, 
 379                                current_rule
->action_props
.location
, 
 381                                current_rule
->action_props
.is_predicate
); 
 382   code_props_none_init (¤t_rule
->action_props
); 
 384   if (previous_rule_end
) 
 385     previous_rule_end
->next 
= midrule
; 
 389   /* End the dummy's rule.  */ 
 390   midrule
->next 
= symbol_list_sym_new (NULL
, dummy_location
); 
 391   midrule
->next
->next 
= current_rule
; 
 393   previous_rule_end 
= midrule
->next
; 
 395   /* Insert the dummy nonterminal replacing the midrule action into 
 396      the current rule.  Bind it to its dedicated rule.  */ 
 397   grammar_current_rule_symbol_append (dummy
, dummy_location
, 
 399   grammar_end
->midrule 
= midrule
; 
 400   midrule
->midrule_parent_rule 
= current_rule
; 
 401   midrule
->midrule_parent_rhs_index 
= symbol_list_length (current_rule
->next
); 
 404 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 407 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 409   /* POSIX says that any identifier is a nonterminal if it does not 
 410      appear on the LHS of a grammar rule and is not defined by %token 
 411      or by one of the directives that assigns precedence to a token.  We 
 412      ignore this here because the only kind of identifier that POSIX 
 413      allows to follow a %prec is a token and because assuming it's a 
 414      token now can produce more logical error messages.  Nevertheless, 
 415      grammar_rule_check does obey what we believe is the real intent of 
 416      POSIX here: that an error be reported for any identifier that 
 417      appears after %prec but that is not defined separately as a 
 419   symbol_class_set (precsym
, token_sym
, loc
, false); 
 420   if (current_rule
->ruleprec
) 
 421     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 422   current_rule
->ruleprec 
= precsym
; 
 425 /* Attach dynamic precedence DPREC to the current rule. */ 
 428 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 431     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 433     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 434   else if (current_rule
->dprec 
!= 0) 
 435     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 436   current_rule
->dprec 
= dprec
; 
 439 /* Attach a merge function NAME with argument type TYPE to current 
 443 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 446     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 447   if (current_rule
->merger 
!= 0) 
 448     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 449   current_rule
->merger 
= get_merge_function (name
); 
 450   current_rule
->merger_declaration_location 
= loc
; 
 453 /* Attach SYM to the current rule.  If needed, move the previous 
 454    action as a mid-rule action.  */ 
 457 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
, 
 461   if (current_rule
->action_props
.code
) 
 462     grammar_midrule_action (); 
 463   p 
= grammar_symbol_append (sym
, loc
); 
 465     assign_named_ref(p
, name
); 
 468 /* Attach an ACTION to the current rule.  */ 
 471 grammar_current_rule_action_append (const char *action
, location loc
, 
 472                                     named_ref 
*name
, bool is_predicate
) 
 474   if (current_rule
->action_props
.code
) 
 475     grammar_midrule_action (); 
 476   /* After all symbol declarations have been parsed, packgram invokes 
 477      code_props_translate_code.  */ 
 478   code_props_rule_action_init (¤t_rule
->action_props
, action
, loc
, 
 479                                current_rule
, name
, is_predicate
); 
 483 /*---------------------------------------------------------------. 
 484 | Convert the rules into the representation using RRHS, RLHS and | 
 486 `---------------------------------------------------------------*/ 
 491   unsigned int itemno 
= 0; 
 492   rule_number ruleno 
= 0; 
 493   symbol_list 
*p 
= grammar
; 
 495   ritem 
= xnmalloc (nritems 
+ 1, sizeof *ritem
); 
 497   /* This sentinel is used by build_relations in gram.c.  */ 
 500   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 505       symbol 
*ruleprec 
= p
->ruleprec
; 
 506       record_merge_function_type (p
->merger
, p
->content
.sym
->type_name
, 
 507                                   p
->merger_declaration_location
); 
 508       rules
[ruleno
].user_number 
= ruleno
; 
 509       rules
[ruleno
].number 
= ruleno
; 
 510       rules
[ruleno
].lhs 
= p
->content
.sym
; 
 511       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 512       rules
[ruleno
].prec 
= NULL
; 
 513       rules
[ruleno
].dprec 
= p
->dprec
; 
 514       rules
[ruleno
].merger 
= p
->merger
; 
 515       rules
[ruleno
].precsym 
= NULL
; 
 516       rules
[ruleno
].location 
= p
->location
; 
 517       rules
[ruleno
].useful 
= true; 
 518       rules
[ruleno
].action 
= p
->action_props
.code
; 
 519       rules
[ruleno
].action_location 
= p
->action_props
.location
; 
 520       rules
[ruleno
].is_predicate 
= p
->action_props
.is_predicate
; 
 522       /* If the midrule's $$ is set or its $n is used, remove the `$' from the 
 523          symbol name so that it's a user-defined symbol so that the default 
 524          %destructor and %printer apply.  */ 
 525       if (p
->midrule_parent_rule
 
 526           && (p
->action_props
.is_value_used
 
 527               || symbol_list_n_get (p
->midrule_parent_rule
, 
 528                                     p
->midrule_parent_rhs_index
) 
 529                    ->action_props
.is_value_used
)) 
 530         p
->content
.sym
->tag 
+= 1; 
 532       /* Don't check the generated rule 0.  It has no action, so some rhs 
 533          symbols may appear unused, but the parsing algorithm ensures that 
 534          %destructor's are invoked appropriately.  */ 
 536         grammar_rule_check (p
); 
 538       for (p 
= p
->next
; p 
&& p
->content
.sym
; p 
= p
->next
) 
 542           /* Don't allow rule_length == INT_MAX, since that might 
 543              cause confusion with strtol if INT_MAX == LONG_MAX.  */ 
 544           if (rule_length 
== INT_MAX
) 
 545               fatal_at (rules
[ruleno
].location
, _("rule is too long")); 
 547           /* item_number = symbol_number. 
 548              But the former needs to contain more: negative rule numbers. */ 
 550             symbol_number_as_item_number (p
->content
.sym
->number
); 
 551           /* A rule gets by default the precedence and associativity 
 552              of its last token.  */ 
 553           if (p
->content
.sym
->class == token_sym 
&& default_prec
) 
 554             rules
[ruleno
].prec 
= p
->content
.sym
; 
 557       /* If this rule has a %prec, 
 558          the specified symbol's precedence replaces the default.  */ 
 561           rules
[ruleno
].precsym 
= ruleprec
; 
 562           rules
[ruleno
].prec 
= ruleprec
; 
 564       /* An item ends by the rule number (negated).  */ 
 565       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 566       aver (itemno 
< ITEM_NUMBER_MAX
); 
 568       aver (ruleno 
< RULE_NUMBER_MAX
); 
 574   aver (itemno 
== nritems
); 
 576   if (trace_flag 
& trace_sets
) 
 577     ritem_print (stderr
); 
 580 /*------------------------------------------------------------------. 
 581 | Read in the grammar specification and record it in the format     | 
 582 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 583 | in each case forming the body of a C function (YYACTION) which    | 
 584 | contains a switch statement to decide which action to execute.    | 
 585 `------------------------------------------------------------------*/ 
 590   /* Initialize the symbol table.  */ 
 593   /* Construct the accept symbol. */ 
 594   accept 
= symbol_get ("$accept", empty_location
); 
 595   accept
->class = nterm_sym
; 
 596   accept
->number 
= nvars
++; 
 598   /* Construct the error token */ 
 599   errtoken 
= symbol_get ("error", empty_location
); 
 600   errtoken
->class = token_sym
; 
 601   errtoken
->number 
= ntokens
++; 
 603   /* Construct a token that represents all undefined literal tokens. 
 604      It is always token number 2.  */ 
 605   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 606   undeftoken
->class = token_sym
; 
 607   undeftoken
->number 
= ntokens
++; 
 609   gram_in 
= xfopen (grammar_file
, "r"); 
 611   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 612   gram_debug 
= trace_flag 
& trace_parse
; 
 613   gram_scanner_initialize (); 
 615   prepare_percent_define_front_end_variables (); 
 617   if (! complaint_issued
) 
 618     check_and_convert_grammar (); 
 624 prepare_percent_define_front_end_variables (void) 
 626   /* Set %define front-end variable defaults.  */ 
 627   muscle_percent_define_default ("lr.keep-unreachable-states", "false"); 
 630     /* IELR would be a better default, but LALR is historically the 
 632     muscle_percent_define_default ("lr.type", "lalr"); 
 633     lr_type 
= muscle_percent_define_get ("lr.type"); 
 634     if (0 != strcmp (lr_type
, "canonical-lr")) 
 635       muscle_percent_define_default ("lr.default-reductions", "all"); 
 637       muscle_percent_define_default ("lr.default-reductions", "accepting"); 
 641   /* Check %define front-end variables.  */ 
 643     static char const * const values
[] = { 
 644       "lr.type", "lalr", "ielr", "canonical-lr", NULL
, 
 645       "lr.default-reductions", "all", "consistent", "accepting", NULL
, 
 648     muscle_percent_define_check_values (values
); 
 653 /*-------------------------------------------------------------. 
 654 | Check the grammar that has just been read, and convert it to | 
 656 `-------------------------------------------------------------*/ 
 659 check_and_convert_grammar (void) 
 661   /* Grammar has been read.  Do some checking.  */ 
 663     fatal (_("no rules in the input grammar")); 
 665   /* If the user did not define her ENDTOKEN, do it now. */ 
 668       endtoken 
= symbol_get ("$end", empty_location
); 
 669       endtoken
->class = token_sym
; 
 670       endtoken
->number 
= 0; 
 671       /* Value specified by POSIX.  */ 
 672       endtoken
->user_token_number 
= 0; 
 675   /* Report any undefined symbols and consider them nonterminals.  */ 
 676   symbols_check_defined (); 
 678   /* Find the start symbol if no %start.  */ 
 683            node 
!= NULL 
&& symbol_is_dummy (node
->content
.sym
); 
 686           for (node 
= node
->next
; 
 687                node 
!= NULL 
&& node
->content
.sym 
!= NULL
; 
 692       grammar_start_symbol_set (node
->content
.sym
, 
 693                                 node
->content
.sym
->location
); 
 696   /* Insert the initial rule, whose line is that of the first rule 
 697      (not that of the start symbol): 
 699      accept: %start EOF.  */ 
 701     symbol_list 
*p 
= symbol_list_sym_new (accept
, empty_location
); 
 702     p
->location 
= grammar
->location
; 
 703     p
->next 
= symbol_list_sym_new (startsymbol
, empty_location
); 
 704     p
->next
->next 
= symbol_list_sym_new (endtoken
, empty_location
); 
 705     p
->next
->next
->next 
= symbol_list_sym_new (NULL
, empty_location
); 
 706     p
->next
->next
->next
->next 
= grammar
; 
 712   aver (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
); 
 714   /* Assign the symbols their symbol numbers.  Write #defines for the 
 715      token symbols into FDEFINES if requested.  */ 
 718   /* Scan rule actions after invoking symbol_check_alias_consistency (in 
 719      symbols_pack above) so that token types are set correctly before the rule 
 720      action type checking. 
 722      Before invoking grammar_rule_check (in packgram below) on any rule, make 
 723      sure all actions have already been scanned in order to set `used' flags. 
 724      Otherwise, checking that a midrule's $$ should be set will not always work 
 725      properly because the check must forward-reference the midrule's parent 
 726      rule.  For the same reason, all the `used' flags must be set before 
 727      checking whether to remove `$' from any midrule symbol name (also in 
 731     for (sym 
= grammar
; sym
; sym 
= sym
->next
) 
 732       code_props_translate_code (&sym
->action_props
); 
 735   /* Convert the grammar into the format described in gram.h.  */ 
 738   /* The grammar as a symbol_list is no longer needed. */ 
 739   symbol_list_free (grammar
);