1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, 
   4    2009-2010 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  28 #include "conflicts.h" 
  32 #include "muscle-tab.h" 
  36 #include "scan-gram.h" 
  37 #include "scan-code.h" 
  39 static void prepare_percent_define_front_end_variables (void); 
  40 static void check_and_convert_grammar (void); 
  42 static symbol_list 
*grammar 
= NULL
; 
  43 static bool start_flag 
= false; 
  44 merger_list 
*merge_functions
; 
  46 /* Was %union seen?  */ 
  47 bool union_seen 
= false; 
  50 bool tag_seen 
= false; 
  52 /* Should rules have a default precedence?  */ 
  53 bool default_prec 
= true; 
  55 /*-----------------------. 
  56 | Set the start symbol.  | 
  57 `-----------------------*/ 
  60 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  63     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  68       startsymbol_location 
= loc
; 
  74 /*------------------------------------------------------------------------. 
  75 | Return the merger index for a merging function named NAME.  Records the | 
  76 | function, if new, in MERGER_LIST.                                       | 
  77 `------------------------------------------------------------------------*/ 
  80 get_merge_function (uniqstr name
) 
  89   head
.next 
= merge_functions
; 
  90   for (syms 
= &head
, n 
= 1; syms
->next
; syms 
= syms
->next
, n 
+= 1) 
  91     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
  93   if (syms
->next 
== NULL
) 
  95       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
  96       syms
->next
->name 
= uniqstr_new (name
); 
  97       /* After all symbol type declarations have been parsed, packgram invokes 
  98          record_merge_function_type to set the type.  */ 
  99       syms
->next
->type 
= NULL
; 
 100       syms
->next
->next 
= NULL
; 
 101       merge_functions 
= head
.next
; 
 106 /*-------------------------------------------------------------------------. 
 107 | For the existing merging function with index MERGER, record the result   | 
 108 | type as TYPE as required by the lhs of the rule whose %merge declaration | 
 109 | is at DECLARATION_LOC.                                                   | 
 110 `-------------------------------------------------------------------------*/ 
 113 record_merge_function_type (int merger
, uniqstr type
, location declaration_loc
) 
 116   merger_list 
*merge_function
; 
 122     type 
= uniqstr_new (""); 
 125   for (merge_function 
= merge_functions
; 
 126        merge_function 
!= NULL 
&& merger_find 
!= merger
; 
 127        merge_function 
= merge_function
->next
) 
 129   aver (merge_function 
!= NULL 
&& merger_find 
== merger
); 
 130   if (merge_function
->type 
!= NULL 
&& !UNIQSTR_EQ (merge_function
->type
, type
)) 
 132       complain_at (declaration_loc
, 
 133                    _("result type clash on merge function `%s': <%s> != <%s>"), 
 134                    merge_function
->name
, type
, merge_function
->type
); 
 135       complain_at (merge_function
->type_declaration_location
, 
 136                    _("previous declaration")); 
 138   merge_function
->type 
= uniqstr_new (type
); 
 139   merge_function
->type_declaration_location 
= declaration_loc
; 
 142 /*--------------------------------------. 
 143 | Free all merge-function definitions.  | 
 144 `--------------------------------------*/ 
 147 free_merger_functions (void) 
 149   merger_list 
*L0 
= merge_functions
; 
 152       merger_list 
*L1 
= L0
->next
; 
 159 /*-------------------------------------------------------------------. 
 160 | Parse the input grammar into a one symbol_list structure.  Each    | 
 161 | rule is represented by a sequence of symbols: the left hand side   | 
 162 | followed by the contents of the right hand side, followed by a     | 
 163 | null pointer instead of a symbol to terminate the rule.  The next  | 
 164 | symbol is the lhs of the following rule.                           | 
 166 | All actions are copied out, labelled by the rule number they apply | 
 168 `-------------------------------------------------------------------*/ 
 170 /* The (currently) last symbol of GRAMMAR. */ 
 171 static symbol_list 
*grammar_end 
= NULL
; 
 173 /* Append SYM to the grammar.  */ 
 175 grammar_symbol_append (symbol 
*sym
, location loc
) 
 177   symbol_list 
*p 
= symbol_list_sym_new (sym
, loc
); 
 180     grammar_end
->next 
= p
; 
 186   /* A null SYM stands for an end of rule; it is not an actual 
 195 assign_named_ref (symbol_list 
*p
, named_ref 
*name
) 
 197   symbol 
*sym 
= p
->content
.sym
; 
 199   if (name
->id 
== sym
->tag
) 
 202                _("duplicated symbol name for %s ignored"), 
 204       named_ref_free (name
); 
 211 /* The rule currently being defined, and the previous rule. 
 212    CURRENT_RULE points to the first LHS of the current rule, while 
 213    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 214 static symbol_list 
*current_rule 
= NULL
; 
 215 static symbol_list 
*previous_rule_end 
= NULL
; 
 218 /*----------------------------------------------. 
 219 | Create a new rule for LHS in to the GRAMMAR.  | 
 220 `----------------------------------------------*/ 
 223 grammar_current_rule_begin (symbol 
*lhs
, location loc
, 
 228   /* Start a new rule and record its lhs.  */ 
 230   previous_rule_end 
= grammar_end
; 
 232   p 
= grammar_symbol_append (lhs
, loc
); 
 234     assign_named_ref(p
, lhs_name
); 
 236   current_rule 
= grammar_end
; 
 238   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 239   if (lhs
->class == unknown_sym
) 
 241       lhs
->class = nterm_sym
; 
 245   else if (lhs
->class == token_sym
) 
 246     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 250 /*----------------------------------------------------------------------. 
 251 | A symbol should be used if either:                                    | 
 252 |   1. It has a destructor.                                             | 
 253 |   2. --warnings=midrule-values and the symbol is a mid-rule symbol    | 
 254 |      (i.e., the generated LHS replacing a mid-rule action) that was   | 
 255 |      assigned to or used, as in "exp: { $$ = 1; } { $$ = $1; }".      | 
 256 `----------------------------------------------------------------------*/ 
 259 symbol_should_be_used (symbol_list 
const *s
) 
 261   if (symbol_destructor_get (s
->content
.sym
)->code
) 
 263   if (warnings_flag 
& warnings_midrule_values
) 
 264     return ((s
->midrule 
&& s
->midrule
->action_props
.is_value_used
) 
 265             || (s
->midrule_parent_rule
 
 266                 && symbol_list_n_get (s
->midrule_parent_rule
, 
 267                                       s
->midrule_parent_rhs_index
) 
 268                      ->action_props
.is_value_used
)); 
 272 /*----------------------------------------------------------------. 
 273 | Check that the rule R is properly defined.  For instance, there | 
 274 | should be no type clash on the default action.                  | 
 275 `----------------------------------------------------------------*/ 
 278 grammar_rule_check (const symbol_list 
*r
) 
 282      If there is an action, then there is nothing we can do: the user 
 283      is allowed to shoot herself in the foot. 
 285      Don't worry about the default action if $$ is untyped, since $$'s 
 286      value can't be used.  */ 
 287   if (!r
->action_props
.code 
&& r
->content
.sym
->type_name
) 
 289       symbol 
*first_rhs 
= r
->next
->content
.sym
; 
 290       /* If $$ is being set in default way, report if any type mismatch.  */ 
 293           char const *lhs_type 
= r
->content
.sym
->type_name
; 
 294           const char *rhs_type 
= 
 295             first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 296           if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 297             warn_at (r
->location
, 
 298                      _("type clash on default action: <%s> != <%s>"), 
 301       /* Warn if there is no default for $$ but we need one.  */ 
 303         warn_at (r
->location
, 
 304                  _("empty rule for typed nonterminal, and no action")); 
 307   /* Check that symbol values that should be used are in fact used.  */ 
 309     symbol_list 
const *l 
= r
; 
 311     for (; l 
&& l
->content
.sym
; l 
= l
->next
, ++n
) 
 312       if (! (l
->action_props
.is_value_used
 
 313              || !symbol_should_be_used (l
) 
 314              /* The default action, $$ = $1, `uses' both.  */ 
 315              || (!r
->action_props
.code 
&& (n 
== 0 || n 
== 1)))) 
 318             warn_at (r
->location
, _("unused value: $%d"), n
); 
 320             warn_at (r
->location
, _("unset value: $$")); 
 324   /* See comments in grammar_current_rule_prec_set for how POSIX 
 325      mandates this complaint.  It's only for identifiers, so skip 
 326      it for char literals and strings, which are always tokens.  */ 
 328       && r
->ruleprec
->tag
[0] != '\'' && r
->ruleprec
->tag
[0] != '"' 
 329       && !r
->ruleprec
->declared 
&& !r
->ruleprec
->prec
) 
 330     complain_at (r
->location
, _("token for %%prec is not defined: %s"), 
 335 /*-------------------------------------. 
 336 | End the currently being grown rule.  | 
 337 `-------------------------------------*/ 
 340 grammar_current_rule_end (location loc
) 
 342   /* Put an empty link in the list to mark the end of this rule  */ 
 343   grammar_symbol_append (NULL
, grammar_end
->location
); 
 344   current_rule
->location 
= loc
; 
 348 /*-------------------------------------------------------------------. 
 349 | The previous action turns out the be a mid-rule action.  Attach it | 
 350 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 351 | mid-rule action, and append this dummy nonterminal to the current  | 
 353 `-------------------------------------------------------------------*/ 
 356 grammar_midrule_action (void) 
 358   /* Since the action was written out with this rule's number, we must 
 359      give the new rule this number by inserting the new rule before 
 362   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 363      action.  Create the MIDRULE.  */ 
 364   location dummy_location 
= current_rule
->action_props
.location
; 
 365   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 366   symbol_list 
*midrule 
= symbol_list_sym_new (dummy
, dummy_location
); 
 368   /* Remember named_ref of previous action. */ 
 369   named_ref 
*action_name 
= current_rule
->action_props
.named_ref
; 
 371   /* Make a new rule, whose body is empty, before the current one, so 
 372      that the action just read can belong to it.  */ 
 375   /* Attach its location and actions to that of the DUMMY.  */ 
 376   midrule
->location 
= dummy_location
; 
 377   code_props_rule_action_init (&midrule
->action_props
, 
 378                                current_rule
->action_props
.code
, 
 379                                current_rule
->action_props
.location
, 
 381   code_props_none_init (¤t_rule
->action_props
); 
 383   if (previous_rule_end
) 
 384     previous_rule_end
->next 
= midrule
; 
 388   /* End the dummy's rule.  */ 
 389   midrule
->next 
= symbol_list_sym_new (NULL
, dummy_location
); 
 390   midrule
->next
->next 
= current_rule
; 
 392   previous_rule_end 
= midrule
->next
; 
 394   /* Insert the dummy nonterminal replacing the midrule action into 
 395      the current rule.  Bind it to its dedicated rule.  */ 
 396   grammar_current_rule_symbol_append (dummy
, dummy_location
, 
 398   grammar_end
->midrule 
= midrule
; 
 399   midrule
->midrule_parent_rule 
= current_rule
; 
 400   midrule
->midrule_parent_rhs_index 
= symbol_list_length (current_rule
->next
); 
 403 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 406 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 408   /* POSIX says that any identifier is a nonterminal if it does not 
 409      appear on the LHS of a grammar rule and is not defined by %token 
 410      or by one of the directives that assigns precedence to a token.  We 
 411      ignore this here because the only kind of identifier that POSIX 
 412      allows to follow a %prec is a token and because assuming it's a 
 413      token now can produce more logical error messages.  Nevertheless, 
 414      grammar_rule_check does obey what we believe is the real intent of 
 415      POSIX here: that an error be reported for any identifier that 
 416      appears after %prec but that is not defined separately as a 
 418   symbol_class_set (precsym
, token_sym
, loc
, false); 
 419   if (current_rule
->ruleprec
) 
 420     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 421   current_rule
->ruleprec 
= precsym
; 
 424 /* Attach dynamic precedence DPREC to the current rule. */ 
 427 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 430     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 432     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 433   else if (current_rule
->dprec 
!= 0) 
 434     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 435   current_rule
->dprec 
= dprec
; 
 438 /* Attach a merge function NAME with argument type TYPE to current 
 442 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 445     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 446   if (current_rule
->merger 
!= 0) 
 447     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 448   current_rule
->merger 
= get_merge_function (name
); 
 449   current_rule
->merger_declaration_location 
= loc
; 
 452 /* Attach SYM to the current rule.  If needed, move the previous 
 453    action as a mid-rule action.  */ 
 456 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
, 
 460   if (current_rule
->action_props
.code
) 
 461     grammar_midrule_action (); 
 462   p 
= grammar_symbol_append (sym
, loc
); 
 464     assign_named_ref(p
, name
); 
 467 /* Attach an ACTION to the current rule.  */ 
 470 grammar_current_rule_action_append (const char *action
, location loc
, 
 473   if (current_rule
->action_props
.code
) 
 474     grammar_midrule_action (); 
 475   /* After all symbol declarations have been parsed, packgram invokes 
 476      code_props_translate_code.  */ 
 477   code_props_rule_action_init (¤t_rule
->action_props
, action
, loc
, 
 482 /*---------------------------------------------------------------. 
 483 | Convert the rules into the representation using RRHS, RLHS and | 
 485 `---------------------------------------------------------------*/ 
 490   unsigned int itemno 
= 0; 
 491   rule_number ruleno 
= 0; 
 492   symbol_list 
*p 
= grammar
; 
 494   ritem 
= xnmalloc (nritems 
+ 1, sizeof *ritem
); 
 496   /* This sentinel is used by build_relations in gram.c.  */ 
 499   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 504       symbol 
*ruleprec 
= p
->ruleprec
; 
 505       record_merge_function_type (p
->merger
, p
->content
.sym
->type_name
, 
 506                                   p
->merger_declaration_location
); 
 507       rules
[ruleno
].user_number 
= ruleno
; 
 508       rules
[ruleno
].number 
= ruleno
; 
 509       rules
[ruleno
].lhs 
= p
->content
.sym
; 
 510       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 511       rules
[ruleno
].prec 
= NULL
; 
 512       rules
[ruleno
].dprec 
= p
->dprec
; 
 513       rules
[ruleno
].merger 
= p
->merger
; 
 514       rules
[ruleno
].precsym 
= NULL
; 
 515       rules
[ruleno
].location 
= p
->location
; 
 516       rules
[ruleno
].useful 
= true; 
 517       rules
[ruleno
].action 
= p
->action_props
.code
; 
 518       rules
[ruleno
].action_location 
= p
->action_props
.location
; 
 520       /* If the midrule's $$ is set or its $n is used, remove the `$' from the 
 521          symbol name so that it's a user-defined symbol so that the default 
 522          %destructor and %printer apply.  */ 
 523       if (p
->midrule_parent_rule
 
 524           && (p
->action_props
.is_value_used
 
 525               || symbol_list_n_get (p
->midrule_parent_rule
, 
 526                                     p
->midrule_parent_rhs_index
) 
 527                    ->action_props
.is_value_used
)) 
 528         p
->content
.sym
->tag 
+= 1; 
 530       /* Don't check the generated rule 0.  It has no action, so some rhs 
 531          symbols may appear unused, but the parsing algorithm ensures that 
 532          %destructor's are invoked appropriately.  */ 
 534         grammar_rule_check (p
); 
 536       for (p 
= p
->next
; p 
&& p
->content
.sym
; p 
= p
->next
) 
 540           /* Don't allow rule_length == INT_MAX, since that might 
 541              cause confusion with strtol if INT_MAX == LONG_MAX.  */ 
 542           if (rule_length 
== INT_MAX
) 
 543               fatal_at (rules
[ruleno
].location
, _("rule is too long")); 
 545           /* item_number = symbol_number. 
 546              But the former needs to contain more: negative rule numbers. */ 
 548             symbol_number_as_item_number (p
->content
.sym
->number
); 
 549           /* A rule gets by default the precedence and associativity 
 550              of its last token.  */ 
 551           if (p
->content
.sym
->class == token_sym 
&& default_prec
) 
 552             rules
[ruleno
].prec 
= p
->content
.sym
; 
 555       /* If this rule has a %prec, 
 556          the specified symbol's precedence replaces the default.  */ 
 559           rules
[ruleno
].precsym 
= ruleprec
; 
 560           rules
[ruleno
].prec 
= ruleprec
; 
 562       /* An item ends by the rule number (negated).  */ 
 563       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 564       aver (itemno 
< ITEM_NUMBER_MAX
); 
 566       aver (ruleno 
< RULE_NUMBER_MAX
); 
 572   aver (itemno 
== nritems
); 
 574   if (trace_flag 
& trace_sets
) 
 575     ritem_print (stderr
); 
 578 /*------------------------------------------------------------------. 
 579 | Read in the grammar specification and record it in the format     | 
 580 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 581 | in each case forming the body of a C function (YYACTION) which    | 
 582 | contains a switch statement to decide which action to execute.    | 
 583 `------------------------------------------------------------------*/ 
 588   /* Initialize the symbol table.  */ 
 591   /* Construct the accept symbol. */ 
 592   accept 
= symbol_get ("$accept", empty_location
); 
 593   accept
->class = nterm_sym
; 
 594   accept
->number 
= nvars
++; 
 596   /* Construct the error token */ 
 597   errtoken 
= symbol_get ("error", empty_location
); 
 598   errtoken
->class = token_sym
; 
 599   errtoken
->number 
= ntokens
++; 
 601   /* Construct a token that represents all undefined literal tokens. 
 602      It is always token number 2.  */ 
 603   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 604   undeftoken
->class = token_sym
; 
 605   undeftoken
->number 
= ntokens
++; 
 607   gram_in 
= xfopen (grammar_file
, "r"); 
 609   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 610   gram_debug 
= trace_flag 
& trace_parse
; 
 611   gram_scanner_initialize (); 
 613   prepare_percent_define_front_end_variables (); 
 615   if (! complaint_issued
) 
 616     check_and_convert_grammar (); 
 622 prepare_percent_define_front_end_variables (void) 
 624   /* Set %define front-end variable defaults.  */ 
 625   muscle_percent_define_default ("lr.keep-unreachable-states", "false"); 
 628     /* IELR would be a better default, but LALR is historically the 
 630     muscle_percent_define_default ("lr.type", "lalr"); 
 631     lr_type 
= muscle_percent_define_get ("lr.type"); 
 632     if (0 != strcmp (lr_type
, "canonical-lr")) 
 633       muscle_percent_define_default ("lr.default-reductions", "all"); 
 635       muscle_percent_define_default ("lr.default-reductions", "accepting"); 
 639   /* Check %define front-end variables.  */ 
 641     static char const * const values
[] = { 
 642       "lr.type", "lalr", "ielr", "canonical-lr", NULL
, 
 643       "lr.default-reductions", "all", "consistent", "accepting", NULL
, 
 646     muscle_percent_define_check_values (values
); 
 651 /*-------------------------------------------------------------. 
 652 | Check the grammar that has just been read, and convert it to | 
 654 `-------------------------------------------------------------*/ 
 657 check_and_convert_grammar (void) 
 659   /* Grammar has been read.  Do some checking.  */ 
 661     fatal (_("no rules in the input grammar")); 
 663   /* If the user did not define her ENDTOKEN, do it now. */ 
 666       endtoken 
= symbol_get ("$end", empty_location
); 
 667       endtoken
->class = token_sym
; 
 668       endtoken
->number 
= 0; 
 669       /* Value specified by POSIX.  */ 
 670       endtoken
->user_token_number 
= 0; 
 673   /* Report any undefined symbols and consider them nonterminals.  */ 
 674   symbols_check_defined (); 
 676   /* Find the start symbol if no %start.  */ 
 681            node 
!= NULL 
&& symbol_is_dummy (node
->content
.sym
); 
 684           for (node 
= node
->next
; 
 685                node 
!= NULL 
&& node
->content
.sym 
!= NULL
; 
 690       grammar_start_symbol_set (node
->content
.sym
, 
 691                                 node
->content
.sym
->location
); 
 694   /* Insert the initial rule, whose line is that of the first rule 
 695      (not that of the start symbol): 
 697      accept: %start EOF.  */ 
 699     symbol_list 
*p 
= symbol_list_sym_new (accept
, empty_location
); 
 700     p
->location 
= grammar
->location
; 
 701     p
->next 
= symbol_list_sym_new (startsymbol
, empty_location
); 
 702     p
->next
->next 
= symbol_list_sym_new (endtoken
, empty_location
); 
 703     p
->next
->next
->next 
= symbol_list_sym_new (NULL
, empty_location
); 
 704     p
->next
->next
->next
->next 
= grammar
; 
 710   aver (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
); 
 712   /* Assign the symbols their symbol numbers.  Write #defines for the 
 713      token symbols into FDEFINES if requested.  */ 
 716   /* Scan rule actions after invoking symbol_check_alias_consistency (in 
 717      symbols_pack above) so that token types are set correctly before the rule 
 718      action type checking. 
 720      Before invoking grammar_rule_check (in packgram below) on any rule, make 
 721      sure all actions have already been scanned in order to set `used' flags. 
 722      Otherwise, checking that a midrule's $$ should be set will not always work 
 723      properly because the check must forward-reference the midrule's parent 
 724      rule.  For the same reason, all the `used' flags must be set before 
 725      checking whether to remove `$' from any midrule symbol name (also in 
 729     for (sym 
= grammar
; sym
; sym 
= sym
->next
) 
 730       code_props_translate_code (&sym
->action_props
); 
 733   /* Convert the grammar into the format described in gram.h.  */ 
 736   /* The grammar as a symbol_list is no longer needed. */ 
 737   symbol_list_free (grammar
);