1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, 
   4    2009-2012 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  28 #include "conflicts.h" 
  32 #include "muscle-tab.h" 
  36 #include "scan-gram.h" 
  37 #include "scan-code.h" 
  39 static void prepare_percent_define_front_end_variables (void); 
  40 static void check_and_convert_grammar (void); 
  42 static symbol_list 
*grammar 
= NULL
; 
  43 static bool start_flag 
= false; 
  44 merger_list 
*merge_functions
; 
  46 /* Was %union seen?  */ 
  47 bool union_seen 
= false; 
  50 bool tag_seen 
= false; 
  52 /* Should rules have a default precedence?  */ 
  53 bool default_prec 
= true; 
  55 /*-----------------------. 
  56 | Set the start symbol.  | 
  57 `-----------------------*/ 
  60 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  63     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  68       startsymbol_location 
= loc
; 
  74 /*------------------------------------------------------------------------. 
  75 | Return the merger index for a merging function named NAME.  Records the | 
  76 | function, if new, in MERGER_LIST.                                       | 
  77 `------------------------------------------------------------------------*/ 
  80 get_merge_function (uniqstr name
) 
  89   head
.next 
= merge_functions
; 
  90   for (syms 
= &head
, n 
= 1; syms
->next
; syms 
= syms
->next
, n 
+= 1) 
  91     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
  93   if (syms
->next 
== NULL
) 
  95       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
  96       syms
->next
->name 
= uniqstr_new (name
); 
  97       /* After all symbol type declarations have been parsed, packgram invokes 
  98          record_merge_function_type to set the type.  */ 
  99       syms
->next
->type 
= NULL
; 
 100       syms
->next
->next 
= NULL
; 
 101       merge_functions 
= head
.next
; 
 106 /*-------------------------------------------------------------------------. 
 107 | For the existing merging function with index MERGER, record the result   | 
 108 | type as TYPE as required by the lhs of the rule whose %merge declaration | 
 109 | is at DECLARATION_LOC.                                                   | 
 110 `-------------------------------------------------------------------------*/ 
 113 record_merge_function_type (int merger
, uniqstr type
, location declaration_loc
) 
 116   merger_list 
*merge_function
; 
 122     type 
= uniqstr_new (""); 
 125   for (merge_function 
= merge_functions
; 
 126        merge_function 
!= NULL 
&& merger_find 
!= merger
; 
 127        merge_function 
= merge_function
->next
) 
 129   aver (merge_function 
!= NULL 
&& merger_find 
== merger
); 
 130   if (merge_function
->type 
!= NULL 
&& !UNIQSTR_EQ (merge_function
->type
, type
)) 
 132       complain_at (declaration_loc
, 
 133                    _("result type clash on merge function `%s': <%s> != <%s>"), 
 134                    merge_function
->name
, type
, merge_function
->type
); 
 135       complain_at (merge_function
->type_declaration_location
, 
 136                    _("previous declaration")); 
 138   merge_function
->type 
= uniqstr_new (type
); 
 139   merge_function
->type_declaration_location 
= declaration_loc
; 
 142 /*--------------------------------------. 
 143 | Free all merge-function definitions.  | 
 144 `--------------------------------------*/ 
 147 free_merger_functions (void) 
 149   merger_list 
*L0 
= merge_functions
; 
 152       merger_list 
*L1 
= L0
->next
; 
 159 /*-------------------------------------------------------------------. 
 160 | Parse the input grammar into a one symbol_list structure.  Each    | 
 161 | rule is represented by a sequence of symbols: the left hand side   | 
 162 | followed by the contents of the right hand side, followed by a     | 
 163 | null pointer instead of a symbol to terminate the rule.  The next  | 
 164 | symbol is the lhs of the following rule.                           | 
 166 | All actions are copied out, labelled by the rule number they apply | 
 168 `-------------------------------------------------------------------*/ 
 170 /* The (currently) last symbol of GRAMMAR. */ 
 171 static symbol_list 
*grammar_end 
= NULL
; 
 173 /* Append SYM to the grammar.  */ 
 175 grammar_symbol_append (symbol 
*sym
, location loc
) 
 177   symbol_list 
*p 
= symbol_list_sym_new (sym
, loc
); 
 180     grammar_end
->next 
= p
; 
 186   /* A null SYM stands for an end of rule; it is not an actual 
 195 assign_named_ref (symbol_list 
*p
, named_ref 
*name
) 
 197   symbol 
*sym 
= p
->content
.sym
; 
 199   if (name
->id 
== sym
->tag
) 
 202                _("duplicated symbol name for %s ignored"), 
 204       named_ref_free (name
); 
 211 /* The rule currently being defined, and the previous rule. 
 212    CURRENT_RULE points to the first LHS of the current rule, while 
 213    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 214 static symbol_list 
*current_rule 
= NULL
; 
 215 static symbol_list 
*previous_rule_end 
= NULL
; 
 218 /*----------------------------------------------. 
 219 | Create a new rule for LHS in to the GRAMMAR.  | 
 220 `----------------------------------------------*/ 
 223 grammar_current_rule_begin (symbol 
*lhs
, location loc
, 
 228   /* Start a new rule and record its lhs.  */ 
 230   previous_rule_end 
= grammar_end
; 
 232   p 
= grammar_symbol_append (lhs
, loc
); 
 234     assign_named_ref (p
, named_ref_copy (lhs_name
)); 
 236   current_rule 
= grammar_end
; 
 238   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 239   if (lhs
->class == unknown_sym
) 
 241       lhs
->class = nterm_sym
; 
 245   else if (lhs
->class == token_sym
) 
 246     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 250 /*----------------------------------------------------------------------. 
 251 | A symbol should be used if either:                                    | 
 252 |   1. It has a destructor.                                             | 
 253 |   2. The symbol is a mid-rule symbol (i.e., the generated LHS         | 
 254 |      replacing a mid-rule action) that was assigned to or used, as in | 
 255 |      "exp: { $$ = 1; } { $$ = $1; }".                                 | 
 256 `----------------------------------------------------------------------*/ 
 259 symbol_should_be_used (symbol_list 
const *s
, bool *midrule_warning
) 
 261   if (symbol_destructor_get (s
->content
.sym
)->code
) 
 263   if ((s
->midrule 
&& s
->midrule
->action_props
.is_value_used
) 
 264       || (s
->midrule_parent_rule
 
 265           && symbol_list_n_get (s
->midrule_parent_rule
, 
 266                                 s
->midrule_parent_rhs_index
) 
 267                ->action_props
.is_value_used
)) 
 269       *midrule_warning 
= true; 
 275 /*----------------------------------------------------------------. 
 276 | Check that the rule R is properly defined.  For instance, there | 
 277 | should be no type clash on the default action.                  | 
 278 `----------------------------------------------------------------*/ 
 281 grammar_rule_check (const symbol_list 
*r
) 
 285      If there is an action, then there is nothing we can do: the user 
 286      is allowed to shoot herself in the foot. 
 288      Don't worry about the default action if $$ is untyped, since $$'s 
 289      value can't be used.  */ 
 290   if (!r
->action_props
.code 
&& r
->content
.sym
->type_name
) 
 292       symbol 
*first_rhs 
= r
->next
->content
.sym
; 
 293       /* If $$ is being set in default way, report if any type mismatch.  */ 
 296           char const *lhs_type 
= r
->content
.sym
->type_name
; 
 297           const char *rhs_type 
= 
 298             first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 299           if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 300             warn_at (r
->location
, 
 301                      _("type clash on default action: <%s> != <%s>"), 
 304       /* Warn if there is no default for $$ but we need one.  */ 
 306         warn_at (r
->location
, 
 307                  _("empty rule for typed nonterminal, and no action")); 
 310   /* Check that symbol values that should be used are in fact used.  */ 
 312     symbol_list 
const *l 
= r
; 
 314     for (; l 
&& l
->content
.sym
; l 
= l
->next
, ++n
) 
 316         bool midrule_warning 
= false; 
 317         if (!l
->action_props
.is_value_used
 
 318             && symbol_should_be_used (l
, &midrule_warning
) 
 319             /* The default action, $$ = $1, `uses' both.  */ 
 320             && (r
->action_props
.code 
|| (n 
!= 0 && n 
!= 1))) 
 322             void (*warn_at_ptr
)(location
, char const*, ...) = 
 323               midrule_warning 
? midrule_value_at 
: warn_at
; 
 325               warn_at_ptr (r
->location
, _("unused value: $%d"), n
); 
 327               warn_at_ptr (r
->location
, _("unset value: $$")); 
 332   /* See comments in grammar_current_rule_prec_set for how POSIX 
 333      mandates this complaint.  It's only for identifiers, so skip 
 334      it for char literals and strings, which are always tokens.  */ 
 336       && r
->ruleprec
->tag
[0] != '\'' && r
->ruleprec
->tag
[0] != '"' 
 337       && !r
->ruleprec
->declared 
&& !r
->ruleprec
->prec
) 
 338     warn_at (r
->location
, _("token for %%prec is not defined: %s"), 
 343 /*-------------------------------------. 
 344 | End the currently being grown rule.  | 
 345 `-------------------------------------*/ 
 348 grammar_current_rule_end (location loc
) 
 350   /* Put an empty link in the list to mark the end of this rule  */ 
 351   grammar_symbol_append (NULL
, grammar_end
->location
); 
 352   current_rule
->location 
= loc
; 
 356 /*-------------------------------------------------------------------. 
 357 | The previous action turns out the be a mid-rule action.  Attach it | 
 358 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 359 | mid-rule action, and append this dummy nonterminal to the current  | 
 361 `-------------------------------------------------------------------*/ 
 364 grammar_midrule_action (void) 
 366   /* Since the action was written out with this rule's number, we must 
 367      give the new rule this number by inserting the new rule before 
 370   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 371      action.  Create the MIDRULE.  */ 
 372   location dummy_location 
= current_rule
->action_props
.location
; 
 373   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 374   symbol_list 
*midrule 
= symbol_list_sym_new (dummy
, dummy_location
); 
 376   /* Remember named_ref of previous action. */ 
 377   named_ref 
*action_name 
= current_rule
->action_props
.named_ref
; 
 379   /* Make a new rule, whose body is empty, before the current one, so 
 380      that the action just read can belong to it.  */ 
 383   /* Attach its location and actions to that of the DUMMY.  */ 
 384   midrule
->location 
= dummy_location
; 
 385   code_props_rule_action_init (&midrule
->action_props
, 
 386                                current_rule
->action_props
.code
, 
 387                                current_rule
->action_props
.location
, 
 389   code_props_none_init (¤t_rule
->action_props
); 
 391   if (previous_rule_end
) 
 392     previous_rule_end
->next 
= midrule
; 
 396   /* End the dummy's rule.  */ 
 397   midrule
->next 
= symbol_list_sym_new (NULL
, dummy_location
); 
 398   midrule
->next
->next 
= current_rule
; 
 400   previous_rule_end 
= midrule
->next
; 
 402   /* Insert the dummy nonterminal replacing the midrule action into 
 403      the current rule.  Bind it to its dedicated rule.  */ 
 404   grammar_current_rule_symbol_append (dummy
, dummy_location
, 
 406   grammar_end
->midrule 
= midrule
; 
 407   midrule
->midrule_parent_rule 
= current_rule
; 
 408   midrule
->midrule_parent_rhs_index 
= symbol_list_length (current_rule
->next
); 
 411 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 414 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 416   /* POSIX says that any identifier is a nonterminal if it does not 
 417      appear on the LHS of a grammar rule and is not defined by %token 
 418      or by one of the directives that assigns precedence to a token.  We 
 419      ignore this here because the only kind of identifier that POSIX 
 420      allows to follow a %prec is a token and because assuming it's a 
 421      token now can produce more logical error messages.  Nevertheless, 
 422      grammar_rule_check does obey what we believe is the real intent of 
 423      POSIX here: that an error be reported for any identifier that 
 424      appears after %prec but that is not defined separately as a 
 426   symbol_class_set (precsym
, token_sym
, loc
, false); 
 427   if (current_rule
->ruleprec
) 
 428     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 429   current_rule
->ruleprec 
= precsym
; 
 432 /* Attach dynamic precedence DPREC to the current rule. */ 
 435 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 438     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 440     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 441   else if (current_rule
->dprec 
!= 0) 
 442     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 443   current_rule
->dprec 
= dprec
; 
 446 /* Attach a merge function NAME with argument type TYPE to current 
 450 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 453     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 454   if (current_rule
->merger 
!= 0) 
 455     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 456   current_rule
->merger 
= get_merge_function (name
); 
 457   current_rule
->merger_declaration_location 
= loc
; 
 460 /* Attach SYM to the current rule.  If needed, move the previous 
 461    action as a mid-rule action.  */ 
 464 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
, 
 468   if (current_rule
->action_props
.code
) 
 469     grammar_midrule_action (); 
 470   p 
= grammar_symbol_append (sym
, loc
); 
 472     assign_named_ref(p
, name
); 
 475 /* Attach an ACTION to the current rule.  */ 
 478 grammar_current_rule_action_append (const char *action
, location loc
, 
 481   if (current_rule
->action_props
.code
) 
 482     grammar_midrule_action (); 
 483   /* After all symbol declarations have been parsed, packgram invokes 
 484      code_props_translate_code.  */ 
 485   code_props_rule_action_init (¤t_rule
->action_props
, action
, loc
, 
 490 /*---------------------------------------------------------------. 
 491 | Convert the rules into the representation using RRHS, RLHS and | 
 493 `---------------------------------------------------------------*/ 
 498   unsigned int itemno 
= 0; 
 499   rule_number ruleno 
= 0; 
 500   symbol_list 
*p 
= grammar
; 
 502   ritem 
= xnmalloc (nritems 
+ 1, sizeof *ritem
); 
 504   /* This sentinel is used by build_relations in gram.c.  */ 
 507   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 512       symbol 
*ruleprec 
= p
->ruleprec
; 
 513       record_merge_function_type (p
->merger
, p
->content
.sym
->type_name
, 
 514                                   p
->merger_declaration_location
); 
 515       rules
[ruleno
].user_number 
= ruleno
; 
 516       rules
[ruleno
].number 
= ruleno
; 
 517       rules
[ruleno
].lhs 
= p
->content
.sym
; 
 518       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 519       rules
[ruleno
].prec 
= NULL
; 
 520       rules
[ruleno
].dprec 
= p
->dprec
; 
 521       rules
[ruleno
].merger 
= p
->merger
; 
 522       rules
[ruleno
].precsym 
= NULL
; 
 523       rules
[ruleno
].location 
= p
->location
; 
 524       rules
[ruleno
].useful 
= true; 
 525       rules
[ruleno
].action 
= p
->action_props
.code
; 
 526       rules
[ruleno
].action_location 
= p
->action_props
.location
; 
 528       /* If the midrule's $$ is set or its $n is used, remove the `$' from the 
 529          symbol name so that it's a user-defined symbol so that the default 
 530          %destructor and %printer apply.  */ 
 531       if (p
->midrule_parent_rule
 
 532           && (p
->action_props
.is_value_used
 
 533               || symbol_list_n_get (p
->midrule_parent_rule
, 
 534                                     p
->midrule_parent_rhs_index
) 
 535                    ->action_props
.is_value_used
)) 
 536         p
->content
.sym
->tag 
+= 1; 
 538       /* Don't check the generated rule 0.  It has no action, so some rhs 
 539          symbols may appear unused, but the parsing algorithm ensures that 
 540          %destructor's are invoked appropriately.  */ 
 542         grammar_rule_check (p
); 
 544       for (p 
= p
->next
; p 
&& p
->content
.sym
; p 
= p
->next
) 
 548           /* Don't allow rule_length == INT_MAX, since that might 
 549              cause confusion with strtol if INT_MAX == LONG_MAX.  */ 
 550           if (rule_length 
== INT_MAX
) 
 551               fatal_at (rules
[ruleno
].location
, _("rule is too long")); 
 553           /* item_number = symbol_number. 
 554              But the former needs to contain more: negative rule numbers. */ 
 556             symbol_number_as_item_number (p
->content
.sym
->number
); 
 557           /* A rule gets by default the precedence and associativity 
 558              of its last token.  */ 
 559           if (p
->content
.sym
->class == token_sym 
&& default_prec
) 
 560             rules
[ruleno
].prec 
= p
->content
.sym
; 
 563       /* If this rule has a %prec, 
 564          the specified symbol's precedence replaces the default.  */ 
 567           rules
[ruleno
].precsym 
= ruleprec
; 
 568           rules
[ruleno
].prec 
= ruleprec
; 
 570       /* An item ends by the rule number (negated).  */ 
 571       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 572       aver (itemno 
< ITEM_NUMBER_MAX
); 
 574       aver (ruleno 
< RULE_NUMBER_MAX
); 
 580   aver (itemno 
== nritems
); 
 582   if (trace_flag 
& trace_sets
) 
 583     ritem_print (stderr
); 
 586 /*------------------------------------------------------------------. 
 587 | Read in the grammar specification and record it in the format     | 
 588 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 589 | in each case forming the body of a C function (YYACTION) which    | 
 590 | contains a switch statement to decide which action to execute.    | 
 591 `------------------------------------------------------------------*/ 
 596   /* Initialize the symbol table.  */ 
 599   /* Construct the accept symbol. */ 
 600   accept 
= symbol_get ("$accept", empty_location
); 
 601   accept
->class = nterm_sym
; 
 602   accept
->number 
= nvars
++; 
 604   /* Construct the error token */ 
 605   errtoken 
= symbol_get ("error", empty_location
); 
 606   errtoken
->class = token_sym
; 
 607   errtoken
->number 
= ntokens
++; 
 609   /* Construct a token that represents all undefined literal tokens. 
 610      It is always token number 2.  */ 
 611   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 612   undeftoken
->class = token_sym
; 
 613   undeftoken
->number 
= ntokens
++; 
 615   gram_in 
= xfopen (grammar_file
, "r"); 
 617   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 618   gram_debug 
= trace_flag 
& trace_parse
; 
 619   gram_scanner_initialize (); 
 621   prepare_percent_define_front_end_variables (); 
 623   if (! complaint_issued
) 
 624     check_and_convert_grammar (); 
 630 prepare_percent_define_front_end_variables (void) 
 632   /* Set %define front-end variable defaults.  */ 
 633   muscle_percent_define_default ("lr.keep-unreachable-states", "false"); 
 636     /* IELR would be a better default, but LALR is historically the 
 638     muscle_percent_define_default ("lr.type", "lalr"); 
 639     lr_type 
= muscle_percent_define_get ("lr.type"); 
 640     if (0 != strcmp (lr_type
, "canonical-lr")) 
 641       muscle_percent_define_default ("lr.default-reductions", "most"); 
 643       muscle_percent_define_default ("lr.default-reductions", "accepting"); 
 647   /* Check %define front-end variables.  */ 
 649     static char const * const values
[] = { 
 650       "lr.type", "lalr", "ielr", "canonical-lr", NULL
, 
 651       "lr.default-reductions", "most", "consistent", "accepting", NULL
, 
 654     muscle_percent_define_check_values (values
); 
 659 /*-------------------------------------------------------------. 
 660 | Check the grammar that has just been read, and convert it to | 
 662 `-------------------------------------------------------------*/ 
 665 check_and_convert_grammar (void) 
 667   /* Grammar has been read.  Do some checking.  */ 
 669     fatal (_("no rules in the input grammar")); 
 671   /* If the user did not define her ENDTOKEN, do it now. */ 
 674       endtoken 
= symbol_get ("$end", empty_location
); 
 675       endtoken
->class = token_sym
; 
 676       endtoken
->number 
= 0; 
 677       /* Value specified by POSIX.  */ 
 678       endtoken
->user_token_number 
= 0; 
 681   /* Report any undefined symbols and consider them nonterminals.  */ 
 682   symbols_check_defined (); 
 684   /* Find the start symbol if no %start.  */ 
 689            node 
!= NULL 
&& symbol_is_dummy (node
->content
.sym
); 
 692           for (node 
= node
->next
; 
 693                node 
!= NULL 
&& node
->content
.sym 
!= NULL
; 
 698       grammar_start_symbol_set (node
->content
.sym
, 
 699                                 node
->content
.sym
->location
); 
 702   /* Insert the initial rule, whose line is that of the first rule 
 703      (not that of the start symbol): 
 705      accept: %start EOF.  */ 
 707     symbol_list 
*p 
= symbol_list_sym_new (accept
, empty_location
); 
 708     p
->location 
= grammar
->location
; 
 709     p
->next 
= symbol_list_sym_new (startsymbol
, empty_location
); 
 710     p
->next
->next 
= symbol_list_sym_new (endtoken
, empty_location
); 
 711     p
->next
->next
->next 
= symbol_list_sym_new (NULL
, empty_location
); 
 712     p
->next
->next
->next
->next 
= grammar
; 
 718   aver (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
); 
 720   /* Assign the symbols their symbol numbers.  Write #defines for the 
 721      token symbols into FDEFINES if requested.  */ 
 724   /* Scan rule actions after invoking symbol_check_alias_consistency (in 
 725      symbols_pack above) so that token types are set correctly before the rule 
 726      action type checking. 
 728      Before invoking grammar_rule_check (in packgram below) on any rule, make 
 729      sure all actions have already been scanned in order to set `used' flags. 
 730      Otherwise, checking that a midrule's $$ should be set will not always work 
 731      properly because the check must forward-reference the midrule's parent 
 732      rule.  For the same reason, all the `used' flags must be set before 
 733      checking whether to remove `$' from any midrule symbol name (also in 
 737     for (sym 
= grammar
; sym
; sym 
= sym
->next
) 
 738       code_props_translate_code (&sym
->action_props
); 
 741   /* Convert the grammar into the format described in gram.h.  */ 
 744   /* The grammar as a symbol_list is no longer needed. */ 
 745   symbol_list_free (grammar
);