1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, 
   4    2009-2012 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  27 #include "conflicts.h" 
  31 #include "muscle-tab.h" 
  35 #include "scan-gram.h" 
  36 #include "scan-code.h" 
  38 static void prepare_percent_define_front_end_variables (void); 
  39 static void check_and_convert_grammar (void); 
  41 static symbol_list 
*grammar 
= NULL
; 
  42 static bool start_flag 
= false; 
  43 merger_list 
*merge_functions
; 
  45 /* Was %union seen?  */ 
  46 bool union_seen 
= false; 
  49 bool tag_seen 
= false; 
  51 /* Should rules have a default precedence?  */ 
  52 bool default_prec 
= true; 
  54 /*-----------------------. 
  55 | Set the start symbol.  | 
  56 `-----------------------*/ 
  59 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  62     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  67       startsymbol_location 
= loc
; 
  73 /*------------------------------------------------------------------------. 
  74 | Return the merger index for a merging function named NAME.  Records the | 
  75 | function, if new, in MERGER_LIST.                                       | 
  76 `------------------------------------------------------------------------*/ 
  79 get_merge_function (uniqstr name
) 
  88   head
.next 
= merge_functions
; 
  89   for (syms 
= &head
, n 
= 1; syms
->next
; syms 
= syms
->next
, n 
+= 1) 
  90     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
  92   if (syms
->next 
== NULL
) 
  94       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
  95       syms
->next
->name 
= uniqstr_new (name
); 
  96       /* After all symbol type declarations have been parsed, packgram invokes 
  97          record_merge_function_type to set the type.  */ 
  98       syms
->next
->type 
= NULL
; 
  99       syms
->next
->next 
= NULL
; 
 100       merge_functions 
= head
.next
; 
 105 /*-------------------------------------------------------------------------. 
 106 | For the existing merging function with index MERGER, record the result   | 
 107 | type as TYPE as required by the lhs of the rule whose %merge declaration | 
 108 | is at DECLARATION_LOC.                                                   | 
 109 `-------------------------------------------------------------------------*/ 
 112 record_merge_function_type (int merger
, uniqstr type
, location declaration_loc
) 
 115   merger_list 
*merge_function
; 
 121     type 
= uniqstr_new (""); 
 124   for (merge_function 
= merge_functions
; 
 125        merge_function 
!= NULL 
&& merger_find 
!= merger
; 
 126        merge_function 
= merge_function
->next
) 
 128   aver (merge_function 
!= NULL 
&& merger_find 
== merger
); 
 129   if (merge_function
->type 
!= NULL 
&& !UNIQSTR_EQ (merge_function
->type
, type
)) 
 132       complain_at_indent (declaration_loc
, &indent
, 
 133                           _("result type clash on merge function %s: " 
 135                           quote (merge_function
->name
), type
, 
 136                           merge_function
->type
); 
 137       indent 
+= SUB_INDENT
; 
 138       complain_at_indent (merge_function
->type_declaration_location
, &indent
, 
 139                           _("previous declaration")); 
 141   merge_function
->type 
= uniqstr_new (type
); 
 142   merge_function
->type_declaration_location 
= declaration_loc
; 
 145 /*--------------------------------------. 
 146 | Free all merge-function definitions.  | 
 147 `--------------------------------------*/ 
 150 free_merger_functions (void) 
 152   merger_list 
*L0 
= merge_functions
; 
 155       merger_list 
*L1 
= L0
->next
; 
 162 /*-------------------------------------------------------------------. 
 163 | Parse the input grammar into a one symbol_list structure.  Each    | 
 164 | rule is represented by a sequence of symbols: the left hand side   | 
 165 | followed by the contents of the right hand side, followed by a     | 
 166 | null pointer instead of a symbol to terminate the rule.  The next  | 
 167 | symbol is the lhs of the following rule.                           | 
 169 | All actions are copied out, labelled by the rule number they apply | 
 171 `-------------------------------------------------------------------*/ 
 173 /* The (currently) last symbol of GRAMMAR. */ 
 174 static symbol_list 
*grammar_end 
= NULL
; 
 176 /* Append SYM to the grammar.  */ 
 178 grammar_symbol_append (symbol 
*sym
, location loc
) 
 180   symbol_list 
*p 
= symbol_list_sym_new (sym
, loc
); 
 183     grammar_end
->next 
= p
; 
 189   /* A null SYM stands for an end of rule; it is not an actual 
 198 assign_named_ref (symbol_list 
*p
, named_ref 
*name
) 
 200   symbol 
*sym 
= p
->content
.sym
; 
 202   if (name
->id 
== sym
->tag
) 
 205                _("duplicated symbol name for %s ignored"), 
 207       named_ref_free (name
); 
 214 /* The rule currently being defined, and the previous rule. 
 215    CURRENT_RULE points to the first LHS of the current rule, while 
 216    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 217 static symbol_list 
*current_rule 
= NULL
; 
 218 static symbol_list 
*previous_rule_end 
= NULL
; 
 221 /*----------------------------------------------. 
 222 | Create a new rule for LHS in to the GRAMMAR.  | 
 223 `----------------------------------------------*/ 
 226 grammar_current_rule_begin (symbol 
*lhs
, location loc
, 
 231   /* Start a new rule and record its lhs.  */ 
 233   previous_rule_end 
= grammar_end
; 
 235   p 
= grammar_symbol_append (lhs
, loc
); 
 237     assign_named_ref (p
, named_ref_copy (lhs_name
)); 
 239   current_rule 
= grammar_end
; 
 241   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 242   if (lhs
->class == unknown_sym
) 
 244       lhs
->class = nterm_sym
; 
 248   else if (lhs
->class == token_sym
) 
 249     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 253 /*----------------------------------------------------------------------. 
 254 | A symbol should be used if either:                                    | 
 255 |   1. It has a destructor.                                             | 
 256 |   2. The symbol is a mid-rule symbol (i.e., the generated LHS         | 
 257 |      replacing a mid-rule action) that was assigned to or used, as in | 
 258 |      "exp: { $$ = 1; } { $$ = $1; }".                                 | 
 259 `----------------------------------------------------------------------*/ 
 262 symbol_should_be_used (symbol_list 
const *s
, bool *midrule_warning
) 
 264   if (symbol_destructor_get (s
->content
.sym
)->code
) 
 266   if ((s
->midrule 
&& s
->midrule
->action_props
.is_value_used
) 
 267       || (s
->midrule_parent_rule
 
 268           && symbol_list_n_get (s
->midrule_parent_rule
, 
 269                                 s
->midrule_parent_rhs_index
) 
 270                ->action_props
.is_value_used
)) 
 272       *midrule_warning 
= true; 
 278 /*----------------------------------------------------------------. 
 279 | Check that the rule R is properly defined.  For instance, there | 
 280 | should be no type clash on the default action.                  | 
 281 `----------------------------------------------------------------*/ 
 284 grammar_rule_check (const symbol_list 
*r
) 
 288      If there is an action, then there is nothing we can do: the user 
 289      is allowed to shoot herself in the foot. 
 291      Don't worry about the default action if $$ is untyped, since $$'s 
 292      value can't be used.  */ 
 293   if (!r
->action_props
.code 
&& r
->content
.sym
->type_name
) 
 295       symbol 
*first_rhs 
= r
->next
->content
.sym
; 
 296       /* If $$ is being set in default way, report if any type mismatch.  */ 
 299           char const *lhs_type 
= r
->content
.sym
->type_name
; 
 300           const char *rhs_type 
= 
 301             first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 302           if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 303             warn_at (r
->location
, 
 304                      _("type clash on default action: <%s> != <%s>"), 
 307       /* Warn if there is no default for $$ but we need one.  */ 
 309         warn_at (r
->location
, 
 310                  _("empty rule for typed nonterminal, and no action")); 
 313   /* Check that symbol values that should be used are in fact used.  */ 
 315     symbol_list 
const *l 
= r
; 
 317     for (; l 
&& l
->content
.sym
; l 
= l
->next
, ++n
) 
 319         bool midrule_warning 
= false; 
 320         if (!l
->action_props
.is_value_used
 
 321             && symbol_should_be_used (l
, &midrule_warning
) 
 322             /* The default action, $$ = $1, `uses' both.  */ 
 323             && (r
->action_props
.code 
|| (n 
!= 0 && n 
!= 1))) 
 325             void (*warn_at_ptr
)(location
, char const*, ...) = 
 326               midrule_warning 
? midrule_value_at 
: warn_at
; 
 328               warn_at_ptr (l
->location
, _("unused value: $%d"), n
); 
 330               warn_at_ptr (l
->location
, _("unset value: $$")); 
 335   /* See comments in grammar_current_rule_prec_set for how POSIX 
 336      mandates this complaint.  It's only for identifiers, so skip 
 337      it for char literals and strings, which are always tokens.  */ 
 339       && r
->ruleprec
->tag
[0] != '\'' && r
->ruleprec
->tag
[0] != '"' 
 340       && !r
->ruleprec
->declared 
&& !r
->ruleprec
->prec
) 
 341     warn_at (r
->location
, _("token for %%prec is not defined: %s"), 
 346 /*-------------------------------------. 
 347 | End the currently being grown rule.  | 
 348 `-------------------------------------*/ 
 351 grammar_current_rule_end (location loc
) 
 353   /* Put an empty link in the list to mark the end of this rule  */ 
 354   grammar_symbol_append (NULL
, grammar_end
->location
); 
 355   current_rule
->location 
= loc
; 
 359 /*-------------------------------------------------------------------. 
 360 | The previous action turns out the be a mid-rule action.  Attach it | 
 361 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 362 | mid-rule action, and append this dummy nonterminal to the current  | 
 364 `-------------------------------------------------------------------*/ 
 367 grammar_midrule_action (void) 
 369   /* Since the action was written out with this rule's number, we must 
 370      give the new rule this number by inserting the new rule before 
 373   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 374      action.  Create the MIDRULE.  */ 
 375   location dummy_location 
= current_rule
->action_props
.location
; 
 376   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 377   symbol_list 
*midrule 
= symbol_list_sym_new (dummy
, dummy_location
); 
 379   /* Remember named_ref of previous action. */ 
 380   named_ref 
*action_name 
= current_rule
->action_props
.named_ref
; 
 382   /* Make a new rule, whose body is empty, before the current one, so 
 383      that the action just read can belong to it.  */ 
 386   /* Attach its location and actions to that of the DUMMY.  */ 
 387   midrule
->location 
= dummy_location
; 
 388   code_props_rule_action_init (&midrule
->action_props
, 
 389                                current_rule
->action_props
.code
, 
 390                                current_rule
->action_props
.location
, 
 392   code_props_none_init (¤t_rule
->action_props
); 
 394   if (previous_rule_end
) 
 395     previous_rule_end
->next 
= midrule
; 
 399   /* End the dummy's rule.  */ 
 400   midrule
->next 
= symbol_list_sym_new (NULL
, dummy_location
); 
 401   midrule
->next
->next 
= current_rule
; 
 403   previous_rule_end 
= midrule
->next
; 
 405   /* Insert the dummy nonterminal replacing the midrule action into 
 406      the current rule.  Bind it to its dedicated rule.  */ 
 407   grammar_current_rule_symbol_append (dummy
, dummy_location
, 
 409   grammar_end
->midrule 
= midrule
; 
 410   midrule
->midrule_parent_rule 
= current_rule
; 
 411   midrule
->midrule_parent_rhs_index 
= symbol_list_length (current_rule
->next
); 
 414 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 417 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 419   /* POSIX says that any identifier is a nonterminal if it does not 
 420      appear on the LHS of a grammar rule and is not defined by %token 
 421      or by one of the directives that assigns precedence to a token.  We 
 422      ignore this here because the only kind of identifier that POSIX 
 423      allows to follow a %prec is a token and because assuming it's a 
 424      token now can produce more logical error messages.  Nevertheless, 
 425      grammar_rule_check does obey what we believe is the real intent of 
 426      POSIX here: that an error be reported for any identifier that 
 427      appears after %prec but that is not defined separately as a 
 429   symbol_class_set (precsym
, token_sym
, loc
, false); 
 430   if (current_rule
->ruleprec
) 
 431     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 432   current_rule
->ruleprec 
= precsym
; 
 435 /* Attach dynamic precedence DPREC to the current rule. */ 
 438 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 441     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 443     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 444   else if (current_rule
->dprec 
!= 0) 
 445     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 446   current_rule
->dprec 
= dprec
; 
 449 /* Attach a merge function NAME with argument type TYPE to current 
 453 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 456     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 457   if (current_rule
->merger 
!= 0) 
 458     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 459   current_rule
->merger 
= get_merge_function (name
); 
 460   current_rule
->merger_declaration_location 
= loc
; 
 463 /* Attach SYM to the current rule.  If needed, move the previous 
 464    action as a mid-rule action.  */ 
 467 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
, 
 471   if (current_rule
->action_props
.code
) 
 472     grammar_midrule_action (); 
 473   p 
= grammar_symbol_append (sym
, loc
); 
 475     assign_named_ref(p
, name
); 
 478 /* Attach an ACTION to the current rule.  */ 
 481 grammar_current_rule_action_append (const char *action
, location loc
, 
 484   if (current_rule
->action_props
.code
) 
 485     grammar_midrule_action (); 
 486   /* After all symbol declarations have been parsed, packgram invokes 
 487      code_props_translate_code.  */ 
 488   code_props_rule_action_init (¤t_rule
->action_props
, action
, loc
, 
 493 /*---------------------------------------------------------------. 
 494 | Convert the rules into the representation using RRHS, RLHS and | 
 496 `---------------------------------------------------------------*/ 
 501   unsigned int itemno 
= 0; 
 502   rule_number ruleno 
= 0; 
 503   symbol_list 
*p 
= grammar
; 
 505   ritem 
= xnmalloc (nritems 
+ 1, sizeof *ritem
); 
 507   /* This sentinel is used by build_relations in gram.c.  */ 
 510   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 515       symbol 
*ruleprec 
= p
->ruleprec
; 
 516       record_merge_function_type (p
->merger
, p
->content
.sym
->type_name
, 
 517                                   p
->merger_declaration_location
); 
 518       rules
[ruleno
].user_number 
= ruleno
; 
 519       rules
[ruleno
].number 
= ruleno
; 
 520       rules
[ruleno
].lhs 
= p
->content
.sym
; 
 521       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 522       rules
[ruleno
].prec 
= NULL
; 
 523       rules
[ruleno
].dprec 
= p
->dprec
; 
 524       rules
[ruleno
].merger 
= p
->merger
; 
 525       rules
[ruleno
].precsym 
= NULL
; 
 526       rules
[ruleno
].location 
= p
->location
; 
 527       rules
[ruleno
].useful 
= true; 
 528       rules
[ruleno
].action 
= p
->action_props
.code
; 
 529       rules
[ruleno
].action_location 
= p
->action_props
.location
; 
 531       /* If the midrule's $$ is set or its $n is used, remove the `$' from the 
 532          symbol name so that it's a user-defined symbol so that the default 
 533          %destructor and %printer apply.  */ 
 534       if (p
->midrule_parent_rule
 
 535           && (p
->action_props
.is_value_used
 
 536               || symbol_list_n_get (p
->midrule_parent_rule
, 
 537                                     p
->midrule_parent_rhs_index
) 
 538                    ->action_props
.is_value_used
)) 
 539         p
->content
.sym
->tag 
+= 1; 
 541       /* Don't check the generated rule 0.  It has no action, so some rhs 
 542          symbols may appear unused, but the parsing algorithm ensures that 
 543          %destructor's are invoked appropriately.  */ 
 545         grammar_rule_check (p
); 
 547       for (p 
= p
->next
; p 
&& p
->content
.sym
; p 
= p
->next
) 
 551           /* Don't allow rule_length == INT_MAX, since that might 
 552              cause confusion with strtol if INT_MAX == LONG_MAX.  */ 
 553           if (rule_length 
== INT_MAX
) 
 554               fatal_at (rules
[ruleno
].location
, _("rule is too long")); 
 556           /* item_number = symbol_number. 
 557              But the former needs to contain more: negative rule numbers. */ 
 559             symbol_number_as_item_number (p
->content
.sym
->number
); 
 560           /* A rule gets by default the precedence and associativity 
 561              of its last token.  */ 
 562           if (p
->content
.sym
->class == token_sym 
&& default_prec
) 
 563             rules
[ruleno
].prec 
= p
->content
.sym
; 
 566       /* If this rule has a %prec, 
 567          the specified symbol's precedence replaces the default.  */ 
 570           rules
[ruleno
].precsym 
= ruleprec
; 
 571           rules
[ruleno
].prec 
= ruleprec
; 
 573       /* An item ends by the rule number (negated).  */ 
 574       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 575       aver (itemno 
< ITEM_NUMBER_MAX
); 
 577       aver (ruleno 
< RULE_NUMBER_MAX
); 
 583   aver (itemno 
== nritems
); 
 585   if (trace_flag 
& trace_sets
) 
 586     ritem_print (stderr
); 
 589 /*------------------------------------------------------------------. 
 590 | Read in the grammar specification and record it in the format     | 
 591 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 592 | in each case forming the body of a C function (YYACTION) which    | 
 593 | contains a switch statement to decide which action to execute.    | 
 594 `------------------------------------------------------------------*/ 
 599   /* Initialize the symbol table.  */ 
 602   /* Construct the accept symbol. */ 
 603   accept 
= symbol_get ("$accept", empty_location
); 
 604   accept
->class = nterm_sym
; 
 605   accept
->number 
= nvars
++; 
 607   /* Construct the error token */ 
 608   errtoken 
= symbol_get ("error", empty_location
); 
 609   errtoken
->class = token_sym
; 
 610   errtoken
->number 
= ntokens
++; 
 612   /* Construct a token that represents all undefined literal tokens. 
 613      It is always token number 2.  */ 
 614   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 615   undeftoken
->class = token_sym
; 
 616   undeftoken
->number 
= ntokens
++; 
 618   gram_in 
= xfopen (grammar_file
, "r"); 
 620   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 621   gram_debug 
= trace_flag 
& trace_parse
; 
 622   gram_scanner_initialize (); 
 624   prepare_percent_define_front_end_variables (); 
 626   if (! complaint_issued
) 
 627     check_and_convert_grammar (); 
 633 prepare_percent_define_front_end_variables (void) 
 635   /* Set %define front-end variable defaults.  */ 
 636   muscle_percent_define_default ("lr.keep-unreachable-states", "false"); 
 639     /* IELR would be a better default, but LALR is historically the 
 641     muscle_percent_define_default ("lr.type", "lalr"); 
 642     lr_type 
= muscle_percent_define_get ("lr.type"); 
 643     if (0 != strcmp (lr_type
, "canonical-lr")) 
 644       muscle_percent_define_default ("lr.default-reductions", "most"); 
 646       muscle_percent_define_default ("lr.default-reductions", "accepting"); 
 650   /* Check %define front-end variables.  */ 
 652     static char const * const values
[] = { 
 653       "lr.type", "lalr", "ielr", "canonical-lr", NULL
, 
 654       "lr.default-reductions", "most", "consistent", "accepting", NULL
, 
 657     muscle_percent_define_check_values (values
); 
 662 /*-------------------------------------------------------------. 
 663 | Check the grammar that has just been read, and convert it to | 
 665 `-------------------------------------------------------------*/ 
 668 check_and_convert_grammar (void) 
 670   /* Grammar has been read.  Do some checking.  */ 
 672     fatal (_("no rules in the input grammar")); 
 674   /* If the user did not define her ENDTOKEN, do it now. */ 
 677       endtoken 
= symbol_get ("$end", empty_location
); 
 678       endtoken
->class = token_sym
; 
 679       endtoken
->number 
= 0; 
 680       /* Value specified by POSIX.  */ 
 681       endtoken
->user_token_number 
= 0; 
 684   /* Report any undefined symbols and consider them nonterminals.  */ 
 685   symbols_check_defined (); 
 687   /* Find the start symbol if no %start.  */ 
 692            node 
!= NULL 
&& symbol_is_dummy (node
->content
.sym
); 
 695           for (node 
= node
->next
; 
 696                node 
!= NULL 
&& node
->content
.sym 
!= NULL
; 
 701       grammar_start_symbol_set (node
->content
.sym
, 
 702                                 node
->content
.sym
->location
); 
 705   /* Insert the initial rule, whose line is that of the first rule 
 706      (not that of the start symbol): 
 708      accept: %start EOF.  */ 
 710     symbol_list 
*p 
= symbol_list_sym_new (accept
, empty_location
); 
 711     p
->location 
= grammar
->location
; 
 712     p
->next 
= symbol_list_sym_new (startsymbol
, empty_location
); 
 713     p
->next
->next 
= symbol_list_sym_new (endtoken
, empty_location
); 
 714     p
->next
->next
->next 
= symbol_list_sym_new (NULL
, empty_location
); 
 715     p
->next
->next
->next
->next 
= grammar
; 
 721   aver (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
); 
 723   /* Assign the symbols their symbol numbers.  Write #defines for the 
 724      token symbols into FDEFINES if requested.  */ 
 727   /* Scan rule actions after invoking symbol_check_alias_consistency (in 
 728      symbols_pack above) so that token types are set correctly before the rule 
 729      action type checking. 
 731      Before invoking grammar_rule_check (in packgram below) on any rule, make 
 732      sure all actions have already been scanned in order to set `used' flags. 
 733      Otherwise, checking that a midrule's $$ should be set will not always work 
 734      properly because the check must forward-reference the midrule's parent 
 735      rule.  For the same reason, all the `used' flags must be set before 
 736      checking whether to remove `$' from any midrule symbol name (also in 
 740     for (sym 
= grammar
; sym
; sym 
= sym
->next
) 
 741       code_props_translate_code (&sym
->action_props
); 
 744   /* Convert the grammar into the format described in gram.h.  */ 
 747   /* The grammar as a symbol_list is no longer needed. */ 
 748   symbol_list_free (grammar
);