1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003, 
   4    2005, 2006, 2007, 2009 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  27 #include "conflicts.h" 
  31 #include "muscle-tab.h" 
  35 #include "scan-gram.h" 
  36 #include "scan-code.h" 
  38 static void check_and_convert_grammar (void); 
  40 static symbol_list 
*grammar 
= NULL
; 
  41 static bool start_flag 
= false; 
  42 merger_list 
*merge_functions
; 
  44 /* Was %union seen?  */ 
  45 bool union_seen 
= false; 
  48 bool tag_seen 
= false; 
  50 /* Should rules have a default precedence?  */ 
  51 bool default_prec 
= true; 
  53 /*-----------------------. 
  54 | Set the start symbol.  | 
  55 `-----------------------*/ 
  58 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  61     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  66       startsymbol_location 
= loc
; 
  72 /*------------------------------------------------------------------------. 
  73 | Return the merger index for a merging function named NAME.  Records the | 
  74 | function, if new, in MERGER_LIST.                                       | 
  75 `------------------------------------------------------------------------*/ 
  78 get_merge_function (uniqstr name
) 
  87   head
.next 
= merge_functions
; 
  88   for (syms 
= &head
, n 
= 1; syms
->next
; syms 
= syms
->next
, n 
+= 1) 
  89     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
  91   if (syms
->next 
== NULL
) 
  93       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
  94       syms
->next
->name 
= uniqstr_new (name
); 
  95       /* After all symbol type declarations have been parsed, packgram invokes 
  96          record_merge_function_type to set the type.  */ 
  97       syms
->next
->type 
= NULL
; 
  98       syms
->next
->next 
= NULL
; 
  99       merge_functions 
= head
.next
; 
 104 /*-------------------------------------------------------------------------. 
 105 | For the existing merging function with index MERGER, record the result   | 
 106 | type as TYPE as required by the lhs of the rule whose %merge declaration | 
 107 | is at DECLARATION_LOC.                                                   | 
 108 `-------------------------------------------------------------------------*/ 
 111 record_merge_function_type (int merger
, uniqstr type
, location declaration_loc
) 
 114   merger_list 
*merge_function
; 
 120     type 
= uniqstr_new (""); 
 123   for (merge_function 
= merge_functions
; 
 124        merge_function 
!= NULL 
&& merger_find 
!= merger
; 
 125        merge_function 
= merge_function
->next
) 
 127   aver (merge_function 
!= NULL 
&& merger_find 
== merger
); 
 128   if (merge_function
->type 
!= NULL 
&& !UNIQSTR_EQ (merge_function
->type
, type
)) 
 130       complain_at (declaration_loc
, 
 131                    _("result type clash on merge function `%s': <%s> != <%s>"), 
 132                    merge_function
->name
, type
, merge_function
->type
); 
 133       complain_at (merge_function
->type_declaration_location
, 
 134                    _("previous declaration")); 
 136   merge_function
->type 
= uniqstr_new (type
); 
 137   merge_function
->type_declaration_location 
= declaration_loc
; 
 140 /*--------------------------------------. 
 141 | Free all merge-function definitions.  | 
 142 `--------------------------------------*/ 
 145 free_merger_functions (void) 
 147   merger_list 
*L0 
= merge_functions
; 
 150       merger_list 
*L1 
= L0
->next
; 
 157 /*-------------------------------------------------------------------. 
 158 | Parse the input grammar into a one symbol_list structure.  Each    | 
 159 | rule is represented by a sequence of symbols: the left hand side   | 
 160 | followed by the contents of the right hand side, followed by a     | 
 161 | null pointer instead of a symbol to terminate the rule.  The next  | 
 162 | symbol is the lhs of the following rule.                           | 
 164 | All actions are copied out, labelled by the rule number they apply | 
 166 `-------------------------------------------------------------------*/ 
 168 /* The (currently) last symbol of GRAMMAR. */ 
 169 static symbol_list 
*grammar_end 
= NULL
; 
 171 /* Append SYM to the grammar.  */ 
 173 grammar_symbol_append (symbol 
*sym
, location loc
) 
 175   symbol_list 
*p 
= symbol_list_sym_new (sym
, loc
); 
 178     grammar_end
->next 
= p
; 
 184   /* A null SYM stands for an end of rule; it is not an actual 
 190 /* The rule currently being defined, and the previous rule. 
 191    CURRENT_RULE points to the first LHS of the current rule, while 
 192    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 193 static symbol_list 
*current_rule 
= NULL
; 
 194 static symbol_list 
*previous_rule_end 
= NULL
; 
 197 /*----------------------------------------------. 
 198 | Create a new rule for LHS in to the GRAMMAR.  | 
 199 `----------------------------------------------*/ 
 202 grammar_current_rule_begin (symbol 
*lhs
, location loc
) 
 204   /* Start a new rule and record its lhs.  */ 
 206   previous_rule_end 
= grammar_end
; 
 207   grammar_symbol_append (lhs
, loc
); 
 208   current_rule 
= grammar_end
; 
 210   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 211   if (lhs
->class == unknown_sym
) 
 213       lhs
->class = nterm_sym
; 
 217   else if (lhs
->class == token_sym
) 
 218     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 222 /*----------------------------------------------------------------------. 
 223 | A symbol should be used if either:                                    | 
 224 |   1. It has a destructor.                                             | 
 225 |   2. --warnings=midrule-values and the symbol is a mid-rule symbol    | 
 226 |      (i.e., the generated LHS replacing a mid-rule action) that was   | 
 227 |      assigned to or used, as in "exp: { $$ = 1; } { $$ = $1; }".      | 
 228 `----------------------------------------------------------------------*/ 
 231 symbol_should_be_used (symbol_list 
const *s
) 
 233   if (symbol_destructor_get (s
->content
.sym
)->code
) 
 235   if (warnings_flag 
& warnings_midrule_values
) 
 236     return ((s
->midrule 
&& s
->midrule
->action_props
.is_value_used
) 
 237             || (s
->midrule_parent_rule
 
 238                 && symbol_list_n_get (s
->midrule_parent_rule
, 
 239                                       s
->midrule_parent_rhs_index
) 
 240                      ->action_props
.is_value_used
)); 
 244 /*----------------------------------------------------------------. 
 245 | Check that the rule R is properly defined.  For instance, there | 
 246 | should be no type clash on the default action.                  | 
 247 `----------------------------------------------------------------*/ 
 250 grammar_rule_check (const symbol_list 
*r
) 
 254      If there is an action, then there is nothing we can do: the user 
 255      is allowed to shoot herself in the foot. 
 257      Don't worry about the default action if $$ is untyped, since $$'s 
 258      value can't be used.  */ 
 259   if (!r
->action_props
.code 
&& r
->content
.sym
->type_name
) 
 261       symbol 
*first_rhs 
= r
->next
->content
.sym
; 
 262       /* If $$ is being set in default way, report if any type mismatch.  */ 
 265           char const *lhs_type 
= r
->content
.sym
->type_name
; 
 266           const char *rhs_type 
= 
 267             first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 268           if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 269             warn_at (r
->location
, 
 270                      _("type clash on default action: <%s> != <%s>"), 
 273       /* Warn if there is no default for $$ but we need one.  */ 
 275         warn_at (r
->location
, 
 276                  _("empty rule for typed nonterminal, and no action")); 
 279   /* Check that symbol values that should be used are in fact used.  */ 
 281     symbol_list 
const *l 
= r
; 
 283     for (; l 
&& l
->content
.sym
; l 
= l
->next
, ++n
) 
 284       if (! (l
->action_props
.is_value_used
 
 285              || !symbol_should_be_used (l
) 
 286              /* The default action, $$ = $1, `uses' both.  */ 
 287              || (!r
->action_props
.code 
&& (n 
== 0 || n 
== 1)))) 
 290             warn_at (r
->location
, _("unused value: $%d"), n
); 
 292             warn_at (r
->location
, _("unset value: $$")); 
 298 /*-------------------------------------. 
 299 | End the currently being grown rule.  | 
 300 `-------------------------------------*/ 
 303 grammar_current_rule_end (location loc
) 
 305   /* Put an empty link in the list to mark the end of this rule  */ 
 306   grammar_symbol_append (NULL
, grammar_end
->location
); 
 307   current_rule
->location 
= loc
; 
 311 /*-------------------------------------------------------------------. 
 312 | The previous action turns out the be a mid-rule action.  Attach it | 
 313 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 314 | mid-rule action, and append this dummy nonterminal to the current  | 
 316 `-------------------------------------------------------------------*/ 
 319 grammar_midrule_action (void) 
 321   /* Since the action was written out with this rule's number, we must 
 322      give the new rule this number by inserting the new rule before 
 325   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 326      action.  Create the MIDRULE.  */ 
 327   location dummy_location 
= current_rule
->action_props
.location
; 
 328   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 329   symbol_list 
*midrule 
= symbol_list_sym_new (dummy
, dummy_location
); 
 331   /* Make a new rule, whose body is empty, before the current one, so 
 332      that the action just read can belong to it.  */ 
 335   /* Attach its location and actions to that of the DUMMY.  */ 
 336   midrule
->location 
= dummy_location
; 
 337   code_props_rule_action_init (&midrule
->action_props
, 
 338                                current_rule
->action_props
.code
, 
 339                                current_rule
->action_props
.location
, 
 341   code_props_none_init (¤t_rule
->action_props
); 
 343   if (previous_rule_end
) 
 344     previous_rule_end
->next 
= midrule
; 
 348   /* End the dummy's rule.  */ 
 349   midrule
->next 
= symbol_list_sym_new (NULL
, dummy_location
); 
 350   midrule
->next
->next 
= current_rule
; 
 352   previous_rule_end 
= midrule
->next
; 
 354   /* Insert the dummy nonterminal replacing the midrule action into 
 355      the current rule.  Bind it to its dedicated rule.  */ 
 356   grammar_current_rule_symbol_append (dummy
, dummy_location
); 
 357   grammar_end
->midrule 
= midrule
; 
 358   midrule
->midrule_parent_rule 
= current_rule
; 
 359   midrule
->midrule_parent_rhs_index 
= symbol_list_length (current_rule
->next
); 
 362 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 365 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 367   symbol_class_set (precsym
, token_sym
, loc
, false); 
 368   if (current_rule
->ruleprec
) 
 369     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 370   current_rule
->ruleprec 
= precsym
; 
 373 /* Attach dynamic precedence DPREC to the current rule. */ 
 376 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 379     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 381     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 382   else if (current_rule
->dprec 
!= 0) 
 383     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 384   current_rule
->dprec 
= dprec
; 
 387 /* Attach a merge function NAME with argument type TYPE to current 
 391 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 394     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 395   if (current_rule
->merger 
!= 0) 
 396     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 397   current_rule
->merger 
= get_merge_function (name
); 
 398   current_rule
->merger_declaration_location 
= loc
; 
 401 /* Attach SYM to the current rule.  If needed, move the previous 
 402    action as a mid-rule action.  */ 
 405 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
) 
 407   if (current_rule
->action_props
.code
) 
 408     grammar_midrule_action (); 
 409   grammar_symbol_append (sym
, loc
); 
 412 /* Attach an ACTION to the current rule.  */ 
 415 grammar_current_rule_action_append (const char *action
, location loc
) 
 417   if (current_rule
->action_props
.code
) 
 418     grammar_midrule_action (); 
 419   /* After all symbol declarations have been parsed, packgram invokes 
 420      code_props_translate_code.  */ 
 421   code_props_rule_action_init (¤t_rule
->action_props
, action
, loc
, 
 426 /*---------------------------------------------------------------. 
 427 | Convert the rules into the representation using RRHS, RLHS and | 
 429 `---------------------------------------------------------------*/ 
 434   unsigned int itemno 
= 0; 
 435   rule_number ruleno 
= 0; 
 436   symbol_list 
*p 
= grammar
; 
 438   ritem 
= xnmalloc (nritems 
+ 1, sizeof *ritem
); 
 440   /* This sentinel is used by build_relations in gram.c.  */ 
 443   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 448       symbol 
*ruleprec 
= p
->ruleprec
; 
 449       record_merge_function_type (p
->merger
, p
->content
.sym
->type_name
, 
 450                                   p
->merger_declaration_location
); 
 451       rules
[ruleno
].user_number 
= ruleno
; 
 452       rules
[ruleno
].number 
= ruleno
; 
 453       rules
[ruleno
].lhs 
= p
->content
.sym
; 
 454       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 455       rules
[ruleno
].prec 
= NULL
; 
 456       rules
[ruleno
].dprec 
= p
->dprec
; 
 457       rules
[ruleno
].merger 
= p
->merger
; 
 458       rules
[ruleno
].precsym 
= NULL
; 
 459       rules
[ruleno
].location 
= p
->location
; 
 460       rules
[ruleno
].useful 
= true; 
 461       rules
[ruleno
].action 
= p
->action_props
.code
; 
 462       rules
[ruleno
].action_location 
= p
->action_props
.location
; 
 464       /* If the midrule's $$ is set or its $n is used, remove the `$' from the 
 465          symbol name so that it's a user-defined symbol so that the default 
 466          %destructor and %printer apply.  */ 
 467       if (p
->midrule_parent_rule
 
 468           && (p
->action_props
.is_value_used
 
 469               || symbol_list_n_get (p
->midrule_parent_rule
, 
 470                                     p
->midrule_parent_rhs_index
) 
 471                    ->action_props
.is_value_used
)) 
 472         p
->content
.sym
->tag 
+= 1; 
 474       /* Don't check the generated rule 0.  It has no action, so some rhs 
 475          symbols may appear unused, but the parsing algorithm ensures that 
 476          %destructor's are invoked appropriately.  */ 
 478         grammar_rule_check (p
); 
 480       for (p 
= p
->next
; p 
&& p
->content
.sym
; p 
= p
->next
) 
 484           /* Don't allow rule_length == INT_MAX, since that might 
 485              cause confusion with strtol if INT_MAX == LONG_MAX.  */ 
 486           if (rule_length 
== INT_MAX
) 
 487               fatal_at (rules
[ruleno
].location
, _("rule is too long")); 
 489           /* item_number = symbol_number. 
 490              But the former needs to contain more: negative rule numbers. */ 
 492             symbol_number_as_item_number (p
->content
.sym
->number
); 
 493           /* A rule gets by default the precedence and associativity 
 494              of its last token.  */ 
 495           if (p
->content
.sym
->class == token_sym 
&& default_prec
) 
 496             rules
[ruleno
].prec 
= p
->content
.sym
; 
 499       /* If this rule has a %prec, 
 500          the specified symbol's precedence replaces the default.  */ 
 503           rules
[ruleno
].precsym 
= ruleprec
; 
 504           rules
[ruleno
].prec 
= ruleprec
; 
 506       /* An item ends by the rule number (negated).  */ 
 507       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 508       aver (itemno 
< ITEM_NUMBER_MAX
); 
 510       aver (ruleno 
< RULE_NUMBER_MAX
); 
 516   aver (itemno 
== nritems
); 
 518   if (trace_flag 
& trace_sets
) 
 519     ritem_print (stderr
); 
 522 /*------------------------------------------------------------------. 
 523 | Read in the grammar specification and record it in the format     | 
 524 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 525 | in each case forming the body of a C function (YYACTION) which    | 
 526 | contains a switch statement to decide which action to execute.    | 
 527 `------------------------------------------------------------------*/ 
 532   /* Initialize the symbol table.  */ 
 535   /* Construct the accept symbol. */ 
 536   accept 
= symbol_get ("$accept", empty_location
); 
 537   accept
->class = nterm_sym
; 
 538   accept
->number 
= nvars
++; 
 540   /* Construct the error token */ 
 541   errtoken 
= symbol_get ("error", empty_location
); 
 542   errtoken
->class = token_sym
; 
 543   errtoken
->number 
= ntokens
++; 
 545   /* Construct a token that represents all undefined literal tokens. 
 546      It is always token number 2.  */ 
 547   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 548   undeftoken
->class = token_sym
; 
 549   undeftoken
->number 
= ntokens
++; 
 551   gram_in 
= xfopen (grammar_file
, "r"); 
 553   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 554   gram_debug 
= trace_flag 
& trace_parse
; 
 555   gram_scanner_initialize (); 
 558   if (! complaint_issued
) 
 559     check_and_convert_grammar (); 
 565 /*-------------------------------------------------------------. 
 566 | Check the grammar that has just been read, and convert it to | 
 568 `-------------------------------------------------------------*/ 
 571 check_and_convert_grammar (void) 
 573   /* Grammar has been read.  Do some checking.  */ 
 575     fatal (_("no rules in the input grammar")); 
 577   /* Report any undefined symbols and consider them nonterminals.  */ 
 578   symbols_check_defined (); 
 580   /* If the user did not define her ENDTOKEN, do it now. */ 
 583       endtoken 
= symbol_get ("$end", empty_location
); 
 584       endtoken
->class = token_sym
; 
 585       endtoken
->number 
= 0; 
 586       /* Value specified by POSIX.  */ 
 587       endtoken
->user_token_number 
= 0; 
 590   /* Find the start symbol if no %start.  */ 
 595            node 
!= NULL 
&& symbol_is_dummy (node
->content
.sym
); 
 598           for (node 
= node
->next
; 
 599                node 
!= NULL 
&& node
->content
.sym 
!= NULL
; 
 604       grammar_start_symbol_set (node
->content
.sym
, 
 605                                 node
->content
.sym
->location
); 
 608   /* Insert the initial rule, whose line is that of the first rule 
 609      (not that of the start symbol): 
 611      accept: %start EOF.  */ 
 613     symbol_list 
*p 
= symbol_list_sym_new (accept
, empty_location
); 
 614     p
->location 
= grammar
->location
; 
 615     p
->next 
= symbol_list_sym_new (startsymbol
, empty_location
); 
 616     p
->next
->next 
= symbol_list_sym_new (endtoken
, empty_location
); 
 617     p
->next
->next
->next 
= symbol_list_sym_new (NULL
, empty_location
); 
 618     p
->next
->next
->next
->next 
= grammar
; 
 624   aver (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
); 
 626   /* Assign the symbols their symbol numbers.  Write #defines for the 
 627      token symbols into FDEFINES if requested.  */ 
 630   /* Scan rule actions after invoking symbol_check_alias_consistency (in 
 631      symbols_pack above) so that token types are set correctly before the rule 
 632      action type checking. 
 634      Before invoking grammar_rule_check (in packgram below) on any rule, make 
 635      sure all actions have already been scanned in order to set `used' flags. 
 636      Otherwise, checking that a midrule's $$ should be set will not always work 
 637      properly because the check must forward-reference the midrule's parent 
 638      rule.  For the same reason, all the `used' flags must be set before 
 639      checking whether to remove `$' from any midrule symbol name (also in 
 643     for (sym 
= grammar
; sym
; sym 
= sym
->next
) 
 644       code_props_translate_code (&sym
->action_props
); 
 647   /* Convert the grammar into the format described in gram.h.  */ 
 650   /* The grammar as a symbol_list is no longer needed. */ 
 651   symbol_list_free (grammar
);