]>
git.saurik.com Git - bison.git/blob - src/reader.c
b6200c91dd57448d1acf7aba067b3880449a5c4b
   1 /* Input parser for Bison 
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003, 
   4    2005 Free Software Foundation, Inc. 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    Bison is free software; you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation; either version 2, or (at your option) 
  13    Bison is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with Bison; see the file COPYING.  If not, write to 
  20    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  21    Boston, MA 02111-1307, USA.  */ 
  28 #include "conflicts.h" 
  32 #include "muscle_tab.h" 
  38 static symbol_list 
*grammar 
= NULL
; 
  39 static bool start_flag 
= false; 
  40 merger_list 
*merge_functions
; 
  42 /* Has %union been seen?  */ 
  45 /* Should rules have a default precedence?  */ 
  46 bool default_prec 
= true; 
  48 /*-----------------------. 
  49 | Set the start symbol.  | 
  50 `-----------------------*/ 
  53 grammar_start_symbol_set (symbol 
*sym
, location loc
) 
  56     complain_at (loc
, _("multiple %s declarations"), "%start"); 
  61       startsymbol_location 
= loc
; 
  66 /*----------------------------------------------------------------. 
  67 | There are two prologues: one before %union, one after.  Augment | 
  69 `----------------------------------------------------------------*/ 
  72 prologue_augment (const char *prologue
, location loc
) 
  74   struct obstack 
*oout 
= 
  75     !typed 
? &pre_prologue_obstack 
: &post_prologue_obstack
; 
  77   obstack_fgrow1 (oout
, "]b4_syncline(%d, [[", loc
.start
.line
); 
  78   MUSCLE_OBSTACK_SGROW (oout
, 
  79                         quotearg_style (c_quoting_style
, loc
.start
.file
)); 
  80   obstack_sgrow (oout
, "]])[\n"); 
  81   obstack_sgrow (oout
, prologue
); 
  86 /*-------------------------------------------------------------------. 
  87 | Return the merger index for a merging function named NAME, whose   | 
  88 | arguments have type TYPE.  Records the function, if new, in        | 
  90 `-------------------------------------------------------------------*/ 
  93 get_merge_function (uniqstr name
, uniqstr type
, location loc
) 
 103     type 
= uniqstr_new (""); 
 105   head
.next 
= merge_functions
; 
 106   for (syms 
= &head
, n 
= 1; syms
->next 
!= NULL
; syms 
= syms
->next
, n 
+= 1) 
 107     if (UNIQSTR_EQ (name
, syms
->next
->name
)) 
 109   if (syms
->next 
== NULL
) 
 111       syms
->next 
= xmalloc (sizeof syms
->next
[0]); 
 112       syms
->next
->name 
= uniqstr_new (name
); 
 113       syms
->next
->type 
= uniqstr_new (type
); 
 114       syms
->next
->next 
= NULL
; 
 115       merge_functions 
= head
.next
; 
 117   else if (!UNIQSTR_EQ (type
, syms
->next
->type
)) 
 118     warn_at (loc
, _("result type clash on merge function %s: <%s> != <%s>"), 
 119              name
, type
, syms
->next
->type
); 
 123 /*--------------------------------------. 
 124 | Free all merge-function definitions.  | 
 125 `--------------------------------------*/ 
 128 free_merger_functions (void) 
 133   L0 
= merge_functions
; 
 136       merger_list 
*L1 
= L0
->next
; 
 143 /*-------------------------------------------------------------------. 
 144 | Parse the input grammar into a one symbol_list structure.  Each    | 
 145 | rule is represented by a sequence of symbols: the left hand side   | 
 146 | followed by the contents of the right hand side, followed by a     | 
 147 | null pointer instead of a symbol to terminate the rule.  The next  | 
 148 | symbol is the lhs of the following rule.                           | 
 150 | All actions are copied out, labelled by the rule number they apply | 
 153 | Bison used to allow some %directives in the rules sections, but    | 
 154 | this is no longer consider appropriate: (i) the documented grammar | 
 155 | doesn't claim it, (ii), it would promote bad style, (iii), error   | 
 156 | recovery for %directives consists in skipping the junk until a `%' | 
 157 | is seen and helrp synchronizing.  This scheme is definitely wrong  | 
 158 | in the rules section.                                              | 
 159 `-------------------------------------------------------------------*/ 
 161 /* The (currently) last symbol of GRAMMAR. */ 
 162 symbol_list 
*grammar_end 
= NULL
; 
 164 /* Append SYM to the grammar.  */ 
 166 grammar_symbol_append (symbol 
*sym
, location loc
) 
 168   symbol_list 
*p 
= symbol_list_new (sym
, loc
); 
 171     grammar_end
->next 
= p
; 
 178 /* The rule currently being defined, and the previous rule. 
 179    CURRENT_RULE points to the first LHS of the current rule, while 
 180    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */ 
 181 symbol_list 
*current_rule 
= NULL
; 
 182 symbol_list 
*previous_rule_end 
= NULL
; 
 185 /*----------------------------------------------. 
 186 | Create a new rule for LHS in to the GRAMMAR.  | 
 187 `----------------------------------------------*/ 
 190 grammar_rule_begin (symbol 
*lhs
, location loc
) 
 195       startsymbol_location 
= loc
; 
 199   /* Start a new rule and record its lhs.  */ 
 203   previous_rule_end 
= grammar_end
; 
 204   grammar_symbol_append (lhs
, loc
); 
 205   current_rule 
= grammar_end
; 
 207   /* Mark the rule's lhs as a nonterminal if not already so.  */ 
 209   if (lhs
->class == unknown_sym
) 
 211       lhs
->class = nterm_sym
; 
 215   else if (lhs
->class == token_sym
) 
 216     complain_at (loc
, _("rule given for %s, which is a token"), lhs
->tag
); 
 219 /* Check that the last rule (CURRENT_RULE) is properly defined.  For 
 220    instance, there should be no type clash on the default action.  */ 
 223 grammar_current_rule_check (void) 
 225   symbol 
*lhs 
= current_rule
->sym
; 
 226   char const *lhs_type 
= lhs
->type_name
; 
 227   symbol 
*first_rhs 
= current_rule
->next
->sym
; 
 229   /* If there is an action, then there is nothing we can do: the user 
 230      is allowed to shoot herself in the foot.  */ 
 231   if (current_rule
->action
) 
 234   /* Don't worry about the default action if $$ is untyped, since $$'s 
 235      value can't be used.  */ 
 239   /* If $$ is being set in default way, report if any type mismatch.  */ 
 242       const char *rhs_type 
= first_rhs
->type_name 
? first_rhs
->type_name 
: ""; 
 243       if (!UNIQSTR_EQ (lhs_type
, rhs_type
)) 
 244         warn_at (current_rule
->location
, 
 245                  _("type clash on default action: <%s> != <%s>"), 
 248   /* Warn if there is no default for $$ but we need one.  */ 
 250     warn_at (current_rule
->location
, 
 251              _("empty rule for typed nonterminal, and no action")); 
 255 /*-------------------------------------. 
 256 | End the currently being grown rule.  | 
 257 `-------------------------------------*/ 
 260 grammar_rule_end (location loc
) 
 262   /* Put an empty link in the list to mark the end of this rule  */ 
 263   grammar_symbol_append (NULL
, grammar_end
->location
); 
 264   current_rule
->location 
= loc
; 
 265   grammar_current_rule_check (); 
 269 /*-------------------------------------------------------------------. 
 270 | The previous action turns out the be a mid-rule action.  Attach it | 
 271 | to the current rule, i.e., create a dummy symbol, attach it this   | 
 272 | mid-rule action, and append this dummy nonterminal to the current  | 
 274 `-------------------------------------------------------------------*/ 
 277 grammar_midrule_action (void) 
 279   /* Since the action was written out with this rule's number, we must 
 280      give the new rule this number by inserting the new rule before 
 283   /* Make a DUMMY nonterminal, whose location is that of the midrule 
 284      action.  Create the MIDRULE.  */ 
 285   location dummy_location 
= current_rule
->action_location
; 
 286   symbol 
*dummy 
= dummy_symbol_get (dummy_location
); 
 287   symbol_list 
*midrule 
= symbol_list_new (dummy
, dummy_location
); 
 289   /* Make a new rule, whose body is empty, before the current one, so 
 290      that the action just read can belong to it.  */ 
 293   /* Attach its location and actions to that of the DUMMY.  */ 
 294   midrule
->location 
= dummy_location
; 
 295   midrule
->action 
= current_rule
->action
; 
 296   midrule
->action_location 
= dummy_location
; 
 297   current_rule
->action 
= NULL
; 
 299   if (previous_rule_end
) 
 300     previous_rule_end
->next 
= midrule
; 
 304   /* End the dummy's rule.  */ 
 305   previous_rule_end 
= symbol_list_new (NULL
, dummy_location
); 
 306   previous_rule_end
->next 
= current_rule
; 
 308   midrule
->next 
= previous_rule_end
; 
 310   /* Insert the dummy nonterminal replacing the midrule action into 
 312   grammar_current_rule_symbol_append (dummy
, dummy_location
); 
 315 /* Set the precedence symbol of the current rule to PRECSYM. */ 
 318 grammar_current_rule_prec_set (symbol 
*precsym
, location loc
) 
 320   if (current_rule
->ruleprec
) 
 321     complain_at (loc
, _("only one %s allowed per rule"), "%prec"); 
 322   current_rule
->ruleprec 
= precsym
; 
 325 /* Attach dynamic precedence DPREC to the current rule. */ 
 328 grammar_current_rule_dprec_set (int dprec
, location loc
) 
 331     warn_at (loc
, _("%s affects only GLR parsers"), "%dprec"); 
 333     complain_at (loc
, _("%s must be followed by positive number"), "%dprec"); 
 334   else if (current_rule
->dprec 
!= 0) 
 335     complain_at (loc
, _("only one %s allowed per rule"), "%dprec"); 
 336   current_rule
->dprec 
= dprec
; 
 339 /* Attach a merge function NAME with argument type TYPE to current 
 343 grammar_current_rule_merge_set (uniqstr name
, location loc
) 
 346     warn_at (loc
, _("%s affects only GLR parsers"), "%merge"); 
 347   if (current_rule
->merger 
!= 0) 
 348     complain_at (loc
, _("only one %s allowed per rule"), "%merge"); 
 349   current_rule
->merger 
= 
 350     get_merge_function (name
, current_rule
->sym
->type_name
, loc
); 
 353 /* Attach SYM to the current rule.  If needed, move the previous 
 354    action as a mid-rule action.  */ 
 357 grammar_current_rule_symbol_append (symbol 
*sym
, location loc
) 
 359   if (current_rule
->action
) 
 360     grammar_midrule_action (); 
 362   grammar_symbol_append (sym
, loc
); 
 365 /* Attach an ACTION to the current rule.  If needed, move the previous 
 366    action as a mid-rule action.  */ 
 369 grammar_current_rule_action_append (const char *action
, location loc
) 
 371   if (current_rule
->action
) 
 372     grammar_midrule_action (); 
 373   current_rule
->action 
= action
; 
 374   current_rule
->action_location 
= loc
; 
 378 /*---------------------------------------------------------------. 
 379 | Convert the rules into the representation using RRHS, RLHS and | 
 381 `---------------------------------------------------------------*/ 
 386   unsigned int itemno 
= 0; 
 387   rule_number ruleno 
= 0; 
 388   symbol_list 
*p 
= grammar
; 
 390   ritem 
= xnmalloc (nritems
, sizeof *ritem
); 
 391   rules 
= xnmalloc (nrules
, sizeof *rules
); 
 395       symbol 
*ruleprec 
= p
->ruleprec
; 
 396       rules
[ruleno
].user_number 
= ruleno
; 
 397       rules
[ruleno
].number 
= ruleno
; 
 398       rules
[ruleno
].lhs 
= p
->sym
; 
 399       rules
[ruleno
].rhs 
= ritem 
+ itemno
; 
 400       rules
[ruleno
].prec 
= NULL
; 
 401       rules
[ruleno
].dprec 
= p
->dprec
; 
 402       rules
[ruleno
].merger 
= p
->merger
; 
 403       rules
[ruleno
].precsym 
= NULL
; 
 404       rules
[ruleno
].location 
= p
->location
; 
 405       rules
[ruleno
].useful 
= true; 
 406       rules
[ruleno
].action 
= p
->action
; 
 407       rules
[ruleno
].action_location 
= p
->action_location
; 
 412           /* item_number = symbol_number. 
 413              But the former needs to contain more: negative rule numbers. */ 
 414           ritem
[itemno
++] = symbol_number_as_item_number (p
->sym
->number
); 
 415           /* A rule gets by default the precedence and associativity 
 416              of the last token in it.  */ 
 417           if (p
->sym
->class == token_sym 
&& default_prec
) 
 418             rules
[ruleno
].prec 
= p
->sym
; 
 423       /* If this rule has a %prec, 
 424          the specified symbol's precedence replaces the default.  */ 
 427           rules
[ruleno
].precsym 
= ruleprec
; 
 428           rules
[ruleno
].prec 
= ruleprec
; 
 430       ritem
[itemno
++] = rule_number_as_item_number (ruleno
); 
 437   if (itemno 
!= nritems
) 
 440   if (trace_flag 
& trace_sets
) 
 441     ritem_print (stderr
); 
 444 /*------------------------------------------------------------------. 
 445 | Read in the grammar specification and record it in the format     | 
 446 | described in gram.h.  All actions are copied into ACTION_OBSTACK, | 
 447 | in each case forming the body of a C function (YYACTION) which    | 
 448 | contains a switch statement to decide which action to execute.    | 
 449 `------------------------------------------------------------------*/ 
 454   /* Initialize the symbol table.  */ 
 457   /* Construct the accept symbol. */ 
 458   accept 
= symbol_get ("$accept", empty_location
); 
 459   accept
->class = nterm_sym
; 
 460   accept
->number 
= nvars
++; 
 462   /* Construct the error token */ 
 463   errtoken 
= symbol_get ("error", empty_location
); 
 464   errtoken
->class = token_sym
; 
 465   errtoken
->number 
= ntokens
++; 
 467   /* Construct a token that represents all undefined literal tokens. 
 468      It is always token number 2.  */ 
 469   undeftoken 
= symbol_get ("$undefined", empty_location
); 
 470   undeftoken
->class = token_sym
; 
 471   undeftoken
->number 
= ntokens
++; 
 473   /* Initialize the obstacks. */ 
 474   obstack_init (&pre_prologue_obstack
); 
 475   obstack_init (&post_prologue_obstack
); 
 477   finput 
= xfopen (grammar_file
, "r"); 
 480   gram__flex_debug 
= trace_flag 
& trace_scan
; 
 481   gram_debug 
= trace_flag 
& trace_parse
; 
 482   scanner_initialize (); 
 485   /* If something went wrong during the parsing, don't try to 
 487   if (complaint_issued
) 
 490   /* Grammar has been read.  Do some checking */ 
 492     fatal (_("no rules in the input grammar")); 
 494   /* Report any undefined symbols and consider them nonterminals.  */ 
 495   symbols_check_defined (); 
 497   /* If the user did not define her ENDTOKEN, do it now. */ 
 500       endtoken 
= symbol_get ("$end", empty_location
); 
 501       endtoken
->class = token_sym
; 
 502       endtoken
->number 
= 0; 
 503       /* Value specified by POSIX.  */ 
 504       endtoken
->user_token_number 
= 0; 
 507   /* Insert the initial rule, which line is that of the first rule 
 508      (not that of the start symbol): 
 510      accept: %start EOF.  */ 
 512     symbol_list 
*p 
= symbol_list_new (accept
, empty_location
); 
 513     p
->location 
= grammar
->location
; 
 514     p
->next 
= symbol_list_new (startsymbol
, empty_location
); 
 515     p
->next
->next 
= symbol_list_new (endtoken
, empty_location
); 
 516     p
->next
->next
->next 
= symbol_list_new (NULL
, empty_location
); 
 517     p
->next
->next
->next
->next 
= grammar
; 
 523   if (! (nsyms 
<= SYMBOL_NUMBER_MAXIMUM 
&& nsyms 
== ntokens 
+ nvars
)) 
 528   /* Assign the symbols their symbol numbers.  Write #defines for the 
 529      token symbols into FDEFINES if requested.  */ 
 532   /* Convert the grammar into the format described in gram.h.  */ 
 535   /* The grammar as a symbol_list is no longer needed. */ 
 536   LIST_FREE (symbol_list
, grammar
);