1 /* Grammar reduction for Bison. 
   2    Copyright (C) 1988, 1989, 2000, 2001, 2002 Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  41 /* Set of all nonterminals which are not useless.  */ 
  44 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  47 /* Set of all accessible symbols.  */ 
  50 /* Set of symbols used to define rule precedence (so they are 
  51    `useless', but no warning should be issued).  */ 
  54 static rule_number nuseful_productions
; 
  55 rule_number nuseless_productions
; 
  56 static int nuseful_nonterminals
; 
  57 symbol_number nuseless_nonterminals
; 
  59 /*-------------------------------------------------------------------. 
  60 | Another way to do this would be with a set for each production and | 
  61 | then do subset tests against N0, but even for the C grammar the    | 
  62 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  63 `-------------------------------------------------------------------*/ 
  66 useful_production (rule_number r
, bitset N0
) 
  70   /* A production is useful if all of the nonterminals in its appear 
  71      in the set of useful nonterminals.  */ 
  73   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
  74     if (ISVAR (*rhsp
) && !bitset_test (N0
, *rhsp 
- ntokens
)) 
  80 /*---------------------------------------------------------. 
  81 | Remember that rules are 1-origin, symbols are 0-origin.  | 
  82 `---------------------------------------------------------*/ 
  85 useless_nonterminals (void) 
  90   /* N is set as built.  Np is set being built this iteration. P is 
  91      set of all productions which have a RHS all in N.  */ 
  93   Np 
= bitset_create (nvars
, BITSET_FIXED
); 
  96   /* The set being computed is a set of nonterminals which can derive 
  97      the empty string or strings consisting of all terminals. At each 
  98      iteration a nonterminal is added to the set if there is a 
  99      production with that nonterminal as its LHS for which all the 
 100      nonterminals in its RHS are already in the set.  Iterate until 
 101      the set being computed remains unchanged.  Any nonterminals not 
 102      in the set at that point are useless in that they will never be 
 103      used in deriving a sentence of the language. 
 105      This iteration doesn't use any special traversal over the 
 106      productions.  A set is kept of all productions for which all the 
 107      nonterminals in the RHS are in useful.  Only productions not in 
 108      this set are scanned on each iteration.  At the end, this set is 
 109      saved to be used when finding useful productions: only 
 110      productions in this set will appear in the final grammar.  */ 
 115       for (r 
= 0; r 
< nrules
; r
++) 
 116         if (!bitset_test (P
, r
) 
 117             && useful_production (r
, N
)) 
 119             bitset_set (Np
, rules
[r
].lhs
->number 
- ntokens
); 
 122       if (bitset_equal_p (N
, Np
)) 
 134 inaccessable_symbols (void) 
 138   /* Find out which productions are reachable and which symbols are 
 139      used.  Starting with an empty set of productions and a set of 
 140      symbols which only has the start symbol in it, iterate over all 
 141      productions until the set of productions remains unchanged for an 
 142      iteration.  For each production which has a LHS in the set of 
 143      reachable symbols, add the production to the set of reachable 
 144      productions, and add all of the nonterminals in the RHS of the 
 145      production to the set of reachable symbols. 
 147      Consider only the (partially) reduced grammar which has only 
 148      nonterminals in N and productions in P. 
 150      The result is the set P of productions in the reduced grammar, 
 151      and the set V of symbols in the reduced grammar. 
 153      Although this algorithm also computes the set of terminals which 
 154      are reachable, no terminal will be deleted from the grammar. Some 
 155      terminals might not be in the grammar but might be generated by 
 156      semantic routines, and so the user might want them available with 
 157      specified numbers.  (Is this true?)  However, the nonreachable 
 158      terminals are printed (if running in verbose mode) so that the 
 161   Vp 
= bitset_create (nsyms
, BITSET_FIXED
); 
 162   Pp 
= bitset_create (nrules
, BITSET_FIXED
); 
 164   /* If the start symbol isn't useful, then nothing will be useful. */ 
 165   if (bitset_test (N
, accept
->number 
- ntokens
)) 
 167       bitset_set (V
, accept
->number
); 
 173           for (r 
= 0; r 
< nrules
; r
++) 
 175               if (!bitset_test (Pp
, r
) 
 176                   && bitset_test (P
, r
) 
 177                   && bitset_test (V
, rules
[r
].lhs
->number
)) 
 180                   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; rhsp
++) 
 181                     if (ISTOKEN (*rhsp
) || bitset_test (N
, *rhsp 
- ntokens
)) 
 182                       bitset_set (Vp
, *rhsp
); 
 186           if (bitset_equal_p (V
, Vp
)) 
 197   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 198   bitset_set (V
, endtoken
->number
);             /* end-of-input token */ 
 199   bitset_set (V
, errtoken
->number
);             /* error token */ 
 200   bitset_set (V
, undeftoken
->number
);           /* some undefined token */ 
 205   nuseful_productions 
= bitset_count (P
); 
 206   nuseless_productions 
= nrules 
- nuseful_productions
; 
 208   nuseful_nonterminals 
= 0; 
 211     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 212       if (bitset_test (V
, i
)) 
 213         nuseful_nonterminals
++; 
 215   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 217   /* A token that was used in %prec should not be warned about.  */ 
 220     for (r 
= 0; r 
< nrules
; ++r
) 
 221       if (rules
[r
].precsym 
!= 0) 
 222         bitset_set (V1
, rules
[r
].precsym
->number
); 
 227 /*-------------------------------------------------------------------. 
 228 | Put the useless productions at the end of RULES, and adjust NRULES | 
 230 `-------------------------------------------------------------------*/ 
 233 reduce_grammar_tables (void) 
 235   /* Report and flag useless productions.  */ 
 238     for (r 
= 0; r 
< nrules
; r
++) 
 239       rules
[r
].useful 
= bitset_test (P
, r
); 
 240     grammar_rules_never_reduced_report (_("useless rule")); 
 243   /* Map the nonterminals to their new index: useful first, useless 
 244      afterwards.  Kept for later report.  */ 
 247     int useless 
= nrules 
- nuseless_productions
; 
 248     rule 
*rules_sorted 
= XMALLOC (rule
, nrules
); 
 250     for (r 
= 0; r 
< nrules
; ++r
) 
 251       rules_sorted
[rules
[r
].useful 
? useful
++ : useless
++] = rules
[r
]; 
 253     rules 
= rules_sorted
; 
 255     /* Renumber the rules markers in RITEMS.  */ 
 256     for (r 
= 0; r 
< nrules
; ++r
) 
 258         item_number 
*rhsp 
= rules
[r
].rhs
; 
 259         for (/* Nothing. */; *rhsp 
>= 0; ++rhsp
) 
 261         *rhsp 
= rule_number_as_item_number (r
); 
 264     nrules 
-= nuseless_productions
; 
 267   /* Adjust NRITEMS.  */ 
 271     for (r 
= nrules
; r 
< nrules 
+ nuseless_productions
; ++r
) 
 273         length 
= rule_rhs_length (&rules
[r
]); 
 274         nritems 
-= length 
+ 1; 
 280 /*------------------------------. 
 281 | Remove useless nonterminals.  | 
 282 `------------------------------*/ 
 285 nonterminals_reduce (void) 
 289   /* Map the nonterminals to their new index: useful first, useless 
 290      afterwards.  Kept for later report.  */ 
 292   symbol_number 
*nontermmap 
= XCALLOC (symbol_number
, nvars
) - ntokens
; 
 294   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 295     if (bitset_test (V
, i
)) 
 297   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 298     if (!bitset_test (V
, i
)) 
 301         warn_at (symbols
[i
]->location
, _("useless nonterminal: %s"), 
 306   /* Shuffle elements of tables indexed by symbol number.  */ 
 308     symbol 
**symbols_sorted 
= XMALLOC (symbol 
*, nvars
) - ntokens
; 
 310     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 311       symbols
[i
]->number 
= nontermmap
[i
]; 
 312     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 313       symbols_sorted
[nontermmap
[i
]] = symbols
[i
]; 
 314     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 315       symbols
[i
] = symbols_sorted
[i
]; 
 316     free (symbols_sorted 
+ ntokens
); 
 321     for (r 
= 0; r 
< nrules
; ++r
) 
 324         for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
 326             *rhsp 
=  symbol_number_as_item_number (nontermmap
[*rhsp
]); 
 328     accept
->number 
= nontermmap
[accept
->number
]; 
 331   nsyms 
-= nuseless_nonterminals
; 
 332   nvars 
-= nuseless_nonterminals
; 
 334   free (nontermmap 
+ ntokens
); 
 338 /*------------------------------------------------------------------. 
 339 | Output the detailed results of the reductions.  For FILE.output.  | 
 340 `------------------------------------------------------------------*/ 
 343 reduce_output (FILE *out
) 
 345   if (nuseless_nonterminals 
> 0) 
 348       fprintf (out
, "%s\n\n", _("Useless nonterminals")); 
 349       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 350         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 357     for (i 
= 0; i 
< ntokens
; i
++) 
 358       if (!bitset_test (V
, i
) && !bitset_test (V1
, i
)) 
 361             fprintf (out
, "%s\n\n", _("Terminals which are not used")); 
 363           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 369   if (nuseless_productions 
> 0) 
 370     grammar_rules_partial_print (out
, _("Useless rules"), 
 378 /*-------------------------------. 
 379 | Report the results to STDERR.  | 
 380 `-------------------------------*/ 
 385   if (yacc_flag 
&& nuseless_productions
) 
 386     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 387                                "%d rules never reduced\n", 
 388                                nuseless_productions
), 
 389              nuseless_productions
); 
 391   fprintf (stderr
, "%s: %s: ", grammar_file
, _("warning")); 
 393   if (nuseless_nonterminals 
> 0) 
 394     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 395                                "%d useless nonterminals", 
 396                                nuseless_nonterminals
), 
 397              nuseless_nonterminals
); 
 399   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 400     fprintf (stderr
, _(" and ")); 
 402   if (nuseless_productions 
> 0) 
 403     fprintf (stderr
, ngettext ("%d useless rule", 
 405                                nuseless_productions
), 
 406              nuseless_productions
); 
 407   fprintf (stderr
, "\n"); 
 412 reduce_grammar (void) 
 416   /* Allocate the global sets used to compute the reduced grammar */ 
 418   N 
= bitset_create (nvars
, BITSET_FIXED
); 
 419   P 
=  bitset_create (nrules
, BITSET_FIXED
); 
 420   V 
= bitset_create (nsyms
, BITSET_FIXED
); 
 421   V1 
= bitset_create (nsyms
, BITSET_FIXED
); 
 423   useless_nonterminals (); 
 424   inaccessable_symbols (); 
 426   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 432   if (!bitset_test (N
, accept
->number 
- ntokens
)) 
 433     fatal_at (startsymbol_location
, 
 434               _("start symbol %s does not derive any sentence"), 
 437   /* First reduce the nonterminals, as they renumber themselves in the 
 438      whole grammar.  If you change the order, nonterms would be 
 439      renumbered only in the reduced grammar.  */ 
 440   if (nuseless_nonterminals 
> 0) 
 441     nonterminals_reduce (); 
 442   if (nuseless_productions 
> 0) 
 443     reduce_grammar_tables (); 
 445   if (trace_flag 
& trace_grammar
) 
 447       grammar_dump (stderr
, "Reduced Grammar"); 
 449       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 450 , and %d productions.\n", 
 451                grammar_file
, ntokens
, nvars
, nrules
); 
 456 /*-----------------------------------------------------------. 
 457 | Free the global sets used to compute the reduced grammar.  | 
 458 `-----------------------------------------------------------*/