1 /* Grammar reduction for Bison. 
   2    Copyright (C) 1988, 1989, 2000, 2001, 2002  Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  40 /* Set of all nonterminals which are not useless.  */ 
  43 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  46 /* Set of all accessible symbols.  */ 
  49 /* Set of symbols used to define rule precedence (so they are 
  50    `useless', but no warning should be issued).  */ 
  53 static rule_number_t nuseful_productions
; 
  54 rule_number_t nuseless_productions
; 
  55 static int nuseful_nonterminals
; 
  56 symbol_number_t nuseless_nonterminals
; 
  58 /*-------------------------------------------------------------------. 
  59 | Another way to do this would be with a set for each production and | 
  60 | then do subset tests against N0, but even for the C grammar the    | 
  61 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  62 `-------------------------------------------------------------------*/ 
  65 useful_production (rule_number_t r
, bitset N0
) 
  69   /* A production is useful if all of the nonterminals in its appear 
  70      in the set of useful nonterminals.  */ 
  72   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
  73     if (ISVAR (*rhsp
) && !bitset_test (N0
, *rhsp 
- ntokens
)) 
  79 /*---------------------------------------------------------. 
  80 | Remember that rules are 1-origin, symbols are 0-origin.  | 
  81 `---------------------------------------------------------*/ 
  84 useless_nonterminals (void) 
  89   /* N is set as built.  Np is set being built this iteration. P is 
  90      set of all productions which have a RHS all in N.  */ 
  92   Np 
= bitset_create (nvars
, BITSET_FIXED
); 
  95   /* The set being computed is a set of nonterminals which can derive 
  96      the empty string or strings consisting of all terminals. At each 
  97      iteration a nonterminal is added to the set if there is a 
  98      production with that nonterminal as its LHS for which all the 
  99      nonterminals in its RHS are already in the set.  Iterate until 
 100      the set being computed remains unchanged.  Any nonterminals not 
 101      in the set at that point are useless in that they will never be 
 102      used in deriving a sentence of the language. 
 104      This iteration doesn't use any special traversal over the 
 105      productions.  A set is kept of all productions for which all the 
 106      nonterminals in the RHS are in useful.  Only productions not in 
 107      this set are scanned on each iteration.  At the end, this set is 
 108      saved to be used when finding useful productions: only 
 109      productions in this set will appear in the final grammar.  */ 
 114       for (r 
= 0; r 
< nrules
; r
++) 
 115         if (!bitset_test (P
, r
) 
 116             && useful_production (r
, N
)) 
 118             bitset_set (Np
, rules
[r
].lhs
->number 
- ntokens
); 
 121       if (bitset_equal_p (N
, Np
)) 
 133 inaccessable_symbols (void) 
 137   /* Find out which productions are reachable and which symbols are 
 138      used.  Starting with an empty set of productions and a set of 
 139      symbols which only has the start symbol in it, iterate over all 
 140      productions until the set of productions remains unchanged for an 
 141      iteration.  For each production which has a LHS in the set of 
 142      reachable symbols, add the production to the set of reachable 
 143      productions, and add all of the nonterminals in the RHS of the 
 144      production to the set of reachable symbols. 
 146      Consider only the (partially) reduced grammar which has only 
 147      nonterminals in N and productions in P. 
 149      The result is the set P of productions in the reduced grammar, 
 150      and the set V of symbols in the reduced grammar. 
 152      Although this algorithm also computes the set of terminals which 
 153      are reachable, no terminal will be deleted from the grammar. Some 
 154      terminals might not be in the grammar but might be generated by 
 155      semantic routines, and so the user might want them available with 
 156      specified numbers.  (Is this true?)  However, the nonreachable 
 157      terminals are printed (if running in verbose mode) so that the 
 160   Vp 
= bitset_create (nsyms
, BITSET_FIXED
); 
 161   Pp 
= bitset_create (nrules
, BITSET_FIXED
); 
 163   /* If the start symbol isn't useful, then nothing will be useful. */ 
 164   if (bitset_test (N
, accept
->number 
- ntokens
)) 
 166       bitset_set (V
, accept
->number
); 
 172           for (r 
= 0; r 
< nrules
; r
++) 
 174               if (!bitset_test (Pp
, r
) 
 175                   && bitset_test (P
, r
) 
 176                   && bitset_test (V
, rules
[r
].lhs
->number
)) 
 179                   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; rhsp
++) 
 180                     if (ISTOKEN (*rhsp
) || bitset_test (N
, *rhsp 
- ntokens
)) 
 181                       bitset_set (Vp
, *rhsp
); 
 185           if (bitset_equal_p (V
, Vp
)) 
 196   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 197   bitset_set (V
, endtoken
->number
);             /* end-of-input token */ 
 198   bitset_set (V
, errtoken
->number
);             /* error token */ 
 199   bitset_set (V
, undeftoken
->number
);           /* some undefined token */ 
 204   nuseful_productions 
= bitset_count (P
); 
 205   nuseless_productions 
= nrules 
- nuseful_productions
; 
 207   nuseful_nonterminals 
= 0; 
 210     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 211       if (bitset_test (V
, i
)) 
 212         nuseful_nonterminals
++; 
 214   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 216   /* A token that was used in %prec should not be warned about.  */ 
 219     for (r 
= 0; r 
< nrules
; ++r
) 
 220       if (rules
[r
].precsym 
!= 0) 
 221         bitset_set (V1
, rules
[r
].precsym
->number
); 
 226 /*-------------------------------------------------------------------. 
 227 | Put the useless productions at the end of RULES, and adjust NRULES | 
 229 `-------------------------------------------------------------------*/ 
 232 reduce_grammar_tables (void) 
 234   /* Report and flag useless productions.  */ 
 237     for (r 
= 0; r 
< nrules
; r
++) 
 238       rules
[r
].useful 
= bitset_test (P
, r
); 
 239     grammar_rules_never_reduced_report (_("useless rule")); 
 242   /* Map the nonterminals to their new index: useful first, useless 
 243      afterwards.  Kept for later report.  */ 
 246     int useless 
= nrules 
- nuseless_productions
; 
 247     rule_t 
*rules_sorted 
= XMALLOC (rule_t
, nrules
); 
 249     for (r 
= 0; r 
< nrules
; ++r
) 
 250       rules_sorted
[rules
[r
].useful 
? useful
++ : useless
++] = rules
[r
]; 
 252     rules 
= rules_sorted
; 
 254     /* Renumber the rules markers in RITEMS.  */ 
 255     for (r 
= 0; r 
< nrules
; ++r
) 
 257         item_number_t 
*rhsp 
= rules
[r
].rhs
; 
 258         for (/* Nothing. */; *rhsp 
>= 0; ++rhsp
) 
 260         *rhsp 
= rule_number_as_item_number (r
); 
 263     nrules 
-= nuseless_productions
; 
 266   /* Adjust NRITEMS.  */ 
 270     for (r 
= nrules
; r 
< nrules 
+ nuseless_productions
; ++r
) 
 272         length 
= rule_rhs_length (&rules
[r
]); 
 273         nritems 
-= length 
+ 1; 
 279 /*------------------------------. 
 280 | Remove useless nonterminals.  | 
 281 `------------------------------*/ 
 284 nonterminals_reduce (void) 
 286   symbol_number_t i
, n
; 
 288   /* Map the nonterminals to their new index: useful first, useless 
 289      afterwards.  Kept for later report.  */ 
 291   symbol_number_t 
*nontermmap 
= XCALLOC (symbol_number_t
, nvars
) - ntokens
; 
 293   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 294     if (bitset_test (V
, i
)) 
 296   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 297     if (!bitset_test (V
, i
)) 
 300         LOCATION_PRINT (stderr
, symbols
[i
]->location
); 
 301         fprintf (stderr
, ": %s: %s: %s\n", 
 302                  _("warning"), _("useless nonterminal"), 
 307   /* Shuffle elements of tables indexed by symbol number.  */ 
 309     symbol_t 
**symbols_sorted 
= XMALLOC (symbol_t 
*, nvars
) - ntokens
; 
 311     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 312       symbols
[i
]->number 
= nontermmap
[i
]; 
 313     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 314       symbols_sorted
[nontermmap
[i
]] = symbols
[i
]; 
 315     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 316       symbols
[i
] = symbols_sorted
[i
]; 
 317     free (symbols_sorted 
+ ntokens
); 
 322     for (r 
= 0; r 
< nrules
; ++r
) 
 325         for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
 327             *rhsp 
=  symbol_number_as_item_number (nontermmap
[*rhsp
]); 
 329     accept
->number 
= nontermmap
[accept
->number
]; 
 332   nsyms 
-= nuseless_nonterminals
; 
 333   nvars 
-= nuseless_nonterminals
; 
 335   free (nontermmap 
+ ntokens
); 
 339 /*------------------------------------------------------------------. 
 340 | Output the detailed results of the reductions.  For FILE.output.  | 
 341 `------------------------------------------------------------------*/ 
 344 reduce_output (FILE *out
) 
 346   if (nuseless_nonterminals 
> 0) 
 349       fprintf (out
, "%s\n\n", _("Useless nonterminals")); 
 350       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 351         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 358     for (i 
= 0; i 
< ntokens
; i
++) 
 359       if (!bitset_test (V
, i
) && !bitset_test (V1
, i
)) 
 362             fprintf (out
, "%s\n\n", _("Terminals which are not used")); 
 364           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 370   if (nuseless_productions 
> 0) 
 371     grammar_rules_partial_print (out
, _("Useless rules"), 
 379 /*-------------------------------. 
 380 | Report the results to STDERR.  | 
 381 `-------------------------------*/ 
 386   if (yacc_flag 
&& nuseless_productions
) 
 387     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 388                                "%d rules never reduced\n", 
 389                                nuseless_productions
), 
 390              nuseless_productions
); 
 392   fprintf (stderr
, "%s: %s: ", infile
, _("warning")); 
 394   if (nuseless_nonterminals 
> 0) 
 395     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 396                                "%d useless nonterminals", 
 397                                nuseless_nonterminals
), 
 398              nuseless_nonterminals
); 
 400   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 401     fprintf (stderr
, _(" and ")); 
 403   if (nuseless_productions 
> 0) 
 404     fprintf (stderr
, ngettext ("%d useless rule", 
 406                                nuseless_productions
), 
 407              nuseless_productions
); 
 408   fprintf (stderr
, "\n"); 
 413 reduce_grammar (void) 
 417   /* Allocate the global sets used to compute the reduced grammar */ 
 419   N 
= bitset_create (nvars
, BITSET_FIXED
); 
 420   P 
=  bitset_create (nrules
, BITSET_FIXED
); 
 421   V 
= bitset_create (nsyms
, BITSET_FIXED
); 
 422   V1 
= bitset_create (nsyms
, BITSET_FIXED
); 
 424   useless_nonterminals (); 
 425   inaccessable_symbols (); 
 427   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 433   if (!bitset_test (N
, accept
->number 
- ntokens
)) 
 434     fatal_at (startsymbol_location
, 
 435               _("start symbol %s does not derive any sentence"), 
 438   /* First reduce the nonterminals, as they renumber themselves in the 
 439      whole grammar.  If you change the order, nonterms would be 
 440      renumbered only in the reduced grammar.  */ 
 441   if (nuseless_nonterminals 
> 0) 
 442     nonterminals_reduce (); 
 443   if (nuseless_productions 
> 0) 
 444     reduce_grammar_tables (); 
 446   if (trace_flag 
& trace_grammar
) 
 448       grammar_dump (stderr
, "Reduced Grammar"); 
 450       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 451 , and %d productions.\n", 
 452                infile
, ntokens
, nvars
, nrules
); 
 457 /*-----------------------------------------------------------. 
 458 | Free the global sets used to compute the reduced grammar.  | 
 459 `-----------------------------------------------------------*/