1 /* Grammar reduction for Bison. 
   3    Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003 Free Software 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    Bison is free software; you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation; either version 2, or (at your option) 
  13    Bison is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with Bison; see the file COPYING.  If not, write to 
  20    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
  21    Boston, MA 02110-1301, USA.  */ 
  24 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  25    nonterminals, and productions.  David S. Bakin.  */ 
  27 /* Don't eliminate unreachable terminals: They may be used by the 
  43 /* Set of all nonterminals which are not useless.  */ 
  46 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  49 /* Set of all accessible symbols.  */ 
  52 /* Set of symbols used to define rule precedence (so they are 
  53    `useless', but no warning should be issued).  */ 
  56 static rule_number nuseful_productions
; 
  57 rule_number nuseless_productions
; 
  58 static int nuseful_nonterminals
; 
  59 symbol_number nuseless_nonterminals
; 
  61 /*-------------------------------------------------------------------. 
  62 | Another way to do this would be with a set for each production and | 
  63 | then do subset tests against N0, but even for the C grammar the    | 
  64 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  65 `-------------------------------------------------------------------*/ 
  68 useful_production (rule_number r
, bitset N0
) 
  72   /* A production is useful if all of the nonterminals in its appear 
  73      in the set of useful nonterminals.  */ 
  75   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
  76     if (ISVAR (*rhsp
) && !bitset_test (N0
, *rhsp 
- ntokens
)) 
  82 /*---------------------------------------------------------. 
  83 | Remember that rules are 1-origin, symbols are 0-origin.  | 
  84 `---------------------------------------------------------*/ 
  87 useless_nonterminals (void) 
  92   /* N is set as built.  Np is set being built this iteration. P is 
  93      set of all productions which have a RHS all in N.  */ 
  95   Np 
= bitset_create (nvars
, BITSET_FIXED
); 
  98   /* The set being computed is a set of nonterminals which can derive 
  99      the empty string or strings consisting of all terminals. At each 
 100      iteration a nonterminal is added to the set if there is a 
 101      production with that nonterminal as its LHS for which all the 
 102      nonterminals in its RHS are already in the set.  Iterate until 
 103      the set being computed remains unchanged.  Any nonterminals not 
 104      in the set at that point are useless in that they will never be 
 105      used in deriving a sentence of the language. 
 107      This iteration doesn't use any special traversal over the 
 108      productions.  A set is kept of all productions for which all the 
 109      nonterminals in the RHS are in useful.  Only productions not in 
 110      this set are scanned on each iteration.  At the end, this set is 
 111      saved to be used when finding useful productions: only 
 112      productions in this set will appear in the final grammar.  */ 
 117       for (r 
= 0; r 
< nrules
; r
++) 
 118         if (!bitset_test (P
, r
) 
 119             && useful_production (r
, N
)) 
 121             bitset_set (Np
, rules
[r
].lhs
->number 
- ntokens
); 
 124       if (bitset_equal_p (N
, Np
)) 
 136 inaccessable_symbols (void) 
 140   /* Find out which productions are reachable and which symbols are 
 141      used.  Starting with an empty set of productions and a set of 
 142      symbols which only has the start symbol in it, iterate over all 
 143      productions until the set of productions remains unchanged for an 
 144      iteration.  For each production which has a LHS in the set of 
 145      reachable symbols, add the production to the set of reachable 
 146      productions, and add all of the nonterminals in the RHS of the 
 147      production to the set of reachable symbols. 
 149      Consider only the (partially) reduced grammar which has only 
 150      nonterminals in N and productions in P. 
 152      The result is the set P of productions in the reduced grammar, 
 153      and the set V of symbols in the reduced grammar. 
 155      Although this algorithm also computes the set of terminals which 
 156      are reachable, no terminal will be deleted from the grammar. Some 
 157      terminals might not be in the grammar but might be generated by 
 158      semantic routines, and so the user might want them available with 
 159      specified numbers.  (Is this true?)  However, the nonreachable 
 160      terminals are printed (if running in verbose mode) so that the 
 163   Vp 
= bitset_create (nsyms
, BITSET_FIXED
); 
 164   Pp 
= bitset_create (nrules
, BITSET_FIXED
); 
 166   /* If the start symbol isn't useful, then nothing will be useful. */ 
 167   if (bitset_test (N
, accept
->number 
- ntokens
)) 
 169       bitset_set (V
, accept
->number
); 
 175           for (r 
= 0; r 
< nrules
; r
++) 
 177               if (!bitset_test (Pp
, r
) 
 178                   && bitset_test (P
, r
) 
 179                   && bitset_test (V
, rules
[r
].lhs
->number
)) 
 182                   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; rhsp
++) 
 183                     if (ISTOKEN (*rhsp
) || bitset_test (N
, *rhsp 
- ntokens
)) 
 184                       bitset_set (Vp
, *rhsp
); 
 188           if (bitset_equal_p (V
, Vp
)) 
 199   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 200   bitset_set (V
, endtoken
->number
);             /* end-of-input token */ 
 201   bitset_set (V
, errtoken
->number
);             /* error token */ 
 202   bitset_set (V
, undeftoken
->number
);           /* some undefined token */ 
 207   nuseful_productions 
= bitset_count (P
); 
 208   nuseless_productions 
= nrules 
- nuseful_productions
; 
 210   nuseful_nonterminals 
= 0; 
 213     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 214       if (bitset_test (V
, i
)) 
 215         nuseful_nonterminals
++; 
 217   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 219   /* A token that was used in %prec should not be warned about.  */ 
 222     for (r 
= 0; r 
< nrules
; ++r
) 
 223       if (rules
[r
].precsym 
!= 0) 
 224         bitset_set (V1
, rules
[r
].precsym
->number
); 
 229 /*-------------------------------------------------------------------. 
 230 | Put the useless productions at the end of RULES, and adjust NRULES | 
 232 `-------------------------------------------------------------------*/ 
 235 reduce_grammar_tables (void) 
 237   /* Report and flag useless productions.  */ 
 240     for (r 
= 0; r 
< nrules
; r
++) 
 241       rules
[r
].useful 
= bitset_test (P
, r
); 
 242     grammar_rules_never_reduced_report (_("useless rule")); 
 245   /* Map the nonterminals to their new index: useful first, useless 
 246      afterwards.  Kept for later report.  */ 
 249     int useless 
= nrules 
- nuseless_productions
; 
 250     rule 
*rules_sorted 
= xnmalloc (nrules
, sizeof *rules_sorted
); 
 252     for (r 
= 0; r 
< nrules
; ++r
) 
 253       rules_sorted
[rules
[r
].useful 
? useful
++ : useless
++] = rules
[r
]; 
 255     rules 
= rules_sorted
; 
 257     /* Renumber the rules markers in RITEMS.  */ 
 258     for (r 
= 0; r 
< nrules
; ++r
) 
 260         item_number 
*rhsp 
= rules
[r
].rhs
; 
 261         for (/* Nothing. */; *rhsp 
>= 0; ++rhsp
) 
 263         *rhsp 
= rule_number_as_item_number (r
); 
 266     nrules 
-= nuseless_productions
; 
 269   /* Adjust NRITEMS.  */ 
 273     for (r 
= nrules
; r 
< nrules 
+ nuseless_productions
; ++r
) 
 275         length 
= rule_rhs_length (&rules
[r
]); 
 276         nritems 
-= length 
+ 1; 
 282 /*------------------------------. 
 283 | Remove useless nonterminals.  | 
 284 `------------------------------*/ 
 287 nonterminals_reduce (void) 
 291   /* Map the nonterminals to their new index: useful first, useless 
 292      afterwards.  Kept for later report.  */ 
 294   symbol_number 
*nontermmap 
= xnmalloc (nvars
, sizeof *nontermmap
); 
 296   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 297     if (bitset_test (V
, i
)) 
 298       nontermmap
[i 
- ntokens
] = n
++; 
 299   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 300     if (!bitset_test (V
, i
)) 
 302         nontermmap
[i 
- ntokens
] = n
++; 
 303         warn_at (symbols
[i
]->location
, _("useless nonterminal: %s"), 
 308   /* Shuffle elements of tables indexed by symbol number.  */ 
 310     symbol 
**symbols_sorted 
= xnmalloc (nvars
, sizeof *symbols_sorted
); 
 312     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 313       symbols
[i
]->number 
= nontermmap
[i 
- ntokens
]; 
 314     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 315       symbols_sorted
[nontermmap
[i 
- ntokens
] - ntokens
] = symbols
[i
]; 
 316     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 317       symbols
[i
] = symbols_sorted
[i 
- ntokens
]; 
 318     free (symbols_sorted
); 
 323     for (r 
= 0; r 
< nrules
; ++r
) 
 326         for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
 328             *rhsp 
=  symbol_number_as_item_number (nontermmap
[*rhsp
 
 331     accept
->number 
= nontermmap
[accept
->number 
- ntokens
]; 
 334   nsyms 
-= nuseless_nonterminals
; 
 335   nvars 
-= nuseless_nonterminals
; 
 341 /*------------------------------------------------------------------. 
 342 | Output the detailed results of the reductions.  For FILE.output.  | 
 343 `------------------------------------------------------------------*/ 
 346 reduce_output (FILE *out
) 
 348   if (nuseless_nonterminals 
> 0) 
 351       fprintf (out
, "%s\n\n", _("Useless nonterminals")); 
 352       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 353         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 360     for (i 
= 0; i 
< ntokens
; i
++) 
 361       if (!bitset_test (V
, i
) && !bitset_test (V1
, i
)) 
 364             fprintf (out
, "%s\n\n", _("Terminals which are not used")); 
 366           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 372   if (nuseless_productions 
> 0) 
 373     grammar_rules_partial_print (out
, _("Useless rules"), 
 381 /*-------------------------------. 
 382 | Report the results to STDERR.  | 
 383 `-------------------------------*/ 
 388   if (yacc_flag 
&& nuseless_productions
) 
 389     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 390                                "%d rules never reduced\n", 
 391                                nuseless_productions
), 
 392              nuseless_productions
); 
 394   fprintf (stderr
, "%s: %s: ", grammar_file
, _("warning")); 
 396   if (nuseless_nonterminals 
> 0) 
 397     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 398                                "%d useless nonterminals", 
 399                                nuseless_nonterminals
), 
 400              nuseless_nonterminals
); 
 402   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 403     fprintf (stderr
, _(" and ")); 
 405   if (nuseless_productions 
> 0) 
 406     fprintf (stderr
, ngettext ("%d useless rule", 
 408                                nuseless_productions
), 
 409              nuseless_productions
); 
 410   fprintf (stderr
, "\n"); 
 414 reduce_grammar (void) 
 418   /* Allocate the global sets used to compute the reduced grammar */ 
 420   N 
= bitset_create (nvars
, BITSET_FIXED
); 
 421   P 
=  bitset_create (nrules
, BITSET_FIXED
); 
 422   V 
= bitset_create (nsyms
, BITSET_FIXED
); 
 423   V1 
= bitset_create (nsyms
, BITSET_FIXED
); 
 425   useless_nonterminals (); 
 426   inaccessable_symbols (); 
 428   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 434   if (!bitset_test (N
, accept
->number 
- ntokens
)) 
 435     fatal_at (startsymbol_location
, 
 436               _("start symbol %s does not derive any sentence"), 
 439   /* First reduce the nonterminals, as they renumber themselves in the 
 440      whole grammar.  If you change the order, nonterms would be 
 441      renumbered only in the reduced grammar.  */ 
 442   if (nuseless_nonterminals 
> 0) 
 443     nonterminals_reduce (); 
 444   if (nuseless_productions 
> 0) 
 445     reduce_grammar_tables (); 
 447   if (trace_flag 
& trace_grammar
) 
 449       grammar_dump (stderr
, "Reduced Grammar"); 
 451       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 452 , and %d productions.\n", 
 453                grammar_file
, ntokens
, nvars
, nrules
); 
 458 /*-----------------------------------------------------------. 
 459 | Free the global sets used to compute the reduced grammar.  | 
 460 `-----------------------------------------------------------*/