1 /* Grammar reduction for Bison. 
   3    Copyright (C) 1988-1989, 2000-2003, 2005-2010 Free Software 
   6    This file is part of Bison, the GNU Compiler Compiler. 
   8    This program is free software: you can redistribute it and/or modify 
   9    it under the terms of the GNU General Public License as published by 
  10    the Free Software Foundation, either version 3 of the License, or 
  11    (at your option) any later version. 
  13    This program is distributed in the hope that it will be useful, 
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16    GNU General Public License for more details. 
  18    You should have received a copy of the GNU General Public License 
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  38 #include "print-xml.h" 
  43 /* Set of all nonterminals which are not useless.  */ 
  46 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  49 /* Set of all accessible symbols.  */ 
  52 /* Set of symbols used to define rule precedence (so they are 
  53    `useless', but no warning should be issued).  */ 
  56 static rule_number nuseful_productions
; 
  57 rule_number nuseless_productions
; 
  58 static int nuseful_nonterminals
; 
  59 symbol_number nuseless_nonterminals
; 
  61 /*-------------------------------------------------------------------. 
  62 | Another way to do this would be with a set for each production and | 
  63 | then do subset tests against N0, but even for the C grammar the    | 
  64 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  65 `-------------------------------------------------------------------*/ 
  68 useful_production (rule_number r
, bitset N0
) 
  72   /* A production is useful if all of the nonterminals in its appear 
  73      in the set of useful nonterminals.  */ 
  75   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
  76     if (ISVAR (*rhsp
) && !bitset_test (N0
, *rhsp 
- ntokens
)) 
  82 /*---------------------------------------------------------. 
  83 | Remember that rules are 1-origin, symbols are 0-origin.  | 
  84 `---------------------------------------------------------*/ 
  87 useless_nonterminals (void) 
  92   /* N is set as built.  Np is set being built this iteration. P is 
  93      set of all productions which have a RHS all in N.  */ 
  95   Np 
= bitset_create (nvars
, BITSET_FIXED
); 
  98   /* The set being computed is a set of nonterminals which can derive 
  99      the empty string or strings consisting of all terminals. At each 
 100      iteration a nonterminal is added to the set if there is a 
 101      production with that nonterminal as its LHS for which all the 
 102      nonterminals in its RHS are already in the set.  Iterate until 
 103      the set being computed remains unchanged.  Any nonterminals not 
 104      in the set at that point are useless in that they will never be 
 105      used in deriving a sentence of the language. 
 107      This iteration doesn't use any special traversal over the 
 108      productions.  A set is kept of all productions for which all the 
 109      nonterminals in the RHS are in useful.  Only productions not in 
 110      this set are scanned on each iteration.  At the end, this set is 
 111      saved to be used when finding useful productions: only 
 112      productions in this set will appear in the final grammar.  */ 
 117       for (r 
= 0; r 
< nrules
; r
++) 
 118         if (!bitset_test (P
, r
) 
 119             && useful_production (r
, N
)) 
 121             bitset_set (Np
, rules
[r
].lhs
->number 
- ntokens
); 
 124       if (bitset_equal_p (N
, Np
)) 
 136 inaccessable_symbols (void) 
 140   /* Find out which productions are reachable and which symbols are 
 141      used.  Starting with an empty set of productions and a set of 
 142      symbols which only has the start symbol in it, iterate over all 
 143      productions until the set of productions remains unchanged for an 
 144      iteration.  For each production which has a LHS in the set of 
 145      reachable symbols, add the production to the set of reachable 
 146      productions, and add all of the nonterminals in the RHS of the 
 147      production to the set of reachable symbols. 
 149      Consider only the (partially) reduced grammar which has only 
 150      nonterminals in N and productions in P. 
 152      The result is the set P of productions in the reduced grammar, 
 153      and the set V of symbols in the reduced grammar. 
 155      Although this algorithm also computes the set of terminals which 
 156      are reachable, no terminal will be deleted from the grammar. Some 
 157      terminals might not be in the grammar but might be generated by 
 158      semantic routines, and so the user might want them available with 
 159      specified numbers.  (Is this true?)  However, the nonreachable 
 160      terminals are printed (if running in verbose mode) so that the 
 163   Vp 
= bitset_create (nsyms
, BITSET_FIXED
); 
 164   Pp 
= bitset_create (nrules
, BITSET_FIXED
); 
 166   /* If the start symbol isn't useful, then nothing will be useful. */ 
 167   if (bitset_test (N
, accept
->number 
- ntokens
)) 
 169       bitset_set (V
, accept
->number
); 
 175           for (r 
= 0; r 
< nrules
; r
++) 
 177               if (!bitset_test (Pp
, r
) 
 178                   && bitset_test (P
, r
) 
 179                   && bitset_test (V
, rules
[r
].lhs
->number
)) 
 182                   for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; rhsp
++) 
 183                     if (ISTOKEN (*rhsp
) || bitset_test (N
, *rhsp 
- ntokens
)) 
 184                       bitset_set (Vp
, *rhsp
); 
 188           if (bitset_equal_p (V
, Vp
)) 
 199   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 200   bitset_set (V
, endtoken
->number
);             /* end-of-input token */ 
 201   bitset_set (V
, errtoken
->number
);             /* error token */ 
 202   bitset_set (V
, undeftoken
->number
);           /* some undefined token */ 
 207   nuseful_productions 
= bitset_count (P
); 
 208   nuseless_productions 
= nrules 
- nuseful_productions
; 
 210   nuseful_nonterminals 
= 0; 
 213     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 214       if (bitset_test (V
, i
)) 
 215         nuseful_nonterminals
++; 
 217   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 219   /* A token that was used in %prec should not be warned about.  */ 
 222     for (r 
= 0; r 
< nrules
; ++r
) 
 223       if (rules
[r
].precsym 
!= 0) 
 224         bitset_set (V1
, rules
[r
].precsym
->number
); 
 229 /*-------------------------------------------------------------------. 
 230 | Put the useless productions at the end of RULES, and adjust NRULES | 
 232 `-------------------------------------------------------------------*/ 
 235 reduce_grammar_tables (void) 
 237   /* Report and flag useless productions.  */ 
 240     for (r 
= 0; r 
< nrules
; r
++) 
 241       rules
[r
].useful 
= bitset_test (P
, r
); 
 242     grammar_rules_useless_report (_("rule useless in grammar")); 
 245   /* Map the nonterminals to their new index: useful first, useless 
 246      afterwards.  Kept for later report.  */ 
 249     int useless 
= nrules 
- nuseless_productions
; 
 250     rule 
*rules_sorted 
= xnmalloc (nrules
, sizeof *rules_sorted
); 
 252     for (r 
= 0; r 
< nrules
; ++r
) 
 253       rules_sorted
[rules
[r
].useful 
? useful
++ : useless
++] = rules
[r
]; 
 255     rules 
= rules_sorted
; 
 257     /* Renumber the rules markers in RITEMS.  */ 
 258     for (r 
= 0; r 
< nrules
; ++r
) 
 260         item_number 
*rhsp 
= rules
[r
].rhs
; 
 261         for (/* Nothing. */; *rhsp 
>= 0; ++rhsp
) 
 263         *rhsp 
= rule_number_as_item_number (r
); 
 266     nrules 
-= nuseless_productions
; 
 269   /* Adjust NRITEMS.  */ 
 273     for (r 
= nrules
; r 
< nrules 
+ nuseless_productions
; ++r
) 
 275         length 
= rule_rhs_length (&rules
[r
]); 
 276         nritems 
-= length 
+ 1; 
 282 /*------------------------------. 
 283 | Remove useless nonterminals.  | 
 284 `------------------------------*/ 
 287 nonterminals_reduce (void) 
 291   /* Map the nonterminals to their new index: useful first, useless 
 292      afterwards.  Kept for later report.  */ 
 294   symbol_number 
*nontermmap 
= xnmalloc (nvars
, sizeof *nontermmap
); 
 296   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 297     if (bitset_test (V
, i
)) 
 298       nontermmap
[i 
- ntokens
] = n
++; 
 299   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 300     if (!bitset_test (V
, i
)) 
 302         nontermmap
[i 
- ntokens
] = n
++; 
 303         warn_at (symbols
[i
]->location
, _("nonterminal useless in grammar: %s"), 
 308   /* Shuffle elements of tables indexed by symbol number.  */ 
 310     symbol 
**symbols_sorted 
= xnmalloc (nvars
, sizeof *symbols_sorted
); 
 312     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 313       symbols
[i
]->number 
= nontermmap
[i 
- ntokens
]; 
 314     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 315       symbols_sorted
[nontermmap
[i 
- ntokens
] - ntokens
] = symbols
[i
]; 
 316     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 317       symbols
[i
] = symbols_sorted
[i 
- ntokens
]; 
 318     free (symbols_sorted
); 
 323     for (r 
= 0; r 
< nrules
; ++r
) 
 326         for (rhsp 
= rules
[r
].rhs
; *rhsp 
>= 0; ++rhsp
) 
 328             *rhsp 
=  symbol_number_as_item_number (nontermmap
[*rhsp
 
 331     accept
->number 
= nontermmap
[accept
->number 
- ntokens
]; 
 334   nsyms 
-= nuseless_nonterminals
; 
 335   nvars 
-= nuseless_nonterminals
; 
 341 /*------------------------------------------------------------------. 
 342 | Output the detailed results of the reductions.  For FILE.output.  | 
 343 `------------------------------------------------------------------*/ 
 346 reduce_output (FILE *out
) 
 348   if (nuseless_nonterminals 
> 0) 
 351       fprintf (out
, "%s\n\n", _("Nonterminals useless in grammar")); 
 352       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 353         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 360     for (i 
= 0; i 
< ntokens
; i
++) 
 361       if (reduce_token_unused_in_grammar (i
)) 
 364             fprintf (out
, "%s\n\n", _("Terminals unused in grammar")); 
 366           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 372   if (nuseless_productions 
> 0) 
 373     grammar_rules_partial_print (out
, _("Rules useless in grammar"), 
 374                                  rule_useless_in_grammar_p
); 
 378 /*-------------------------------. 
 379 | Report the results to STDERR.  | 
 380 `-------------------------------*/ 
 385   if (nuseless_nonterminals 
> 0) 
 387       fprintf (stderr
, "%s: %s: ", grammar_file
, _("warning")); 
 388       fprintf (stderr
, ngettext ("%d nonterminal useless in grammar", 
 389                                  "%d nonterminals useless in grammar", 
 390                                  nuseless_nonterminals
), 
 391                nuseless_nonterminals
); 
 392       fprintf (stderr
, "\n"); 
 394   if (nuseless_productions 
> 0) 
 396       fprintf (stderr
, "%s: %s: ", grammar_file
, _("warning")); 
 397       fprintf (stderr
, ngettext ("%d rule useless in grammar", 
 398                                  "%d rules useless in grammar", 
 399                                  nuseless_productions
), 
 400                nuseless_productions
); 
 401       fprintf (stderr
, "\n"); 
 406 reduce_grammar (void) 
 410   /* Allocate the global sets used to compute the reduced grammar */ 
 412   N 
= bitset_create (nvars
, BITSET_FIXED
); 
 413   P 
=  bitset_create (nrules
, BITSET_FIXED
); 
 414   V 
= bitset_create (nsyms
, BITSET_FIXED
); 
 415   V1 
= bitset_create (nsyms
, BITSET_FIXED
); 
 417   useless_nonterminals (); 
 418   inaccessable_symbols (); 
 420   reduced 
= (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 426   if (!bitset_test (N
, accept
->number 
- ntokens
)) 
 427     fatal_at (startsymbol_location
, 
 428               _("start symbol %s does not derive any sentence"), 
 431   /* First reduce the nonterminals, as they renumber themselves in the 
 432      whole grammar.  If you change the order, nonterms would be 
 433      renumbered only in the reduced grammar.  */ 
 434   if (nuseless_nonterminals 
> 0) 
 435     nonterminals_reduce (); 
 436   if (nuseless_productions 
> 0) 
 437     reduce_grammar_tables (); 
 439   if (trace_flag 
& trace_grammar
) 
 441       grammar_dump (stderr
, "Reduced Grammar"); 
 443       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 444 , and %d productions.\n", 
 445                grammar_file
, ntokens
, nvars
, nrules
); 
 450 reduce_token_unused_in_grammar (symbol_number i
) 
 453   return !bitset_test (V
, i
) && !bitset_test (V1
, i
); 
 457 reduce_nonterminal_useless_in_grammar (symbol_number i
) 
 459   aver (ntokens 
<= i 
&& i 
< nsyms 
+ nuseless_nonterminals
); 
 463 /*-----------------------------------------------------------. 
 464 | Free the global sets used to compute the reduced grammar.  | 
 465 `-----------------------------------------------------------*/