]>
git.saurik.com Git - bison.git/blob - src/reduce.c
7cdc76aba6c2b54edf33cca09696c5b8402d0d46
   1 /* Grammar reduction for Bison. 
   2    Copyright 1988, 1989, 2000, 2001  Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  42 /* Set of all nonterminals which are not useless.  */ 
  45 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  48 /* Set of all accessible symbols.  */ 
  51 /* Set of symbols used to define rule precedence (so they are 
  52    `useless', but no warning should be issued).  */ 
  55 static int nuseful_productions
; 
  56 static int nuseless_productions
; 
  57 static int nuseful_nonterminals
; 
  58 int nuseless_nonterminals
; 
  60 /*-------------------------------------------------------------------. 
  61 | Another way to do this would be with a set for each production and | 
  62 | then do subset tests against N0, but even for the C grammar the    | 
  63 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  64 `-------------------------------------------------------------------*/ 
  67 useful_production (int i
, bitset N0
) 
  72   /* A production is useful if all of the nonterminals in its appear 
  73      in the set of useful nonterminals.  */ 
  75   for (r 
= rules
[i
].rhs
; *r 
>= 0; r
++) 
  76     if (ISVAR (n 
= *r
) && !bitset_test (N0
, n 
- ntokens
)) 
  82 /*---------------------------------------------------------. 
  83 | Remember that rules are 1-origin, symbols are 0-origin.  | 
  84 `---------------------------------------------------------*/ 
  87 useless_nonterminals (void) 
  92   /* N is set as built.  Np is set being built this iteration. P is 
  93      set of all productions which have a RHS all in N.  */ 
  95   Np 
= bitset_create (nvars
, BITSET_FIXED
); 
  98   /* The set being computed is a set of nonterminals which can derive 
  99      the empty string or strings consisting of all terminals. At each 
 100      iteration a nonterminal is added to the set if there is a 
 101      production with that nonterminal as its LHS for which all the 
 102      nonterminals in its RHS are already in the set.  Iterate until 
 103      the set being computed remains unchanged.  Any nonterminals not 
 104      in the set at that point are useless in that they will never be 
 105      used in deriving a sentence of the language. 
 107      This iteration doesn't use any special traversal over the 
 108      productions.  A set is kept of all productions for which all the 
 109      nonterminals in the RHS are in useful.  Only productions not in 
 110      this set are scanned on each iteration.  At the end, this set is 
 111      saved to be used when finding useful productions: only 
 112      productions in this set will appear in the final grammar.  */ 
 117       for (i 
= 1; i 
< nrules 
+ 1; i
++) 
 118         if (!bitset_test (P
, i
) 
 119             && useful_production (i
, N
)) 
 121             bitset_set (Np
, rules
[i
].lhs
->number 
- ntokens
); 
 124       if (bitset_equal_p (N
, Np
)) 
 136 inaccessable_symbols (void) 
 143   /* Find out which productions are reachable and which symbols are 
 144      used.  Starting with an empty set of productions and a set of 
 145      symbols which only has the start symbol in it, iterate over all 
 146      productions until the set of productions remains unchanged for an 
 147      iteration.  For each production which has a LHS in the set of 
 148      reachable symbols, add the production to the set of reachable 
 149      productions, and add all of the nonterminals in the RHS of the 
 150      production to the set of reachable symbols. 
 152      Consider only the (partially) reduced grammar which has only 
 153      nonterminals in N and productions in P. 
 155      The result is the set P of productions in the reduced grammar, 
 156      and the set V of symbols in the reduced grammar. 
 158      Although this algorithm also computes the set of terminals which 
 159      are reachable, no terminal will be deleted from the grammar. Some 
 160      terminals might not be in the grammar but might be generated by 
 161      semantic routines, and so the user might want them available with 
 162      specified numbers.  (Is this true?)  However, the nonreachable 
 163      terminals are printed (if running in verbose mode) so that the 
 166   Vp 
= bitset_create (nsyms
, BITSET_FIXED
); 
 167   Pp 
= bitset_create (nrules 
+ 1, BITSET_FIXED
); 
 169   /* If the start symbol isn't useful, then nothing will be useful. */ 
 170   if (bitset_test (N
, start_symbol 
- ntokens
)) 
 172       bitset_set (V
, start_symbol
); 
 177           for (i 
= 1; i 
< nrules 
+ 1; i
++) 
 179               if (!bitset_test (Pp
, i
) 
 180                   && bitset_test (P
, i
) 
 181                   && bitset_test (V
, rules
[i
].lhs
->number
)) 
 183                   for (r 
= rules
[i
].rhs
; *r 
>= 0; r
++) 
 184                     if (ISTOKEN (t 
= *r
) || bitset_test (N
, t 
- ntokens
)) 
 189           if (bitset_equal_p (V
, Vp
)) 
 200   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 201   bitset_set (V
, 0);            /* end-of-input token */ 
 202   bitset_set (V
, 1);            /* error token */ 
 203   bitset_set (V
, 2);            /* some undefined token */ 
 208   nuseful_productions 
= bitset_count (P
); 
 209   nuseless_productions 
= nrules 
- nuseful_productions
; 
 211   nuseful_nonterminals 
= 0; 
 212   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 213     if (bitset_test (V
, i
)) 
 214       nuseful_nonterminals
++; 
 215   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 217   /* A token that was used in %prec should not be warned about.  */ 
 218   for (i 
= 1; i 
< nrules 
+ 1; i
++) 
 219     if (rules
[i
].precsym 
!= 0) 
 220       bitset_set (V1
, rules
[i
].precsym
); 
 224 /*-------------------------------------------------------------------. 
 225 | Put the useless productions at the end of RULES, and adjust NRULES | 
 227 `-------------------------------------------------------------------*/ 
 230 reduce_grammar_tables (void) 
 232   /* Flag useless productions.  */ 
 235     for (pn 
= 1; pn 
< nrules 
+ 1; pn
++) 
 236       rules
[pn
].useful 
= bitset_test (P
, pn
); 
 239   /* Map the nonterminals to their new index: useful first, useless 
 240      afterwards.  Kept for later report.  */ 
 243     int useless 
= nrules 
+ 1 - nuseless_productions
; 
 244     rule_t 
*rules_sorted 
= XMALLOC (rule_t
, nrules 
+ 1) - 1; 
 246     for (i 
= 1; i 
< nrules 
+ 1; ++i
) 
 247       rules_sorted
[rules
[i
].useful 
? useful
++ : useless
++] = rules
[i
]; 
 249     rules 
= rules_sorted
; 
 251     /* Renumber the rules markers in RITEMS.  */ 
 252     for (i 
= 1; i 
< nrules 
+ 1; ++i
) 
 254         short *rhsp 
= rules
[i
].rhs
; 
 255         for (/* Nothing. */; *rhsp 
>= 0; ++rhsp
) 
 259     nrules 
-= nuseless_productions
; 
 262   /* Adjust NRITEMS and NITEMS.  */ 
 266     for (r 
= nrules 
+ 1; r 
< nrules 
+ 1 + nuseless_productions
; ++r
) 
 268         length 
= rule_rhs_length (&rules
[r
]); 
 269         nritems 
-= length 
+ 1; 
 270         nitems 
-= length 
+ 1; 
 276 /*------------------------------. 
 277 | Remove useless nonterminals.  | 
 278 `------------------------------*/ 
 281 nonterminals_reduce (void) 
 285   /* Map the nonterminals to their new index: useful first, useless 
 286      afterwards.  Kept for later report.  */ 
 288   short *nontermmap 
= XCALLOC (short, nvars
) - ntokens
; 
 290   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 291     if (bitset_test (V
, i
)) 
 293   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 294     if (!bitset_test (V
, i
)) 
 298   /* Shuffle elements of tables indexed by symbol number.  */ 
 300     bucket 
**symbols_sorted 
= XMALLOC (bucket 
*, nvars
) - ntokens
; 
 302     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 303       symbols
[i
]->number 
= nontermmap
[i
]; 
 304     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 305       symbols_sorted
[nontermmap
[i
]] = symbols
[i
]; 
 306     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 307       symbols
[i
] = symbols_sorted
[i
]; 
 308     free (symbols_sorted 
+ ntokens
); 
 311   /* Replace all symbol numbers in valid data structures.  */ 
 313   for (i 
= 1; i 
< nrules 
+ 1; i
++) 
 315       if (ISVAR (rules
[i
].precsym
)) 
 316         /* Can this happen?  */ 
 317         rules
[i
].precsym 
= nontermmap
[rules
[i
].precsym
]; 
 320   for (i 
= 0; i 
< nritems
; ++i
) 
 321     if (ISVAR (ritem
[i
])) 
 322       ritem
[i
] = nontermmap
[ritem
[i
]]; 
 324   start_symbol 
= nontermmap
[start_symbol
]; 
 326   nsyms 
-= nuseless_nonterminals
; 
 327   nvars 
-= nuseless_nonterminals
; 
 329   free (nontermmap 
+ ntokens
); 
 333 /*------------------------------------------------------------------. 
 334 | Output the detailed results of the reductions.  For FILE.output.  | 
 335 `------------------------------------------------------------------*/ 
 338 reduce_output (FILE *out
) 
 340   if (nuseless_nonterminals 
> 0) 
 343       fprintf (out
, "%s\n\n", _("Useless nonterminals:")); 
 344       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 345         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 352     for (i 
= 0; i 
< ntokens
; i
++) 
 353       if (!bitset_test (V
, i
) && !bitset_test (V1
, i
)) 
 356             fprintf (out
, "%s\n\n", _("Terminals which are not used:")); 
 358           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 364   if (nuseless_productions 
> 0) 
 367       fprintf (out
, "%s\n\n", _("Useless rules:")); 
 368       for (i 
= nrules 
+ 1; i 
< nuseless_productions 
+ nrules 
+ 1; i
++) 
 371           fprintf (out
, "#%-4d  ", rules
[i
].number 
- 1); 
 372           fprintf (out
, "%s:", rules
[i
].lhs
->tag
); 
 373           for (r 
= rules
[i
].rhs
; *r 
>= 0; r
++) 
 374             fprintf (out
, " %s", symbols
[*r
]->tag
); 
 382 dump_grammar (FILE *out
) 
 387   fprintf (out
, "REDUCED GRAMMAR\n\n"); 
 389            "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n", 
 390            ntokens
, nvars
, nsyms
, nrules
, nitems
); 
 391   fprintf (out
, "Variables\n---------\n\n"); 
 392   fprintf (out
, "Value  Sprec  Sassoc  Tag\n"); 
 393   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 394     fprintf (out
, "%5d  %5d   %5d  %s\n", 
 396              symbols
[i
]->prec
, symbols
[i
]->assoc
, symbols
[i
]->tag
); 
 397   fprintf (out
, "\n\n"); 
 398   fprintf (out
, "Rules\n-----\n\n"); 
 399   fprintf (out
, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n"); 
 400   for (i 
= 1; i 
< nrules 
+ nuseless_productions 
+ 1; i
++) 
 403       /* Find the last RHS index in ritems. */ 
 404       for (r 
= rules
[i
].rhs
; *r 
>= 0; ++r
) 
 406       fprintf (out
, "%3d (%2d, %2d, %2d, %2d-%2d)   %2d ->", 
 408                rules
[i
].prec
, rules
[i
].assoc
, rules
[i
].useful
, 
 409                rules
[i
].rhs 
- ritem
, rules
[i
].rhs 
- ritem 
+ rhs_count 
- 1, 
 410                rules
[i
].lhs
->number
); 
 411       /* Dumped the RHS. */ 
 412       for (r 
= rules
[i
].rhs
; *r 
>= 0; r
++) 
 413         fprintf (out
, "%3d", *r
); 
 414       fprintf (out
, "  [%d]\n", -(*r
) - 1); 
 416   fprintf (out
, "\n\n"); 
 417   fprintf (out
, "Rules interpreted\n-----------------\n\n"); 
 418   for (i 
= 1; i 
< nrules 
+ nuseless_productions 
+ 1; i
++) 
 420       fprintf (out
, "%-5d  %s :", i
, rules
[i
].lhs
->tag
); 
 421       for (r 
= rules
[i
].rhs
; *r 
>= 0; r
++) 
 422         fprintf (out
, " %s", symbols
[*r
]->tag
); 
 425   fprintf (out
, "\n\n"); 
 430 /*-------------------------------. 
 431 | Report the results to STDERR.  | 
 432 `-------------------------------*/ 
 437   if (yacc_flag 
&& nuseless_productions
) 
 438     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 439                                "%d rules never reduced\n", 
 440                                nuseless_productions
), 
 441              nuseless_productions
); 
 443   fprintf (stderr
, _("%s contains "), infile
); 
 445   if (nuseless_nonterminals 
> 0) 
 446     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 447                                "%d useless nonterminals", 
 448                                nuseless_nonterminals
), 
 449              nuseless_nonterminals
); 
 451   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 452     fprintf (stderr
, _(" and ")); 
 454   if (nuseless_productions 
> 0) 
 455     fprintf (stderr
, ngettext ("%d useless rule", 
 457                                nuseless_productions
), 
 458              nuseless_productions
); 
 459   fprintf (stderr
, "\n"); 
 464 reduce_grammar (void) 
 468   /* Allocate the global sets used to compute the reduced grammar */ 
 470   N 
= bitset_create (nvars
, BITSET_FIXED
); 
 471   P 
=  bitset_create (nrules 
+ 1, BITSET_FIXED
); 
 472   V 
= bitset_create (nsyms
, BITSET_FIXED
); 
 473   V1 
= bitset_create (nsyms
, BITSET_FIXED
); 
 475   useless_nonterminals (); 
 476   inaccessable_symbols (); 
 478   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 484   if (!bitset_test (N
, start_symbol 
- ntokens
)) 
 485     fatal (_("Start symbol %s does not derive any sentence"), 
 486            symbols
[start_symbol
]->tag
); 
 488   if (nuseless_productions 
> 0) 
 489     reduce_grammar_tables (); 
 490   if (nuseless_nonterminals 
> 0) 
 491     nonterminals_reduce (); 
 495       dump_grammar (stderr
); 
 497       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 498 , and %d productions.\n", 
 499                infile
, ntokens
, nvars
, nrules
); 
 504 /*-----------------------------------------------------------. 
 505 | Free the global sets used to compute the reduced grammar.  | 
 506 `-----------------------------------------------------------*/