]>
git.saurik.com Git - bison.git/blob - src/reduce.c
   1 /* Grammar reduction for Bison. 
   2    Copyright 1988, 1989, 2000, 2001  Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  37 typedef unsigned *BSet
; 
  41 /* Set of all nonterminals which are not useless.  */ 
  44 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  47 /* Set of all accessible symbols.  */ 
  50 /* Set of symbols used to define rule precedence (so they are 
  51    `useless', but no warning should be issued).  */ 
  54 static int nuseful_productions
; 
  55 static int nuseless_productions
; 
  56 static int nuseful_nonterminals
; 
  57 int nuseless_nonterminals
; 
  60 bits_equal (BSet L
, BSet R
, int n
) 
  64   for (i 
= n 
- 1; i 
>= 0; i
--) 
  78       i 
^= (i 
& ((unsigned) (-(int) i
))); 
  86 bits_size (BSet S
, int n
) 
  90   for (i 
= n 
- 1; i 
>= 0; i
--) 
  91     count 
+= nbits (S
[i
]); 
  95 /*-------------------------------------------------------------------. 
  96 | Another way to do this would be with a set for each production and | 
  97 | then do subset tests against N0, but even for the C grammar the    | 
  98 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
  99 `-------------------------------------------------------------------*/ 
 102 useful_production (int i
, BSet N0
) 
 107   /* A production is useful if all of the nonterminals in its appear 
 108      in the set of useful nonterminals.  */ 
 110   for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
> 0; r
++) 
 112       if (!BITISSET (N0
, n 
- ntokens
)) 
 118 /*---------------------------------------------------------. 
 119 | Remember that rules are 1-origin, symbols are 0-origin.  | 
 120 `---------------------------------------------------------*/ 
 123 useless_nonterminals (void) 
 128   /* N is set as built.  Np is set being built this iteration. P is 
 129      set of all productions which have a RHS all in N.  */ 
 131   Np 
= XCALLOC (unsigned, WORDSIZE (nvars
)); 
 133   /* The set being computed is a set of nonterminals which can derive 
 134      the empty string or strings consisting of all terminals. At each 
 135      iteration a nonterminal is added to the set if there is a 
 136      production with that nonterminal as its LHS for which all the 
 137      nonterminals in its RHS are already in the set.  Iterate until 
 138      the set being computed remains unchanged.  Any nonterminals not 
 139      in the set at that point are useless in that they will never be 
 140      used in deriving a sentence of the language. 
 142      This iteration doesn't use any special traversal over the 
 143      productions.  A set is kept of all productions for which all the 
 144      nonterminals in the RHS are in useful.  Only productions not in 
 145      this set are scanned on each iteration.  At the end, this set is 
 146      saved to be used when finding useful productions: only 
 147      productions in this set will appear in the final grammar.  */ 
 151       for (i 
= WORDSIZE (nvars
) - 1; i 
>= 0; i
--) 
 153       for (i 
= 1; i 
<= nrules
; i
++) 
 155           if (!BITISSET (P
, i
)) 
 157               if (useful_production (i
, N
)) 
 159                   SETBIT (Np
, rule_table
[i
].lhs 
- ntokens
); 
 164       if (bits_equal (N
, Np
, WORDSIZE (nvars
))) 
 176 inaccessable_symbols (void) 
 183   /* Find out which productions are reachable and which symbols are 
 184      used.  Starting with an empty set of productions and a set of 
 185      symbols which only has the start symbol in it, iterate over all 
 186      productions until the set of productions remains unchanged for an 
 187      iteration.  For each production which has a LHS in the set of 
 188      reachable symbols, add the production to the set of reachable 
 189      productions, and add all of the nonterminals in the RHS of the 
 190      production to the set of reachable symbols. 
 192      Consider only the (partially) reduced grammar which has only 
 193      nonterminals in N and productions in P. 
 195      The result is the set P of productions in the reduced grammar, 
 196      and the set V of symbols in the reduced grammar. 
 198      Although this algorithm also computes the set of terminals which 
 199      are reachable, no terminal will be deleted from the grammar. Some 
 200      terminals might not be in the grammar but might be generated by 
 201      semantic routines, and so the user might want them available with 
 202      specified numbers.  (Is this true?)  However, the nonreachable 
 203      terminals are printed (if running in verbose mode) so that the 
 206   Vp 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 207   Pp 
= XCALLOC (unsigned, WORDSIZE (nrules 
+ 1)); 
 209   /* If the start symbol isn't useful, then nothing will be useful. */ 
 210   if (BITISSET (N
, start_symbol 
- ntokens
)) 
 212       SETBIT (V
, start_symbol
); 
 216           for (i 
= WORDSIZE (nsyms
) - 1; i 
>= 0; i
--) 
 218           for (i 
= 1; i 
<= nrules
; i
++) 
 220               if (!BITISSET (Pp
, i
) 
 222                   && BITISSET (V
, rule_table
[i
].lhs
)) 
 224                   for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
>= 0; r
++) 
 225                     if (ISTOKEN (t 
= *r
) || BITISSET (N
, t 
- ntokens
)) 
 230           if (bits_equal (V
, Vp
, WORDSIZE (nsyms
))) 
 241   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 242   SETBIT (V
, 0);                /* end-of-input token */ 
 243   SETBIT (V
, 1);                /* error token */ 
 244   SETBIT (V
, 2);                /* some undefined token */ 
 249   nuseful_productions 
= bits_size (P
, WORDSIZE (nrules 
+ 1)); 
 250   nuseless_productions 
= nrules 
- nuseful_productions
; 
 252   nuseful_nonterminals 
= 0; 
 253   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 255       nuseful_nonterminals
++; 
 256   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 258   /* A token that was used in %prec should not be warned about.  */ 
 259   for (i 
= 1; i 
< nrules
; i
++) 
 260     if (rule_table
[i
].precsym 
!= 0) 
 261       SETBIT (V1
, rule_table
[i
].precsym
); 
 265 reduce_grammar_tables (void) 
 267   /* This is turned off because we would need to change the numbers in 
 268      the case statements in the actions file. 
 270      We don't disable it via CPP so that it is still checked with the 
 271      rest of the code, to avoid its becoming completely obsolete. 
 273      FIXME: I think the comment above demonstrates this code must be 
 274      turned off for *semantic* parser, not in the general case.  Try 
 275      to understand this better --akim.  */ 
 278     /* remove useless productions */ 
 279     if (nuseless_productions 
> 0) 
 281         short np
, pn
, ni
, pi
; 
 285         for (pn 
= 1; pn 
<= nrules
; pn
++) 
 286           if (BITISSET (P
, pn
)) 
 291                   rule_table
[np
].lhs   
= rule_table
[pn
].lhs
; 
 292                   rule_table
[np
].line  
= rule_table
[pn
].line
; 
 293                   rule_table
[np
].prec  
= rule_table
[pn
].prec
; 
 294                   rule_table
[np
].assoc 
= rule_table
[pn
].assoc
; 
 295                   rule_table
[np
].rhs   
= rule_table
[pn
].rhs
; 
 296                   if (rule_table
[np
].rhs 
!= ni
) 
 298                       pi 
= rule_table
[np
].rhs
; 
 299                       rule_table
[np
].rhs 
= ni
; 
 300                       while (ritem
[pi
] >= 0) 
 301                         ritem
[ni
++] = ritem
[pi
++]; 
 307                   while (ritem
[ni
++] >= 0); 
 312         nrules 
-= nuseless_productions
; 
 315         /* Is it worth it to reduce the amount of memory for the 
 316            grammar? Probably not.  */ 
 319   /* Disable useless productions. */ 
 320   if (nuseless_productions 
> 0) 
 323       for (pn 
= 1; pn 
<= nrules
; pn
++) 
 324         rule_table
[pn
].useful 
= BITISSET (P
, pn
); 
 329 /*------------------------------. 
 330 | Remove useless nonterminals.  | 
 331 `------------------------------*/ 
 334 nonterminals_reduce (void) 
 339   /* Map the nonterminals to their new index: useful first, useless 
 340      afterwards.  Kept for later report.  */ 
 342   short *nontermmap 
= XCALLOC (short, nvars
) - ntokens
; 
 344   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 347   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 348     if (!BITISSET (V
, i
)) 
 352   /* Shuffle elements of tables indexed by symbol number.  */ 
 354     short *sassoc_sorted 
= XMALLOC (short, nvars
) - ntokens
; 
 355     short *sprec_sorted  
= XMALLOC (short, nvars
) - ntokens
; 
 356     char **tags_sorted   
= XMALLOC (char *, nvars
) - ntokens
; 
 358     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 361         sassoc_sorted
[n
] = sassoc
[i
]; 
 362         sprec_sorted
[n
]  = sprec
[i
]; 
 363         tags_sorted
[n
]   = tags
[i
]; 
 365     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 367         sassoc
[i
] = sassoc_sorted
[i
]; 
 368         sprec
[i
]  = sprec_sorted
[i
]; 
 369         tags
[i
]   = tags_sorted
[i
]; 
 371     free (sassoc_sorted 
+ ntokens
); 
 372     free (sprec_sorted 
+ ntokens
); 
 373     free (tags_sorted 
+ ntokens
); 
 376   /* Replace all symbol numbers in valid data structures.  */ 
 378   for (i 
= 1; i 
<= nrules
; i
++) 
 380       rule_table
[i
].lhs 
= nontermmap
[rule_table
[i
].lhs
]; 
 381       if (ISVAR (rule_table
[i
].precsym
)) 
 382         /* Can this happen?  */ 
 383         rule_table
[i
].precsym 
= nontermmap
[rule_table
[i
].precsym
]; 
 386   for (r 
= ritem
; *r
; r
++) 
 390   start_symbol 
= nontermmap
[start_symbol
]; 
 392   nsyms 
-= nuseless_nonterminals
; 
 393   nvars 
-= nuseless_nonterminals
; 
 395   free (nontermmap 
+ ntokens
); 
 399 /*------------------------------------------------------------------. 
 400 | Output the detailed results of the reductions.  For FILE.output.  | 
 401 `------------------------------------------------------------------*/ 
 404 reduce_output (FILE *out
) 
 406   if (nuseless_nonterminals 
> 0) 
 409       fprintf (out
, "%s\n\n", _("Useless nonterminals:")); 
 410       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 411         fprintf (out
, "   %s\n", tags
[nsyms 
+ i
]); 
 418     for (i 
= 0; i 
< ntokens
; i
++) 
 419       if (!BITISSET (V
, i
) && !BITISSET (V1
, i
)) 
 422             fprintf (out
, "%s\n\n", _("Terminals which are not used:")); 
 424           fprintf (out
, "   %s\n", tags
[i
]); 
 430   if (nuseless_productions 
> 0) 
 433       fprintf (out
, "%s\n\n", _("Useless rules:")); 
 434       for (i 
= 1; i 
<= nrules
; i
++) 
 435         if (!rule_table
[i
].useful
) 
 438             fprintf (out
, "#%-4d  ", i 
- 1); 
 439             fprintf (out
, "%s:", tags
[rule_table
[i
].lhs
]); 
 440             for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
>= 0; r
++) 
 441               fprintf (out
, " %s", tags
[*r
]); 
 449 dump_grammar (FILE *out
) 
 454   fprintf (out
, "REDUCED GRAMMAR\n\n"); 
 456            "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n", 
 457            ntokens
, nvars
, nsyms
, nrules
, nitems
); 
 458   fprintf (out
, "Variables\n---------\n\n"); 
 459   fprintf (out
, "Value  Sprec  Sassoc  Tag\n"); 
 460   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 461     fprintf (out
, "%5d  %5d   %5d  %s\n", i
, sprec
[i
], sassoc
[i
], tags
[i
]); 
 462   fprintf (out
, "\n\n"); 
 463   fprintf (out
, "Rules\n-----\n\n"); 
 464   fprintf (out
, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n"); 
 465   for (i 
= 1; i 
<= nrules
; i
++) 
 468       /* Find the last RHS index in ritems. */ 
 469       for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
> 0; ++r
) 
 471       fprintf (out
, "%3d (%2d, %2d, %2d, %2d-%2d)   %2d ->", 
 473                rule_table
[i
].prec
, rule_table
[i
].assoc
, rule_table
[i
].useful
, 
 474                rule_table
[i
].rhs
, rule_table
[i
].rhs 
+ rhs_count 
- 1, 
 476       /* Dumped the RHS. */ 
 477       for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
> 0; r
++) 
 478         fprintf (out
, "%3d", *r
); 
 479       fprintf (out
, "  [%d]\n", -(*r
)); 
 481   fprintf (out
, "\n\n"); 
 482   fprintf (out
, "Rules interpreted\n-----------------\n\n"); 
 483   for (i 
= 1; i 
<= nrules
; i
++) 
 485       fprintf (out
, "%-5d  %s :", i
, tags
[rule_table
[i
].lhs
]); 
 486       for (r 
= &ritem
[rule_table
[i
].rhs
]; *r 
> 0; r
++) 
 487         fprintf (out
, " %s", tags
[*r
]); 
 490   fprintf (out
, "\n\n"); 
 495 /*-------------------------------. 
 496 | Report the results to STDERR.  | 
 497 `-------------------------------*/ 
 502   if (yacc_flag 
&& nuseless_productions
) 
 503     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 504                                "%d rules never reduced\n", 
 505                                nuseless_productions
), 
 506              nuseless_productions
); 
 508   fprintf (stderr
, _("%s contains "), infile
); 
 510   if (nuseless_nonterminals 
> 0) 
 511     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 512                                "%d useless nonterminals", 
 513                                nuseless_nonterminals
), 
 514              nuseless_nonterminals
); 
 516   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 517     fprintf (stderr
, _(" and ")); 
 519   if (nuseless_productions 
> 0) 
 520     fprintf (stderr
, ngettext ("%d useless rule", 
 522                                nuseless_productions
), 
 523              nuseless_productions
); 
 524   fprintf (stderr
, "\n"); 
 529 reduce_grammar (void) 
 533   /* Allocate the global sets used to compute the reduced grammar */ 
 535   N 
= XCALLOC (unsigned, WORDSIZE (nvars
)); 
 536   P 
= XCALLOC (unsigned, WORDSIZE (nrules 
+ 1)); 
 537   V 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 538   V1 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 540   useless_nonterminals (); 
 541   inaccessable_symbols (); 
 543   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 550   if (!BITISSET (N
, start_symbol 
- ntokens
)) 
 551     fatal (_("Start symbol %s does not derive any sentence"), 
 554   reduce_grammar_tables (); 
 555   if (nuseless_nonterminals 
> 0) 
 556     nonterminals_reduce (); 
 560       dump_grammar (stderr
); 
 562       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 563 , and %d productions.\n", 
 564                infile
, ntokens
, nvars
, nrules
); 
 569 /*-----------------------------------------------------------. 
 570 | Free the global sets used to compute the reduced grammar.  | 
 571 `-----------------------------------------------------------*/