]>
git.saurik.com Git - bison.git/blob - src/reduce.c
c4a2bd1c713458e69204ae2f8e33e82912ffd824
   1 /* Grammar reduction for Bison. 
   2    Copyright 1988, 1989, 2000, 2001  Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  22 /* Reduce the grammar: Find and eliminate unreachable terminals, 
  23    nonterminals, and productions.  David S. Bakin.  */ 
  25 /* Don't eliminate unreachable terminals: They may be used by the 
  38 typedef unsigned *BSet
; 
  42 /* Set of all nonterminals which are not useless.  */ 
  45 /* Set of all rules which have no useless nonterminals in their RHS.  */ 
  48 /* Set of all accessible symbols.  */ 
  51 /* Set of symbols used to define rule precedence (so they are 
  52    `useless', but no warning should be issued).  */ 
  55 static int nuseful_productions
; 
  56 static int nuseless_productions
; 
  57 static int nuseful_nonterminals
; 
  58 int nuseless_nonterminals
; 
  61 bits_equal (BSet L
, BSet R
, int n
) 
  65   for (i 
= n 
- 1; i 
>= 0; i
--) 
  79       i 
^= (i 
& ((unsigned) (-(int) i
))); 
  87 bits_size (BSet S
, int n
) 
  91   for (i 
= n 
- 1; i 
>= 0; i
--) 
  92     count 
+= nbits (S
[i
]); 
  96 /*-------------------------------------------------------------------. 
  97 | Another way to do this would be with a set for each production and | 
  98 | then do subset tests against N0, but even for the C grammar the    | 
  99 | whole reducing process takes only 2 seconds on my 8Mhz AT.         | 
 100 `-------------------------------------------------------------------*/ 
 103 useful_production (int i
, BSet N0
) 
 108   /* A production is useful if all of the nonterminals in its appear 
 109      in the set of useful nonterminals.  */ 
 111   for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; r
++) 
 113       if (!BITISSET (N0
, n 
- ntokens
)) 
 119 /*---------------------------------------------------------. 
 120 | Remember that rules are 1-origin, symbols are 0-origin.  | 
 121 `---------------------------------------------------------*/ 
 124 useless_nonterminals (void) 
 129   /* N is set as built.  Np is set being built this iteration. P is 
 130      set of all productions which have a RHS all in N.  */ 
 132   Np 
= XCALLOC (unsigned, WORDSIZE (nvars
)); 
 134   /* The set being computed is a set of nonterminals which can derive 
 135      the empty string or strings consisting of all terminals. At each 
 136      iteration a nonterminal is added to the set if there is a 
 137      production with that nonterminal as its LHS for which all the 
 138      nonterminals in its RHS are already in the set.  Iterate until 
 139      the set being computed remains unchanged.  Any nonterminals not 
 140      in the set at that point are useless in that they will never be 
 141      used in deriving a sentence of the language. 
 143      This iteration doesn't use any special traversal over the 
 144      productions.  A set is kept of all productions for which all the 
 145      nonterminals in the RHS are in useful.  Only productions not in 
 146      this set are scanned on each iteration.  At the end, this set is 
 147      saved to be used when finding useful productions: only 
 148      productions in this set will appear in the final grammar.  */ 
 152       for (i 
= WORDSIZE (nvars
) - 1; i 
>= 0; i
--) 
 154       for (i 
= 1; i 
<= nrules
; i
++) 
 156           if (!BITISSET (P
, i
)) 
 158               if (useful_production (i
, N
)) 
 160                   SETBIT (Np
, rules
[i
].lhs 
- ntokens
); 
 165       if (bits_equal (N
, Np
, WORDSIZE (nvars
))) 
 177 inaccessable_symbols (void) 
 184   /* Find out which productions are reachable and which symbols are 
 185      used.  Starting with an empty set of productions and a set of 
 186      symbols which only has the start symbol in it, iterate over all 
 187      productions until the set of productions remains unchanged for an 
 188      iteration.  For each production which has a LHS in the set of 
 189      reachable symbols, add the production to the set of reachable 
 190      productions, and add all of the nonterminals in the RHS of the 
 191      production to the set of reachable symbols. 
 193      Consider only the (partially) reduced grammar which has only 
 194      nonterminals in N and productions in P. 
 196      The result is the set P of productions in the reduced grammar, 
 197      and the set V of symbols in the reduced grammar. 
 199      Although this algorithm also computes the set of terminals which 
 200      are reachable, no terminal will be deleted from the grammar. Some 
 201      terminals might not be in the grammar but might be generated by 
 202      semantic routines, and so the user might want them available with 
 203      specified numbers.  (Is this true?)  However, the nonreachable 
 204      terminals are printed (if running in verbose mode) so that the 
 207   Vp 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 208   Pp 
= XCALLOC (unsigned, WORDSIZE (nrules 
+ 1)); 
 210   /* If the start symbol isn't useful, then nothing will be useful. */ 
 211   if (BITISSET (N
, start_symbol 
- ntokens
)) 
 213       SETBIT (V
, start_symbol
); 
 217           for (i 
= WORDSIZE (nsyms
) - 1; i 
>= 0; i
--) 
 219           for (i 
= 1; i 
<= nrules
; i
++) 
 221               if (!BITISSET (Pp
, i
) 
 223                   && BITISSET (V
, rules
[i
].lhs
)) 
 225                   for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; r
++) 
 226                     if (ISTOKEN (t 
= *r
) || BITISSET (N
, t 
- ntokens
)) 
 231           if (bits_equal (V
, Vp
, WORDSIZE (nsyms
))) 
 242   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 243   SETBIT (V
, 0);                /* end-of-input token */ 
 244   SETBIT (V
, 1);                /* error token */ 
 245   SETBIT (V
, 2);                /* some undefined token */ 
 250   nuseful_productions 
= bits_size (P
, WORDSIZE (nrules 
+ 1)); 
 251   nuseless_productions 
= nrules 
- nuseful_productions
; 
 253   nuseful_nonterminals 
= 0; 
 254   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 256       nuseful_nonterminals
++; 
 257   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 259   /* A token that was used in %prec should not be warned about.  */ 
 260   for (i 
= 1; i 
< nrules
; i
++) 
 261     if (rules
[i
].precsym 
!= 0) 
 262       SETBIT (V1
, rules
[i
].precsym
); 
 266 reduce_grammar_tables (void) 
 268   /* This is turned off because we would need to change the numbers in 
 269      the case statements in the actions file. 
 271      We don't disable it via CPP so that it is still checked with the 
 272      rest of the code, to avoid its becoming completely obsolete. 
 274      FIXME: I think the comment above demonstrates this code must be 
 275      turned off for *semantic* parser, not in the general case.  Try 
 276      to understand this better --akim.  */ 
 279     /* remove useless productions */ 
 280     if (nuseless_productions 
> 0) 
 282         short np
, pn
, ni
, pi
; 
 286         for (pn 
= 1; pn 
<= nrules
; pn
++) 
 287           if (BITISSET (P
, pn
)) 
 292                   rules
[np
].lhs   
= rules
[pn
].lhs
; 
 293                   rules
[np
].line  
= rules
[pn
].line
; 
 294                   rules
[np
].prec  
= rules
[pn
].prec
; 
 295                   rules
[np
].assoc 
= rules
[pn
].assoc
; 
 296                   rules
[np
].rhs   
= rules
[pn
].rhs
; 
 297                   if (rules
[np
].rhs 
!= ni
) 
 301                       while (ritem
[pi
] >= 0) 
 302                         ritem
[ni
++] = ritem
[pi
++]; 
 308                   while (ritem
[ni
++] >= 0) 
 314         nrules 
-= nuseless_productions
; 
 318         /* Is it worth it to reduce the amount of memory for the 
 319            grammar? Probably not.  */ 
 322   /* Disable useless productions. */ 
 323   if (nuseless_productions 
> 0) 
 326       for (pn 
= 1; pn 
<= nrules
; pn
++) 
 327         rules
[pn
].useful 
= BITISSET (P
, pn
); 
 332 /*------------------------------. 
 333 | Remove useless nonterminals.  | 
 334 `------------------------------*/ 
 337 nonterminals_reduce (void) 
 342   /* Map the nonterminals to their new index: useful first, useless 
 343      afterwards.  Kept for later report.  */ 
 345   short *nontermmap 
= XCALLOC (short, nvars
) - ntokens
; 
 347   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 350   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 351     if (!BITISSET (V
, i
)) 
 355   /* Shuffle elements of tables indexed by symbol number.  */ 
 357     bucket 
**symbols_sorted 
= XMALLOC (bucket 
*, nvars
) - ntokens
; 
 359     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 360       symbols_sorted
[nontermmap
[i
]] = symbols
[i
]; 
 361     for (i 
= ntokens
; i 
< nsyms
; i
++) 
 362       symbols
[i
] = symbols_sorted
[i
]; 
 363     free (symbols_sorted 
+ ntokens
); 
 366   /* Replace all symbol numbers in valid data structures.  */ 
 368   for (i 
= 1; i 
<= nrules
; i
++) 
 370       rules
[i
].lhs 
= nontermmap
[rules
[i
].lhs
]; 
 371       if (ISVAR (rules
[i
].precsym
)) 
 372         /* Can this happen?  */ 
 373         rules
[i
].precsym 
= nontermmap
[rules
[i
].precsym
]; 
 376   for (i 
= 0; i 
< nritems
; ++i
) 
 377     if (ISVAR (ritem
[i
])) 
 378       ritem
[i
] = nontermmap
[ritem
[i
]]; 
 380   start_symbol 
= nontermmap
[start_symbol
]; 
 382   nsyms 
-= nuseless_nonterminals
; 
 383   nvars 
-= nuseless_nonterminals
; 
 385   free (nontermmap 
+ ntokens
); 
 389 /*------------------------------------------------------------------. 
 390 | Output the detailed results of the reductions.  For FILE.output.  | 
 391 `------------------------------------------------------------------*/ 
 394 reduce_output (FILE *out
) 
 396   if (nuseless_nonterminals 
> 0) 
 399       fprintf (out
, "%s\n\n", _("Useless nonterminals:")); 
 400       for (i 
= 0; i 
< nuseless_nonterminals
; ++i
) 
 401         fprintf (out
, "   %s\n", symbols
[nsyms 
+ i
]->tag
); 
 408     for (i 
= 0; i 
< ntokens
; i
++) 
 409       if (!BITISSET (V
, i
) && !BITISSET (V1
, i
)) 
 412             fprintf (out
, "%s\n\n", _("Terminals which are not used:")); 
 414           fprintf (out
, "   %s\n", symbols
[i
]->tag
); 
 420   if (nuseless_productions 
> 0) 
 423       fprintf (out
, "%s\n\n", _("Useless rules:")); 
 424       for (i 
= 1; i 
<= nrules
; i
++) 
 425         if (!rules
[i
].useful
) 
 428             fprintf (out
, "#%-4d  ", i 
- 1); 
 429             fprintf (out
, "%s:", symbols
[rules
[i
].lhs
]->tag
); 
 430             for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; r
++) 
 431               fprintf (out
, " %s", symbols
[*r
]->tag
); 
 439 dump_grammar (FILE *out
) 
 444   fprintf (out
, "REDUCED GRAMMAR\n\n"); 
 446            "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n", 
 447            ntokens
, nvars
, nsyms
, nrules
, nitems
); 
 448   fprintf (out
, "Variables\n---------\n\n"); 
 449   fprintf (out
, "Value  Sprec  Sassoc  Tag\n"); 
 450   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 451     fprintf (out
, "%5d  %5d   %5d  %s\n", 
 453              symbols
[i
]->prec
, symbols
[i
]->assoc
, symbols
[i
]->tag
); 
 454   fprintf (out
, "\n\n"); 
 455   fprintf (out
, "Rules\n-----\n\n"); 
 456   fprintf (out
, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n"); 
 457   for (i 
= 1; i 
<= nrules
; i
++) 
 460       /* Find the last RHS index in ritems. */ 
 461       for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; ++r
) 
 463       fprintf (out
, "%3d (%2d, %2d, %2d, %2d-%2d)   %2d ->", 
 465                rules
[i
].prec
, rules
[i
].assoc
, rules
[i
].useful
, 
 466                rules
[i
].rhs
, rules
[i
].rhs 
+ rhs_count 
- 1, 
 468       /* Dumped the RHS. */ 
 469       for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; r
++) 
 470         fprintf (out
, "%3d", *r
); 
 471       fprintf (out
, "  [%d]\n", -(*r
) - 1); 
 473   fprintf (out
, "\n\n"); 
 474   fprintf (out
, "Rules interpreted\n-----------------\n\n"); 
 475   for (i 
= 1; i 
<= nrules
; i
++) 
 477       fprintf (out
, "%-5d  %s :", i
, symbols
[rules
[i
].lhs
]->tag
); 
 478       for (r 
= &ritem
[rules
[i
].rhs
]; *r 
>= 0; r
++) 
 479         fprintf (out
, " %s", symbols
[*r
]->tag
); 
 482   fprintf (out
, "\n\n"); 
 487 /*-------------------------------. 
 488 | Report the results to STDERR.  | 
 489 `-------------------------------*/ 
 494   if (yacc_flag 
&& nuseless_productions
) 
 495     fprintf (stderr
, ngettext ("%d rule never reduced\n", 
 496                                "%d rules never reduced\n", 
 497                                nuseless_productions
), 
 498              nuseless_productions
); 
 500   fprintf (stderr
, _("%s contains "), infile
); 
 502   if (nuseless_nonterminals 
> 0) 
 503     fprintf (stderr
, ngettext ("%d useless nonterminal", 
 504                                "%d useless nonterminals", 
 505                                nuseless_nonterminals
), 
 506              nuseless_nonterminals
); 
 508   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 509     fprintf (stderr
, _(" and ")); 
 511   if (nuseless_productions 
> 0) 
 512     fprintf (stderr
, ngettext ("%d useless rule", 
 514                                nuseless_productions
), 
 515              nuseless_productions
); 
 516   fprintf (stderr
, "\n"); 
 521 reduce_grammar (void) 
 525   /* Allocate the global sets used to compute the reduced grammar */ 
 527   N 
= XCALLOC (unsigned, WORDSIZE (nvars
)); 
 528   P 
= XCALLOC (unsigned, WORDSIZE (nrules 
+ 1)); 
 529   V 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 530   V1 
= XCALLOC (unsigned, WORDSIZE (nsyms
)); 
 532   useless_nonterminals (); 
 533   inaccessable_symbols (); 
 535   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 542   if (!BITISSET (N
, start_symbol 
- ntokens
)) 
 543     fatal (_("Start symbol %s does not derive any sentence"), 
 544            symbols
[start_symbol
]->tag
); 
 546   reduce_grammar_tables (); 
 547   if (nuseless_nonterminals 
> 0) 
 548     nonterminals_reduce (); 
 552       dump_grammar (stderr
); 
 554       fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\ 
 555 , and %d productions.\n", 
 556                infile
, ntokens
, nvars
, nrules
); 
 561 /*-----------------------------------------------------------. 
 562 | Free the global sets used to compute the reduced grammar.  | 
 563 `-----------------------------------------------------------*/