]>
git.saurik.com Git - bison.git/blob - src/reduce.c
5e69a69b117d65807ca99023507c6ca2a60cf2c0
   1 /* Grammar reduction for Bison. 
   2    Copyright (C) 1988, 1989 Free Software Foundation, Inc. 
   4 This file is part of Bison, the GNU Compiler Compiler. 
   6 Bison is free software; you can redistribute it and/or modify 
   7 it under the terms of the GNU General Public License as published by 
   8 the Free Software Foundation; either version 2, or (at your option) 
  11 Bison is distributed in the hope that it will be useful, 
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14 GNU General Public License for more details. 
  16 You should have received a copy of the GNU General Public License 
  17 along with Bison; see the file COPYING.  If not, write to 
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19 Boston, MA 02111-1307, USA.  */ 
  23  * Reduce the grammar:  Find and eliminate unreachable terminals, 
  24  * nonterminals, and productions.  David S. Bakin. 
  28  * Don't eliminate unreachable terminals:  They may be used by the user's 
  40 extern char   **tags
;           /* reader.c */ 
  41 extern int      verboseflag
;    /* getargs.c */ 
  42 static int      statisticsflag
; /* XXXXXXX */ 
  43 extern int      fixed_outfiles
; 
  50 typedef unsigned *BSet
; 
  55  * N is set of all nonterminals which are not useless.  P is set of all rules 
  56  * which have no useless nonterminals in their RHS.  V is the set of all 
  60 static BSet     N
, P
, V
, V1
; 
  62 static int      nuseful_productions
, nuseless_productions
, 
  63                 nuseful_nonterminals
, nuseless_nonterminals
; 
  66 bool bits_equal 
PARAMS((BSet
, BSet
, int)); 
  67 int nbits 
PARAMS((unsigned)); 
  68 int bits_size 
PARAMS((BSet
, int)); 
  69 void reduce_grammar 
PARAMS((void)); 
  70 static void useless_nonterminals 
PARAMS((void)); 
  71 static void inaccessable_symbols 
PARAMS((void)); 
  72 static void reduce_grammar_tables 
PARAMS((void)); 
  73 static void print_results 
PARAMS((void)); 
  74 static void print_notices 
PARAMS((void)); 
  75 void dump_grammar 
PARAMS((void)); 
  77 extern void fatals 
PARAMS((char *, char *)); 
  81 bits_equal (BSet L
, BSet R
, int n
) 
  85   for (i 
= n 
- 1; i 
>= 0; i
--) 
  98     i 
^= (i 
& ((unsigned) (- (int) i
))); 
 106 bits_size (BSet S
, int n
) 
 110   for (i 
= n 
- 1; i 
>= 0; i
--) 
 111     count 
+= nbits(S
[i
]); 
 116 reduce_grammar (void) 
 120   /* Allocate the global sets used to compute the reduced grammar */ 
 122   N 
= NEW2(WORDSIZE(nvars
), unsigned); 
 123   P 
= NEW2(WORDSIZE(nrules 
+ 1), unsigned); 
 124   V 
= NEW2(WORDSIZE(nsyms
), unsigned); 
 125   V1 
= NEW2(WORDSIZE(nsyms
), unsigned); 
 127   useless_nonterminals(); 
 128   inaccessable_symbols(); 
 130   reduced 
= (bool) (nuseless_nonterminals 
+ nuseless_productions 
> 0); 
 135   if (reduced 
== FALSE
) 
 140   if (!BITISSET(N
, start_symbol 
- ntokens
)) 
 141     fatals(_("Start symbol %s does not derive any sentence"), 
 144   reduce_grammar_tables(); 
 145   /* if (verboseflag) { 
 146      fprintf(foutput, "REDUCED GRAMMAR\n\n"); 
 151   /**/ statisticsflag 
= FALSE
; /* someday getopts should handle this */ 
 152   if (statisticsflag 
== TRUE
) 
 154             _("reduced %s defines %d terminal%s, %d nonterminal%s\ 
 155 , and %d production%s.\n"), infile
, 
 156             ntokens
, (ntokens 
== 1 ? "" : "s"), 
 157             nvars
,   (nvars   
== 1 ? "" : "s"), 
 158             nrules
,  (nrules  
== 1 ? "" : "s")); 
 162   /* Free the global sets used to compute the reduced grammar */ 
 171  * Another way to do this would be with a set for each production and then do 
 172  * subset tests against N0, but even for the C grammar the whole reducing 
 173  * process takes only 2 seconds on my 8Mhz AT. 
 177 useful_production (int i
, BSet N0
) 
 183    * A production is useful if all of the nonterminals in its RHS 
 184    * appear in the set of useful nonterminals. 
 187   for (r 
= &ritem
[rrhs
[i
]]; *r 
> 0; r
++) 
 189       if (!BITISSET(N0
, n 
- ntokens
)) 
 195 /* Remember that rules are 1-origin, symbols are 0-origin. */ 
 198 useless_nonterminals (void) 
 204    * N is set as built.  Np is set being built this iteration. P is set 
 205    * of all productions which have a RHS all in N. 
 208   Np 
= NEW2(WORDSIZE(nvars
), unsigned); 
 211    * The set being computed is a set of nonterminals which can derive 
 212    * the empty string or strings consisting of all terminals. At each 
 213    * iteration a nonterminal is added to the set if there is a 
 214    * production with that nonterminal as its LHS for which all the 
 215    * nonterminals in its RHS are already in the set.  Iterate until the 
 216    * set being computed remains unchanged.  Any nonterminals not in the 
 217    * set at that point are useless in that they will never be used in 
 218    * deriving a sentence of the language. 
 220    * This iteration doesn't use any special traversal over the 
 221    * productions.  A set is kept of all productions for which all the 
 222    * nonterminals in the RHS are in useful.  Only productions not in 
 223    * this set are scanned on each iteration.  At the end, this set is 
 224    * saved to be used when finding useful productions: only productions 
 225    * in this set will appear in the final grammar. 
 231       for (i 
= WORDSIZE(nvars
) - 1; i 
>= 0; i
--) 
 233       for (i 
= 1; i 
<= nrules
; i
++) 
 237               if (useful_production(i
, N
)) 
 239                   SETBIT(Np
, rlhs
[i
] - ntokens
); 
 244       if (bits_equal(N
, Np
, WORDSIZE(nvars
))) 
 255 inaccessable_symbols (void) 
 263    * Find out which productions are reachable and which symbols are 
 264    * used.  Starting with an empty set of productions and a set of 
 265    * symbols which only has the start symbol in it, iterate over all 
 266    * productions until the set of productions remains unchanged for an 
 267    * iteration.  For each production which has a LHS in the set of 
 268    * reachable symbols, add the production to the set of reachable 
 269    * productions, and add all of the nonterminals in the RHS of the 
 270    * production to the set of reachable symbols. 
 272    * Consider only the (partially) reduced grammar which has only 
 273    * nonterminals in N and productions in P. 
 275    * The result is the set P of productions in the reduced grammar, and 
 276    * the set V of symbols in the reduced grammar. 
 278    * Although this algorithm also computes the set of terminals which are 
 279    * reachable, no terminal will be deleted from the grammar. Some 
 280    * terminals might not be in the grammar but might be generated by 
 281    * semantic routines, and so the user might want them available with 
 282    * specified numbers.  (Is this true?)  However, the nonreachable 
 283    * terminals are printed (if running in verbose mode) so that the user 
 287   Vp 
= NEW2(WORDSIZE(nsyms
), unsigned); 
 288   Pp 
= NEW2(WORDSIZE(nrules 
+ 1), unsigned); 
 290   /* If the start symbol isn't useful, then nothing will be useful. */ 
 291   if (!BITISSET(N
, start_symbol 
- ntokens
)) 
 294   SETBIT(V
, start_symbol
); 
 299       for (i 
= WORDSIZE(nsyms
) - 1; i 
>= 0; i
--) 
 301       for (i 
= 1; i 
<= nrules
; i
++) 
 303           if (!BITISSET(Pp
, i
) && BITISSET(P
, i
) && 
 304               BITISSET(V
, rlhs
[i
])) 
 306               for (r 
= &ritem
[rrhs
[i
]]; *r 
>= 0; r
++) 
 309                       || BITISSET(N
, t 
- ntokens
)) 
 317       if (bits_equal(V
, Vp
, WORDSIZE(nsyms
))) 
 330   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */ 
 331   SETBIT(V
, 0);                 /* end-of-input token */ 
 332   SETBIT(V
, 1);                 /* error token */ 
 333   SETBIT(V
, 2);                 /* some undefined token */ 
 338   nuseful_productions 
= bits_size(P
, WORDSIZE(nrules 
+ 1)); 
 339   nuseless_productions 
= nrules 
- nuseful_productions
; 
 341   nuseful_nonterminals 
= 0; 
 342   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 344       nuseful_nonterminals
++; 
 345   nuseless_nonterminals 
= nvars 
- nuseful_nonterminals
; 
 347   /* A token that was used in %prec should not be warned about.  */ 
 348   for (i 
= 1; i 
< nrules
; i
++) 
 349     if (rprecsym
[i
] != 0) 
 350       SETBIT(V1
, rprecsym
[i
]); 
 354 reduce_grammar_tables (void) 
 356 /* This is turned off because we would need to change the numbers 
 357    in the case statements in the actions file.  */ 
 359   /* remove useless productions */ 
 360   if (nuseless_productions 
> 0) 
 362       short np
, pn
, ni
, pi
; 
 366       for (pn 
= 1; pn 
<= nrules
; pn
++) 
 374                   rline
[np
] = rline
[pn
]; 
 375                   rprec
[np
] = rprec
[pn
]; 
 376                   rassoc
[np
] = rassoc
[pn
]; 
 382                       while (ritem
[pi
] >= 0) 
 383                         ritem
[ni
++] = ritem
[pi
++]; 
 387                   while (ritem
[ni
++] >= 0); 
 392       nrules 
-= nuseless_productions
; 
 396        * Is it worth it to reduce the amount of memory for the 
 397        * grammar? Probably not. 
 402   /* Disable useless productions, 
 403      since they may contain useless nonterms 
 404      that would get mapped below to -1 and confuse everyone.  */ 
 405   if (nuseless_productions 
> 0) 
 409       for (pn 
= 1; pn 
<= nrules
; pn
++) 
 411           if (!BITISSET(P
, pn
)) 
 418   /* remove useless symbols */ 
 419   if (nuseless_nonterminals 
> 0) 
 423 /*      short  j; JF unused */ 
 428        * create a map of nonterminal number to new nonterminal 
 429        * number. -1 in the map means it was useless and is being 
 433       nontermmap 
= NEW2(nvars
, short) - ntokens
; 
 434       for (i 
= ntokens
; i 
< nsyms
; i
++) 
 438       for (i 
= ntokens
; i 
< nsyms
; i
++) 
 442       /* Shuffle elements of tables indexed by symbol number.  */ 
 444       for (i 
= ntokens
; i 
< nsyms
; i
++) 
 449               sassoc
[n
] = sassoc
[i
]; 
 457       /* Replace all symbol numbers in valid data structures.  */ 
 459       for (i 
= 1; i 
<= nrules
; i
++) 
 461           /* Ignore the rules disabled above.  */ 
 463             rlhs
[i
] = nontermmap
[rlhs
[i
]]; 
 464           if (ISVAR (rprecsym
[i
])) 
 465             /* Can this happen?  */ 
 466             rprecsym
[i
] = nontermmap
[rprecsym
[i
]]; 
 469       for (r 
= ritem
; *r
; r
++) 
 473       start_symbol 
= nontermmap
[start_symbol
]; 
 475       nsyms 
-= nuseless_nonterminals
; 
 476       nvars 
-= nuseless_nonterminals
; 
 478       free(&nontermmap
[ntokens
]); 
 486 /*  short j; JF unused */ 
 490   if (nuseless_nonterminals 
> 0) 
 492       fprintf(foutput
, _("Useless nonterminals:\n\n")); 
 493       for (i 
= ntokens
; i 
< nsyms
; i
++) 
 495           fprintf(foutput
, "   %s\n", tags
[i
]); 
 498   for (i 
= 0; i 
< ntokens
; i
++) 
 500       if (!BITISSET(V
, i
) && !BITISSET(V1
, i
)) 
 504               fprintf(foutput
, _("\n\nTerminals which are not used:\n\n")); 
 507           fprintf(foutput
, "   %s\n", tags
[i
]); 
 511   if (nuseless_productions 
> 0) 
 513       fprintf(foutput
, _("\n\nUseless rules:\n\n")); 
 514       for (i 
= 1; i 
<= nrules
; i
++) 
 518               fprintf(foutput
, "#%-4d  ", i
); 
 519               fprintf(foutput
, "%s :\t", tags
[rlhs
[i
]]); 
 520               for (r 
= &ritem
[rrhs
[i
]]; *r 
>= 0; r
++) 
 522                   fprintf(foutput
, " %s", tags
[*r
]); 
 524               fprintf(foutput
, ";\n"); 
 528   if (nuseless_nonterminals 
> 0 || nuseless_productions 
> 0 || b
) 
 529     fprintf(foutput
, "\n\n"); 
 539           "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n", 
 540           ntokens
, nvars
, nsyms
, nrules
, nitems
); 
 541   fprintf(foutput
, _("Variables\n---------\n\n")); 
 542   fprintf(foutput
, _("Value  Sprec    Sassoc    Tag\n")); 
 543   for (i 
= ntokens
; i 
< nsyms
; i
++) 
 544     fprintf(foutput
, "%5d  %5d  %5d  %s\n", 
 545             i
, sprec
[i
], sassoc
[i
], tags
[i
]); 
 546   fprintf(foutput
, "\n\n"); 
 547   fprintf(foutput
, _("Rules\n-----\n\n")); 
 548   for (i 
= 1; i 
<= nrules
; i
++) 
 550       fprintf(foutput
, "%-5d(%5d%5d)%5d : (@%-5d)", 
 551               i
, rprec
[i
], rassoc
[i
], rlhs
[i
], rrhs
[i
]); 
 552       for (r 
= &ritem
[rrhs
[i
]]; *r 
> 0; r
++) 
 553         fprintf(foutput
, "%5d", *r
); 
 554       fprintf(foutput
, " [%d]\n", -(*r
)); 
 556   fprintf(foutput
, "\n\n"); 
 557   fprintf(foutput
, _("Rules interpreted\n-----------------\n\n")); 
 558   for (i 
= 1; i 
<= nrules
; i
++) 
 560       fprintf(foutput
, "%-5d  %s :", i
, tags
[rlhs
[i
]]); 
 561       for (r 
= &ritem
[rrhs
[i
]]; *r 
> 0; r
++) 
 562         fprintf(foutput
, " %s", tags
[*r
]); 
 563       fprintf(foutput
, "\n"); 
 565   fprintf(foutput
, "\n\n"); 
 572   if (fixed_outfiles 
&& nuseless_productions
) 
 573     fprintf(stderr
, _("%d rules never reduced\n"), nuseless_productions
); 
 575   fprintf(stderr
, _("%s contains "), infile
); 
 577   if (nuseless_nonterminals 
> 0) 
 579       fprintf(stderr
, _("%d useless nonterminal%s"), 
 580               nuseless_nonterminals
, 
 581               (nuseless_nonterminals 
== 1 ? "" : "s")); 
 583   if (nuseless_nonterminals 
> 0 && nuseless_productions 
> 0) 
 584     fprintf(stderr
, _(" and ")); 
 586   if (nuseless_productions 
> 0) 
 588       fprintf(stderr
, _("%d useless rule%s"), 
 589               nuseless_productions
, 
 590               (nuseless_productions 
== 1 ? "" : "s")); 
 592   fprintf(stderr
, "\n");