]>
git.saurik.com Git - bison.git/blob - src/reduce.c
1 /* Grammar reduction for Bison.
2 Copyright (C) 1988, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
4 This file is part of Bison, the GNU Compiler Compiler.
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
25 /* Don't eliminate unreachable terminals: They may be used by the
43 /* Set of all nonterminals which are not useless. */
46 /* Set of all rules which have no useless nonterminals in their RHS. */
49 /* Set of all accessible symbols. */
52 /* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
56 static int nuseful_productions
;
57 static int nuseless_productions
;
58 static int nuseful_nonterminals
;
59 int nuseless_nonterminals
;
61 /*-------------------------------------------------------------------.
62 | Another way to do this would be with a set for each production and |
63 | then do subset tests against N0, but even for the C grammar the |
64 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
65 `-------------------------------------------------------------------*/
68 useful_production (int i
, bitset N0
)
73 /* A production is useful if all of the nonterminals in its appear
74 in the set of useful nonterminals. */
76 for (r
= rules
[i
].rhs
; *r
>= 0; r
++)
77 if (ISVAR (n
= *r
) && !bitset_test (N0
, n
- ntokens
))
83 /*---------------------------------------------------------.
84 | Remember that rules are 1-origin, symbols are 0-origin. |
85 `---------------------------------------------------------*/
88 useless_nonterminals (void)
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
96 Np
= bitset_create (nvars
, BITSET_FIXED
);
99 /* The set being computed is a set of nonterminals which can derive
100 the empty string or strings consisting of all terminals. At each
101 iteration a nonterminal is added to the set if there is a
102 production with that nonterminal as its LHS for which all the
103 nonterminals in its RHS are already in the set. Iterate until
104 the set being computed remains unchanged. Any nonterminals not
105 in the set at that point are useless in that they will never be
106 used in deriving a sentence of the language.
108 This iteration doesn't use any special traversal over the
109 productions. A set is kept of all productions for which all the
110 nonterminals in the RHS are in useful. Only productions not in
111 this set are scanned on each iteration. At the end, this set is
112 saved to be used when finding useful productions: only
113 productions in this set will appear in the final grammar. */
118 for (i
= 1; i
< nrules
+ 1; i
++)
119 if (!bitset_test (P
, i
)
120 && useful_production (i
, N
))
122 bitset_set (Np
, rules
[i
].lhs
->number
- ntokens
);
125 if (bitset_equal_p (N
, Np
))
137 inaccessable_symbols (void)
144 /* Find out which productions are reachable and which symbols are
145 used. Starting with an empty set of productions and a set of
146 symbols which only has the start symbol in it, iterate over all
147 productions until the set of productions remains unchanged for an
148 iteration. For each production which has a LHS in the set of
149 reachable symbols, add the production to the set of reachable
150 productions, and add all of the nonterminals in the RHS of the
151 production to the set of reachable symbols.
153 Consider only the (partially) reduced grammar which has only
154 nonterminals in N and productions in P.
156 The result is the set P of productions in the reduced grammar,
157 and the set V of symbols in the reduced grammar.
159 Although this algorithm also computes the set of terminals which
160 are reachable, no terminal will be deleted from the grammar. Some
161 terminals might not be in the grammar but might be generated by
162 semantic routines, and so the user might want them available with
163 specified numbers. (Is this true?) However, the nonreachable
164 terminals are printed (if running in verbose mode) so that the
167 Vp
= bitset_create (nsyms
, BITSET_FIXED
);
168 Pp
= bitset_create (nrules
+ 1, BITSET_FIXED
);
170 /* If the start symbol isn't useful, then nothing will be useful. */
171 if (bitset_test (N
, start_symbol
- ntokens
))
173 bitset_set (V
, start_symbol
);
178 for (i
= 1; i
< nrules
+ 1; i
++)
180 if (!bitset_test (Pp
, i
)
181 && bitset_test (P
, i
)
182 && bitset_test (V
, rules
[i
].lhs
->number
))
184 for (r
= rules
[i
].rhs
; *r
>= 0; r
++)
185 if (ISTOKEN (t
= *r
) || bitset_test (N
, t
- ntokens
))
190 if (bitset_equal_p (V
, Vp
))
201 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
202 bitset_set (V
, eoftoken
->number
); /* end-of-input token */
203 bitset_set (V
, errtoken
->number
); /* error token */
204 bitset_set (V
, undeftoken
->number
); /* some undefined token */
209 nuseful_productions
= bitset_count (P
);
210 nuseless_productions
= nrules
- nuseful_productions
;
212 nuseful_nonterminals
= 0;
213 for (i
= ntokens
; i
< nsyms
; i
++)
214 if (bitset_test (V
, i
))
215 nuseful_nonterminals
++;
216 nuseless_nonterminals
= nvars
- nuseful_nonterminals
;
218 /* A token that was used in %prec should not be warned about. */
219 for (i
= 1; i
< nrules
+ 1; i
++)
220 if (rules
[i
].precsym
!= 0)
221 bitset_set (V1
, rules
[i
].precsym
->number
);
225 /*-------------------------------------------------------------------.
226 | Put the useless productions at the end of RULES, and adjust NRULES |
228 `-------------------------------------------------------------------*/
231 reduce_grammar_tables (void)
233 /* Flag useless productions. */
236 for (pn
= 1; pn
< nrules
+ 1; pn
++)
237 rules
[pn
].useful
= bitset_test (P
, pn
);
240 /* Map the nonterminals to their new index: useful first, useless
241 afterwards. Kept for later report. */
244 int useless
= nrules
+ 1 - nuseless_productions
;
245 rule_t
*rules_sorted
= XMALLOC (rule_t
, nrules
+ 1) - 1;
247 for (i
= 1; i
< nrules
+ 1; ++i
)
248 rules_sorted
[rules
[i
].useful
? useful
++ : useless
++] = rules
[i
];
250 rules
= rules_sorted
;
252 /* Renumber the rules markers in RITEMS. */
253 for (i
= 1; i
< nrules
+ 1; ++i
)
255 short *rhsp
= rules
[i
].rhs
;
256 for (/* Nothing. */; *rhsp
>= 0; ++rhsp
)
261 nrules
-= nuseless_productions
;
264 /* Adjust NRITEMS and NITEMS. */
268 for (r
= nrules
+ 1; r
< nrules
+ 1 + nuseless_productions
; ++r
)
270 length
= rule_rhs_length (&rules
[r
]);
271 nritems
-= length
+ 1;
277 /*------------------------------.
278 | Remove useless nonterminals. |
279 `------------------------------*/
282 nonterminals_reduce (void)
286 /* Map the nonterminals to their new index: useful first, useless
287 afterwards. Kept for later report. */
289 short *nontermmap
= XCALLOC (short, nvars
) - ntokens
;
291 for (i
= ntokens
; i
< nsyms
; i
++)
292 if (bitset_test (V
, i
))
294 for (i
= ntokens
; i
< nsyms
; i
++)
295 if (!bitset_test (V
, i
))
299 /* Shuffle elements of tables indexed by symbol number. */
301 symbol_t
**symbols_sorted
= XMALLOC (symbol_t
*, nvars
) - ntokens
;
303 for (i
= ntokens
; i
< nsyms
; i
++)
304 symbols
[i
]->number
= nontermmap
[i
];
305 for (i
= ntokens
; i
< nsyms
; i
++)
306 symbols_sorted
[nontermmap
[i
]] = symbols
[i
];
307 for (i
= ntokens
; i
< nsyms
; i
++)
308 symbols
[i
] = symbols_sorted
[i
];
309 free (symbols_sorted
+ ntokens
);
312 for (i
= 0; i
< nritems
; ++i
)
313 if (ISVAR (ritem
[i
]))
314 ritem
[i
] = nontermmap
[ritem
[i
]];
316 start_symbol
= nontermmap
[start_symbol
];
318 nsyms
-= nuseless_nonterminals
;
319 nvars
-= nuseless_nonterminals
;
321 free (nontermmap
+ ntokens
);
325 /*------------------------------------------------------------------.
326 | Output the detailed results of the reductions. For FILE.output. |
327 `------------------------------------------------------------------*/
330 reduce_output (FILE *out
)
332 if (nuseless_nonterminals
> 0)
335 fprintf (out
, "%s\n\n", _("Useless nonterminals:"));
336 for (i
= 0; i
< nuseless_nonterminals
; ++i
)
337 fprintf (out
, " %s\n", quotearg_style (escape_quoting_style
,
338 symbols
[nsyms
+ i
]->tag
));
345 for (i
= 0; i
< ntokens
; i
++)
346 if (!bitset_test (V
, i
) && !bitset_test (V1
, i
))
349 fprintf (out
, "%s\n\n", _("Terminals which are not used:"));
351 fprintf (out
, " %s\n", quotearg_style (escape_quoting_style
,
358 if (nuseless_productions
> 0)
361 fprintf (out
, "%s\n\n", _("Useless rules:"));
362 for (i
= nrules
+ 1; i
< nuseless_productions
+ nrules
+ 1; i
++)
365 fprintf (out
, "#%-4d ", rules
[i
].user_number
- 1);
366 fprintf (out
, "%s:", quotearg_style (escape_quoting_style
,
368 for (r
= rules
[i
].rhs
; *r
>= 0; r
++)
369 fprintf (out
, " %s", quotearg_style (escape_quoting_style
,
378 dump_grammar (FILE *out
)
383 fprintf (out
, "REDUCED GRAMMAR\n\n");
385 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nritems = %d\n\n",
386 ntokens
, nvars
, nsyms
, nrules
, nritems
);
387 fprintf (out
, "Variables\n---------\n\n");
388 fprintf (out
, "Value Sprec Sassoc Tag\n");
389 for (i
= ntokens
; i
< nsyms
; i
++)
390 fprintf (out
, "%5d %5d %5d %s\n",
392 symbols
[i
]->prec
, symbols
[i
]->assoc
,
393 quotearg_style (escape_quoting_style
, symbols
[i
]->tag
));
394 fprintf (out
, "\n\n");
395 fprintf (out
, "Rules\n-----\n\n");
396 fprintf (out
, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
397 for (i
= 1; i
< nrules
+ nuseless_productions
+ 1; i
++)
400 /* Find the last RHS index in ritems. */
401 for (r
= rules
[i
].rhs
; *r
>= 0; ++r
)
403 fprintf (out
, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
406 rules
[i
].prec
->assoc
,
408 rules
[i
].rhs
- ritem
,
409 rules
[i
].rhs
- ritem
+ rhs_count
- 1,
410 rules
[i
].lhs
->number
);
411 /* Dumped the RHS. */
412 for (r
= rules
[i
].rhs
; *r
>= 0; r
++)
413 fprintf (out
, "%3d", *r
);
414 fprintf (out
, " [%d]\n", -(*r
) - 1);
416 fprintf (out
, "\n\n");
417 fprintf (out
, "Rules interpreted\n-----------------\n\n");
418 for (i
= 1; i
< nrules
+ nuseless_productions
+ 1; i
++)
420 fprintf (out
, "%-5d %s :",
421 i
, quotearg_style (escape_quoting_style
, rules
[i
].lhs
->tag
));
422 for (r
= rules
[i
].rhs
; *r
>= 0; r
++)
424 quotearg_style (escape_quoting_style
, symbols
[*r
]->tag
));
427 fprintf (out
, "\n\n");
432 /*-------------------------------.
433 | Report the results to STDERR. |
434 `-------------------------------*/
439 if (yacc_flag
&& nuseless_productions
)
440 fprintf (stderr
, ngettext ("%d rule never reduced\n",
441 "%d rules never reduced\n",
442 nuseless_productions
),
443 nuseless_productions
);
445 fprintf (stderr
, _("%s contains "), infile
);
447 if (nuseless_nonterminals
> 0)
448 fprintf (stderr
, ngettext ("%d useless nonterminal",
449 "%d useless nonterminals",
450 nuseless_nonterminals
),
451 nuseless_nonterminals
);
453 if (nuseless_nonterminals
> 0 && nuseless_productions
> 0)
454 fprintf (stderr
, _(" and "));
456 if (nuseless_productions
> 0)
457 fprintf (stderr
, ngettext ("%d useless rule",
459 nuseless_productions
),
460 nuseless_productions
);
461 fprintf (stderr
, "\n");
466 reduce_grammar (void)
470 /* Allocate the global sets used to compute the reduced grammar */
472 N
= bitset_create (nvars
, BITSET_FIXED
);
473 P
= bitset_create (nrules
+ 1, BITSET_FIXED
);
474 V
= bitset_create (nsyms
, BITSET_FIXED
);
475 V1
= bitset_create (nsyms
, BITSET_FIXED
);
477 useless_nonterminals ();
478 inaccessable_symbols ();
480 reduced
= (bool) (nuseless_nonterminals
+ nuseless_productions
> 0);
486 if (!bitset_test (N
, start_symbol
- ntokens
))
487 fatal (_("Start symbol %s does not derive any sentence"),
488 quotearg_style (escape_quoting_style
, symbols
[start_symbol
]->tag
));
490 /* First reduce the nonterminals, as they renumber themselves in the
491 whole grammar. If you change the order, nonterms would be
492 renumbered only in the reduced grammar. */
493 if (nuseless_nonterminals
> 0)
494 nonterminals_reduce ();
495 if (nuseless_productions
> 0)
496 reduce_grammar_tables ();
500 dump_grammar (stderr
);
502 fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals\
503 , and %d productions.\n",
504 infile
, ntokens
, nvars
, nrules
);
509 /*-----------------------------------------------------------.
510 | Free the global sets used to compute the reduced grammar. |
511 `-----------------------------------------------------------*/