]> git.saurik.com Git - bison.git/blob - src/reduce.c
* src/lalr.c, src/LR0.c, src/closure.c, src/gram.c, src/reduce.c:
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright (C) 1988, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
24
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
27
28 #include "system.h"
29 #include "quotearg.h"
30 #include "getargs.h"
31 #include "files.h"
32 #include "symtab.h"
33 #include "gram.h"
34 #include "complain.h"
35 #include "reduce.h"
36 #include "reader.h"
37 #include "getargs.h"
38 #include "bitset.h"
39
40 typedef short *rule;
41
42
43 /* Set of all nonterminals which are not useless. */
44 static bitset N;
45
46 /* Set of all rules which have no useless nonterminals in their RHS. */
47 static bitset P;
48
49 /* Set of all accessible symbols. */
50 static bitset V;
51
52 /* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
54 static bitset V1;
55
56 static int nuseful_productions;
57 static int nuseless_productions;
58 static int nuseful_nonterminals;
59 int nuseless_nonterminals;
60 \f
61 /*-------------------------------------------------------------------.
62 | Another way to do this would be with a set for each production and |
63 | then do subset tests against N0, but even for the C grammar the |
64 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
65 `-------------------------------------------------------------------*/
66
67 static bool
68 useful_production (int i, bitset N0)
69 {
70 rule r;
71 short n;
72
73 /* A production is useful if all of the nonterminals in its appear
74 in the set of useful nonterminals. */
75
76 for (r = rules[i].rhs; *r >= 0; r++)
77 if (ISVAR (n = *r) && !bitset_test (N0, n - ntokens))
78 return FALSE;
79 return TRUE;
80 }
81
82
83 /*---------------------------------------------------------.
84 | Remember that rules are 1-origin, symbols are 0-origin. |
85 `---------------------------------------------------------*/
86
87 static void
88 useless_nonterminals (void)
89 {
90 bitset Np, Ns;
91 int i;
92
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
95
96 Np = bitset_create (nvars, BITSET_FIXED);
97
98
99 /* The set being computed is a set of nonterminals which can derive
100 the empty string or strings consisting of all terminals. At each
101 iteration a nonterminal is added to the set if there is a
102 production with that nonterminal as its LHS for which all the
103 nonterminals in its RHS are already in the set. Iterate until
104 the set being computed remains unchanged. Any nonterminals not
105 in the set at that point are useless in that they will never be
106 used in deriving a sentence of the language.
107
108 This iteration doesn't use any special traversal over the
109 productions. A set is kept of all productions for which all the
110 nonterminals in the RHS are in useful. Only productions not in
111 this set are scanned on each iteration. At the end, this set is
112 saved to be used when finding useful productions: only
113 productions in this set will appear in the final grammar. */
114
115 while (1)
116 {
117 bitset_copy (Np, N);
118 for (i = 1; i < nrules + 1; i++)
119 if (!bitset_test (P, i)
120 && useful_production (i, N))
121 {
122 bitset_set (Np, rules[i].lhs->number - ntokens);
123 bitset_set (P, i);
124 }
125 if (bitset_equal_p (N, Np))
126 break;
127 Ns = Np;
128 Np = N;
129 N = Ns;
130 }
131 bitset_free (N);
132 N = Np;
133 }
134
135
136 static void
137 inaccessable_symbols (void)
138 {
139 bitset Vp, Vs, Pp;
140 int i;
141 short t;
142 rule r;
143
144 /* Find out which productions are reachable and which symbols are
145 used. Starting with an empty set of productions and a set of
146 symbols which only has the start symbol in it, iterate over all
147 productions until the set of productions remains unchanged for an
148 iteration. For each production which has a LHS in the set of
149 reachable symbols, add the production to the set of reachable
150 productions, and add all of the nonterminals in the RHS of the
151 production to the set of reachable symbols.
152
153 Consider only the (partially) reduced grammar which has only
154 nonterminals in N and productions in P.
155
156 The result is the set P of productions in the reduced grammar,
157 and the set V of symbols in the reduced grammar.
158
159 Although this algorithm also computes the set of terminals which
160 are reachable, no terminal will be deleted from the grammar. Some
161 terminals might not be in the grammar but might be generated by
162 semantic routines, and so the user might want them available with
163 specified numbers. (Is this true?) However, the nonreachable
164 terminals are printed (if running in verbose mode) so that the
165 user can know. */
166
167 Vp = bitset_create (nsyms, BITSET_FIXED);
168 Pp = bitset_create (nrules + 1, BITSET_FIXED);
169
170 /* If the start symbol isn't useful, then nothing will be useful. */
171 if (bitset_test (N, start_symbol - ntokens))
172 {
173 bitset_set (V, start_symbol);
174
175 while (1)
176 {
177 bitset_copy (Vp, V);
178 for (i = 1; i < nrules + 1; i++)
179 {
180 if (!bitset_test (Pp, i)
181 && bitset_test (P, i)
182 && bitset_test (V, rules[i].lhs->number))
183 {
184 for (r = rules[i].rhs; *r >= 0; r++)
185 if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
186 bitset_set (Vp, t);
187 bitset_set (Pp, i);
188 }
189 }
190 if (bitset_equal_p (V, Vp))
191 break;
192 Vs = Vp;
193 Vp = V;
194 V = Vs;
195 }
196 }
197
198 bitset_free (V);
199 V = Vp;
200
201 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
202 bitset_set (V, 0); /* end-of-input token */
203 bitset_set (V, 1); /* error token */
204 bitset_set (V, 2); /* some undefined token */
205
206 bitset_free (P);
207 P = Pp;
208
209 nuseful_productions = bitset_count (P);
210 nuseless_productions = nrules - nuseful_productions;
211
212 nuseful_nonterminals = 0;
213 for (i = ntokens; i < nsyms; i++)
214 if (bitset_test (V, i))
215 nuseful_nonterminals++;
216 nuseless_nonterminals = nvars - nuseful_nonterminals;
217
218 /* A token that was used in %prec should not be warned about. */
219 for (i = 1; i < nrules + 1; i++)
220 if (rules[i].precsym != 0)
221 bitset_set (V1, rules[i].precsym);
222 }
223
224
225 /*-------------------------------------------------------------------.
226 | Put the useless productions at the end of RULES, and adjust NRULES |
227 | accordingly. |
228 `-------------------------------------------------------------------*/
229
230 static void
231 reduce_grammar_tables (void)
232 {
233 /* Flag useless productions. */
234 {
235 int pn;
236 for (pn = 1; pn < nrules + 1; pn++)
237 rules[pn].useful = bitset_test (P, pn);
238 }
239
240 /* Map the nonterminals to their new index: useful first, useless
241 afterwards. Kept for later report. */
242 {
243 int useful = 1;
244 int useless = nrules + 1 - nuseless_productions;
245 rule_t *rules_sorted = XMALLOC (rule_t, nrules + 1) - 1;
246 int i;
247 for (i = 1; i < nrules + 1; ++i)
248 rules_sorted[rules[i].useful ? useful++ : useless++] = rules[i];
249 free (rules + 1);
250 rules = rules_sorted;
251
252 /* Renumber the rules markers in RITEMS. */
253 for (i = 1; i < nrules + 1; ++i)
254 {
255 short *rhsp = rules[i].rhs;
256 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
257 /* Nothing. */;
258 *rhsp = -i;
259 rules[i].number = i;
260 }
261 nrules -= nuseless_productions;
262 }
263
264 /* Adjust NRITEMS and NITEMS. */
265 {
266 int r;
267 int length;
268 for (r = nrules + 1; r < nrules + 1 + nuseless_productions; ++r)
269 {
270 length = rule_rhs_length (&rules[r]);
271 nritems -= length + 1;
272 nitems -= length + 1;
273 }
274 }
275 }
276
277
278 /*------------------------------.
279 | Remove useless nonterminals. |
280 `------------------------------*/
281
282 static void
283 nonterminals_reduce (void)
284 {
285 int i, n;
286
287 /* Map the nonterminals to their new index: useful first, useless
288 afterwards. Kept for later report. */
289
290 short *nontermmap = XCALLOC (short, nvars) - ntokens;
291 n = ntokens;
292 for (i = ntokens; i < nsyms; i++)
293 if (bitset_test (V, i))
294 nontermmap[i] = n++;
295 for (i = ntokens; i < nsyms; i++)
296 if (!bitset_test (V, i))
297 nontermmap[i] = n++;
298
299
300 /* Shuffle elements of tables indexed by symbol number. */
301 {
302 bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
303
304 for (i = ntokens; i < nsyms; i++)
305 symbols[i]->number = nontermmap[i];
306 for (i = ntokens; i < nsyms; i++)
307 symbols_sorted[nontermmap[i]] = symbols[i];
308 for (i = ntokens; i < nsyms; i++)
309 symbols[i] = symbols_sorted[i];
310 free (symbols_sorted + ntokens);
311 }
312
313 /* Replace all symbol numbers in valid data structures. */
314
315 for (i = 1; i < nrules + 1; i++)
316 {
317 if (ISVAR (rules[i].precsym))
318 /* Can this happen? */
319 rules[i].precsym = nontermmap[rules[i].precsym];
320 }
321
322 for (i = 0; i < nritems; ++i)
323 if (ISVAR (ritem[i]))
324 ritem[i] = nontermmap[ritem[i]];
325
326 start_symbol = nontermmap[start_symbol];
327
328 nsyms -= nuseless_nonterminals;
329 nvars -= nuseless_nonterminals;
330
331 free (nontermmap + ntokens);
332 }
333
334
335 /*------------------------------------------------------------------.
336 | Output the detailed results of the reductions. For FILE.output. |
337 `------------------------------------------------------------------*/
338
339 void
340 reduce_output (FILE *out)
341 {
342 if (nuseless_nonterminals > 0)
343 {
344 int i;
345 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
346 for (i = 0; i < nuseless_nonterminals; ++i)
347 fprintf (out, " %s\n", quotearg_style (escape_quoting_style,
348 symbols[nsyms + i]->tag));
349 fputs ("\n\n", out);
350 }
351
352 {
353 bool b = FALSE;
354 int i;
355 for (i = 0; i < ntokens; i++)
356 if (!bitset_test (V, i) && !bitset_test (V1, i))
357 {
358 if (!b)
359 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
360 b = TRUE;
361 fprintf (out, " %s\n", quotearg_style (escape_quoting_style,
362 symbols[i]->tag));
363 }
364 if (b)
365 fputs ("\n\n", out);
366 }
367
368 if (nuseless_productions > 0)
369 {
370 int i;
371 fprintf (out, "%s\n\n", _("Useless rules:"));
372 for (i = nrules + 1; i < nuseless_productions + nrules + 1; i++)
373 {
374 rule r;
375 fprintf (out, "#%-4d ", rules[i].user_number - 1);
376 fprintf (out, "%s:", quotearg_style (escape_quoting_style,
377 rules[i].lhs->tag));
378 for (r = rules[i].rhs; *r >= 0; r++)
379 fprintf (out, " %s", quotearg_style (escape_quoting_style,
380 symbols[*r]->tag));
381 fputs (";\n", out);
382 }
383 fputs ("\n\n", out);
384 }
385 }
386 \f
387 static void
388 dump_grammar (FILE *out)
389 {
390 int i;
391 rule r;
392
393 fprintf (out, "REDUCED GRAMMAR\n\n");
394 fprintf (out,
395 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
396 ntokens, nvars, nsyms, nrules, nitems);
397 fprintf (out, "Variables\n---------\n\n");
398 fprintf (out, "Value Sprec Sassoc Tag\n");
399 for (i = ntokens; i < nsyms; i++)
400 fprintf (out, "%5d %5d %5d %s\n",
401 i,
402 symbols[i]->prec, symbols[i]->assoc,
403 quotearg_style (escape_quoting_style, symbols[i]->tag));
404 fprintf (out, "\n\n");
405 fprintf (out, "Rules\n-----\n\n");
406 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
407 for (i = 1; i < nrules + nuseless_productions + 1; i++)
408 {
409 int rhs_count = 0;
410 /* Find the last RHS index in ritems. */
411 for (r = rules[i].rhs; *r >= 0; ++r)
412 ++rhs_count;
413 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
414 i - 1,
415 rules[i].prec, rules[i].assoc, rules[i].useful,
416 rules[i].rhs - ritem, rules[i].rhs - ritem + rhs_count - 1,
417 rules[i].lhs->number);
418 /* Dumped the RHS. */
419 for (r = rules[i].rhs; *r >= 0; r++)
420 fprintf (out, "%3d", *r);
421 fprintf (out, " [%d]\n", -(*r) - 1);
422 }
423 fprintf (out, "\n\n");
424 fprintf (out, "Rules interpreted\n-----------------\n\n");
425 for (i = 1; i < nrules + nuseless_productions + 1; i++)
426 {
427 fprintf (out, "%-5d %s :",
428 i, quotearg_style (escape_quoting_style, rules[i].lhs->tag));
429 for (r = rules[i].rhs; *r >= 0; r++)
430 fprintf (out, " %s",
431 quotearg_style (escape_quoting_style, symbols[*r]->tag));
432 fputc ('\n', out);
433 }
434 fprintf (out, "\n\n");
435 }
436
437
438
439 /*-------------------------------.
440 | Report the results to STDERR. |
441 `-------------------------------*/
442
443 static void
444 reduce_print (void)
445 {
446 if (yacc_flag && nuseless_productions)
447 fprintf (stderr, ngettext ("%d rule never reduced\n",
448 "%d rules never reduced\n",
449 nuseless_productions),
450 nuseless_productions);
451
452 fprintf (stderr, _("%s contains "), infile);
453
454 if (nuseless_nonterminals > 0)
455 fprintf (stderr, ngettext ("%d useless nonterminal",
456 "%d useless nonterminals",
457 nuseless_nonterminals),
458 nuseless_nonterminals);
459
460 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
461 fprintf (stderr, _(" and "));
462
463 if (nuseless_productions > 0)
464 fprintf (stderr, ngettext ("%d useless rule",
465 "%d useless rules",
466 nuseless_productions),
467 nuseless_productions);
468 fprintf (stderr, "\n");
469 fflush (stderr);
470 }
471 \f
472 void
473 reduce_grammar (void)
474 {
475 bool reduced;
476
477 /* Allocate the global sets used to compute the reduced grammar */
478
479 N = bitset_create (nvars, BITSET_FIXED);
480 P = bitset_create (nrules + 1, BITSET_FIXED);
481 V = bitset_create (nsyms, BITSET_FIXED);
482 V1 = bitset_create (nsyms, BITSET_FIXED);
483
484 useless_nonterminals ();
485 inaccessable_symbols ();
486
487 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
488 if (!reduced)
489 return;
490
491 reduce_print ();
492
493 if (!bitset_test (N, start_symbol - ntokens))
494 fatal (_("Start symbol %s does not derive any sentence"),
495 quotearg_style (escape_quoting_style, symbols[start_symbol]->tag));
496
497 if (nuseless_productions > 0)
498 reduce_grammar_tables ();
499 if (nuseless_nonterminals > 0)
500 nonterminals_reduce ();
501
502 if (trace_flag)
503 {
504 dump_grammar (stderr);
505
506 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
507 , and %d productions.\n",
508 infile, ntokens, nvars, nrules);
509 }
510 }
511
512
513 /*-----------------------------------------------------------.
514 | Free the global sets used to compute the reduced grammar. |
515 `-----------------------------------------------------------*/
516
517 void
518 reduce_free (void)
519 {
520 bitset_free (N);
521 bitset_free (V);
522 bitset_free (V1);
523 bitset_free (P);
524 }