]> git.saurik.com Git - bison.git/blob - src/reduce.c
* src/closure.c, src/conflicts.c, src/lalr.c, src/print.c,
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright 1988, 1989, 2000, 2001 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
24
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
27
28 #include "system.h"
29 #include "getargs.h"
30 #include "files.h"
31 #include "symtab.h"
32 #include "gram.h"
33 #include "complain.h"
34 #include "reduce.h"
35 #include "reader.h"
36 #include "getargs.h"
37 #include "bitset.h"
38
39 typedef short *rule;
40
41
42 /* Set of all nonterminals which are not useless. */
43 static bitset N;
44
45 /* Set of all rules which have no useless nonterminals in their RHS. */
46 static bitset P;
47
48 /* Set of all accessible symbols. */
49 static bitset V;
50
51 /* Set of symbols used to define rule precedence (so they are
52 `useless', but no warning should be issued). */
53 static bitset V1;
54
55 static int nuseful_productions;
56 static int nuseless_productions;
57 static int nuseful_nonterminals;
58 int nuseless_nonterminals;
59 \f
60 static int
61 bits_size (bitset S)
62 {
63 int i, count = 0;
64
65 BITSET_EXECUTE (S, 0, i, { ++count; });
66 return count;
67 }
68 \f
69 /*-------------------------------------------------------------------.
70 | Another way to do this would be with a set for each production and |
71 | then do subset tests against N0, but even for the C grammar the |
72 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
73 `-------------------------------------------------------------------*/
74
75 static bool
76 useful_production (int i, bitset N0)
77 {
78 rule r;
79 short n;
80
81 /* A production is useful if all of the nonterminals in its appear
82 in the set of useful nonterminals. */
83
84 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
85 if (ISVAR (n = *r))
86 if (!bitset_test (N0, n - ntokens))
87 return FALSE;
88 return TRUE;
89 }
90
91
92 /*---------------------------------------------------------.
93 | Remember that rules are 1-origin, symbols are 0-origin. |
94 `---------------------------------------------------------*/
95
96 static void
97 useless_nonterminals (void)
98 {
99 bitset Np, Ns;
100 int i;
101
102 /* N is set as built. Np is set being built this iteration. P is
103 set of all productions which have a RHS all in N. */
104
105 Np = bitset_create (nvars, BITSET_FIXED);
106
107
108 /* The set being computed is a set of nonterminals which can derive
109 the empty string or strings consisting of all terminals. At each
110 iteration a nonterminal is added to the set if there is a
111 production with that nonterminal as its LHS for which all the
112 nonterminals in its RHS are already in the set. Iterate until
113 the set being computed remains unchanged. Any nonterminals not
114 in the set at that point are useless in that they will never be
115 used in deriving a sentence of the language.
116
117 This iteration doesn't use any special traversal over the
118 productions. A set is kept of all productions for which all the
119 nonterminals in the RHS are in useful. Only productions not in
120 this set are scanned on each iteration. At the end, this set is
121 saved to be used when finding useful productions: only
122 productions in this set will appear in the final grammar. */
123
124 while (1)
125 {
126 bitset_copy (Np, N);
127 for (i = 1; i <= nrules; i++)
128 if (!bitset_test (P, i)
129 && useful_production (i, N))
130 {
131 bitset_set (Np, rules[i].lhs - ntokens);
132 bitset_set (P, i);
133 }
134 if (bitset_equal_p (N, Np))
135 break;
136 Ns = Np;
137 Np = N;
138 N = Ns;
139 }
140 bitset_free (N);
141 N = Np;
142 }
143
144
145 static void
146 inaccessable_symbols (void)
147 {
148 bitset Vp, Vs, Pp;
149 int i;
150 short t;
151 rule r;
152
153 /* Find out which productions are reachable and which symbols are
154 used. Starting with an empty set of productions and a set of
155 symbols which only has the start symbol in it, iterate over all
156 productions until the set of productions remains unchanged for an
157 iteration. For each production which has a LHS in the set of
158 reachable symbols, add the production to the set of reachable
159 productions, and add all of the nonterminals in the RHS of the
160 production to the set of reachable symbols.
161
162 Consider only the (partially) reduced grammar which has only
163 nonterminals in N and productions in P.
164
165 The result is the set P of productions in the reduced grammar,
166 and the set V of symbols in the reduced grammar.
167
168 Although this algorithm also computes the set of terminals which
169 are reachable, no terminal will be deleted from the grammar. Some
170 terminals might not be in the grammar but might be generated by
171 semantic routines, and so the user might want them available with
172 specified numbers. (Is this true?) However, the nonreachable
173 terminals are printed (if running in verbose mode) so that the
174 user can know. */
175
176 Vp = bitset_create (nsyms, BITSET_FIXED);
177 Pp = bitset_create (nrules + 1, BITSET_FIXED);
178
179 /* If the start symbol isn't useful, then nothing will be useful. */
180 if (bitset_test (N, start_symbol - ntokens))
181 {
182 bitset_set (V, start_symbol);
183
184 while (1)
185 {
186 bitset_copy (Vp, V);
187 for (i = 1; i <= nrules; i++)
188 {
189 if (!bitset_test (Pp, i)
190 && bitset_test (P, i)
191 && bitset_test (V, rules[i].lhs))
192 {
193 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
194 if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
195 bitset_set (Vp, t);
196 bitset_set (Pp, i);
197 }
198 }
199 if (bitset_equal_p (V, Vp))
200 break;
201 Vs = Vp;
202 Vp = V;
203 V = Vs;
204 }
205 }
206
207 bitset_free (V);
208 V = Vp;
209
210 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
211 bitset_set (V, 0); /* end-of-input token */
212 bitset_set (V, 1); /* error token */
213 bitset_set (V, 2); /* some undefined token */
214
215 bitset_free (P);
216 P = Pp;
217
218 nuseful_productions = bits_size (P);
219 nuseless_productions = nrules - nuseful_productions;
220
221 nuseful_nonterminals = 0;
222 for (i = ntokens; i < nsyms; i++)
223 if (bitset_test (V, i))
224 nuseful_nonterminals++;
225 nuseless_nonterminals = nvars - nuseful_nonterminals;
226
227 /* A token that was used in %prec should not be warned about. */
228 for (i = 1; i < nrules; i++)
229 if (rules[i].precsym != 0)
230 bitset_set (V1, rules[i].precsym);
231 }
232
233 static void
234 reduce_grammar_tables (void)
235 {
236 /* This is turned off because we would need to change the numbers in
237 the case statements in the actions file.
238
239 We don't disable it via CPP so that it is still checked with the
240 rest of the code, to avoid its becoming completely obsolete.
241
242 FIXME: I think the comment above demonstrates this code must be
243 turned off for *semantic* parser, not in the general case. Try
244 to understand this better --akim. */
245
246 if (0)
247 /* remove useless productions */
248 if (nuseless_productions > 0)
249 {
250 short np, pn, ni, pi;
251
252 np = 0;
253 ni = 0;
254 for (pn = 1; pn <= nrules; pn++)
255 if (bitset_test (P, pn))
256 {
257 np++;
258 if (pn != np)
259 {
260 rules[np].lhs = rules[pn].lhs;
261 rules[np].line = rules[pn].line;
262 rules[np].prec = rules[pn].prec;
263 rules[np].assoc = rules[pn].assoc;
264 rules[np].rhs = rules[pn].rhs;
265 if (rules[np].rhs != ni)
266 {
267 pi = rules[np].rhs;
268 rules[np].rhs = ni;
269 while (ritem[pi] >= 0)
270 ritem[ni++] = ritem[pi++];
271 ritem[ni++] = -np;
272 }
273 }
274 else
275 {
276 while (ritem[ni++] >= 0)
277 /* Nothing. */;
278 }
279 }
280
281 ritem[ni] = 0;
282 nrules -= nuseless_productions;
283 nitems = ni;
284 nritems = ni;
285
286 /* Is it worth it to reduce the amount of memory for the
287 grammar? Probably not. */
288 }
289
290 /* Disable useless productions. */
291 if (nuseless_productions > 0)
292 {
293 int pn;
294 for (pn = 1; pn <= nrules; pn++)
295 rules[pn].useful = bitset_test (P, pn);
296 }
297 }
298
299
300 /*------------------------------.
301 | Remove useless nonterminals. |
302 `------------------------------*/
303
304 static void
305 nonterminals_reduce (void)
306 {
307 int i, n;
308
309 /* Map the nonterminals to their new index: useful first, useless
310 afterwards. Kept for later report. */
311
312 short *nontermmap = XCALLOC (short, nvars) - ntokens;
313 n = ntokens;
314 for (i = ntokens; i < nsyms; i++)
315 if (bitset_test (V, i))
316 nontermmap[i] = n++;
317 for (i = ntokens; i < nsyms; i++)
318 if (!bitset_test (V, i))
319 nontermmap[i] = n++;
320
321
322 /* Shuffle elements of tables indexed by symbol number. */
323 {
324 bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
325
326 for (i = ntokens; i < nsyms; i++)
327 symbols_sorted[nontermmap[i]] = symbols[i];
328 for (i = ntokens; i < nsyms; i++)
329 symbols[i] = symbols_sorted[i];
330 free (symbols_sorted + ntokens);
331 }
332
333 /* Replace all symbol numbers in valid data structures. */
334
335 for (i = 1; i <= nrules; i++)
336 {
337 rules[i].lhs = nontermmap[rules[i].lhs];
338 if (ISVAR (rules[i].precsym))
339 /* Can this happen? */
340 rules[i].precsym = nontermmap[rules[i].precsym];
341 }
342
343 for (i = 0; i < nritems; ++i)
344 if (ISVAR (ritem[i]))
345 ritem[i] = nontermmap[ritem[i]];
346
347 start_symbol = nontermmap[start_symbol];
348
349 nsyms -= nuseless_nonterminals;
350 nvars -= nuseless_nonterminals;
351
352 free (nontermmap + ntokens);
353 }
354
355
356 /*------------------------------------------------------------------.
357 | Output the detailed results of the reductions. For FILE.output. |
358 `------------------------------------------------------------------*/
359
360 void
361 reduce_output (FILE *out)
362 {
363 if (nuseless_nonterminals > 0)
364 {
365 int i;
366 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
367 for (i = 0; i < nuseless_nonterminals; ++i)
368 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
369 fputs ("\n\n", out);
370 }
371
372 {
373 bool b = FALSE;
374 int i;
375 for (i = 0; i < ntokens; i++)
376 if (!bitset_test (V, i) && !bitset_test (V1, i))
377 {
378 if (!b)
379 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
380 b = TRUE;
381 fprintf (out, " %s\n", symbols[i]->tag);
382 }
383 if (b)
384 fputs ("\n\n", out);
385 }
386
387 if (nuseless_productions > 0)
388 {
389 int i;
390 fprintf (out, "%s\n\n", _("Useless rules:"));
391 for (i = 1; i <= nrules; i++)
392 if (!rules[i].useful)
393 {
394 rule r;
395 fprintf (out, "#%-4d ", i - 1);
396 fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
397 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
398 fprintf (out, " %s", symbols[*r]->tag);
399 fputs (";\n", out);
400 }
401 fputs ("\n\n", out);
402 }
403 }
404 \f
405 static void
406 dump_grammar (FILE *out)
407 {
408 int i;
409 rule r;
410
411 fprintf (out, "REDUCED GRAMMAR\n\n");
412 fprintf (out,
413 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
414 ntokens, nvars, nsyms, nrules, nitems);
415 fprintf (out, "Variables\n---------\n\n");
416 fprintf (out, "Value Sprec Sassoc Tag\n");
417 for (i = ntokens; i < nsyms; i++)
418 fprintf (out, "%5d %5d %5d %s\n",
419 i,
420 symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
421 fprintf (out, "\n\n");
422 fprintf (out, "Rules\n-----\n\n");
423 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
424 for (i = 1; i <= nrules; i++)
425 {
426 int rhs_count = 0;
427 /* Find the last RHS index in ritems. */
428 for (r = &ritem[rules[i].rhs]; *r >= 0; ++r)
429 ++rhs_count;
430 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
431 i - 1,
432 rules[i].prec, rules[i].assoc, rules[i].useful,
433 rules[i].rhs, rules[i].rhs + rhs_count - 1,
434 rules[i].lhs);
435 /* Dumped the RHS. */
436 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
437 fprintf (out, "%3d", *r);
438 fprintf (out, " [%d]\n", -(*r) - 1);
439 }
440 fprintf (out, "\n\n");
441 fprintf (out, "Rules interpreted\n-----------------\n\n");
442 for (i = 1; i <= nrules; i++)
443 {
444 fprintf (out, "%-5d %s :", i, symbols[rules[i].lhs]->tag);
445 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
446 fprintf (out, " %s", symbols[*r]->tag);
447 fputc ('\n', out);
448 }
449 fprintf (out, "\n\n");
450 }
451
452
453
454 /*-------------------------------.
455 | Report the results to STDERR. |
456 `-------------------------------*/
457
458 static void
459 reduce_print (void)
460 {
461 if (yacc_flag && nuseless_productions)
462 fprintf (stderr, ngettext ("%d rule never reduced\n",
463 "%d rules never reduced\n",
464 nuseless_productions),
465 nuseless_productions);
466
467 fprintf (stderr, _("%s contains "), infile);
468
469 if (nuseless_nonterminals > 0)
470 fprintf (stderr, ngettext ("%d useless nonterminal",
471 "%d useless nonterminals",
472 nuseless_nonterminals),
473 nuseless_nonterminals);
474
475 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
476 fprintf (stderr, _(" and "));
477
478 if (nuseless_productions > 0)
479 fprintf (stderr, ngettext ("%d useless rule",
480 "%d useless rules",
481 nuseless_productions),
482 nuseless_productions);
483 fprintf (stderr, "\n");
484 fflush (stderr);
485 }
486 \f
487 void
488 reduce_grammar (void)
489 {
490 bool reduced;
491
492 /* Allocate the global sets used to compute the reduced grammar */
493
494 N = bitset_create (nvars, BITSET_FIXED);
495 P = bitset_create (nrules + 1, BITSET_FIXED);
496 V = bitset_create (nsyms, BITSET_FIXED);
497 V1 = bitset_create (nsyms, BITSET_FIXED);
498
499 useless_nonterminals ();
500 inaccessable_symbols ();
501
502 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
503
504 if (!reduced)
505 return;
506
507 reduce_print ();
508
509 if (!bitset_test (N, start_symbol - ntokens))
510 fatal (_("Start symbol %s does not derive any sentence"),
511 symbols[start_symbol]->tag);
512
513 reduce_grammar_tables ();
514 if (nuseless_nonterminals > 0)
515 nonterminals_reduce ();
516
517 if (trace_flag)
518 {
519 dump_grammar (stderr);
520
521 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
522 , and %d productions.\n",
523 infile, ntokens, nvars, nrules);
524 }
525 }
526
527
528 /*-----------------------------------------------------------.
529 | Free the global sets used to compute the reduced grammar. |
530 `-----------------------------------------------------------*/
531
532 void
533 reduce_free (void)
534 {
535 bitset_free (N);
536 bitset_free (V);
537 bitset_free (V1);
538 bitset_free (P);
539 }