]> git.saurik.com Git - bison.git/blob - src/reduce.c
973e5edea4ebe25c8a4159ff4bf2366fb81e5196
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright 1988, 1989, 2000, 2001 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
24
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
27
28 #include "system.h"
29 #include "getargs.h"
30 #include "files.h"
31 #include "symtab.h"
32 #include "gram.h"
33 #include "complain.h"
34 #include "reduce.h"
35 #include "reader.h"
36 #include "getargs.h"
37 #include "bitset.h"
38
39 typedef short *rule;
40
41
42 /* Set of all nonterminals which are not useless. */
43 static bitset N;
44
45 /* Set of all rules which have no useless nonterminals in their RHS. */
46 static bitset P;
47
48 /* Set of all accessible symbols. */
49 static bitset V;
50
51 /* Set of symbols used to define rule precedence (so they are
52 `useless', but no warning should be issued). */
53 static bitset V1;
54
55 static int nuseful_productions;
56 static int nuseless_productions;
57 static int nuseful_nonterminals;
58 int nuseless_nonterminals;
59 \f
60 static int
61 bits_size (bitset S)
62 {
63 int i, count = 0;
64
65 BITSET_EXECUTE (S, 0, i, { ++count; });
66 return count;
67 }
68 \f
69 /*-------------------------------------------------------------------.
70 | Another way to do this would be with a set for each production and |
71 | then do subset tests against N0, but even for the C grammar the |
72 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
73 `-------------------------------------------------------------------*/
74
75 static bool
76 useful_production (int i, bitset N0)
77 {
78 rule r;
79 short n;
80
81 /* A production is useful if all of the nonterminals in its appear
82 in the set of useful nonterminals. */
83
84 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
85 if (ISVAR (n = *r))
86 if (!bitset_test (N0, n - ntokens))
87 return FALSE;
88 return TRUE;
89 }
90
91
92 /*---------------------------------------------------------.
93 | Remember that rules are 1-origin, symbols are 0-origin. |
94 `---------------------------------------------------------*/
95
96 static void
97 useless_nonterminals (void)
98 {
99 bitset Np, Ns;
100 int i;
101
102 /* N is set as built. Np is set being built this iteration. P is
103 set of all productions which have a RHS all in N. */
104
105 Np = bitset_create (nvars, BITSET_FIXED);
106 bitset_zero (Np);
107
108
109 /* The set being computed is a set of nonterminals which can derive
110 the empty string or strings consisting of all terminals. At each
111 iteration a nonterminal is added to the set if there is a
112 production with that nonterminal as its LHS for which all the
113 nonterminals in its RHS are already in the set. Iterate until
114 the set being computed remains unchanged. Any nonterminals not
115 in the set at that point are useless in that they will never be
116 used in deriving a sentence of the language.
117
118 This iteration doesn't use any special traversal over the
119 productions. A set is kept of all productions for which all the
120 nonterminals in the RHS are in useful. Only productions not in
121 this set are scanned on each iteration. At the end, this set is
122 saved to be used when finding useful productions: only
123 productions in this set will appear in the final grammar. */
124
125 while (1)
126 {
127 bitset_copy (Np, N);
128 for (i = 1; i <= nrules; i++)
129 if (!bitset_test (P, i)
130 && useful_production (i, N))
131 {
132 bitset_set (Np, rules[i].lhs - ntokens);
133 bitset_set (P, i);
134 }
135 if (bitset_equal_p (N, Np))
136 break;
137 Ns = Np;
138 Np = N;
139 N = Ns;
140 }
141 bitset_free (N);
142 N = Np;
143 }
144
145
146 static void
147 inaccessable_symbols (void)
148 {
149 bitset Vp, Vs, Pp;
150 int i;
151 short t;
152 rule r;
153
154 /* Find out which productions are reachable and which symbols are
155 used. Starting with an empty set of productions and a set of
156 symbols which only has the start symbol in it, iterate over all
157 productions until the set of productions remains unchanged for an
158 iteration. For each production which has a LHS in the set of
159 reachable symbols, add the production to the set of reachable
160 productions, and add all of the nonterminals in the RHS of the
161 production to the set of reachable symbols.
162
163 Consider only the (partially) reduced grammar which has only
164 nonterminals in N and productions in P.
165
166 The result is the set P of productions in the reduced grammar,
167 and the set V of symbols in the reduced grammar.
168
169 Although this algorithm also computes the set of terminals which
170 are reachable, no terminal will be deleted from the grammar. Some
171 terminals might not be in the grammar but might be generated by
172 semantic routines, and so the user might want them available with
173 specified numbers. (Is this true?) However, the nonreachable
174 terminals are printed (if running in verbose mode) so that the
175 user can know. */
176
177 Vp = bitset_create (nsyms, BITSET_FIXED);
178 bitset_zero (Vp);
179 Pp = bitset_create (nrules + 1, BITSET_FIXED);
180 bitset_zero (Pp);
181
182 /* If the start symbol isn't useful, then nothing will be useful. */
183 if (bitset_test (N, start_symbol - ntokens))
184 {
185 bitset_set (V, start_symbol);
186
187 while (1)
188 {
189 bitset_copy (Vp, V);
190 for (i = 1; i <= nrules; i++)
191 {
192 if (!bitset_test (Pp, i)
193 && bitset_test (P, i)
194 && bitset_test (V, rules[i].lhs))
195 {
196 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
197 if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
198 bitset_set (Vp, t);
199 bitset_set (Pp, i);
200 }
201 }
202 if (bitset_equal_p (V, Vp))
203 break;
204 Vs = Vp;
205 Vp = V;
206 V = Vs;
207 }
208 }
209
210 bitset_free (V);
211 V = Vp;
212
213 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
214 bitset_set (V, 0); /* end-of-input token */
215 bitset_set (V, 1); /* error token */
216 bitset_set (V, 2); /* some undefined token */
217
218 bitset_free (P);
219 P = Pp;
220
221 nuseful_productions = bits_size (P);
222 nuseless_productions = nrules - nuseful_productions;
223
224 nuseful_nonterminals = 0;
225 for (i = ntokens; i < nsyms; i++)
226 if (bitset_test (V, i))
227 nuseful_nonterminals++;
228 nuseless_nonterminals = nvars - nuseful_nonterminals;
229
230 /* A token that was used in %prec should not be warned about. */
231 for (i = 1; i < nrules; i++)
232 if (rules[i].precsym != 0)
233 bitset_set (V1, rules[i].precsym);
234 }
235
236 static void
237 reduce_grammar_tables (void)
238 {
239 /* This is turned off because we would need to change the numbers in
240 the case statements in the actions file.
241
242 We don't disable it via CPP so that it is still checked with the
243 rest of the code, to avoid its becoming completely obsolete.
244
245 FIXME: I think the comment above demonstrates this code must be
246 turned off for *semantic* parser, not in the general case. Try
247 to understand this better --akim. */
248
249 if (0)
250 /* remove useless productions */
251 if (nuseless_productions > 0)
252 {
253 short np, pn, ni, pi;
254
255 np = 0;
256 ni = 0;
257 for (pn = 1; pn <= nrules; pn++)
258 if (bitset_test (P, pn))
259 {
260 np++;
261 if (pn != np)
262 {
263 rules[np].lhs = rules[pn].lhs;
264 rules[np].line = rules[pn].line;
265 rules[np].prec = rules[pn].prec;
266 rules[np].assoc = rules[pn].assoc;
267 rules[np].rhs = rules[pn].rhs;
268 if (rules[np].rhs != ni)
269 {
270 pi = rules[np].rhs;
271 rules[np].rhs = ni;
272 while (ritem[pi] >= 0)
273 ritem[ni++] = ritem[pi++];
274 ritem[ni++] = -np;
275 }
276 }
277 else
278 {
279 while (ritem[ni++] >= 0)
280 /* Nothing. */;
281 }
282 }
283
284 ritem[ni] = 0;
285 nrules -= nuseless_productions;
286 nitems = ni;
287 nritems = ni;
288
289 /* Is it worth it to reduce the amount of memory for the
290 grammar? Probably not. */
291 }
292
293 /* Disable useless productions. */
294 if (nuseless_productions > 0)
295 {
296 int pn;
297 for (pn = 1; pn <= nrules; pn++)
298 rules[pn].useful = bitset_test (P, pn);
299 }
300 }
301
302
303 /*------------------------------.
304 | Remove useless nonterminals. |
305 `------------------------------*/
306
307 static void
308 nonterminals_reduce (void)
309 {
310 int i, n;
311
312 /* Map the nonterminals to their new index: useful first, useless
313 afterwards. Kept for later report. */
314
315 short *nontermmap = XCALLOC (short, nvars) - ntokens;
316 n = ntokens;
317 for (i = ntokens; i < nsyms; i++)
318 if (bitset_test (V, i))
319 nontermmap[i] = n++;
320 for (i = ntokens; i < nsyms; i++)
321 if (!bitset_test (V, i))
322 nontermmap[i] = n++;
323
324
325 /* Shuffle elements of tables indexed by symbol number. */
326 {
327 bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
328
329 for (i = ntokens; i < nsyms; i++)
330 symbols_sorted[nontermmap[i]] = symbols[i];
331 for (i = ntokens; i < nsyms; i++)
332 symbols[i] = symbols_sorted[i];
333 free (symbols_sorted + ntokens);
334 }
335
336 /* Replace all symbol numbers in valid data structures. */
337
338 for (i = 1; i <= nrules; i++)
339 {
340 rules[i].lhs = nontermmap[rules[i].lhs];
341 if (ISVAR (rules[i].precsym))
342 /* Can this happen? */
343 rules[i].precsym = nontermmap[rules[i].precsym];
344 }
345
346 for (i = 0; i < nritems; ++i)
347 if (ISVAR (ritem[i]))
348 ritem[i] = nontermmap[ritem[i]];
349
350 start_symbol = nontermmap[start_symbol];
351
352 nsyms -= nuseless_nonterminals;
353 nvars -= nuseless_nonterminals;
354
355 free (nontermmap + ntokens);
356 }
357
358
359 /*------------------------------------------------------------------.
360 | Output the detailed results of the reductions. For FILE.output. |
361 `------------------------------------------------------------------*/
362
363 void
364 reduce_output (FILE *out)
365 {
366 if (nuseless_nonterminals > 0)
367 {
368 int i;
369 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
370 for (i = 0; i < nuseless_nonterminals; ++i)
371 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
372 fputs ("\n\n", out);
373 }
374
375 {
376 bool b = FALSE;
377 int i;
378 for (i = 0; i < ntokens; i++)
379 if (!bitset_test (V, i) && !bitset_test (V1, i))
380 {
381 if (!b)
382 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
383 b = TRUE;
384 fprintf (out, " %s\n", symbols[i]->tag);
385 }
386 if (b)
387 fputs ("\n\n", out);
388 }
389
390 if (nuseless_productions > 0)
391 {
392 int i;
393 fprintf (out, "%s\n\n", _("Useless rules:"));
394 for (i = 1; i <= nrules; i++)
395 if (!rules[i].useful)
396 {
397 rule r;
398 fprintf (out, "#%-4d ", i - 1);
399 fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
400 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
401 fprintf (out, " %s", symbols[*r]->tag);
402 fputs (";\n", out);
403 }
404 fputs ("\n\n", out);
405 }
406 }
407 \f
408 static void
409 dump_grammar (FILE *out)
410 {
411 int i;
412 rule r;
413
414 fprintf (out, "REDUCED GRAMMAR\n\n");
415 fprintf (out,
416 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
417 ntokens, nvars, nsyms, nrules, nitems);
418 fprintf (out, "Variables\n---------\n\n");
419 fprintf (out, "Value Sprec Sassoc Tag\n");
420 for (i = ntokens; i < nsyms; i++)
421 fprintf (out, "%5d %5d %5d %s\n",
422 i,
423 symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
424 fprintf (out, "\n\n");
425 fprintf (out, "Rules\n-----\n\n");
426 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
427 for (i = 1; i <= nrules; i++)
428 {
429 int rhs_count = 0;
430 /* Find the last RHS index in ritems. */
431 for (r = &ritem[rules[i].rhs]; *r >= 0; ++r)
432 ++rhs_count;
433 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
434 i - 1,
435 rules[i].prec, rules[i].assoc, rules[i].useful,
436 rules[i].rhs, rules[i].rhs + rhs_count - 1,
437 rules[i].lhs);
438 /* Dumped the RHS. */
439 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
440 fprintf (out, "%3d", *r);
441 fprintf (out, " [%d]\n", -(*r) - 1);
442 }
443 fprintf (out, "\n\n");
444 fprintf (out, "Rules interpreted\n-----------------\n\n");
445 for (i = 1; i <= nrules; i++)
446 {
447 fprintf (out, "%-5d %s :", i, symbols[rules[i].lhs]->tag);
448 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
449 fprintf (out, " %s", symbols[*r]->tag);
450 fputc ('\n', out);
451 }
452 fprintf (out, "\n\n");
453 }
454
455
456
457 /*-------------------------------.
458 | Report the results to STDERR. |
459 `-------------------------------*/
460
461 static void
462 reduce_print (void)
463 {
464 if (yacc_flag && nuseless_productions)
465 fprintf (stderr, ngettext ("%d rule never reduced\n",
466 "%d rules never reduced\n",
467 nuseless_productions),
468 nuseless_productions);
469
470 fprintf (stderr, _("%s contains "), infile);
471
472 if (nuseless_nonterminals > 0)
473 fprintf (stderr, ngettext ("%d useless nonterminal",
474 "%d useless nonterminals",
475 nuseless_nonterminals),
476 nuseless_nonterminals);
477
478 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
479 fprintf (stderr, _(" and "));
480
481 if (nuseless_productions > 0)
482 fprintf (stderr, ngettext ("%d useless rule",
483 "%d useless rules",
484 nuseless_productions),
485 nuseless_productions);
486 fprintf (stderr, "\n");
487 fflush (stderr);
488 }
489 \f
490 void
491 reduce_grammar (void)
492 {
493 bool reduced;
494
495 /* Allocate the global sets used to compute the reduced grammar */
496
497 N = bitset_create (nvars, BITSET_FIXED);
498 bitset_zero (N);
499 P = bitset_create (nrules + 1, BITSET_FIXED);
500 bitset_zero (P);
501 V = bitset_create (nsyms, BITSET_FIXED);
502 bitset_zero (V);
503 V1 = bitset_create (nsyms, BITSET_FIXED);
504 bitset_zero (V1);
505
506 useless_nonterminals ();
507 inaccessable_symbols ();
508
509 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
510
511 if (!reduced)
512 return;
513
514 reduce_print ();
515
516 if (!bitset_test (N, start_symbol - ntokens))
517 fatal (_("Start symbol %s does not derive any sentence"),
518 symbols[start_symbol]->tag);
519
520 reduce_grammar_tables ();
521 if (nuseless_nonterminals > 0)
522 nonterminals_reduce ();
523
524 if (trace_flag)
525 {
526 dump_grammar (stderr);
527
528 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
529 , and %d productions.\n",
530 infile, ntokens, nvars, nrules);
531 }
532 }
533
534
535 /*-----------------------------------------------------------.
536 | Free the global sets used to compute the reduced grammar. |
537 `-----------------------------------------------------------*/
538
539 void
540 reduce_free (void)
541 {
542 bitset_free (N);
543 bitset_free (V);
544 bitset_free (V1);
545 bitset_free (P);
546 }