]> git.saurik.com Git - bison.git/blob - src/reduce.c
d160f9d6f134322439cc7b85dd54a15430d23a6e
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright (C) 1988, 1989, 2000 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
24
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
27
28 #include "system.h"
29 #include "getargs.h"
30 #include "files.h"
31 #include "gram.h"
32 #include "alloc.h"
33 #include "complain.h"
34 #include "reduce.h"
35
36 extern char **tags; /* reader.c */
37 static int statisticsflag; /* XXXXXXX */
38 extern int fixed_outfiles;
39
40 typedef unsigned *BSet;
41 typedef short *rule;
42
43
44 /* N is set of all nonterminals which are not useless. P is set of
45 all rules which have no useless nonterminals in their RHS. V is
46 the set of all accessible symbols. */
47
48 static BSet N, P, V, V1;
49
50 static int nuseful_productions;
51 static int nuseless_productions;
52 static int nuseful_nonterminals;
53 static int nuseless_nonterminals;
54 \f
55 static bool
56 bits_equal (BSet L, BSet R, int n)
57 {
58 int i;
59
60 for (i = n - 1; i >= 0; i--)
61 if (L[i] != R[i])
62 return FALSE;
63 return TRUE;
64 }
65
66
67 static int
68 nbits (unsigned i)
69 {
70 int count = 0;
71
72 while (i != 0)
73 {
74 i ^= (i & ((unsigned) (-(int) i)));
75 ++count;
76 }
77 return count;
78 }
79
80
81 static int
82 bits_size (BSet S, int n)
83 {
84 int i, count = 0;
85
86 for (i = n - 1; i >= 0; i--)
87 count += nbits (S[i]);
88 return count;
89 }
90 \f
91 /*-------------------------------------------------------------------.
92 | Another way to do this would be with a set for each production and |
93 | then do subset tests against N0, but even for the C grammar the |
94 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
95 `-------------------------------------------------------------------*/
96
97 static bool
98 useful_production (int i, BSet N0)
99 {
100 rule r;
101 short n;
102
103 /* A production is useful if all of the nonterminals in its appear
104 in the set of useful nonterminals. */
105
106 for (r = &ritem[rrhs[i]]; *r > 0; r++)
107 if (ISVAR (n = *r))
108 if (!BITISSET (N0, n - ntokens))
109 return FALSE;
110 return TRUE;
111 }
112
113
114 /*---------------------------------------------------------.
115 | Remember that rules are 1-origin, symbols are 0-origin. |
116 `---------------------------------------------------------*/
117
118 static void
119 useless_nonterminals (void)
120 {
121 BSet Np, Ns;
122 int i, n;
123
124 /* N is set as built. Np is set being built this iteration. P is
125 set of all productions which have a RHS all in N. */
126
127 Np = NEW2 (WORDSIZE (nvars), unsigned);
128
129 /* The set being computed is a set of nonterminals which can derive
130 the empty string or strings consisting of all terminals. At each
131 iteration a nonterminal is added to the set if there is a
132 production with that nonterminal as its LHS for which all the
133 nonterminals in its RHS are already in the set. Iterate until
134 the set being computed remains unchanged. Any nonterminals not
135 in the set at that point are useless in that they will never be
136 used in deriving a sentence of the language.
137
138 This iteration doesn't use any special traversal over the
139 productions. A set is kept of all productions for which all the
140 nonterminals in the RHS are in useful. Only productions not in
141 this set are scanned on each iteration. At the end, this set is
142 saved to be used when finding useful productions: only
143 productions in this set will appear in the final grammar. */
144
145 n = 0;
146 while (1)
147 {
148 for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
149 Np[i] = N[i];
150 for (i = 1; i <= nrules; i++)
151 {
152 if (!BITISSET (P, i))
153 {
154 if (useful_production (i, N))
155 {
156 SETBIT (Np, rlhs[i] - ntokens);
157 SETBIT (P, i);
158 }
159 }
160 }
161 if (bits_equal (N, Np, WORDSIZE (nvars)))
162 break;
163 Ns = Np;
164 Np = N;
165 N = Ns;
166 }
167 FREE (N);
168 N = Np;
169 }
170
171
172 static void
173 inaccessable_symbols (void)
174 {
175 BSet Vp, Vs, Pp;
176 int i, n;
177 short t;
178 rule r;
179
180 /* Find out which productions are reachable and which symbols are
181 used. Starting with an empty set of productions and a set of
182 symbols which only has the start symbol in it, iterate over all
183 productions until the set of productions remains unchanged for an
184 iteration. For each production which has a LHS in the set of
185 reachable symbols, add the production to the set of reachable
186 productions, and add all of the nonterminals in the RHS of the
187 production to the set of reachable symbols.
188
189 Consider only the (partially) reduced grammar which has only
190 nonterminals in N and productions in P.
191
192 The result is the set P of productions in the reduced grammar,
193 and the set V of symbols in the reduced grammar.
194
195 Although this algorithm also computes the set of terminals which
196 are reachable, no terminal will be deleted from the grammar. Some
197 terminals might not be in the grammar but might be generated by
198 semantic routines, and so the user might want them available with
199 specified numbers. (Is this true?) However, the nonreachable
200 terminals are printed (if running in verbose mode) so that the
201 user can know. */
202
203 Vp = NEW2 (WORDSIZE (nsyms), unsigned);
204 Pp = NEW2 (WORDSIZE (nrules + 1), unsigned);
205
206 /* If the start symbol isn't useful, then nothing will be useful. */
207 if (!BITISSET (N, start_symbol - ntokens))
208 goto end_iteration;
209
210 SETBIT (V, start_symbol);
211
212 n = 0;
213 while (1)
214 {
215 for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
216 Vp[i] = V[i];
217 for (i = 1; i <= nrules; i++)
218 {
219 if (!BITISSET (Pp, i) && BITISSET (P, i) && BITISSET (V, rlhs[i]))
220 {
221 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
222 {
223 if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
224 {
225 SETBIT (Vp, t);
226 }
227 }
228 SETBIT (Pp, i);
229 }
230 }
231 if (bits_equal (V, Vp, WORDSIZE (nsyms)))
232 {
233 break;
234 }
235 Vs = Vp;
236 Vp = V;
237 V = Vs;
238 }
239 end_iteration:
240
241 FREE (V);
242 V = Vp;
243
244 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
245 SETBIT (V, 0); /* end-of-input token */
246 SETBIT (V, 1); /* error token */
247 SETBIT (V, 2); /* some undefined token */
248
249 FREE (P);
250 P = Pp;
251
252 nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
253 nuseless_productions = nrules - nuseful_productions;
254
255 nuseful_nonterminals = 0;
256 for (i = ntokens; i < nsyms; i++)
257 if (BITISSET (V, i))
258 nuseful_nonterminals++;
259 nuseless_nonterminals = nvars - nuseful_nonterminals;
260
261 /* A token that was used in %prec should not be warned about. */
262 for (i = 1; i < nrules; i++)
263 if (rprecsym[i] != 0)
264 SETBIT (V1, rprecsym[i]);
265 }
266
267 static void
268 reduce_grammar_tables (void)
269 {
270 /* This is turned off because we would need to change the numbers
271 in the case statements in the actions file. */
272 #if 0
273 /* remove useless productions */
274 if (nuseless_productions > 0)
275 {
276 short np, pn, ni, pi;
277
278 np = 0;
279 ni = 0;
280 for (pn = 1; pn <= nrules; pn++)
281 {
282 if (BITISSET (P, pn))
283 {
284 np++;
285 if (pn != np)
286 {
287 rlhs[np] = rlhs[pn];
288 rline[np] = rline[pn];
289 rprec[np] = rprec[pn];
290 rassoc[np] = rassoc[pn];
291 rrhs[np] = rrhs[pn];
292 if (rrhs[np] != ni)
293 {
294 pi = rrhs[np];
295 rrhs[np] = ni;
296 while (ritem[pi] >= 0)
297 ritem[ni++] = ritem[pi++];
298 ritem[ni++] = -np;
299 }
300 }
301 else
302 {
303 while (ritem[ni++] >= 0);
304 }
305 }
306 }
307 ritem[ni] = 0;
308 nrules -= nuseless_productions;
309 nitems = ni;
310
311 /* Is it worth it to reduce the amount of memory for the
312 grammar? Probably not. */
313
314 }
315 #endif /* 0 */
316 /* Disable useless productions,
317 since they may contain useless nonterms
318 that would get mapped below to -1 and confuse everyone. */
319 if (nuseless_productions > 0)
320 {
321 int pn;
322
323 for (pn = 1; pn <= nrules; pn++)
324 {
325 if (!BITISSET (P, pn))
326 {
327 rlhs[pn] = -1;
328 }
329 }
330 }
331
332 /* remove useless symbols */
333 if (nuseless_nonterminals > 0)
334 {
335
336 int i, n;
337 /* short j; JF unused */
338 short *nontermmap;
339 rule r;
340
341 /* Create a map of nonterminal number to new nonterminal
342 number. -1 in the map means it was useless and is being
343 eliminated. */
344
345 nontermmap = NEW2 (nvars, short) - ntokens;
346 for (i = ntokens; i < nsyms; i++)
347 nontermmap[i] = -1;
348
349 n = ntokens;
350 for (i = ntokens; i < nsyms; i++)
351 if (BITISSET (V, i))
352 nontermmap[i] = n++;
353
354 /* Shuffle elements of tables indexed by symbol number. */
355
356 for (i = ntokens; i < nsyms; i++)
357 {
358 n = nontermmap[i];
359 if (n >= 0)
360 {
361 sassoc[n] = sassoc[i];
362 sprec[n] = sprec[i];
363 tags[n] = tags[i];
364 }
365 else
366 {
367 free (tags[i]);
368 }
369 }
370
371 /* Replace all symbol numbers in valid data structures. */
372
373 for (i = 1; i <= nrules; i++)
374 {
375 /* Ignore the rules disabled above. */
376 if (rlhs[i] >= 0)
377 rlhs[i] = nontermmap[rlhs[i]];
378 if (ISVAR (rprecsym[i]))
379 /* Can this happen? */
380 rprecsym[i] = nontermmap[rprecsym[i]];
381 }
382
383 for (r = ritem; *r; r++)
384 if (ISVAR (*r))
385 *r = nontermmap[*r];
386
387 start_symbol = nontermmap[start_symbol];
388
389 nsyms -= nuseless_nonterminals;
390 nvars -= nuseless_nonterminals;
391
392 free (&nontermmap[ntokens]);
393 }
394 }
395
396 static void
397 print_results (void)
398 {
399 int i;
400 /* short j; JF unused */
401 rule r;
402 bool b;
403
404 if (nuseless_nonterminals > 0)
405 {
406 fprintf (foutput, _("Useless nonterminals:\n\n"));
407 for (i = ntokens; i < nsyms; i++)
408 if (!BITISSET (V, i))
409 fprintf (foutput, " %s\n", tags[i]);
410 }
411 b = FALSE;
412 for (i = 0; i < ntokens; i++)
413 {
414 if (!BITISSET (V, i) && !BITISSET (V1, i))
415 {
416 if (!b)
417 {
418 fprintf (foutput, _("\n\nTerminals which are not used:\n\n"));
419 b = TRUE;
420 }
421 fprintf (foutput, " %s\n", tags[i]);
422 }
423 }
424
425 if (nuseless_productions > 0)
426 {
427 fprintf (foutput, _("\n\nUseless rules:\n\n"));
428 for (i = 1; i <= nrules; i++)
429 {
430 if (!BITISSET (P, i))
431 {
432 fprintf (foutput, "#%-4d ", i);
433 fprintf (foutput, "%s :\t", tags[rlhs[i]]);
434 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
435 {
436 fprintf (foutput, " %s", tags[*r]);
437 }
438 fprintf (foutput, ";\n");
439 }
440 }
441 }
442 if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
443 fprintf (foutput, "\n\n");
444 }
445 \f
446 #if 0 /* XXX currently unused. */
447 static void
448 dump_grammar (void)
449 {
450 int i;
451 rule r;
452
453 fprintf (foutput,
454 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
455 ntokens, nvars, nsyms, nrules, nitems);
456 fprintf (foutput, _("Variables\n---------\n\n"));
457 fprintf (foutput, _("Value Sprec Sassoc Tag\n"));
458 for (i = ntokens; i < nsyms; i++)
459 fprintf (foutput, "%5d %5d %5d %s\n", i, sprec[i], sassoc[i], tags[i]);
460 fprintf (foutput, "\n\n");
461 fprintf (foutput, _("Rules\n-----\n\n"));
462 for (i = 1; i <= nrules; i++)
463 {
464 fprintf (foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
465 i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
466 for (r = &ritem[rrhs[i]]; *r > 0; r++)
467 fprintf (foutput, "%5d", *r);
468 fprintf (foutput, " [%d]\n", -(*r));
469 }
470 fprintf (foutput, "\n\n");
471 fprintf (foutput, _("Rules interpreted\n-----------------\n\n"));
472 for (i = 1; i <= nrules; i++)
473 {
474 fprintf (foutput, "%-5d %s :", i, tags[rlhs[i]]);
475 for (r = &ritem[rrhs[i]]; *r > 0; r++)
476 fprintf (foutput, " %s", tags[*r]);
477 fprintf (foutput, "\n");
478 }
479 fprintf (foutput, "\n\n");
480 }
481
482 #endif
483
484
485 static void
486 print_notices (void)
487 {
488 if (fixed_outfiles && nuseless_productions)
489 fprintf (stderr, _("%d rules never reduced\n"), nuseless_productions);
490
491 fprintf (stderr, _("%s contains "), infile);
492
493 if (nuseless_nonterminals > 0)
494 {
495 fprintf (stderr, _("%d useless nonterminal%s"),
496 nuseless_nonterminals,
497 (nuseless_nonterminals == 1 ? "" : "s"));
498 }
499 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
500 fprintf (stderr, _(" and "));
501
502 if (nuseless_productions > 0)
503 {
504 fprintf (stderr, _("%d useless rule%s"),
505 nuseless_productions, (nuseless_productions == 1 ? "" : "s"));
506 }
507 fprintf (stderr, "\n");
508 fflush (stderr);
509 }
510 \f
511 void
512 reduce_grammar (void)
513 {
514 bool reduced;
515
516 /* Allocate the global sets used to compute the reduced grammar */
517
518 N = NEW2 (WORDSIZE (nvars), unsigned);
519 P = NEW2 (WORDSIZE (nrules + 1), unsigned);
520 V = NEW2 (WORDSIZE (nsyms), unsigned);
521 V1 = NEW2 (WORDSIZE (nsyms), unsigned);
522
523 useless_nonterminals ();
524 inaccessable_symbols ();
525
526 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
527
528 if (verboseflag)
529 print_results ();
530
531 if (reduced == FALSE)
532 goto done_reducing;
533
534 print_notices ();
535
536 if (!BITISSET (N, start_symbol - ntokens))
537 fatal (_("Start symbol %s does not derive any sentence"),
538 tags[start_symbol]);
539
540 reduce_grammar_tables ();
541 #if 0
542 if (verboseflag)
543 {
544 fprintf (foutput, "REDUCED GRAMMAR\n\n");
545 dump_grammar ();
546 }
547 #endif
548 statisticsflag = FALSE; /* someday getopts should handle this */
549 if (statisticsflag == TRUE)
550 fprintf (stderr, _("reduced %s defines %d terminal%s, %d nonterminal%s\
551 , and %d production%s.\n"),
552 infile,
553 ntokens,
554 (ntokens == 1 ? "" : "s"),
555 nvars,
556 (nvars == 1 ? "" : "s"),
557 nrules,
558 (nrules == 1 ? "" : "s"));
559
560 done_reducing:
561 /* Free the global sets used to compute the reduced grammar */
562
563 FREE (N);
564 FREE (V);
565 FREE (P);
566 }