]> git.saurik.com Git - bison.git/blob - src/reduce.c
Use prototypes if the compiler understands them.
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright (C) 1988, 1989 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 /*
22 * Reduce the grammar: Find and eliminate unreachable terminals,
23 * nonterminals, and productions. David S. Bakin.
24 */
25
26 /*
27 * Don't eliminate unreachable terminals: They may be used by the user's
28 * parser.
29 */
30
31 #include <stdio.h>
32 #include "system.h"
33 #include "files.h"
34 #include "gram.h"
35 #include "machine.h"
36 #include "alloc.h"
37
38
39 extern char **tags; /* reader.c */
40 extern int verboseflag; /* getargs.c */
41 static int statisticsflag; /* XXXXXXX */
42 extern int fixed_outfiles;
43
44 #ifndef TRUE
45 #define TRUE (1)
46 #define FALSE (0)
47 #endif
48 typedef int bool;
49 typedef unsigned *BSet;
50 typedef short *rule;
51
52
53 /*
54 * N is set of all nonterminals which are not useless. P is set of all rules
55 * which have no useless nonterminals in their RHS. V is the set of all
56 * accessible symbols.
57 */
58
59 static BSet N, P, V, V1;
60
61 static int nuseful_productions, nuseless_productions,
62 nuseful_nonterminals, nuseless_nonterminals;
63
64
65 bool bits_equal PARAMS((BSet, BSet, int));
66 int nbits PARAMS((unsigned));
67 int bits_size PARAMS((BSet, int));
68 void reduce_grammar PARAMS((void));
69 static void useless_nonterminals PARAMS((void));
70 static void inaccessable_symbols PARAMS((void));
71 static void reduce_grammar_tables PARAMS((void));
72 static void print_results PARAMS((void));
73 static void print_notices PARAMS((void));
74 void dump_grammar PARAMS((void));
75
76 extern void fatals PARAMS((char *, char *));
77 \f
78
79 bool
80 bits_equal (BSet L, BSet R, int n)
81 {
82 int i;
83
84 for (i = n - 1; i >= 0; i--)
85 if (L[i] != R[i])
86 return FALSE;
87 return TRUE;
88 }
89
90
91 int
92 nbits (unsigned i)
93 {
94 int count = 0;
95
96 while (i != 0) {
97 i ^= (i & ((unsigned) (- (int) i)));
98 ++count;
99 }
100 return count;
101 }
102
103
104 int
105 bits_size (BSet S, int n)
106 {
107 int i, count = 0;
108
109 for (i = n - 1; i >= 0; i--)
110 count += nbits(S[i]);
111 return count;
112 }
113 \f
114 void
115 reduce_grammar (void)
116 {
117 bool reduced;
118
119 /* Allocate the global sets used to compute the reduced grammar */
120
121 N = NEW2(WORDSIZE(nvars), unsigned);
122 P = NEW2(WORDSIZE(nrules + 1), unsigned);
123 V = NEW2(WORDSIZE(nsyms), unsigned);
124 V1 = NEW2(WORDSIZE(nsyms), unsigned);
125
126 useless_nonterminals();
127 inaccessable_symbols();
128
129 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
130
131 if (verboseflag)
132 print_results();
133
134 if (reduced == FALSE)
135 goto done_reducing;
136
137 print_notices();
138
139 if (!BITISSET(N, start_symbol - ntokens))
140 fatals(_("Start symbol %s does not derive any sentence"),
141 tags[start_symbol]);
142
143 reduce_grammar_tables();
144 /* if (verboseflag) {
145 fprintf(foutput, "REDUCED GRAMMAR\n\n");
146 dump_grammar();
147 }
148 */
149
150 /**/ statisticsflag = FALSE; /* someday getopts should handle this */
151 if (statisticsflag == TRUE)
152 fprintf(stderr,
153 _("reduced %s defines %d terminal%s, %d nonterminal%s\
154 , and %d production%s.\n"), infile,
155 ntokens, (ntokens == 1 ? "" : "s"),
156 nvars, (nvars == 1 ? "" : "s"),
157 nrules, (nrules == 1 ? "" : "s"));
158
159 done_reducing:
160
161 /* Free the global sets used to compute the reduced grammar */
162
163 FREE(N);
164 FREE(V);
165 FREE(P);
166
167 }
168 \f
169 /*
170 * Another way to do this would be with a set for each production and then do
171 * subset tests against N0, but even for the C grammar the whole reducing
172 * process takes only 2 seconds on my 8Mhz AT.
173 */
174
175 static bool
176 useful_production (int i, BSet N0)
177 {
178 rule r;
179 short n;
180
181 /*
182 * A production is useful if all of the nonterminals in its RHS
183 * appear in the set of useful nonterminals.
184 */
185
186 for (r = &ritem[rrhs[i]]; *r > 0; r++)
187 if (ISVAR(n = *r))
188 if (!BITISSET(N0, n - ntokens))
189 return FALSE;
190 return TRUE;
191 }
192
193
194 /* Remember that rules are 1-origin, symbols are 0-origin. */
195
196 static void
197 useless_nonterminals (void)
198 {
199 BSet Np, Ns;
200 int i, n;
201
202 /*
203 * N is set as built. Np is set being built this iteration. P is set
204 * of all productions which have a RHS all in N.
205 */
206
207 Np = NEW2(WORDSIZE(nvars), unsigned);
208
209 /*
210 * The set being computed is a set of nonterminals which can derive
211 * the empty string or strings consisting of all terminals. At each
212 * iteration a nonterminal is added to the set if there is a
213 * production with that nonterminal as its LHS for which all the
214 * nonterminals in its RHS are already in the set. Iterate until the
215 * set being computed remains unchanged. Any nonterminals not in the
216 * set at that point are useless in that they will never be used in
217 * deriving a sentence of the language.
218 *
219 * This iteration doesn't use any special traversal over the
220 * productions. A set is kept of all productions for which all the
221 * nonterminals in the RHS are in useful. Only productions not in
222 * this set are scanned on each iteration. At the end, this set is
223 * saved to be used when finding useful productions: only productions
224 * in this set will appear in the final grammar.
225 */
226
227 n = 0;
228 while (1)
229 {
230 for (i = WORDSIZE(nvars) - 1; i >= 0; i--)
231 Np[i] = N[i];
232 for (i = 1; i <= nrules; i++)
233 {
234 if (!BITISSET(P, i))
235 {
236 if (useful_production(i, N))
237 {
238 SETBIT(Np, rlhs[i] - ntokens);
239 SETBIT(P, i);
240 }
241 }
242 }
243 if (bits_equal(N, Np, WORDSIZE(nvars)))
244 break;
245 Ns = Np;
246 Np = N;
247 N = Ns;
248 }
249 FREE(N);
250 N = Np;
251 }
252 \f
253 static void
254 inaccessable_symbols (void)
255 {
256 BSet Vp, Vs, Pp;
257 int i, n;
258 short t;
259 rule r;
260
261 /*
262 * Find out which productions are reachable and which symbols are
263 * used. Starting with an empty set of productions and a set of
264 * symbols which only has the start symbol in it, iterate over all
265 * productions until the set of productions remains unchanged for an
266 * iteration. For each production which has a LHS in the set of
267 * reachable symbols, add the production to the set of reachable
268 * productions, and add all of the nonterminals in the RHS of the
269 * production to the set of reachable symbols.
270 *
271 * Consider only the (partially) reduced grammar which has only
272 * nonterminals in N and productions in P.
273 *
274 * The result is the set P of productions in the reduced grammar, and
275 * the set V of symbols in the reduced grammar.
276 *
277 * Although this algorithm also computes the set of terminals which are
278 * reachable, no terminal will be deleted from the grammar. Some
279 * terminals might not be in the grammar but might be generated by
280 * semantic routines, and so the user might want them available with
281 * specified numbers. (Is this true?) However, the nonreachable
282 * terminals are printed (if running in verbose mode) so that the user
283 * can know.
284 */
285
286 Vp = NEW2(WORDSIZE(nsyms), unsigned);
287 Pp = NEW2(WORDSIZE(nrules + 1), unsigned);
288
289 /* If the start symbol isn't useful, then nothing will be useful. */
290 if (!BITISSET(N, start_symbol - ntokens))
291 goto end_iteration;
292
293 SETBIT(V, start_symbol);
294
295 n = 0;
296 while (1)
297 {
298 for (i = WORDSIZE(nsyms) - 1; i >= 0; i--)
299 Vp[i] = V[i];
300 for (i = 1; i <= nrules; i++)
301 {
302 if (!BITISSET(Pp, i) && BITISSET(P, i) &&
303 BITISSET(V, rlhs[i]))
304 {
305 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
306 {
307 if (ISTOKEN(t = *r)
308 || BITISSET(N, t - ntokens))
309 {
310 SETBIT(Vp, t);
311 }
312 }
313 SETBIT(Pp, i);
314 }
315 }
316 if (bits_equal(V, Vp, WORDSIZE(nsyms)))
317 {
318 break;
319 }
320 Vs = Vp;
321 Vp = V;
322 V = Vs;
323 }
324 end_iteration:
325
326 FREE(V);
327 V = Vp;
328
329 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
330 SETBIT(V, 0); /* end-of-input token */
331 SETBIT(V, 1); /* error token */
332 SETBIT(V, 2); /* some undefined token */
333
334 FREE(P);
335 P = Pp;
336
337 nuseful_productions = bits_size(P, WORDSIZE(nrules + 1));
338 nuseless_productions = nrules - nuseful_productions;
339
340 nuseful_nonterminals = 0;
341 for (i = ntokens; i < nsyms; i++)
342 if (BITISSET(V, i))
343 nuseful_nonterminals++;
344 nuseless_nonterminals = nvars - nuseful_nonterminals;
345
346 /* A token that was used in %prec should not be warned about. */
347 for (i = 1; i < nrules; i++)
348 if (rprecsym[i] != 0)
349 SETBIT(V1, rprecsym[i]);
350 }
351 \f
352 static void
353 reduce_grammar_tables (void)
354 {
355 /* This is turned off because we would need to change the numbers
356 in the case statements in the actions file. */
357 #if 0
358 /* remove useless productions */
359 if (nuseless_productions > 0)
360 {
361 short np, pn, ni, pi;
362
363 np = 0;
364 ni = 0;
365 for (pn = 1; pn <= nrules; pn++)
366 {
367 if (BITISSET(P, pn))
368 {
369 np++;
370 if (pn != np)
371 {
372 rlhs[np] = rlhs[pn];
373 rline[np] = rline[pn];
374 rprec[np] = rprec[pn];
375 rassoc[np] = rassoc[pn];
376 rrhs[np] = rrhs[pn];
377 if (rrhs[np] != ni)
378 {
379 pi = rrhs[np];
380 rrhs[np] = ni;
381 while (ritem[pi] >= 0)
382 ritem[ni++] = ritem[pi++];
383 ritem[ni++] = -np;
384 }
385 } else {
386 while (ritem[ni++] >= 0);
387 }
388 }
389 }
390 ritem[ni] = 0;
391 nrules -= nuseless_productions;
392 nitems = ni;
393
394 /*
395 * Is it worth it to reduce the amount of memory for the
396 * grammar? Probably not.
397 */
398
399 }
400 #endif /* 0 */
401 /* Disable useless productions,
402 since they may contain useless nonterms
403 that would get mapped below to -1 and confuse everyone. */
404 if (nuseless_productions > 0)
405 {
406 int pn;
407
408 for (pn = 1; pn <= nrules; pn++)
409 {
410 if (!BITISSET(P, pn))
411 {
412 rlhs[pn] = -1;
413 }
414 }
415 }
416
417 /* remove useless symbols */
418 if (nuseless_nonterminals > 0)
419 {
420
421 int i, n;
422 /* short j; JF unused */
423 short *nontermmap;
424 rule r;
425
426 /*
427 * create a map of nonterminal number to new nonterminal
428 * number. -1 in the map means it was useless and is being
429 * eliminated.
430 */
431
432 nontermmap = NEW2(nvars, short) - ntokens;
433 for (i = ntokens; i < nsyms; i++)
434 nontermmap[i] = -1;
435
436 n = ntokens;
437 for (i = ntokens; i < nsyms; i++)
438 if (BITISSET(V, i))
439 nontermmap[i] = n++;
440
441 /* Shuffle elements of tables indexed by symbol number. */
442
443 for (i = ntokens; i < nsyms; i++)
444 {
445 n = nontermmap[i];
446 if (n >= 0)
447 {
448 sassoc[n] = sassoc[i];
449 sprec[n] = sprec[i];
450 tags[n] = tags[i];
451 } else {
452 free(tags[i]);
453 }
454 }
455
456 /* Replace all symbol numbers in valid data structures. */
457
458 for (i = 1; i <= nrules; i++)
459 {
460 /* Ignore the rules disabled above. */
461 if (rlhs[i] >= 0)
462 rlhs[i] = nontermmap[rlhs[i]];
463 if (ISVAR (rprecsym[i]))
464 /* Can this happen? */
465 rprecsym[i] = nontermmap[rprecsym[i]];
466 }
467
468 for (r = ritem; *r; r++)
469 if (ISVAR(*r))
470 *r = nontermmap[*r];
471
472 start_symbol = nontermmap[start_symbol];
473
474 nsyms -= nuseless_nonterminals;
475 nvars -= nuseless_nonterminals;
476
477 free(&nontermmap[ntokens]);
478 }
479 }
480 \f
481 static void
482 print_results (void)
483 {
484 int i;
485 /* short j; JF unused */
486 rule r;
487 bool b;
488
489 if (nuseless_nonterminals > 0)
490 {
491 fprintf(foutput, _("Useless nonterminals:\n\n"));
492 for (i = ntokens; i < nsyms; i++)
493 if (!BITISSET(V, i))
494 fprintf(foutput, " %s\n", tags[i]);
495 }
496 b = FALSE;
497 for (i = 0; i < ntokens; i++)
498 {
499 if (!BITISSET(V, i) && !BITISSET(V1, i))
500 {
501 if (!b)
502 {
503 fprintf(foutput, _("\n\nTerminals which are not used:\n\n"));
504 b = TRUE;
505 }
506 fprintf(foutput, " %s\n", tags[i]);
507 }
508 }
509
510 if (nuseless_productions > 0)
511 {
512 fprintf(foutput, _("\n\nUseless rules:\n\n"));
513 for (i = 1; i <= nrules; i++)
514 {
515 if (!BITISSET(P, i))
516 {
517 fprintf(foutput, "#%-4d ", i);
518 fprintf(foutput, "%s :\t", tags[rlhs[i]]);
519 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
520 {
521 fprintf(foutput, " %s", tags[*r]);
522 }
523 fprintf(foutput, ";\n");
524 }
525 }
526 }
527 if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
528 fprintf(foutput, "\n\n");
529 }
530 \f
531 void
532 dump_grammar (void)
533 {
534 int i;
535 rule r;
536
537 fprintf(foutput,
538 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
539 ntokens, nvars, nsyms, nrules, nitems);
540 fprintf(foutput, _("Variables\n---------\n\n"));
541 fprintf(foutput, _("Value Sprec Sassoc Tag\n"));
542 for (i = ntokens; i < nsyms; i++)
543 fprintf(foutput, "%5d %5d %5d %s\n",
544 i, sprec[i], sassoc[i], tags[i]);
545 fprintf(foutput, "\n\n");
546 fprintf(foutput, _("Rules\n-----\n\n"));
547 for (i = 1; i <= nrules; i++)
548 {
549 fprintf(foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
550 i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
551 for (r = &ritem[rrhs[i]]; *r > 0; r++)
552 fprintf(foutput, "%5d", *r);
553 fprintf(foutput, " [%d]\n", -(*r));
554 }
555 fprintf(foutput, "\n\n");
556 fprintf(foutput, _("Rules interpreted\n-----------------\n\n"));
557 for (i = 1; i <= nrules; i++)
558 {
559 fprintf(foutput, "%-5d %s :", i, tags[rlhs[i]]);
560 for (r = &ritem[rrhs[i]]; *r > 0; r++)
561 fprintf(foutput, " %s", tags[*r]);
562 fprintf(foutput, "\n");
563 }
564 fprintf(foutput, "\n\n");
565 }
566
567
568 static void
569 print_notices (void)
570 {
571 if (fixed_outfiles && nuseless_productions)
572 fprintf(stderr, _("%d rules never reduced\n"), nuseless_productions);
573
574 fprintf(stderr, _("%s contains "), infile);
575
576 if (nuseless_nonterminals > 0)
577 {
578 fprintf(stderr, _("%d useless nonterminal%s"),
579 nuseless_nonterminals,
580 (nuseless_nonterminals == 1 ? "" : "s"));
581 }
582 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
583 fprintf(stderr, _(" and "));
584
585 if (nuseless_productions > 0)
586 {
587 fprintf(stderr, _("%d useless rule%s"),
588 nuseless_productions,
589 (nuseless_productions == 1 ? "" : "s"));
590 }
591 fprintf(stderr, "\n");
592 fflush(stderr);
593 }