]> git.saurik.com Git - bison.git/blob - src/reduce.c
* src/reader.c (copy_comment2): New function, same as former
[bison.git] / src / reduce.c
1 /* Grammar reduction for Bison.
2 Copyright (C) 1988, 1989 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 /*
23 * Reduce the grammar: Find and eliminate unreachable terminals,
24 * nonterminals, and productions. David S. Bakin.
25 */
26
27 /*
28 * Don't eliminate unreachable terminals: They may be used by the user's
29 * parser.
30 */
31
32 #include <stdio.h>
33 #include "system.h"
34 #include "files.h"
35 #include "gram.h"
36 #include "machine.h"
37 #include "alloc.h"
38 #include "complain.h"
39
40
41 extern char **tags; /* reader.c */
42 extern int verboseflag; /* getargs.c */
43 static int statisticsflag; /* XXXXXXX */
44 extern int fixed_outfiles;
45
46 #ifndef TRUE
47 #define TRUE (1)
48 #define FALSE (0)
49 #endif
50 typedef int bool;
51 typedef unsigned *BSet;
52 typedef short *rule;
53
54
55 /*
56 * N is set of all nonterminals which are not useless. P is set of all rules
57 * which have no useless nonterminals in their RHS. V is the set of all
58 * accessible symbols.
59 */
60
61 static BSet N, P, V, V1;
62
63 static int nuseful_productions, nuseless_productions,
64 nuseful_nonterminals, nuseless_nonterminals;
65
66
67 bool bits_equal PARAMS((BSet, BSet, int));
68 int nbits PARAMS((unsigned));
69 int bits_size PARAMS((BSet, int));
70 void reduce_grammar PARAMS((void));
71 static void useless_nonterminals PARAMS((void));
72 static void inaccessable_symbols PARAMS((void));
73 static void reduce_grammar_tables PARAMS((void));
74 static void print_results PARAMS((void));
75 static void print_notices PARAMS((void));
76 void dump_grammar PARAMS((void));
77
78 \f
79
80 bool
81 bits_equal (BSet L, BSet R, int n)
82 {
83 int i;
84
85 for (i = n - 1; i >= 0; i--)
86 if (L[i] != R[i])
87 return FALSE;
88 return TRUE;
89 }
90
91
92 int
93 nbits (unsigned i)
94 {
95 int count = 0;
96
97 while (i != 0) {
98 i ^= (i & ((unsigned) (- (int) i)));
99 ++count;
100 }
101 return count;
102 }
103
104
105 int
106 bits_size (BSet S, int n)
107 {
108 int i, count = 0;
109
110 for (i = n - 1; i >= 0; i--)
111 count += nbits(S[i]);
112 return count;
113 }
114 \f
115 void
116 reduce_grammar (void)
117 {
118 bool reduced;
119
120 /* Allocate the global sets used to compute the reduced grammar */
121
122 N = NEW2(WORDSIZE(nvars), unsigned);
123 P = NEW2(WORDSIZE(nrules + 1), unsigned);
124 V = NEW2(WORDSIZE(nsyms), unsigned);
125 V1 = NEW2(WORDSIZE(nsyms), unsigned);
126
127 useless_nonterminals();
128 inaccessable_symbols();
129
130 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
131
132 if (verboseflag)
133 print_results();
134
135 if (reduced == FALSE)
136 goto done_reducing;
137
138 print_notices();
139
140 if (!BITISSET(N, start_symbol - ntokens))
141 fatal (_("Start symbol %s does not derive any sentence"),
142 tags[start_symbol]);
143
144 reduce_grammar_tables();
145 /*
146 if (verboseflag)
147 {
148 fprintf(foutput, "REDUCED GRAMMAR\n\n");
149 dump_grammar();
150 }
151 */
152 /**/ statisticsflag = FALSE; /* someday getopts should handle this */
153 if (statisticsflag == TRUE)
154 fprintf(stderr,
155 _("reduced %s defines %d terminal%s, %d nonterminal%s\
156 , and %d production%s.\n"), infile,
157 ntokens, (ntokens == 1 ? "" : "s"),
158 nvars, (nvars == 1 ? "" : "s"),
159 nrules, (nrules == 1 ? "" : "s"));
160
161 done_reducing:
162
163 /* Free the global sets used to compute the reduced grammar */
164
165 FREE(N);
166 FREE(V);
167 FREE(P);
168
169 }
170 \f
171 /*
172 * Another way to do this would be with a set for each production and then do
173 * subset tests against N0, but even for the C grammar the whole reducing
174 * process takes only 2 seconds on my 8Mhz AT.
175 */
176
177 static bool
178 useful_production (int i, BSet N0)
179 {
180 rule r;
181 short n;
182
183 /*
184 * A production is useful if all of the nonterminals in its RHS
185 * appear in the set of useful nonterminals.
186 */
187
188 for (r = &ritem[rrhs[i]]; *r > 0; r++)
189 if (ISVAR(n = *r))
190 if (!BITISSET(N0, n - ntokens))
191 return FALSE;
192 return TRUE;
193 }
194
195
196 /* Remember that rules are 1-origin, symbols are 0-origin. */
197
198 static void
199 useless_nonterminals (void)
200 {
201 BSet Np, Ns;
202 int i, n;
203
204 /*
205 * N is set as built. Np is set being built this iteration. P is set
206 * of all productions which have a RHS all in N.
207 */
208
209 Np = NEW2(WORDSIZE(nvars), unsigned);
210
211 /*
212 * The set being computed is a set of nonterminals which can derive
213 * the empty string or strings consisting of all terminals. At each
214 * iteration a nonterminal is added to the set if there is a
215 * production with that nonterminal as its LHS for which all the
216 * nonterminals in its RHS are already in the set. Iterate until the
217 * set being computed remains unchanged. Any nonterminals not in the
218 * set at that point are useless in that they will never be used in
219 * deriving a sentence of the language.
220 *
221 * This iteration doesn't use any special traversal over the
222 * productions. A set is kept of all productions for which all the
223 * nonterminals in the RHS are in useful. Only productions not in
224 * this set are scanned on each iteration. At the end, this set is
225 * saved to be used when finding useful productions: only productions
226 * in this set will appear in the final grammar.
227 */
228
229 n = 0;
230 while (1)
231 {
232 for (i = WORDSIZE(nvars) - 1; i >= 0; i--)
233 Np[i] = N[i];
234 for (i = 1; i <= nrules; i++)
235 {
236 if (!BITISSET(P, i))
237 {
238 if (useful_production(i, N))
239 {
240 SETBIT(Np, rlhs[i] - ntokens);
241 SETBIT(P, i);
242 }
243 }
244 }
245 if (bits_equal(N, Np, WORDSIZE(nvars)))
246 break;
247 Ns = Np;
248 Np = N;
249 N = Ns;
250 }
251 FREE(N);
252 N = Np;
253 }
254 \f
255 static void
256 inaccessable_symbols (void)
257 {
258 BSet Vp, Vs, Pp;
259 int i, n;
260 short t;
261 rule r;
262
263 /*
264 * Find out which productions are reachable and which symbols are
265 * used. Starting with an empty set of productions and a set of
266 * symbols which only has the start symbol in it, iterate over all
267 * productions until the set of productions remains unchanged for an
268 * iteration. For each production which has a LHS in the set of
269 * reachable symbols, add the production to the set of reachable
270 * productions, and add all of the nonterminals in the RHS of the
271 * production to the set of reachable symbols.
272 *
273 * Consider only the (partially) reduced grammar which has only
274 * nonterminals in N and productions in P.
275 *
276 * The result is the set P of productions in the reduced grammar, and
277 * the set V of symbols in the reduced grammar.
278 *
279 * Although this algorithm also computes the set of terminals which are
280 * reachable, no terminal will be deleted from the grammar. Some
281 * terminals might not be in the grammar but might be generated by
282 * semantic routines, and so the user might want them available with
283 * specified numbers. (Is this true?) However, the nonreachable
284 * terminals are printed (if running in verbose mode) so that the user
285 * can know.
286 */
287
288 Vp = NEW2(WORDSIZE(nsyms), unsigned);
289 Pp = NEW2(WORDSIZE(nrules + 1), unsigned);
290
291 /* If the start symbol isn't useful, then nothing will be useful. */
292 if (!BITISSET(N, start_symbol - ntokens))
293 goto end_iteration;
294
295 SETBIT(V, start_symbol);
296
297 n = 0;
298 while (1)
299 {
300 for (i = WORDSIZE(nsyms) - 1; i >= 0; i--)
301 Vp[i] = V[i];
302 for (i = 1; i <= nrules; i++)
303 {
304 if (!BITISSET(Pp, i) && BITISSET(P, i) &&
305 BITISSET(V, rlhs[i]))
306 {
307 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
308 {
309 if (ISTOKEN(t = *r)
310 || BITISSET(N, t - ntokens))
311 {
312 SETBIT(Vp, t);
313 }
314 }
315 SETBIT(Pp, i);
316 }
317 }
318 if (bits_equal(V, Vp, WORDSIZE(nsyms)))
319 {
320 break;
321 }
322 Vs = Vp;
323 Vp = V;
324 V = Vs;
325 }
326 end_iteration:
327
328 FREE(V);
329 V = Vp;
330
331 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
332 SETBIT(V, 0); /* end-of-input token */
333 SETBIT(V, 1); /* error token */
334 SETBIT(V, 2); /* some undefined token */
335
336 FREE(P);
337 P = Pp;
338
339 nuseful_productions = bits_size(P, WORDSIZE(nrules + 1));
340 nuseless_productions = nrules - nuseful_productions;
341
342 nuseful_nonterminals = 0;
343 for (i = ntokens; i < nsyms; i++)
344 if (BITISSET(V, i))
345 nuseful_nonterminals++;
346 nuseless_nonterminals = nvars - nuseful_nonterminals;
347
348 /* A token that was used in %prec should not be warned about. */
349 for (i = 1; i < nrules; i++)
350 if (rprecsym[i] != 0)
351 SETBIT(V1, rprecsym[i]);
352 }
353 \f
354 static void
355 reduce_grammar_tables (void)
356 {
357 /* This is turned off because we would need to change the numbers
358 in the case statements in the actions file. */
359 #if 0
360 /* remove useless productions */
361 if (nuseless_productions > 0)
362 {
363 short np, pn, ni, pi;
364
365 np = 0;
366 ni = 0;
367 for (pn = 1; pn <= nrules; pn++)
368 {
369 if (BITISSET(P, pn))
370 {
371 np++;
372 if (pn != np)
373 {
374 rlhs[np] = rlhs[pn];
375 rline[np] = rline[pn];
376 rprec[np] = rprec[pn];
377 rassoc[np] = rassoc[pn];
378 rrhs[np] = rrhs[pn];
379 if (rrhs[np] != ni)
380 {
381 pi = rrhs[np];
382 rrhs[np] = ni;
383 while (ritem[pi] >= 0)
384 ritem[ni++] = ritem[pi++];
385 ritem[ni++] = -np;
386 }
387 } else {
388 while (ritem[ni++] >= 0);
389 }
390 }
391 }
392 ritem[ni] = 0;
393 nrules -= nuseless_productions;
394 nitems = ni;
395
396 /*
397 * Is it worth it to reduce the amount of memory for the
398 * grammar? Probably not.
399 */
400
401 }
402 #endif /* 0 */
403 /* Disable useless productions,
404 since they may contain useless nonterms
405 that would get mapped below to -1 and confuse everyone. */
406 if (nuseless_productions > 0)
407 {
408 int pn;
409
410 for (pn = 1; pn <= nrules; pn++)
411 {
412 if (!BITISSET(P, pn))
413 {
414 rlhs[pn] = -1;
415 }
416 }
417 }
418
419 /* remove useless symbols */
420 if (nuseless_nonterminals > 0)
421 {
422
423 int i, n;
424 /* short j; JF unused */
425 short *nontermmap;
426 rule r;
427
428 /*
429 * create a map of nonterminal number to new nonterminal
430 * number. -1 in the map means it was useless and is being
431 * eliminated.
432 */
433
434 nontermmap = NEW2(nvars, short) - ntokens;
435 for (i = ntokens; i < nsyms; i++)
436 nontermmap[i] = -1;
437
438 n = ntokens;
439 for (i = ntokens; i < nsyms; i++)
440 if (BITISSET(V, i))
441 nontermmap[i] = n++;
442
443 /* Shuffle elements of tables indexed by symbol number. */
444
445 for (i = ntokens; i < nsyms; i++)
446 {
447 n = nontermmap[i];
448 if (n >= 0)
449 {
450 sassoc[n] = sassoc[i];
451 sprec[n] = sprec[i];
452 tags[n] = tags[i];
453 } else {
454 free(tags[i]);
455 }
456 }
457
458 /* Replace all symbol numbers in valid data structures. */
459
460 for (i = 1; i <= nrules; i++)
461 {
462 /* Ignore the rules disabled above. */
463 if (rlhs[i] >= 0)
464 rlhs[i] = nontermmap[rlhs[i]];
465 if (ISVAR (rprecsym[i]))
466 /* Can this happen? */
467 rprecsym[i] = nontermmap[rprecsym[i]];
468 }
469
470 for (r = ritem; *r; r++)
471 if (ISVAR(*r))
472 *r = nontermmap[*r];
473
474 start_symbol = nontermmap[start_symbol];
475
476 nsyms -= nuseless_nonterminals;
477 nvars -= nuseless_nonterminals;
478
479 free(&nontermmap[ntokens]);
480 }
481 }
482 \f
483 static void
484 print_results (void)
485 {
486 int i;
487 /* short j; JF unused */
488 rule r;
489 bool b;
490
491 if (nuseless_nonterminals > 0)
492 {
493 fprintf(foutput, _("Useless nonterminals:\n\n"));
494 for (i = ntokens; i < nsyms; i++)
495 if (!BITISSET(V, i))
496 fprintf(foutput, " %s\n", tags[i]);
497 }
498 b = FALSE;
499 for (i = 0; i < ntokens; i++)
500 {
501 if (!BITISSET(V, i) && !BITISSET(V1, i))
502 {
503 if (!b)
504 {
505 fprintf(foutput, _("\n\nTerminals which are not used:\n\n"));
506 b = TRUE;
507 }
508 fprintf(foutput, " %s\n", tags[i]);
509 }
510 }
511
512 if (nuseless_productions > 0)
513 {
514 fprintf(foutput, _("\n\nUseless rules:\n\n"));
515 for (i = 1; i <= nrules; i++)
516 {
517 if (!BITISSET(P, i))
518 {
519 fprintf(foutput, "#%-4d ", i);
520 fprintf(foutput, "%s :\t", tags[rlhs[i]]);
521 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
522 {
523 fprintf(foutput, " %s", tags[*r]);
524 }
525 fprintf(foutput, ";\n");
526 }
527 }
528 }
529 if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
530 fprintf(foutput, "\n\n");
531 }
532 \f
533 void
534 dump_grammar (void)
535 {
536 int i;
537 rule r;
538
539 fprintf(foutput,
540 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
541 ntokens, nvars, nsyms, nrules, nitems);
542 fprintf(foutput, _("Variables\n---------\n\n"));
543 fprintf(foutput, _("Value Sprec Sassoc Tag\n"));
544 for (i = ntokens; i < nsyms; i++)
545 fprintf(foutput, "%5d %5d %5d %s\n",
546 i, sprec[i], sassoc[i], tags[i]);
547 fprintf(foutput, "\n\n");
548 fprintf(foutput, _("Rules\n-----\n\n"));
549 for (i = 1; i <= nrules; i++)
550 {
551 fprintf(foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
552 i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
553 for (r = &ritem[rrhs[i]]; *r > 0; r++)
554 fprintf(foutput, "%5d", *r);
555 fprintf(foutput, " [%d]\n", -(*r));
556 }
557 fprintf(foutput, "\n\n");
558 fprintf(foutput, _("Rules interpreted\n-----------------\n\n"));
559 for (i = 1; i <= nrules; i++)
560 {
561 fprintf(foutput, "%-5d %s :", i, tags[rlhs[i]]);
562 for (r = &ritem[rrhs[i]]; *r > 0; r++)
563 fprintf(foutput, " %s", tags[*r]);
564 fprintf(foutput, "\n");
565 }
566 fprintf(foutput, "\n\n");
567 }
568
569
570 static void
571 print_notices (void)
572 {
573 if (fixed_outfiles && nuseless_productions)
574 fprintf(stderr, _("%d rules never reduced\n"), nuseless_productions);
575
576 fprintf(stderr, _("%s contains "), infile);
577
578 if (nuseless_nonterminals > 0)
579 {
580 fprintf(stderr, _("%d useless nonterminal%s"),
581 nuseless_nonterminals,
582 (nuseless_nonterminals == 1 ? "" : "s"));
583 }
584 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
585 fprintf(stderr, _(" and "));
586
587 if (nuseless_productions > 0)
588 {
589 fprintf(stderr, _("%d useless rule%s"),
590 nuseless_productions,
591 (nuseless_productions == 1 ? "" : "s"));
592 }
593 fprintf(stderr, "\n");
594 fflush(stderr);
595 }