]> git.saurik.com Git - bison.git/blame - src/reduce.c
* src/print.h: New file.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
015acc48 2 Copyright (C) 1988, 1989, 2000 Free Software Foundation, Inc.
572909b5 3
015acc48 4 This file is part of Bison, the GNU Compiler Compiler.
572909b5 5
015acc48
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
572909b5 10
015acc48
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
572909b5 15
015acc48
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
572909b5 28#include "system.h"
ceed8467 29#include "getargs.h"
572909b5
RS
30#include "files.h"
31#include "gram.h"
7612000c 32#include "alloc.h"
a0f6b076 33#include "complain.h"
015acc48 34#include "reduce.h"
572909b5 35
015acc48
AD
36extern char **tags; /* reader.c */
37static int statisticsflag; /* XXXXXXX */
38extern int fixed_outfiles;
572909b5 39
572909b5 40typedef unsigned *BSet;
015acc48 41typedef short *rule;
572909b5 42
572909b5 43
015acc48
AD
44/* N is set of all nonterminals which are not useless. P is set of
45 all rules which have no useless nonterminals in their RHS. V is
46 the set of all accessible symbols. */
572909b5 47
015acc48 48static BSet N, P, V, V1;
572909b5 49
015acc48
AD
50static int nuseful_productions;
51static int nuseless_productions;
52static int nuseful_nonterminals;
53static int nuseless_nonterminals;
572909b5 54\f
4a120d45 55static bool
d2729d44 56bits_equal (BSet L, BSet R, int n)
572909b5
RS
57{
58 int i;
59
60 for (i = n - 1; i >= 0; i--)
61 if (L[i] != R[i])
62 return FALSE;
63 return TRUE;
64}
65
66
4a120d45 67static int
d2729d44 68nbits (unsigned i)
572909b5
RS
69{
70 int count = 0;
71
015acc48
AD
72 while (i != 0)
73 {
74 i ^= (i & ((unsigned) (-(int) i)));
75 ++count;
76 }
572909b5
RS
77 return count;
78}
79
80
4a120d45 81static int
d2729d44 82bits_size (BSet S, int n)
572909b5
RS
83{
84 int i, count = 0;
85
86 for (i = n - 1; i >= 0; i--)
015acc48 87 count += nbits (S[i]);
572909b5
RS
88 return count;
89}
90\f
015acc48
AD
91/*-------------------------------------------------------------------.
92| Another way to do this would be with a set for each production and |
93| then do subset tests against N0, but even for the C grammar the |
94| whole reducing process takes only 2 seconds on my 8Mhz AT. |
95`-------------------------------------------------------------------*/
572909b5 96
a083fbbf 97static bool
d2729d44 98useful_production (int i, BSet N0)
572909b5 99{
015acc48 100 rule r;
572909b5
RS
101 short n;
102
015acc48
AD
103 /* A production is useful if all of the nonterminals in its appear
104 in the set of useful nonterminals. */
572909b5
RS
105
106 for (r = &ritem[rrhs[i]]; *r > 0; r++)
015acc48
AD
107 if (ISVAR (n = *r))
108 if (!BITISSET (N0, n - ntokens))
572909b5
RS
109 return FALSE;
110 return TRUE;
111}
112
113
015acc48
AD
114/*---------------------------------------------------------.
115| Remember that rules are 1-origin, symbols are 0-origin. |
116`---------------------------------------------------------*/
572909b5 117
a083fbbf 118static void
d2729d44 119useless_nonterminals (void)
572909b5
RS
120{
121 BSet Np, Ns;
015acc48
AD
122 int i, n;
123
124 /* N is set as built. Np is set being built this iteration. P is
125 set of all productions which have a RHS all in N. */
126
127 Np = NEW2 (WORDSIZE (nvars), unsigned);
128
129 /* The set being computed is a set of nonterminals which can derive
130 the empty string or strings consisting of all terminals. At each
131 iteration a nonterminal is added to the set if there is a
132 production with that nonterminal as its LHS for which all the
133 nonterminals in its RHS are already in the set. Iterate until
134 the set being computed remains unchanged. Any nonterminals not
135 in the set at that point are useless in that they will never be
136 used in deriving a sentence of the language.
137
138 This iteration doesn't use any special traversal over the
139 productions. A set is kept of all productions for which all the
140 nonterminals in the RHS are in useful. Only productions not in
141 this set are scanned on each iteration. At the end, this set is
142 saved to be used when finding useful productions: only
143 productions in this set will appear in the final grammar. */
572909b5
RS
144
145 n = 0;
146 while (1)
147 {
015acc48 148 for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
572909b5
RS
149 Np[i] = N[i];
150 for (i = 1; i <= nrules; i++)
151 {
015acc48 152 if (!BITISSET (P, i))
572909b5 153 {
015acc48 154 if (useful_production (i, N))
572909b5 155 {
015acc48
AD
156 SETBIT (Np, rlhs[i] - ntokens);
157 SETBIT (P, i);
572909b5
RS
158 }
159 }
160 }
015acc48 161 if (bits_equal (N, Np, WORDSIZE (nvars)))
572909b5
RS
162 break;
163 Ns = Np;
164 Np = N;
165 N = Ns;
166 }
015acc48 167 FREE (N);
572909b5
RS
168 N = Np;
169}
015acc48
AD
170
171
a083fbbf 172static void
d2729d44 173inaccessable_symbols (void)
572909b5 174{
015acc48
AD
175 BSet Vp, Vs, Pp;
176 int i, n;
572909b5 177 short t;
015acc48
AD
178 rule r;
179
180 /* Find out which productions are reachable and which symbols are
181 used. Starting with an empty set of productions and a set of
182 symbols which only has the start symbol in it, iterate over all
183 productions until the set of productions remains unchanged for an
184 iteration. For each production which has a LHS in the set of
185 reachable symbols, add the production to the set of reachable
186 productions, and add all of the nonterminals in the RHS of the
187 production to the set of reachable symbols.
188
189 Consider only the (partially) reduced grammar which has only
190 nonterminals in N and productions in P.
191
192 The result is the set P of productions in the reduced grammar,
193 and the set V of symbols in the reduced grammar.
194
195 Although this algorithm also computes the set of terminals which
196 are reachable, no terminal will be deleted from the grammar. Some
197 terminals might not be in the grammar but might be generated by
198 semantic routines, and so the user might want them available with
199 specified numbers. (Is this true?) However, the nonreachable
200 terminals are printed (if running in verbose mode) so that the
201 user can know. */
202
203 Vp = NEW2 (WORDSIZE (nsyms), unsigned);
204 Pp = NEW2 (WORDSIZE (nrules + 1), unsigned);
572909b5
RS
205
206 /* If the start symbol isn't useful, then nothing will be useful. */
015acc48 207 if (!BITISSET (N, start_symbol - ntokens))
572909b5
RS
208 goto end_iteration;
209
015acc48 210 SETBIT (V, start_symbol);
572909b5
RS
211
212 n = 0;
213 while (1)
214 {
015acc48 215 for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
572909b5
RS
216 Vp[i] = V[i];
217 for (i = 1; i <= nrules; i++)
218 {
015acc48 219 if (!BITISSET (Pp, i) && BITISSET (P, i) && BITISSET (V, rlhs[i]))
572909b5
RS
220 {
221 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
222 {
015acc48 223 if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
572909b5 224 {
015acc48 225 SETBIT (Vp, t);
572909b5
RS
226 }
227 }
015acc48 228 SETBIT (Pp, i);
572909b5
RS
229 }
230 }
015acc48 231 if (bits_equal (V, Vp, WORDSIZE (nsyms)))
572909b5
RS
232 {
233 break;
234 }
235 Vs = Vp;
236 Vp = V;
237 V = Vs;
238 }
015acc48 239end_iteration:
572909b5 240
015acc48 241 FREE (V);
572909b5
RS
242 V = Vp;
243
244 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
015acc48
AD
245 SETBIT (V, 0); /* end-of-input token */
246 SETBIT (V, 1); /* error token */
247 SETBIT (V, 2); /* some undefined token */
572909b5 248
015acc48 249 FREE (P);
572909b5
RS
250 P = Pp;
251
015acc48 252 nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
572909b5
RS
253 nuseless_productions = nrules - nuseful_productions;
254
255 nuseful_nonterminals = 0;
256 for (i = ntokens; i < nsyms; i++)
015acc48 257 if (BITISSET (V, i))
572909b5
RS
258 nuseful_nonterminals++;
259 nuseless_nonterminals = nvars - nuseful_nonterminals;
260
261 /* A token that was used in %prec should not be warned about. */
262 for (i = 1; i < nrules; i++)
263 if (rprecsym[i] != 0)
015acc48 264 SETBIT (V1, rprecsym[i]);
572909b5 265}
015acc48 266
a083fbbf 267static void
d2729d44 268reduce_grammar_tables (void)
572909b5
RS
269{
270/* This is turned off because we would need to change the numbers
271 in the case statements in the actions file. */
272#if 0
273 /* remove useless productions */
274 if (nuseless_productions > 0)
275 {
276 short np, pn, ni, pi;
277
278 np = 0;
279 ni = 0;
280 for (pn = 1; pn <= nrules; pn++)
281 {
015acc48 282 if (BITISSET (P, pn))
572909b5
RS
283 {
284 np++;
285 if (pn != np)
286 {
287 rlhs[np] = rlhs[pn];
288 rline[np] = rline[pn];
289 rprec[np] = rprec[pn];
290 rassoc[np] = rassoc[pn];
291 rrhs[np] = rrhs[pn];
292 if (rrhs[np] != ni)
293 {
294 pi = rrhs[np];
295 rrhs[np] = ni;
296 while (ritem[pi] >= 0)
297 ritem[ni++] = ritem[pi++];
298 ritem[ni++] = -np;
299 }
015acc48
AD
300 }
301 else
302 {
572909b5
RS
303 while (ritem[ni++] >= 0);
304 }
305 }
306 }
307 ritem[ni] = 0;
308 nrules -= nuseless_productions;
309 nitems = ni;
310
015acc48
AD
311 /* Is it worth it to reduce the amount of memory for the
312 grammar? Probably not. */
572909b5
RS
313
314 }
315#endif /* 0 */
316 /* Disable useless productions,
317 since they may contain useless nonterms
318 that would get mapped below to -1 and confuse everyone. */
319 if (nuseless_productions > 0)
320 {
321 int pn;
322
323 for (pn = 1; pn <= nrules; pn++)
324 {
015acc48 325 if (!BITISSET (P, pn))
572909b5
RS
326 {
327 rlhs[pn] = -1;
328 }
329 }
330 }
331
332 /* remove useless symbols */
333 if (nuseless_nonterminals > 0)
334 {
335
015acc48 336 int i, n;
572909b5
RS
337/* short j; JF unused */
338 short *nontermmap;
015acc48 339 rule r;
572909b5 340
015acc48
AD
341 /* Create a map of nonterminal number to new nonterminal
342 number. -1 in the map means it was useless and is being
343 eliminated. */
572909b5 344
015acc48 345 nontermmap = NEW2 (nvars, short) - ntokens;
572909b5
RS
346 for (i = ntokens; i < nsyms; i++)
347 nontermmap[i] = -1;
348
349 n = ntokens;
350 for (i = ntokens; i < nsyms; i++)
015acc48 351 if (BITISSET (V, i))
572909b5
RS
352 nontermmap[i] = n++;
353
354 /* Shuffle elements of tables indexed by symbol number. */
355
356 for (i = ntokens; i < nsyms; i++)
357 {
358 n = nontermmap[i];
359 if (n >= 0)
360 {
361 sassoc[n] = sassoc[i];
362 sprec[n] = sprec[i];
363 tags[n] = tags[i];
015acc48
AD
364 }
365 else
366 {
367 free (tags[i]);
572909b5
RS
368 }
369 }
370
371 /* Replace all symbol numbers in valid data structures. */
372
373 for (i = 1; i <= nrules; i++)
374 {
375 /* Ignore the rules disabled above. */
376 if (rlhs[i] >= 0)
377 rlhs[i] = nontermmap[rlhs[i]];
378 if (ISVAR (rprecsym[i]))
379 /* Can this happen? */
380 rprecsym[i] = nontermmap[rprecsym[i]];
381 }
382
383 for (r = ritem; *r; r++)
015acc48 384 if (ISVAR (*r))
572909b5
RS
385 *r = nontermmap[*r];
386
387 start_symbol = nontermmap[start_symbol];
388
389 nsyms -= nuseless_nonterminals;
390 nvars -= nuseless_nonterminals;
391
015acc48 392 free (&nontermmap[ntokens]);
572909b5
RS
393 }
394}
015acc48 395
a083fbbf 396static void
d2729d44 397print_results (void)
572909b5 398{
015acc48 399 int i;
572909b5 400/* short j; JF unused */
015acc48
AD
401 rule r;
402 bool b;
572909b5
RS
403
404 if (nuseless_nonterminals > 0)
405 {
015acc48 406 fprintf (foutput, _("Useless nonterminals:\n\n"));
572909b5 407 for (i = ntokens; i < nsyms; i++)
015acc48
AD
408 if (!BITISSET (V, i))
409 fprintf (foutput, " %s\n", tags[i]);
572909b5
RS
410 }
411 b = FALSE;
412 for (i = 0; i < ntokens; i++)
413 {
015acc48 414 if (!BITISSET (V, i) && !BITISSET (V1, i))
572909b5
RS
415 {
416 if (!b)
417 {
015acc48 418 fprintf (foutput, _("\n\nTerminals which are not used:\n\n"));
572909b5
RS
419 b = TRUE;
420 }
015acc48 421 fprintf (foutput, " %s\n", tags[i]);
572909b5
RS
422 }
423 }
424
425 if (nuseless_productions > 0)
426 {
015acc48 427 fprintf (foutput, _("\n\nUseless rules:\n\n"));
572909b5
RS
428 for (i = 1; i <= nrules; i++)
429 {
015acc48 430 if (!BITISSET (P, i))
572909b5 431 {
015acc48
AD
432 fprintf (foutput, "#%-4d ", i);
433 fprintf (foutput, "%s :\t", tags[rlhs[i]]);
572909b5
RS
434 for (r = &ritem[rrhs[i]]; *r >= 0; r++)
435 {
015acc48 436 fprintf (foutput, " %s", tags[*r]);
572909b5 437 }
015acc48 438 fprintf (foutput, ";\n");
572909b5
RS
439 }
440 }
441 }
442 if (nuseless_nonterminals > 0 || nuseless_productions > 0 || b)
015acc48 443 fprintf (foutput, "\n\n");
572909b5
RS
444}
445\f
015acc48 446#if 0 /* XXX currently unused. */
4a120d45 447static void
d2729d44 448dump_grammar (void)
572909b5
RS
449{
450 int i;
451 rule r;
452
015acc48
AD
453 fprintf (foutput,
454 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
455 ntokens, nvars, nsyms, nrules, nitems);
456 fprintf (foutput, _("Variables\n---------\n\n"));
457 fprintf (foutput, _("Value Sprec Sassoc Tag\n"));
572909b5 458 for (i = ntokens; i < nsyms; i++)
015acc48
AD
459 fprintf (foutput, "%5d %5d %5d %s\n", i, sprec[i], sassoc[i], tags[i]);
460 fprintf (foutput, "\n\n");
461 fprintf (foutput, _("Rules\n-----\n\n"));
572909b5
RS
462 for (i = 1; i <= nrules; i++)
463 {
015acc48
AD
464 fprintf (foutput, "%-5d(%5d%5d)%5d : (@%-5d)",
465 i, rprec[i], rassoc[i], rlhs[i], rrhs[i]);
572909b5 466 for (r = &ritem[rrhs[i]]; *r > 0; r++)
015acc48
AD
467 fprintf (foutput, "%5d", *r);
468 fprintf (foutput, " [%d]\n", -(*r));
572909b5 469 }
015acc48
AD
470 fprintf (foutput, "\n\n");
471 fprintf (foutput, _("Rules interpreted\n-----------------\n\n"));
572909b5
RS
472 for (i = 1; i <= nrules; i++)
473 {
015acc48 474 fprintf (foutput, "%-5d %s :", i, tags[rlhs[i]]);
572909b5 475 for (r = &ritem[rrhs[i]]; *r > 0; r++)
015acc48
AD
476 fprintf (foutput, " %s", tags[*r]);
477 fprintf (foutput, "\n");
572909b5 478 }
015acc48 479 fprintf (foutput, "\n\n");
572909b5 480}
015acc48 481
4a120d45 482#endif
572909b5
RS
483
484
a083fbbf 485static void
d2729d44 486print_notices (void)
572909b5 487{
572909b5 488 if (fixed_outfiles && nuseless_productions)
015acc48 489 fprintf (stderr, _("%d rules never reduced\n"), nuseless_productions);
572909b5 490
015acc48 491 fprintf (stderr, _("%s contains "), infile);
572909b5
RS
492
493 if (nuseless_nonterminals > 0)
494 {
015acc48
AD
495 fprintf (stderr, _("%d useless nonterminal%s"),
496 nuseless_nonterminals,
497 (nuseless_nonterminals == 1 ? "" : "s"));
572909b5
RS
498 }
499 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 500 fprintf (stderr, _(" and "));
572909b5
RS
501
502 if (nuseless_productions > 0)
503 {
015acc48
AD
504 fprintf (stderr, _("%d useless rule%s"),
505 nuseless_productions, (nuseless_productions == 1 ? "" : "s"));
572909b5 506 }
015acc48
AD
507 fprintf (stderr, "\n");
508 fflush (stderr);
509}
510\f
511void
512reduce_grammar (void)
513{
514 bool reduced;
515
516 /* Allocate the global sets used to compute the reduced grammar */
517
518 N = NEW2 (WORDSIZE (nvars), unsigned);
519 P = NEW2 (WORDSIZE (nrules + 1), unsigned);
520 V = NEW2 (WORDSIZE (nsyms), unsigned);
521 V1 = NEW2 (WORDSIZE (nsyms), unsigned);
522
523 useless_nonterminals ();
524 inaccessable_symbols ();
525
526 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
527
528 if (verboseflag)
529 print_results ();
530
531 if (reduced == FALSE)
532 goto done_reducing;
533
534 print_notices ();
535
536 if (!BITISSET (N, start_symbol - ntokens))
537 fatal (_("Start symbol %s does not derive any sentence"),
538 tags[start_symbol]);
539
540 reduce_grammar_tables ();
541#if 0
542 if (verboseflag)
543 {
544 fprintf (foutput, "REDUCED GRAMMAR\n\n");
545 dump_grammar ();
546 }
547#endif
548 statisticsflag = FALSE; /* someday getopts should handle this */
549 if (statisticsflag == TRUE)
550 fprintf (stderr, _("reduced %s defines %d terminal%s, %d nonterminal%s\
551, and %d production%s.\n"),
552 infile,
553 ntokens,
554 (ntokens == 1 ? "" : "s"),
555 nvars,
556 (nvars == 1 ? "" : "s"),
557 nrules,
558 (nrules == 1 ? "" : "s"));
559
560done_reducing:
561 /* Free the global sets used to compute the reduced grammar */
562
563 FREE (N);
564 FREE (V);
565 FREE (P);
572909b5 566}