]> git.saurik.com Git - bison.git/blame - src/reduce.c
* data/bison.c++ (yy::b4_name::parse): Error recovery is back.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
337c5bd1 2 Copyright 1988, 1989, 2000, 2001 Free Software Foundation, Inc.
572909b5 3
015acc48 4 This file is part of Bison, the GNU Compiler Compiler.
572909b5 5
015acc48
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
572909b5 10
015acc48
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
572909b5 15
015acc48
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
572909b5 28#include "system.h"
ceed8467 29#include "getargs.h"
572909b5 30#include "files.h"
0e78e603 31#include "symtab.h"
572909b5 32#include "gram.h"
a0f6b076 33#include "complain.h"
015acc48 34#include "reduce.h"
b2ca4022
AD
35#include "reader.h"
36#include "getargs.h"
572909b5 37
572909b5 38typedef unsigned *BSet;
015acc48 39typedef short *rule;
572909b5 40
572909b5 41
00238958
AD
42/* Set of all nonterminals which are not useless. */
43static BSet N;
572909b5 44
00238958
AD
45/* Set of all rules which have no useless nonterminals in their RHS. */
46static BSet P;
47
48/* Set of all accessible symbols. */
49static BSet V;
50
51/* Set of symbols used to define rule precedence (so they are
52 `useless', but no warning should be issued). */
53static BSet V1;
572909b5 54
015acc48
AD
55static int nuseful_productions;
56static int nuseless_productions;
57static int nuseful_nonterminals;
630e182b 58int nuseless_nonterminals;
572909b5 59\f
4a120d45 60static bool
d2729d44 61bits_equal (BSet L, BSet R, int n)
572909b5
RS
62{
63 int i;
64
65 for (i = n - 1; i >= 0; i--)
66 if (L[i] != R[i])
67 return FALSE;
68 return TRUE;
69}
70
71
4a120d45 72static int
d2729d44 73nbits (unsigned i)
572909b5
RS
74{
75 int count = 0;
76
015acc48
AD
77 while (i != 0)
78 {
79 i ^= (i & ((unsigned) (-(int) i)));
80 ++count;
81 }
572909b5
RS
82 return count;
83}
84
85
4a120d45 86static int
d2729d44 87bits_size (BSet S, int n)
572909b5
RS
88{
89 int i, count = 0;
90
91 for (i = n - 1; i >= 0; i--)
015acc48 92 count += nbits (S[i]);
572909b5
RS
93 return count;
94}
95\f
015acc48
AD
96/*-------------------------------------------------------------------.
97| Another way to do this would be with a set for each production and |
98| then do subset tests against N0, but even for the C grammar the |
99| whole reducing process takes only 2 seconds on my 8Mhz AT. |
100`-------------------------------------------------------------------*/
572909b5 101
a083fbbf 102static bool
d2729d44 103useful_production (int i, BSet N0)
572909b5 104{
015acc48 105 rule r;
572909b5
RS
106 short n;
107
015acc48
AD
108 /* A production is useful if all of the nonterminals in its appear
109 in the set of useful nonterminals. */
572909b5 110
1a2b5d37 111 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
015acc48
AD
112 if (ISVAR (n = *r))
113 if (!BITISSET (N0, n - ntokens))
572909b5
RS
114 return FALSE;
115 return TRUE;
116}
117
118
015acc48
AD
119/*---------------------------------------------------------.
120| Remember that rules are 1-origin, symbols are 0-origin. |
121`---------------------------------------------------------*/
572909b5 122
a083fbbf 123static void
d2729d44 124useless_nonterminals (void)
572909b5
RS
125{
126 BSet Np, Ns;
342b8b6e 127 int i;
015acc48
AD
128
129 /* N is set as built. Np is set being built this iteration. P is
130 set of all productions which have a RHS all in N. */
131
d7913476 132 Np = XCALLOC (unsigned, WORDSIZE (nvars));
015acc48
AD
133
134 /* The set being computed is a set of nonterminals which can derive
135 the empty string or strings consisting of all terminals. At each
136 iteration a nonterminal is added to the set if there is a
137 production with that nonterminal as its LHS for which all the
138 nonterminals in its RHS are already in the set. Iterate until
139 the set being computed remains unchanged. Any nonterminals not
140 in the set at that point are useless in that they will never be
141 used in deriving a sentence of the language.
142
143 This iteration doesn't use any special traversal over the
144 productions. A set is kept of all productions for which all the
145 nonterminals in the RHS are in useful. Only productions not in
146 this set are scanned on each iteration. At the end, this set is
147 saved to be used when finding useful productions: only
148 productions in this set will appear in the final grammar. */
572909b5 149
572909b5
RS
150 while (1)
151 {
015acc48 152 for (i = WORDSIZE (nvars) - 1; i >= 0; i--)
572909b5
RS
153 Np[i] = N[i];
154 for (i = 1; i <= nrules; i++)
155 {
015acc48 156 if (!BITISSET (P, i))
572909b5 157 {
015acc48 158 if (useful_production (i, N))
572909b5 159 {
1a2b5d37 160 SETBIT (Np, rules[i].lhs - ntokens);
015acc48 161 SETBIT (P, i);
572909b5
RS
162 }
163 }
164 }
015acc48 165 if (bits_equal (N, Np, WORDSIZE (nvars)))
572909b5
RS
166 break;
167 Ns = Np;
168 Np = N;
169 N = Ns;
170 }
d7913476 171 XFREE (N);
572909b5
RS
172 N = Np;
173}
015acc48
AD
174
175
a083fbbf 176static void
d2729d44 177inaccessable_symbols (void)
572909b5 178{
015acc48 179 BSet Vp, Vs, Pp;
342b8b6e 180 int i;
572909b5 181 short t;
015acc48
AD
182 rule r;
183
184 /* Find out which productions are reachable and which symbols are
185 used. Starting with an empty set of productions and a set of
186 symbols which only has the start symbol in it, iterate over all
187 productions until the set of productions remains unchanged for an
188 iteration. For each production which has a LHS in the set of
189 reachable symbols, add the production to the set of reachable
190 productions, and add all of the nonterminals in the RHS of the
191 production to the set of reachable symbols.
192
193 Consider only the (partially) reduced grammar which has only
194 nonterminals in N and productions in P.
195
196 The result is the set P of productions in the reduced grammar,
197 and the set V of symbols in the reduced grammar.
198
199 Although this algorithm also computes the set of terminals which
200 are reachable, no terminal will be deleted from the grammar. Some
201 terminals might not be in the grammar but might be generated by
202 semantic routines, and so the user might want them available with
203 specified numbers. (Is this true?) However, the nonreachable
204 terminals are printed (if running in verbose mode) so that the
205 user can know. */
206
d7913476
AD
207 Vp = XCALLOC (unsigned, WORDSIZE (nsyms));
208 Pp = XCALLOC (unsigned, WORDSIZE (nrules + 1));
572909b5
RS
209
210 /* If the start symbol isn't useful, then nothing will be useful. */
2c5f66ed 211 if (BITISSET (N, start_symbol - ntokens))
572909b5 212 {
2c5f66ed
AD
213 SETBIT (V, start_symbol);
214
215 while (1)
572909b5 216 {
2c5f66ed
AD
217 for (i = WORDSIZE (nsyms) - 1; i >= 0; i--)
218 Vp[i] = V[i];
219 for (i = 1; i <= nrules; i++)
572909b5 220 {
2c5f66ed
AD
221 if (!BITISSET (Pp, i)
222 && BITISSET (P, i)
1a2b5d37 223 && BITISSET (V, rules[i].lhs))
572909b5 224 {
1a2b5d37 225 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
2c5f66ed 226 if (ISTOKEN (t = *r) || BITISSET (N, t - ntokens))
015acc48 227 SETBIT (Vp, t);
2c5f66ed 228 SETBIT (Pp, i);
572909b5 229 }
572909b5 230 }
2c5f66ed
AD
231 if (bits_equal (V, Vp, WORDSIZE (nsyms)))
232 break;
233 Vs = Vp;
234 Vp = V;
235 V = Vs;
572909b5 236 }
572909b5 237 }
572909b5 238
d7913476 239 XFREE (V);
572909b5
RS
240 V = Vp;
241
242 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
015acc48
AD
243 SETBIT (V, 0); /* end-of-input token */
244 SETBIT (V, 1); /* error token */
245 SETBIT (V, 2); /* some undefined token */
572909b5 246
d7913476 247 XFREE (P);
572909b5
RS
248 P = Pp;
249
015acc48 250 nuseful_productions = bits_size (P, WORDSIZE (nrules + 1));
572909b5
RS
251 nuseless_productions = nrules - nuseful_productions;
252
253 nuseful_nonterminals = 0;
254 for (i = ntokens; i < nsyms; i++)
015acc48 255 if (BITISSET (V, i))
572909b5
RS
256 nuseful_nonterminals++;
257 nuseless_nonterminals = nvars - nuseful_nonterminals;
258
259 /* A token that was used in %prec should not be warned about. */
260 for (i = 1; i < nrules; i++)
1a2b5d37
AD
261 if (rules[i].precsym != 0)
262 SETBIT (V1, rules[i].precsym);
572909b5 263}
015acc48 264
a083fbbf 265static void
d2729d44 266reduce_grammar_tables (void)
572909b5 267{
076ab033
AD
268 /* This is turned off because we would need to change the numbers in
269 the case statements in the actions file.
572909b5 270
076ab033
AD
271 We don't disable it via CPP so that it is still checked with the
272 rest of the code, to avoid its becoming completely obsolete.
273
274 FIXME: I think the comment above demonstrates this code must be
275 turned off for *semantic* parser, not in the general case. Try
276 to understand this better --akim. */
277
278 if (0)
279 /* remove useless productions */
280 if (nuseless_productions > 0)
281 {
282 short np, pn, ni, pi;
283
284 np = 0;
285 ni = 0;
286 for (pn = 1; pn <= nrules; pn++)
015acc48 287 if (BITISSET (P, pn))
572909b5
RS
288 {
289 np++;
290 if (pn != np)
291 {
1a2b5d37
AD
292 rules[np].lhs = rules[pn].lhs;
293 rules[np].line = rules[pn].line;
294 rules[np].prec = rules[pn].prec;
295 rules[np].assoc = rules[pn].assoc;
296 rules[np].rhs = rules[pn].rhs;
297 if (rules[np].rhs != ni)
572909b5 298 {
1a2b5d37
AD
299 pi = rules[np].rhs;
300 rules[np].rhs = ni;
572909b5
RS
301 while (ritem[pi] >= 0)
302 ritem[ni++] = ritem[pi++];
303 ritem[ni++] = -np;
304 }
015acc48
AD
305 }
306 else
307 {
3d4daee3
AD
308 while (ritem[ni++] >= 0)
309 /* Nothing. */;
572909b5
RS
310 }
311 }
572909b5 312
076ab033
AD
313 ritem[ni] = 0;
314 nrules -= nuseless_productions;
315 nitems = ni;
3d4daee3 316 nritems = ni;
076ab033
AD
317
318 /* Is it worth it to reduce the amount of memory for the
319 grammar? Probably not. */
320 }
572909b5 321
68f1e3ed 322 /* Disable useless productions. */
572909b5
RS
323 if (nuseless_productions > 0)
324 {
325 int pn;
572909b5 326 for (pn = 1; pn <= nrules; pn++)
1a2b5d37 327 rules[pn].useful = BITISSET (P, pn);
572909b5 328 }
00238958 329}
572909b5 330
572909b5 331
00238958
AD
332/*------------------------------.
333| Remove useless nonterminals. |
334`------------------------------*/
572909b5 335
00238958
AD
336static void
337nonterminals_reduce (void)
338{
339 int i, n;
572909b5 340
68f1e3ed
AD
341 /* Map the nonterminals to their new index: useful first, useless
342 afterwards. Kept for later report. */
572909b5 343
00238958 344 short *nontermmap = XCALLOC (short, nvars) - ntokens;
00238958
AD
345 n = ntokens;
346 for (i = ntokens; i < nsyms; i++)
347 if (BITISSET (V, i))
348 nontermmap[i] = n++;
760b53a8
AD
349 for (i = ntokens; i < nsyms; i++)
350 if (!BITISSET (V, i))
351 nontermmap[i] = n++;
572909b5 352
00238958 353
760b53a8
AD
354 /* Shuffle elements of tables indexed by symbol number. */
355 {
0e78e603 356 bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
760b53a8
AD
357
358 for (i = ntokens; i < nsyms; i++)
0e78e603 359 symbols_sorted[nontermmap[i]] = symbols[i];
760b53a8 360 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
361 symbols[i] = symbols_sorted[i];
362 free (symbols_sorted + ntokens);
760b53a8 363 }
572909b5 364
00238958 365 /* Replace all symbol numbers in valid data structures. */
572909b5 366
00238958
AD
367 for (i = 1; i <= nrules; i++)
368 {
1a2b5d37
AD
369 rules[i].lhs = nontermmap[rules[i].lhs];
370 if (ISVAR (rules[i].precsym))
00238958 371 /* Can this happen? */
1a2b5d37 372 rules[i].precsym = nontermmap[rules[i].precsym];
00238958 373 }
572909b5 374
9e7f6bbd
AD
375 for (i = 0; i < nritems; ++i)
376 if (ISVAR (ritem[i]))
377 ritem[i] = nontermmap[ritem[i]];
572909b5 378
00238958 379 start_symbol = nontermmap[start_symbol];
572909b5 380
00238958
AD
381 nsyms -= nuseless_nonterminals;
382 nvars -= nuseless_nonterminals;
572909b5 383
68f1e3ed 384 free (nontermmap + ntokens);
572909b5 385}
015acc48 386
337c5bd1 387
00238958
AD
388/*------------------------------------------------------------------.
389| Output the detailed results of the reductions. For FILE.output. |
390`------------------------------------------------------------------*/
337c5bd1
AD
391
392void
393reduce_output (FILE *out)
572909b5 394{
572909b5
RS
395 if (nuseless_nonterminals > 0)
396 {
d2d1b42b
AD
397 int i;
398 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
760b53a8 399 for (i = 0; i < nuseless_nonterminals; ++i)
0e78e603 400 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 401 fputs ("\n\n", out);
572909b5 402 }
d2d1b42b
AD
403
404 {
405 bool b = FALSE;
406 int i;
407 for (i = 0; i < ntokens; i++)
015acc48 408 if (!BITISSET (V, i) && !BITISSET (V1, i))
572909b5
RS
409 {
410 if (!b)
d2d1b42b
AD
411 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
412 b = TRUE;
0e78e603 413 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 414 }
d2d1b42b
AD
415 if (b)
416 fputs ("\n\n", out);
417 }
572909b5
RS
418
419 if (nuseless_productions > 0)
420 {
d2d1b42b
AD
421 int i;
422 fprintf (out, "%s\n\n", _("Useless rules:"));
572909b5 423 for (i = 1; i <= nrules; i++)
1a2b5d37 424 if (!rules[i].useful)
2c5f66ed 425 {
d2d1b42b 426 rule r;
30171f79 427 fprintf (out, "#%-4d ", i - 1);
1a2b5d37
AD
428 fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
429 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
0e78e603 430 fprintf (out, " %s", symbols[*r]->tag);
d2d1b42b 431 fputs (";\n", out);
2c5f66ed 432 }
d2d1b42b 433 fputs ("\n\n", out);
572909b5 434 }
572909b5
RS
435}
436\f
4a120d45 437static void
337c5bd1 438dump_grammar (FILE *out)
572909b5
RS
439{
440 int i;
441 rule r;
442
2c5f66ed 443 fprintf (out, "REDUCED GRAMMAR\n\n");
337c5bd1 444 fprintf (out,
6013d43f
AD
445 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
446 ntokens, nvars, nsyms, nrules, nitems);
447 fprintf (out, "Variables\n---------\n\n");
cb487d7d 448 fprintf (out, "Value Sprec Sassoc Tag\n");
572909b5 449 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
450 fprintf (out, "%5d %5d %5d %s\n",
451 i,
452 symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
337c5bd1 453 fprintf (out, "\n\n");
6013d43f 454 fprintf (out, "Rules\n-----\n\n");
3067fbef 455 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
572909b5
RS
456 for (i = 1; i <= nrules; i++)
457 {
3067fbef
AD
458 int rhs_count = 0;
459 /* Find the last RHS index in ritems. */
1a2b5d37 460 for (r = &ritem[rules[i].rhs]; *r >= 0; ++r)
3067fbef
AD
461 ++rhs_count;
462 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
3d4daee3 463 i - 1,
1a2b5d37
AD
464 rules[i].prec, rules[i].assoc, rules[i].useful,
465 rules[i].rhs, rules[i].rhs + rhs_count - 1,
466 rules[i].lhs);
3067fbef 467 /* Dumped the RHS. */
1a2b5d37 468 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
3067fbef 469 fprintf (out, "%3d", *r);
3d4daee3 470 fprintf (out, " [%d]\n", -(*r) - 1);
572909b5 471 }
337c5bd1 472 fprintf (out, "\n\n");
6013d43f 473 fprintf (out, "Rules interpreted\n-----------------\n\n");
572909b5
RS
474 for (i = 1; i <= nrules; i++)
475 {
1a2b5d37
AD
476 fprintf (out, "%-5d %s :", i, symbols[rules[i].lhs]->tag);
477 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
0e78e603 478 fprintf (out, " %s", symbols[*r]->tag);
337c5bd1 479 fputc ('\n', out);
572909b5 480 }
337c5bd1 481 fprintf (out, "\n\n");
572909b5
RS
482}
483
484
337c5bd1
AD
485
486/*-------------------------------.
487| Report the results to STDERR. |
488`-------------------------------*/
489
a083fbbf 490static void
337c5bd1 491reduce_print (void)
572909b5 492{
89cab50d 493 if (yacc_flag && nuseless_productions)
cb487d7d
AD
494 fprintf (stderr, ngettext ("%d rule never reduced\n",
495 "%d rules never reduced\n",
496 nuseless_productions),
497 nuseless_productions);
572909b5 498
015acc48 499 fprintf (stderr, _("%s contains "), infile);
572909b5
RS
500
501 if (nuseless_nonterminals > 0)
cb487d7d
AD
502 fprintf (stderr, ngettext ("%d useless nonterminal",
503 "%d useless nonterminals",
504 nuseless_nonterminals),
505 nuseless_nonterminals);
506
572909b5 507 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 508 fprintf (stderr, _(" and "));
572909b5
RS
509
510 if (nuseless_productions > 0)
cb487d7d
AD
511 fprintf (stderr, ngettext ("%d useless rule",
512 "%d useless rules",
513 nuseless_productions),
514 nuseless_productions);
015acc48
AD
515 fprintf (stderr, "\n");
516 fflush (stderr);
517}
518\f
519void
520reduce_grammar (void)
521{
522 bool reduced;
523
524 /* Allocate the global sets used to compute the reduced grammar */
525
d7913476
AD
526 N = XCALLOC (unsigned, WORDSIZE (nvars));
527 P = XCALLOC (unsigned, WORDSIZE (nrules + 1));
528 V = XCALLOC (unsigned, WORDSIZE (nsyms));
529 V1 = XCALLOC (unsigned, WORDSIZE (nsyms));
015acc48
AD
530
531 useless_nonterminals ();
532 inaccessable_symbols ();
533
534 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
535
337c5bd1
AD
536 if (!reduced)
537 return;
015acc48 538
337c5bd1 539 reduce_print ();
015acc48
AD
540
541 if (!BITISSET (N, start_symbol - ntokens))
542 fatal (_("Start symbol %s does not derive any sentence"),
0e78e603 543 symbols[start_symbol]->tag);
015acc48
AD
544
545 reduce_grammar_tables ();
00238958
AD
546 if (nuseless_nonterminals > 0)
547 nonterminals_reduce ();
9bfe901c
AD
548
549 if (trace_flag)
550 {
551 dump_grammar (stderr);
552
553 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
554, and %d productions.\n",
555 infile, ntokens, nvars, nrules);
556 }
337c5bd1 557}
015acc48 558
015acc48 559
337c5bd1
AD
560/*-----------------------------------------------------------.
561| Free the global sets used to compute the reduced grammar. |
562`-----------------------------------------------------------*/
563
564void
565reduce_free (void)
566{
d7913476
AD
567 XFREE (N);
568 XFREE (V);
337c5bd1 569 XFREE (V1);
d7913476 570 XFREE (P);
572909b5 571}