]> git.saurik.com Git - bison.git/blame - src/reduce.c
* src/print.c: Convert to use bitset.h, not hand coded iterations
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
337c5bd1 2 Copyright 1988, 1989, 2000, 2001 Free Software Foundation, Inc.
572909b5 3
015acc48 4 This file is part of Bison, the GNU Compiler Compiler.
572909b5 5
015acc48
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
572909b5 10
015acc48
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
572909b5 15
015acc48
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
572909b5 28#include "system.h"
ceed8467 29#include "getargs.h"
572909b5 30#include "files.h"
0e78e603 31#include "symtab.h"
572909b5 32#include "gram.h"
a0f6b076 33#include "complain.h"
015acc48 34#include "reduce.h"
b2ca4022
AD
35#include "reader.h"
36#include "getargs.h"
ed86e78c 37#include "bitset.h"
572909b5 38
015acc48 39typedef short *rule;
572909b5 40
572909b5 41
00238958 42/* Set of all nonterminals which are not useless. */
ed86e78c 43static bitset N;
572909b5 44
00238958 45/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 46static bitset P;
00238958
AD
47
48/* Set of all accessible symbols. */
ed86e78c 49static bitset V;
00238958
AD
50
51/* Set of symbols used to define rule precedence (so they are
52 `useless', but no warning should be issued). */
ed86e78c 53static bitset V1;
572909b5 54
015acc48
AD
55static int nuseful_productions;
56static int nuseless_productions;
57static int nuseful_nonterminals;
630e182b 58int nuseless_nonterminals;
572909b5 59\f
4a120d45 60static int
ed86e78c 61bits_size (bitset S)
572909b5
RS
62{
63 int i, count = 0;
64
ed86e78c 65 BITSET_EXECUTE (S, 0, i, { ++count; });
572909b5
RS
66 return count;
67}
68\f
015acc48
AD
69/*-------------------------------------------------------------------.
70| Another way to do this would be with a set for each production and |
71| then do subset tests against N0, but even for the C grammar the |
72| whole reducing process takes only 2 seconds on my 8Mhz AT. |
73`-------------------------------------------------------------------*/
572909b5 74
a083fbbf 75static bool
ed86e78c 76useful_production (int i, bitset N0)
572909b5 77{
015acc48 78 rule r;
572909b5
RS
79 short n;
80
015acc48
AD
81 /* A production is useful if all of the nonterminals in its appear
82 in the set of useful nonterminals. */
572909b5 83
1a2b5d37 84 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
015acc48 85 if (ISVAR (n = *r))
ed86e78c 86 if (!bitset_test (N0, n - ntokens))
572909b5
RS
87 return FALSE;
88 return TRUE;
89}
90
91
015acc48
AD
92/*---------------------------------------------------------.
93| Remember that rules are 1-origin, symbols are 0-origin. |
94`---------------------------------------------------------*/
572909b5 95
a083fbbf 96static void
d2729d44 97useless_nonterminals (void)
572909b5 98{
ed86e78c 99 bitset Np, Ns;
342b8b6e 100 int i;
015acc48
AD
101
102 /* N is set as built. Np is set being built this iteration. P is
103 set of all productions which have a RHS all in N. */
104
ed86e78c
AD
105 Np = bitset_create (nvars, BITSET_FIXED);
106 bitset_zero (Np);
107
015acc48
AD
108
109 /* The set being computed is a set of nonterminals which can derive
110 the empty string or strings consisting of all terminals. At each
111 iteration a nonterminal is added to the set if there is a
112 production with that nonterminal as its LHS for which all the
113 nonterminals in its RHS are already in the set. Iterate until
114 the set being computed remains unchanged. Any nonterminals not
115 in the set at that point are useless in that they will never be
116 used in deriving a sentence of the language.
117
118 This iteration doesn't use any special traversal over the
119 productions. A set is kept of all productions for which all the
120 nonterminals in the RHS are in useful. Only productions not in
121 this set are scanned on each iteration. At the end, this set is
122 saved to be used when finding useful productions: only
123 productions in this set will appear in the final grammar. */
572909b5 124
572909b5
RS
125 while (1)
126 {
ed86e78c 127 bitset_copy (Np, N);
572909b5
RS
128 for (i = 1; i <= nrules; i++)
129 {
ed86e78c 130 if (!bitset_test (P, i))
572909b5 131 {
015acc48 132 if (useful_production (i, N))
572909b5 133 {
ed86e78c
AD
134 bitset_set (Np, rules[i].lhs - ntokens);
135 bitset_set (P, i);
572909b5
RS
136 }
137 }
138 }
ed86e78c 139 if (bitset_equal_p (N, Np))
572909b5
RS
140 break;
141 Ns = Np;
142 Np = N;
143 N = Ns;
144 }
ed86e78c 145 bitset_free (N);
572909b5
RS
146 N = Np;
147}
015acc48
AD
148
149
a083fbbf 150static void
d2729d44 151inaccessable_symbols (void)
572909b5 152{
ed86e78c 153 bitset Vp, Vs, Pp;
342b8b6e 154 int i;
572909b5 155 short t;
015acc48
AD
156 rule r;
157
158 /* Find out which productions are reachable and which symbols are
159 used. Starting with an empty set of productions and a set of
160 symbols which only has the start symbol in it, iterate over all
161 productions until the set of productions remains unchanged for an
162 iteration. For each production which has a LHS in the set of
163 reachable symbols, add the production to the set of reachable
164 productions, and add all of the nonterminals in the RHS of the
165 production to the set of reachable symbols.
166
167 Consider only the (partially) reduced grammar which has only
168 nonterminals in N and productions in P.
169
170 The result is the set P of productions in the reduced grammar,
171 and the set V of symbols in the reduced grammar.
172
173 Although this algorithm also computes the set of terminals which
174 are reachable, no terminal will be deleted from the grammar. Some
175 terminals might not be in the grammar but might be generated by
176 semantic routines, and so the user might want them available with
177 specified numbers. (Is this true?) However, the nonreachable
178 terminals are printed (if running in verbose mode) so that the
179 user can know. */
180
ed86e78c
AD
181 Vp = bitset_create (nsyms, BITSET_FIXED);
182 bitset_zero (Vp);
183 Pp = bitset_create (nrules + 1, BITSET_FIXED);
184 bitset_zero (Pp);
572909b5
RS
185
186 /* If the start symbol isn't useful, then nothing will be useful. */
ed86e78c 187 if (bitset_test (N, start_symbol - ntokens))
572909b5 188 {
ed86e78c 189 bitset_set (V, start_symbol);
2c5f66ed
AD
190
191 while (1)
572909b5 192 {
ed86e78c 193 bitset_copy (Vp, V);
2c5f66ed 194 for (i = 1; i <= nrules; i++)
572909b5 195 {
ed86e78c
AD
196 if (!bitset_test (Pp, i)
197 && bitset_test (P, i)
198 && bitset_test (V, rules[i].lhs))
572909b5 199 {
1a2b5d37 200 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
ed86e78c
AD
201 if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
202 bitset_set (Vp, t);
203 bitset_set (Pp, i);
572909b5 204 }
572909b5 205 }
ed86e78c 206 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
207 break;
208 Vs = Vp;
209 Vp = V;
210 V = Vs;
572909b5 211 }
572909b5 212 }
572909b5 213
ed86e78c 214 bitset_free (V);
572909b5
RS
215 V = Vp;
216
217 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
ed86e78c
AD
218 bitset_set (V, 0); /* end-of-input token */
219 bitset_set (V, 1); /* error token */
220 bitset_set (V, 2); /* some undefined token */
572909b5 221
ed86e78c 222 bitset_free (P);
572909b5
RS
223 P = Pp;
224
ed86e78c 225 nuseful_productions = bits_size (P);
572909b5
RS
226 nuseless_productions = nrules - nuseful_productions;
227
228 nuseful_nonterminals = 0;
229 for (i = ntokens; i < nsyms; i++)
ed86e78c 230 if (bitset_test (V, i))
572909b5
RS
231 nuseful_nonterminals++;
232 nuseless_nonterminals = nvars - nuseful_nonterminals;
233
234 /* A token that was used in %prec should not be warned about. */
235 for (i = 1; i < nrules; i++)
1a2b5d37 236 if (rules[i].precsym != 0)
ed86e78c 237 bitset_set (V1, rules[i].precsym);
572909b5 238}
015acc48 239
a083fbbf 240static void
d2729d44 241reduce_grammar_tables (void)
572909b5 242{
076ab033
AD
243 /* This is turned off because we would need to change the numbers in
244 the case statements in the actions file.
572909b5 245
076ab033
AD
246 We don't disable it via CPP so that it is still checked with the
247 rest of the code, to avoid its becoming completely obsolete.
248
249 FIXME: I think the comment above demonstrates this code must be
250 turned off for *semantic* parser, not in the general case. Try
251 to understand this better --akim. */
252
253 if (0)
254 /* remove useless productions */
255 if (nuseless_productions > 0)
256 {
257 short np, pn, ni, pi;
258
259 np = 0;
260 ni = 0;
261 for (pn = 1; pn <= nrules; pn++)
ed86e78c 262 if (bitset_test (P, pn))
572909b5
RS
263 {
264 np++;
265 if (pn != np)
266 {
1a2b5d37
AD
267 rules[np].lhs = rules[pn].lhs;
268 rules[np].line = rules[pn].line;
269 rules[np].prec = rules[pn].prec;
270 rules[np].assoc = rules[pn].assoc;
271 rules[np].rhs = rules[pn].rhs;
272 if (rules[np].rhs != ni)
572909b5 273 {
1a2b5d37
AD
274 pi = rules[np].rhs;
275 rules[np].rhs = ni;
572909b5
RS
276 while (ritem[pi] >= 0)
277 ritem[ni++] = ritem[pi++];
278 ritem[ni++] = -np;
279 }
015acc48
AD
280 }
281 else
282 {
3d4daee3
AD
283 while (ritem[ni++] >= 0)
284 /* Nothing. */;
572909b5
RS
285 }
286 }
572909b5 287
076ab033
AD
288 ritem[ni] = 0;
289 nrules -= nuseless_productions;
290 nitems = ni;
3d4daee3 291 nritems = ni;
076ab033
AD
292
293 /* Is it worth it to reduce the amount of memory for the
294 grammar? Probably not. */
295 }
572909b5 296
68f1e3ed 297 /* Disable useless productions. */
572909b5
RS
298 if (nuseless_productions > 0)
299 {
300 int pn;
572909b5 301 for (pn = 1; pn <= nrules; pn++)
ed86e78c 302 rules[pn].useful = bitset_test (P, pn);
572909b5 303 }
00238958 304}
572909b5 305
572909b5 306
00238958
AD
307/*------------------------------.
308| Remove useless nonterminals. |
309`------------------------------*/
572909b5 310
00238958
AD
311static void
312nonterminals_reduce (void)
313{
314 int i, n;
572909b5 315
68f1e3ed
AD
316 /* Map the nonterminals to their new index: useful first, useless
317 afterwards. Kept for later report. */
572909b5 318
00238958 319 short *nontermmap = XCALLOC (short, nvars) - ntokens;
00238958
AD
320 n = ntokens;
321 for (i = ntokens; i < nsyms; i++)
ed86e78c 322 if (bitset_test (V, i))
00238958 323 nontermmap[i] = n++;
760b53a8 324 for (i = ntokens; i < nsyms; i++)
ed86e78c 325 if (!bitset_test (V, i))
760b53a8 326 nontermmap[i] = n++;
572909b5 327
00238958 328
760b53a8
AD
329 /* Shuffle elements of tables indexed by symbol number. */
330 {
0e78e603 331 bucket **symbols_sorted = XMALLOC (bucket *, nvars) - ntokens;
760b53a8
AD
332
333 for (i = ntokens; i < nsyms; i++)
0e78e603 334 symbols_sorted[nontermmap[i]] = symbols[i];
760b53a8 335 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
336 symbols[i] = symbols_sorted[i];
337 free (symbols_sorted + ntokens);
760b53a8 338 }
572909b5 339
00238958 340 /* Replace all symbol numbers in valid data structures. */
572909b5 341
00238958
AD
342 for (i = 1; i <= nrules; i++)
343 {
1a2b5d37
AD
344 rules[i].lhs = nontermmap[rules[i].lhs];
345 if (ISVAR (rules[i].precsym))
00238958 346 /* Can this happen? */
1a2b5d37 347 rules[i].precsym = nontermmap[rules[i].precsym];
00238958 348 }
572909b5 349
9e7f6bbd
AD
350 for (i = 0; i < nritems; ++i)
351 if (ISVAR (ritem[i]))
352 ritem[i] = nontermmap[ritem[i]];
572909b5 353
00238958 354 start_symbol = nontermmap[start_symbol];
572909b5 355
00238958
AD
356 nsyms -= nuseless_nonterminals;
357 nvars -= nuseless_nonterminals;
572909b5 358
68f1e3ed 359 free (nontermmap + ntokens);
572909b5 360}
015acc48 361
337c5bd1 362
00238958
AD
363/*------------------------------------------------------------------.
364| Output the detailed results of the reductions. For FILE.output. |
365`------------------------------------------------------------------*/
337c5bd1
AD
366
367void
368reduce_output (FILE *out)
572909b5 369{
572909b5
RS
370 if (nuseless_nonterminals > 0)
371 {
d2d1b42b
AD
372 int i;
373 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
760b53a8 374 for (i = 0; i < nuseless_nonterminals; ++i)
0e78e603 375 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 376 fputs ("\n\n", out);
572909b5 377 }
d2d1b42b
AD
378
379 {
380 bool b = FALSE;
381 int i;
382 for (i = 0; i < ntokens; i++)
ed86e78c 383 if (!bitset_test (V, i) && !bitset_test (V1, i))
572909b5
RS
384 {
385 if (!b)
d2d1b42b
AD
386 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
387 b = TRUE;
0e78e603 388 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 389 }
d2d1b42b
AD
390 if (b)
391 fputs ("\n\n", out);
392 }
572909b5
RS
393
394 if (nuseless_productions > 0)
395 {
d2d1b42b
AD
396 int i;
397 fprintf (out, "%s\n\n", _("Useless rules:"));
572909b5 398 for (i = 1; i <= nrules; i++)
1a2b5d37 399 if (!rules[i].useful)
2c5f66ed 400 {
d2d1b42b 401 rule r;
30171f79 402 fprintf (out, "#%-4d ", i - 1);
1a2b5d37
AD
403 fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
404 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
0e78e603 405 fprintf (out, " %s", symbols[*r]->tag);
d2d1b42b 406 fputs (";\n", out);
2c5f66ed 407 }
d2d1b42b 408 fputs ("\n\n", out);
572909b5 409 }
572909b5
RS
410}
411\f
4a120d45 412static void
337c5bd1 413dump_grammar (FILE *out)
572909b5
RS
414{
415 int i;
416 rule r;
417
2c5f66ed 418 fprintf (out, "REDUCED GRAMMAR\n\n");
337c5bd1 419 fprintf (out,
6013d43f
AD
420 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nitems = %d\n\n",
421 ntokens, nvars, nsyms, nrules, nitems);
422 fprintf (out, "Variables\n---------\n\n");
cb487d7d 423 fprintf (out, "Value Sprec Sassoc Tag\n");
572909b5 424 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
425 fprintf (out, "%5d %5d %5d %s\n",
426 i,
427 symbols[i]->prec, symbols[i]->assoc, symbols[i]->tag);
337c5bd1 428 fprintf (out, "\n\n");
6013d43f 429 fprintf (out, "Rules\n-----\n\n");
3067fbef 430 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
572909b5
RS
431 for (i = 1; i <= nrules; i++)
432 {
3067fbef
AD
433 int rhs_count = 0;
434 /* Find the last RHS index in ritems. */
1a2b5d37 435 for (r = &ritem[rules[i].rhs]; *r >= 0; ++r)
3067fbef
AD
436 ++rhs_count;
437 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
3d4daee3 438 i - 1,
1a2b5d37
AD
439 rules[i].prec, rules[i].assoc, rules[i].useful,
440 rules[i].rhs, rules[i].rhs + rhs_count - 1,
441 rules[i].lhs);
3067fbef 442 /* Dumped the RHS. */
1a2b5d37 443 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
3067fbef 444 fprintf (out, "%3d", *r);
3d4daee3 445 fprintf (out, " [%d]\n", -(*r) - 1);
572909b5 446 }
337c5bd1 447 fprintf (out, "\n\n");
6013d43f 448 fprintf (out, "Rules interpreted\n-----------------\n\n");
572909b5
RS
449 for (i = 1; i <= nrules; i++)
450 {
1a2b5d37
AD
451 fprintf (out, "%-5d %s :", i, symbols[rules[i].lhs]->tag);
452 for (r = &ritem[rules[i].rhs]; *r >= 0; r++)
0e78e603 453 fprintf (out, " %s", symbols[*r]->tag);
337c5bd1 454 fputc ('\n', out);
572909b5 455 }
337c5bd1 456 fprintf (out, "\n\n");
572909b5
RS
457}
458
459
337c5bd1
AD
460
461/*-------------------------------.
462| Report the results to STDERR. |
463`-------------------------------*/
464
a083fbbf 465static void
337c5bd1 466reduce_print (void)
572909b5 467{
89cab50d 468 if (yacc_flag && nuseless_productions)
cb487d7d
AD
469 fprintf (stderr, ngettext ("%d rule never reduced\n",
470 "%d rules never reduced\n",
471 nuseless_productions),
472 nuseless_productions);
572909b5 473
015acc48 474 fprintf (stderr, _("%s contains "), infile);
572909b5
RS
475
476 if (nuseless_nonterminals > 0)
cb487d7d
AD
477 fprintf (stderr, ngettext ("%d useless nonterminal",
478 "%d useless nonterminals",
479 nuseless_nonterminals),
480 nuseless_nonterminals);
481
572909b5 482 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 483 fprintf (stderr, _(" and "));
572909b5
RS
484
485 if (nuseless_productions > 0)
cb487d7d
AD
486 fprintf (stderr, ngettext ("%d useless rule",
487 "%d useless rules",
488 nuseless_productions),
489 nuseless_productions);
015acc48
AD
490 fprintf (stderr, "\n");
491 fflush (stderr);
492}
493\f
494void
495reduce_grammar (void)
496{
497 bool reduced;
498
499 /* Allocate the global sets used to compute the reduced grammar */
500
ed86e78c
AD
501 N = bitset_create (nvars, BITSET_FIXED);
502 bitset_zero (N);
503 P = bitset_create (nrules + 1, BITSET_FIXED);
504 bitset_zero (P);
505 V = bitset_create (nsyms, BITSET_FIXED);
506 bitset_zero (V);
507 V1 = bitset_create (nsyms, BITSET_FIXED);
508 bitset_zero (V1);
015acc48
AD
509
510 useless_nonterminals ();
511 inaccessable_symbols ();
512
513 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
514
337c5bd1
AD
515 if (!reduced)
516 return;
015acc48 517
337c5bd1 518 reduce_print ();
015acc48 519
ed86e78c 520 if (!bitset_test (N, start_symbol - ntokens))
015acc48 521 fatal (_("Start symbol %s does not derive any sentence"),
0e78e603 522 symbols[start_symbol]->tag);
015acc48
AD
523
524 reduce_grammar_tables ();
00238958
AD
525 if (nuseless_nonterminals > 0)
526 nonterminals_reduce ();
9bfe901c
AD
527
528 if (trace_flag)
529 {
530 dump_grammar (stderr);
531
532 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
533, and %d productions.\n",
534 infile, ntokens, nvars, nrules);
535 }
337c5bd1 536}
015acc48 537
015acc48 538
337c5bd1
AD
539/*-----------------------------------------------------------.
540| Free the global sets used to compute the reduced grammar. |
541`-----------------------------------------------------------*/
542
543void
544reduce_free (void)
545{
ed86e78c
AD
546 bitset_free (N);
547 bitset_free (V);
548 bitset_free (V1);
549 bitset_free (P);
572909b5 550}