]> git.saurik.com Git - bison.git/blame - src/reduce.c
* LR0.c (new_itemsets): Fix wording in comments: say item index rather
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
c4882934 2
9ae1f0c6 3 Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006 Free
2cec9080 4 Software Foundation, Inc.
572909b5 5
015acc48 6 This file is part of Bison, the GNU Compiler Compiler.
572909b5 7
015acc48
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
572909b5 12
015acc48
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
572909b5 17
015acc48
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
572909b5
RS
22
23
015acc48
AD
24/* Reduce the grammar: Find and eliminate unreachable terminals,
25 nonterminals, and productions. David S. Bakin. */
572909b5 26
015acc48
AD
27/* Don't eliminate unreachable terminals: They may be used by the
28 user's parser. */
572909b5 29
2cec9080 30#include <config.h>
572909b5 31#include "system.h"
17ee7397
PE
32
33#include <bitset.h>
34#include <quotearg.h>
35
36#include "complain.h"
572909b5 37#include "files.h"
17ee7397 38#include "getargs.h"
572909b5 39#include "gram.h"
b2ca4022 40#include "reader.h"
17ee7397
PE
41#include "reduce.h"
42#include "symtab.h"
572909b5 43
00238958 44/* Set of all nonterminals which are not useless. */
ed86e78c 45static bitset N;
572909b5 46
00238958 47/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 48static bitset P;
00238958
AD
49
50/* Set of all accessible symbols. */
ed86e78c 51static bitset V;
00238958
AD
52
53/* Set of symbols used to define rule precedence (so they are
54 `useless', but no warning should be issued). */
ed86e78c 55static bitset V1;
572909b5 56
17ee7397
PE
57static rule_number nuseful_productions;
58rule_number nuseless_productions;
015acc48 59static int nuseful_nonterminals;
17ee7397 60symbol_number nuseless_nonterminals;
572909b5 61\f
015acc48
AD
62/*-------------------------------------------------------------------.
63| Another way to do this would be with a set for each production and |
64| then do subset tests against N0, but even for the C grammar the |
65| whole reducing process takes only 2 seconds on my 8Mhz AT. |
66`-------------------------------------------------------------------*/
572909b5 67
a083fbbf 68static bool
17ee7397 69useful_production (rule_number r, bitset N0)
572909b5 70{
17ee7397 71 item_number *rhsp;
572909b5 72
015acc48
AD
73 /* A production is useful if all of the nonterminals in its appear
74 in the set of useful nonterminals. */
572909b5 75
9222837b
AD
76 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
77 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
8307162d
PE
78 return false;
79 return true;
572909b5
RS
80}
81
82
015acc48
AD
83/*---------------------------------------------------------.
84| Remember that rules are 1-origin, symbols are 0-origin. |
85`---------------------------------------------------------*/
572909b5 86
a083fbbf 87static void
d2729d44 88useless_nonterminals (void)
572909b5 89{
ed86e78c 90 bitset Np, Ns;
17ee7397 91 rule_number r;
015acc48
AD
92
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
95
ed86e78c 96 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 97
015acc48
AD
98
99 /* The set being computed is a set of nonterminals which can derive
100 the empty string or strings consisting of all terminals. At each
101 iteration a nonterminal is added to the set if there is a
102 production with that nonterminal as its LHS for which all the
103 nonterminals in its RHS are already in the set. Iterate until
104 the set being computed remains unchanged. Any nonterminals not
105 in the set at that point are useless in that they will never be
106 used in deriving a sentence of the language.
107
108 This iteration doesn't use any special traversal over the
109 productions. A set is kept of all productions for which all the
110 nonterminals in the RHS are in useful. Only productions not in
111 this set are scanned on each iteration. At the end, this set is
112 saved to be used when finding useful productions: only
113 productions in this set will appear in the final grammar. */
572909b5 114
572909b5
RS
115 while (1)
116 {
ed86e78c 117 bitset_copy (Np, N);
4b3d3a8e 118 for (r = 0; r < nrules; r++)
9222837b
AD
119 if (!bitset_test (P, r)
120 && useful_production (r, N))
f9abaa2c 121 {
9222837b
AD
122 bitset_set (Np, rules[r].lhs->number - ntokens);
123 bitset_set (P, r);
f9abaa2c 124 }
ed86e78c 125 if (bitset_equal_p (N, Np))
572909b5
RS
126 break;
127 Ns = Np;
128 Np = N;
129 N = Ns;
130 }
ed86e78c 131 bitset_free (N);
572909b5
RS
132 N = Np;
133}
015acc48
AD
134
135
a083fbbf 136static void
d2729d44 137inaccessable_symbols (void)
572909b5 138{
ed86e78c 139 bitset Vp, Vs, Pp;
015acc48
AD
140
141 /* Find out which productions are reachable and which symbols are
142 used. Starting with an empty set of productions and a set of
143 symbols which only has the start symbol in it, iterate over all
144 productions until the set of productions remains unchanged for an
145 iteration. For each production which has a LHS in the set of
146 reachable symbols, add the production to the set of reachable
147 productions, and add all of the nonterminals in the RHS of the
148 production to the set of reachable symbols.
149
150 Consider only the (partially) reduced grammar which has only
151 nonterminals in N and productions in P.
152
153 The result is the set P of productions in the reduced grammar,
154 and the set V of symbols in the reduced grammar.
155
156 Although this algorithm also computes the set of terminals which
157 are reachable, no terminal will be deleted from the grammar. Some
158 terminals might not be in the grammar but might be generated by
159 semantic routines, and so the user might want them available with
160 specified numbers. (Is this true?) However, the nonreachable
161 terminals are printed (if running in verbose mode) so that the
162 user can know. */
163
ed86e78c 164 Vp = bitset_create (nsyms, BITSET_FIXED);
4b3d3a8e 165 Pp = bitset_create (nrules, BITSET_FIXED);
572909b5
RS
166
167 /* If the start symbol isn't useful, then nothing will be useful. */
88bce5a2 168 if (bitset_test (N, accept->number - ntokens))
572909b5 169 {
88bce5a2 170 bitset_set (V, accept->number);
2c5f66ed
AD
171
172 while (1)
572909b5 173 {
17ee7397 174 rule_number r;
ed86e78c 175 bitset_copy (Vp, V);
4b3d3a8e 176 for (r = 0; r < nrules; r++)
572909b5 177 {
9222837b
AD
178 if (!bitset_test (Pp, r)
179 && bitset_test (P, r)
180 && bitset_test (V, rules[r].lhs->number))
572909b5 181 {
17ee7397 182 item_number *rhsp;
9222837b
AD
183 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
184 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
185 bitset_set (Vp, *rhsp);
186 bitset_set (Pp, r);
572909b5 187 }
572909b5 188 }
ed86e78c 189 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
190 break;
191 Vs = Vp;
192 Vp = V;
193 V = Vs;
572909b5 194 }
572909b5 195 }
572909b5 196
ed86e78c 197 bitset_free (V);
572909b5
RS
198 V = Vp;
199
200 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
88bce5a2 201 bitset_set (V, endtoken->number); /* end-of-input token */
72a23c97
AD
202 bitset_set (V, errtoken->number); /* error token */
203 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 204
ed86e78c 205 bitset_free (P);
572909b5
RS
206 P = Pp;
207
914feea9 208 nuseful_productions = bitset_count (P);
572909b5
RS
209 nuseless_productions = nrules - nuseful_productions;
210
211 nuseful_nonterminals = 0;
9222837b 212 {
17ee7397 213 symbol_number i;
9222837b
AD
214 for (i = ntokens; i < nsyms; i++)
215 if (bitset_test (V, i))
216 nuseful_nonterminals++;
217 }
572909b5
RS
218 nuseless_nonterminals = nvars - nuseful_nonterminals;
219
220 /* A token that was used in %prec should not be warned about. */
9222837b 221 {
17ee7397 222 rule_number r;
4b3d3a8e
AD
223 for (r = 0; r < nrules; ++r)
224 if (rules[r].precsym != 0)
225 bitset_set (V1, rules[r].precsym->number);
9222837b 226 }
572909b5 227}
015acc48 228
c3b407f4
AD
229
230/*-------------------------------------------------------------------.
231| Put the useless productions at the end of RULES, and adjust NRULES |
232| accordingly. |
233`-------------------------------------------------------------------*/
234
a083fbbf 235static void
d2729d44 236reduce_grammar_tables (void)
572909b5 237{
6b98e4b5 238 /* Report and flag useless productions. */
c3b407f4 239 {
17ee7397 240 rule_number r;
4b3d3a8e 241 for (r = 0; r < nrules; r++)
c8f002c7
AD
242 rules[r].useful = bitset_test (P, r);
243 grammar_rules_never_reduced_report (_("useless rule"));
c3b407f4 244 }
572909b5 245
c3b407f4
AD
246 /* Map the nonterminals to their new index: useful first, useless
247 afterwards. Kept for later report. */
248 {
4b3d3a8e
AD
249 int useful = 0;
250 int useless = nrules - nuseless_productions;
da2a7671 251 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
17ee7397 252 rule_number r;
4b3d3a8e 253 for (r = 0; r < nrules; ++r)
9222837b 254 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
4b3d3a8e 255 free (rules);
c3b407f4 256 rules = rules_sorted;
076ab033 257
cc9305dd 258 /* Renumber the rules markers in RITEMS. */
4b3d3a8e 259 for (r = 0; r < nrules; ++r)
cc9305dd 260 {
17ee7397 261 item_number *rhsp = rules[r].rhs;
cc9305dd
AD
262 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
263 /* Nothing. */;
12b0043a 264 *rhsp = rule_number_as_item_number (r);
9222837b 265 rules[r].number = r;
cc9305dd 266 }
c3b407f4
AD
267 nrules -= nuseless_productions;
268 }
076ab033 269
a900a624 270 /* Adjust NRITEMS. */
c3b407f4 271 {
a737b216 272 rule_number r;
c3b407f4 273 int length;
4b3d3a8e 274 for (r = nrules; r < nrules + nuseless_productions; ++r)
076ab033 275 {
c3b407f4
AD
276 length = rule_rhs_length (&rules[r]);
277 nritems -= length + 1;
076ab033 278 }
c3b407f4 279 }
00238958 280}
572909b5 281
572909b5 282
00238958
AD
283/*------------------------------.
284| Remove useless nonterminals. |
285`------------------------------*/
572909b5 286
00238958
AD
287static void
288nonterminals_reduce (void)
289{
17ee7397 290 symbol_number i, n;
572909b5 291
68f1e3ed
AD
292 /* Map the nonterminals to their new index: useful first, useless
293 afterwards. Kept for later report. */
572909b5 294
da2a7671 295 symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
00238958
AD
296 n = ntokens;
297 for (i = ntokens; i < nsyms; i++)
ed86e78c 298 if (bitset_test (V, i))
c4882934 299 nontermmap[i - ntokens] = n++;
760b53a8 300 for (i = ntokens; i < nsyms; i++)
ed86e78c 301 if (!bitset_test (V, i))
6b98e4b5 302 {
c4882934 303 nontermmap[i - ntokens] = n++;
d6ea8200 304 warn_at (symbols[i]->location, _("useless nonterminal: %s"),
97650f4e 305 symbols[i]->tag);
6b98e4b5 306 }
572909b5 307
00238958 308
760b53a8
AD
309 /* Shuffle elements of tables indexed by symbol number. */
310 {
da2a7671 311 symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
760b53a8 312
bba97eb2 313 for (i = ntokens; i < nsyms; i++)
c4882934 314 symbols[i]->number = nontermmap[i - ntokens];
760b53a8 315 for (i = ntokens; i < nsyms; i++)
c4882934 316 symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
760b53a8 317 for (i = ntokens; i < nsyms; i++)
c4882934
PE
318 symbols[i] = symbols_sorted[i - ntokens];
319 free (symbols_sorted);
760b53a8 320 }
572909b5 321
0c2d3f4c 322 {
17ee7397 323 rule_number r;
4b3d3a8e 324 for (r = 0; r < nrules; ++r)
0c2d3f4c 325 {
17ee7397 326 item_number *rhsp;
0c2d3f4c
AD
327 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
328 if (ISVAR (*rhsp))
c4882934
PE
329 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
330 - ntokens]);
0c2d3f4c 331 }
c4882934 332 accept->number = nontermmap[accept->number - ntokens];
0c2d3f4c 333 }
572909b5 334
00238958
AD
335 nsyms -= nuseless_nonterminals;
336 nvars -= nuseless_nonterminals;
572909b5 337
c4882934 338 free (nontermmap);
572909b5 339}
015acc48 340
337c5bd1 341
00238958
AD
342/*------------------------------------------------------------------.
343| Output the detailed results of the reductions. For FILE.output. |
344`------------------------------------------------------------------*/
337c5bd1
AD
345
346void
347reduce_output (FILE *out)
572909b5 348{
572909b5
RS
349 if (nuseless_nonterminals > 0)
350 {
d2d1b42b 351 int i;
c8f002c7 352 fprintf (out, "%s\n\n", _("Useless nonterminals"));
760b53a8 353 for (i = 0; i < nuseless_nonterminals; ++i)
97650f4e 354 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 355 fputs ("\n\n", out);
572909b5 356 }
d2d1b42b
AD
357
358 {
8307162d 359 bool b = false;
d2d1b42b
AD
360 int i;
361 for (i = 0; i < ntokens; i++)
ed86e78c 362 if (!bitset_test (V, i) && !bitset_test (V1, i))
572909b5
RS
363 {
364 if (!b)
c8f002c7 365 fprintf (out, "%s\n\n", _("Terminals which are not used"));
8307162d 366 b = true;
97650f4e 367 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 368 }
d2d1b42b
AD
369 if (b)
370 fputs ("\n\n", out);
371 }
572909b5
RS
372
373 if (nuseless_productions > 0)
9757c359 374 grammar_rules_partial_print (out, _("Useless rules"),
c8f002c7 375 rule_useless_p);
572909b5
RS
376}
377\f
572909b5 378
572909b5
RS
379
380
337c5bd1
AD
381
382/*-------------------------------.
383| Report the results to STDERR. |
384`-------------------------------*/
385
a083fbbf 386static void
337c5bd1 387reduce_print (void)
572909b5 388{
89cab50d 389 if (yacc_flag && nuseless_productions)
cb487d7d
AD
390 fprintf (stderr, ngettext ("%d rule never reduced\n",
391 "%d rules never reduced\n",
392 nuseless_productions),
393 nuseless_productions);
572909b5 394
95612cfa 395 fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
572909b5
RS
396
397 if (nuseless_nonterminals > 0)
cb487d7d
AD
398 fprintf (stderr, ngettext ("%d useless nonterminal",
399 "%d useless nonterminals",
400 nuseless_nonterminals),
401 nuseless_nonterminals);
402
572909b5 403 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 404 fprintf (stderr, _(" and "));
572909b5
RS
405
406 if (nuseless_productions > 0)
cb487d7d
AD
407 fprintf (stderr, ngettext ("%d useless rule",
408 "%d useless rules",
409 nuseless_productions),
410 nuseless_productions);
2f4f028d 411 fprintf (stderr, "\n");
015acc48
AD
412}
413\f
414void
415reduce_grammar (void)
416{
417 bool reduced;
418
419 /* Allocate the global sets used to compute the reduced grammar */
420
ed86e78c 421 N = bitset_create (nvars, BITSET_FIXED);
4b3d3a8e 422 P = bitset_create (nrules, BITSET_FIXED);
ed86e78c 423 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 424 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
425
426 useless_nonterminals ();
427 inaccessable_symbols ();
428
4d56beff 429 reduced = (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
430 if (!reduced)
431 return;
015acc48 432
337c5bd1 433 reduce_print ();
015acc48 434
88bce5a2 435 if (!bitset_test (N, accept->number - ntokens))
1bfb97db
AD
436 fatal_at (startsymbol_location,
437 _("start symbol %s does not derive any sentence"),
438 startsymbol->tag);
015acc48 439
bb88b0fc
AD
440 /* First reduce the nonterminals, as they renumber themselves in the
441 whole grammar. If you change the order, nonterms would be
442 renumbered only in the reduced grammar. */
00238958
AD
443 if (nuseless_nonterminals > 0)
444 nonterminals_reduce ();
bb88b0fc
AD
445 if (nuseless_productions > 0)
446 reduce_grammar_tables ();
9bfe901c 447
273a74fa 448 if (trace_flag & trace_grammar)
9bfe901c 449 {
78ab8f67 450 grammar_dump (stderr, "Reduced Grammar");
9bfe901c
AD
451
452 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
453, and %d productions.\n",
95612cfa 454 grammar_file, ntokens, nvars, nrules);
9bfe901c 455 }
337c5bd1 456}
015acc48 457
015acc48 458
337c5bd1
AD
459/*-----------------------------------------------------------.
460| Free the global sets used to compute the reduced grammar. |
461`-----------------------------------------------------------*/
462
463void
464reduce_free (void)
465{
ed86e78c
AD
466 bitset_free (N);
467 bitset_free (V);
468 bitset_free (V1);
469 bitset_free (P);
572909b5 470}