]> git.saurik.com Git - bison.git/blame - src/reduce.c
maint: run "make update-copyright".
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
c4882934 2
34136e65 3 Copyright (C) 1988-1989, 2000-2003, 2005-2012 Free Software
575619af 4 Foundation, Inc.
572909b5 5
015acc48 6 This file is part of Bison, the GNU Compiler Compiler.
572909b5 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
015acc48 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
572909b5 12
f16b0819 13 This program is distributed in the hope that it will be useful,
015acc48
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
572909b5 17
015acc48 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
2cec9080 28#include <config.h>
572909b5 29#include "system.h"
17ee7397
PE
30
31#include <bitset.h>
32#include <quotearg.h>
33
34#include "complain.h"
572909b5 35#include "files.h"
17ee7397 36#include "getargs.h"
572909b5 37#include "gram.h"
41d7a5f2 38#include "print-xml.h"
b2ca4022 39#include "reader.h"
17ee7397
PE
40#include "reduce.h"
41#include "symtab.h"
572909b5 42
00238958 43/* Set of all nonterminals which are not useless. */
ed86e78c 44static bitset N;
572909b5 45
00238958 46/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 47static bitset P;
00238958
AD
48
49/* Set of all accessible symbols. */
ed86e78c 50static bitset V;
00238958
AD
51
52/* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
ed86e78c 54static bitset V1;
572909b5 55
17ee7397
PE
56static rule_number nuseful_productions;
57rule_number nuseless_productions;
015acc48 58static int nuseful_nonterminals;
17ee7397 59symbol_number nuseless_nonterminals;
572909b5 60\f
015acc48
AD
61/*-------------------------------------------------------------------.
62| Another way to do this would be with a set for each production and |
63| then do subset tests against N0, but even for the C grammar the |
64| whole reducing process takes only 2 seconds on my 8Mhz AT. |
65`-------------------------------------------------------------------*/
572909b5 66
a083fbbf 67static bool
17ee7397 68useful_production (rule_number r, bitset N0)
572909b5 69{
17ee7397 70 item_number *rhsp;
572909b5 71
015acc48
AD
72 /* A production is useful if all of the nonterminals in its appear
73 in the set of useful nonterminals. */
572909b5 74
9222837b
AD
75 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
76 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
8307162d
PE
77 return false;
78 return true;
572909b5
RS
79}
80
81
015acc48
AD
82/*---------------------------------------------------------.
83| Remember that rules are 1-origin, symbols are 0-origin. |
84`---------------------------------------------------------*/
572909b5 85
a083fbbf 86static void
d2729d44 87useless_nonterminals (void)
572909b5 88{
ed86e78c 89 bitset Np, Ns;
17ee7397 90 rule_number r;
015acc48
AD
91
92 /* N is set as built. Np is set being built this iteration. P is
93 set of all productions which have a RHS all in N. */
94
ed86e78c 95 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 96
015acc48
AD
97
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
106
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
572909b5 113
572909b5
RS
114 while (1)
115 {
ed86e78c 116 bitset_copy (Np, N);
4b3d3a8e 117 for (r = 0; r < nrules; r++)
e9690142
JD
118 if (!bitset_test (P, r)
119 && useful_production (r, N))
120 {
121 bitset_set (Np, rules[r].lhs->number - ntokens);
122 bitset_set (P, r);
123 }
ed86e78c 124 if (bitset_equal_p (N, Np))
e9690142 125 break;
572909b5
RS
126 Ns = Np;
127 Np = N;
128 N = Ns;
129 }
ed86e78c 130 bitset_free (N);
572909b5
RS
131 N = Np;
132}
015acc48
AD
133
134
a083fbbf 135static void
d2729d44 136inaccessable_symbols (void)
572909b5 137{
ed86e78c 138 bitset Vp, Vs, Pp;
015acc48
AD
139
140 /* Find out which productions are reachable and which symbols are
141 used. Starting with an empty set of productions and a set of
142 symbols which only has the start symbol in it, iterate over all
143 productions until the set of productions remains unchanged for an
144 iteration. For each production which has a LHS in the set of
145 reachable symbols, add the production to the set of reachable
146 productions, and add all of the nonterminals in the RHS of the
147 production to the set of reachable symbols.
148
149 Consider only the (partially) reduced grammar which has only
150 nonterminals in N and productions in P.
151
152 The result is the set P of productions in the reduced grammar,
153 and the set V of symbols in the reduced grammar.
154
155 Although this algorithm also computes the set of terminals which
156 are reachable, no terminal will be deleted from the grammar. Some
157 terminals might not be in the grammar but might be generated by
158 semantic routines, and so the user might want them available with
159 specified numbers. (Is this true?) However, the nonreachable
160 terminals are printed (if running in verbose mode) so that the
161 user can know. */
162
ed86e78c 163 Vp = bitset_create (nsyms, BITSET_FIXED);
4b3d3a8e 164 Pp = bitset_create (nrules, BITSET_FIXED);
572909b5
RS
165
166 /* If the start symbol isn't useful, then nothing will be useful. */
88bce5a2 167 if (bitset_test (N, accept->number - ntokens))
572909b5 168 {
88bce5a2 169 bitset_set (V, accept->number);
2c5f66ed
AD
170
171 while (1)
e9690142
JD
172 {
173 rule_number r;
174 bitset_copy (Vp, V);
175 for (r = 0; r < nrules; r++)
176 {
177 if (!bitset_test (Pp, r)
178 && bitset_test (P, r)
179 && bitset_test (V, rules[r].lhs->number))
180 {
181 item_number *rhsp;
182 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
183 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
184 bitset_set (Vp, *rhsp);
185 bitset_set (Pp, r);
186 }
187 }
188 if (bitset_equal_p (V, Vp))
189 break;
190 Vs = Vp;
191 Vp = V;
192 V = Vs;
193 }
572909b5 194 }
572909b5 195
ed86e78c 196 bitset_free (V);
572909b5
RS
197 V = Vp;
198
199 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
e9690142
JD
200 bitset_set (V, endtoken->number); /* end-of-input token */
201 bitset_set (V, errtoken->number); /* error token */
202 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 203
ed86e78c 204 bitset_free (P);
572909b5
RS
205 P = Pp;
206
914feea9 207 nuseful_productions = bitset_count (P);
572909b5
RS
208 nuseless_productions = nrules - nuseful_productions;
209
210 nuseful_nonterminals = 0;
9222837b 211 {
17ee7397 212 symbol_number i;
9222837b
AD
213 for (i = ntokens; i < nsyms; i++)
214 if (bitset_test (V, i))
e9690142 215 nuseful_nonterminals++;
9222837b 216 }
572909b5
RS
217 nuseless_nonterminals = nvars - nuseful_nonterminals;
218
219 /* A token that was used in %prec should not be warned about. */
9222837b 220 {
17ee7397 221 rule_number r;
4b3d3a8e
AD
222 for (r = 0; r < nrules; ++r)
223 if (rules[r].precsym != 0)
e9690142 224 bitset_set (V1, rules[r].precsym->number);
9222837b 225 }
572909b5 226}
015acc48 227
c3b407f4
AD
228
229/*-------------------------------------------------------------------.
230| Put the useless productions at the end of RULES, and adjust NRULES |
231| accordingly. |
232`-------------------------------------------------------------------*/
233
a083fbbf 234static void
d2729d44 235reduce_grammar_tables (void)
572909b5 236{
6b98e4b5 237 /* Report and flag useless productions. */
c3b407f4 238 {
17ee7397 239 rule_number r;
4b3d3a8e 240 for (r = 0; r < nrules; r++)
c8f002c7 241 rules[r].useful = bitset_test (P, r);
cff03fb2 242 grammar_rules_useless_report (_("rule useless in grammar"));
c3b407f4 243 }
572909b5 244
c3b407f4
AD
245 /* Map the nonterminals to their new index: useful first, useless
246 afterwards. Kept for later report. */
247 {
4b3d3a8e
AD
248 int useful = 0;
249 int useless = nrules - nuseless_productions;
da2a7671 250 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
17ee7397 251 rule_number r;
4b3d3a8e 252 for (r = 0; r < nrules; ++r)
9222837b 253 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
4b3d3a8e 254 free (rules);
c3b407f4 255 rules = rules_sorted;
076ab033 256
cc9305dd 257 /* Renumber the rules markers in RITEMS. */
4b3d3a8e 258 for (r = 0; r < nrules; ++r)
cc9305dd 259 {
e9690142
JD
260 item_number *rhsp = rules[r].rhs;
261 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
262 /* Nothing. */;
263 *rhsp = rule_number_as_item_number (r);
264 rules[r].number = r;
cc9305dd 265 }
c3b407f4
AD
266 nrules -= nuseless_productions;
267 }
076ab033 268
a900a624 269 /* Adjust NRITEMS. */
c3b407f4 270 {
a737b216 271 rule_number r;
c3b407f4 272 int length;
4b3d3a8e 273 for (r = nrules; r < nrules + nuseless_productions; ++r)
076ab033 274 {
e9690142
JD
275 length = rule_rhs_length (&rules[r]);
276 nritems -= length + 1;
076ab033 277 }
c3b407f4 278 }
00238958 279}
572909b5 280
572909b5 281
00238958
AD
282/*------------------------------.
283| Remove useless nonterminals. |
284`------------------------------*/
572909b5 285
00238958
AD
286static void
287nonterminals_reduce (void)
288{
17ee7397 289 symbol_number i, n;
572909b5 290
68f1e3ed
AD
291 /* Map the nonterminals to their new index: useful first, useless
292 afterwards. Kept for later report. */
572909b5 293
da2a7671 294 symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
00238958
AD
295 n = ntokens;
296 for (i = ntokens; i < nsyms; i++)
ed86e78c 297 if (bitset_test (V, i))
c4882934 298 nontermmap[i - ntokens] = n++;
760b53a8 299 for (i = ntokens; i < nsyms; i++)
ed86e78c 300 if (!bitset_test (V, i))
6b98e4b5 301 {
e9690142
JD
302 nontermmap[i - ntokens] = n++;
303 warn_at (symbols[i]->location, _("nonterminal useless in grammar: %s"),
304 symbols[i]->tag);
6b98e4b5 305 }
572909b5 306
00238958 307
760b53a8
AD
308 /* Shuffle elements of tables indexed by symbol number. */
309 {
da2a7671 310 symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
760b53a8 311
bba97eb2 312 for (i = ntokens; i < nsyms; i++)
c4882934 313 symbols[i]->number = nontermmap[i - ntokens];
760b53a8 314 for (i = ntokens; i < nsyms; i++)
c4882934 315 symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
760b53a8 316 for (i = ntokens; i < nsyms; i++)
c4882934
PE
317 symbols[i] = symbols_sorted[i - ntokens];
318 free (symbols_sorted);
760b53a8 319 }
572909b5 320
0c2d3f4c 321 {
17ee7397 322 rule_number r;
4b3d3a8e 323 for (r = 0; r < nrules; ++r)
0c2d3f4c 324 {
e9690142
JD
325 item_number *rhsp;
326 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
327 if (ISVAR (*rhsp))
328 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
329 - ntokens]);
0c2d3f4c 330 }
c4882934 331 accept->number = nontermmap[accept->number - ntokens];
0c2d3f4c 332 }
572909b5 333
00238958
AD
334 nsyms -= nuseless_nonterminals;
335 nvars -= nuseless_nonterminals;
572909b5 336
c4882934 337 free (nontermmap);
572909b5 338}
015acc48 339
337c5bd1 340
00238958
AD
341/*------------------------------------------------------------------.
342| Output the detailed results of the reductions. For FILE.output. |
343`------------------------------------------------------------------*/
337c5bd1
AD
344
345void
346reduce_output (FILE *out)
572909b5 347{
572909b5
RS
348 if (nuseless_nonterminals > 0)
349 {
d2d1b42b 350 int i;
cff03fb2 351 fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
760b53a8 352 for (i = 0; i < nuseless_nonterminals; ++i)
e9690142 353 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 354 fputs ("\n\n", out);
572909b5 355 }
d2d1b42b
AD
356
357 {
8307162d 358 bool b = false;
d2d1b42b
AD
359 int i;
360 for (i = 0; i < ntokens; i++)
d80fb37a 361 if (reduce_token_unused_in_grammar (i))
e9690142
JD
362 {
363 if (!b)
364 fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
365 b = true;
366 fprintf (out, " %s\n", symbols[i]->tag);
367 }
d2d1b42b
AD
368 if (b)
369 fputs ("\n\n", out);
370 }
572909b5
RS
371
372 if (nuseless_productions > 0)
cff03fb2 373 grammar_rules_partial_print (out, _("Rules useless in grammar"),
e9690142 374 rule_useless_in_grammar_p);
572909b5
RS
375}
376\f
572909b5 377
337c5bd1
AD
378/*-------------------------------.
379| Report the results to STDERR. |
380`-------------------------------*/
381
a083fbbf 382static void
337c5bd1 383reduce_print (void)
572909b5 384{
572909b5 385 if (nuseless_nonterminals > 0)
2bfcac9a
JD
386 warn (ngettext ("%d nonterminal useless in grammar",
387 "%d nonterminals useless in grammar",
388 nuseless_nonterminals),
389 nuseless_nonterminals);
572909b5 390 if (nuseless_productions > 0)
2bfcac9a
JD
391 warn (ngettext ("%d rule useless in grammar",
392 "%d rules useless in grammar",
393 nuseless_productions),
394 nuseless_productions);
015acc48
AD
395}
396\f
397void
398reduce_grammar (void)
399{
400 bool reduced;
401
402 /* Allocate the global sets used to compute the reduced grammar */
403
ed86e78c 404 N = bitset_create (nvars, BITSET_FIXED);
4b3d3a8e 405 P = bitset_create (nrules, BITSET_FIXED);
ed86e78c 406 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 407 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
408
409 useless_nonterminals ();
410 inaccessable_symbols ();
411
4d56beff 412 reduced = (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
413 if (!reduced)
414 return;
015acc48 415
337c5bd1 416 reduce_print ();
015acc48 417
88bce5a2 418 if (!bitset_test (N, accept->number - ntokens))
1bfb97db 419 fatal_at (startsymbol_location,
e9690142
JD
420 _("start symbol %s does not derive any sentence"),
421 startsymbol->tag);
015acc48 422
bb88b0fc
AD
423 /* First reduce the nonterminals, as they renumber themselves in the
424 whole grammar. If you change the order, nonterms would be
425 renumbered only in the reduced grammar. */
00238958
AD
426 if (nuseless_nonterminals > 0)
427 nonterminals_reduce ();
bb88b0fc
AD
428 if (nuseless_productions > 0)
429 reduce_grammar_tables ();
9bfe901c 430
273a74fa 431 if (trace_flag & trace_grammar)
9bfe901c 432 {
78ab8f67 433 grammar_dump (stderr, "Reduced Grammar");
9bfe901c
AD
434
435 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
436, and %d productions.\n",
e9690142 437 grammar_file, ntokens, nvars, nrules);
9bfe901c 438 }
337c5bd1 439}
015acc48 440
d80fb37a
JD
441bool
442reduce_token_unused_in_grammar (symbol_number i)
443{
444 aver (i < ntokens);
445 return !bitset_test (V, i) && !bitset_test (V1, i);
446}
447
448bool
449reduce_nonterminal_useless_in_grammar (symbol_number i)
450{
451 aver (ntokens <= i && i < nsyms + nuseless_nonterminals);
452 return nsyms <= i;
453}
015acc48 454
337c5bd1
AD
455/*-----------------------------------------------------------.
456| Free the global sets used to compute the reduced grammar. |
457`-----------------------------------------------------------*/
458
459void
460reduce_free (void)
461{
ed86e78c
AD
462 bitset_free (N);
463 bitset_free (V);
464 bitset_free (V1);
465 bitset_free (P);
572909b5 466}