]> git.saurik.com Git - bison.git/blame - src/reduce.c
Work around Java's ``code too large'' problem for parser tables.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
c4882934 2
41d7a5f2 3 Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003, 2005, 2006,
5523ac79 4 2007, 2008 Free Software Foundation, Inc.
572909b5 5
015acc48 6 This file is part of Bison, the GNU Compiler Compiler.
572909b5 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
015acc48 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
572909b5 12
f16b0819 13 This program is distributed in the hope that it will be useful,
015acc48
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
572909b5 17
015acc48 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
2cec9080 28#include <config.h>
572909b5 29#include "system.h"
17ee7397
PE
30
31#include <bitset.h>
32#include <quotearg.h>
33
34#include "complain.h"
572909b5 35#include "files.h"
17ee7397 36#include "getargs.h"
572909b5 37#include "gram.h"
41d7a5f2 38#include "print-xml.h"
b2ca4022 39#include "reader.h"
17ee7397
PE
40#include "reduce.h"
41#include "symtab.h"
572909b5 42
00238958 43/* Set of all nonterminals which are not useless. */
ed86e78c 44static bitset N;
572909b5 45
00238958 46/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 47static bitset P;
00238958
AD
48
49/* Set of all accessible symbols. */
ed86e78c 50static bitset V;
00238958
AD
51
52/* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
ed86e78c 54static bitset V1;
572909b5 55
17ee7397
PE
56static rule_number nuseful_productions;
57rule_number nuseless_productions;
015acc48 58static int nuseful_nonterminals;
17ee7397 59symbol_number nuseless_nonterminals;
572909b5 60\f
015acc48
AD
61/*-------------------------------------------------------------------.
62| Another way to do this would be with a set for each production and |
63| then do subset tests against N0, but even for the C grammar the |
64| whole reducing process takes only 2 seconds on my 8Mhz AT. |
65`-------------------------------------------------------------------*/
572909b5 66
a083fbbf 67static bool
17ee7397 68useful_production (rule_number r, bitset N0)
572909b5 69{
17ee7397 70 item_number *rhsp;
572909b5 71
015acc48
AD
72 /* A production is useful if all of the nonterminals in its appear
73 in the set of useful nonterminals. */
572909b5 74
9222837b
AD
75 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
76 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
8307162d
PE
77 return false;
78 return true;
572909b5
RS
79}
80
81
015acc48
AD
82/*---------------------------------------------------------.
83| Remember that rules are 1-origin, symbols are 0-origin. |
84`---------------------------------------------------------*/
572909b5 85
a083fbbf 86static void
d2729d44 87useless_nonterminals (void)
572909b5 88{
ed86e78c 89 bitset Np, Ns;
17ee7397 90 rule_number r;
015acc48
AD
91
92 /* N is set as built. Np is set being built this iteration. P is
93 set of all productions which have a RHS all in N. */
94
ed86e78c 95 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 96
015acc48
AD
97
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
106
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
572909b5 113
572909b5
RS
114 while (1)
115 {
ed86e78c 116 bitset_copy (Np, N);
4b3d3a8e 117 for (r = 0; r < nrules; r++)
9222837b
AD
118 if (!bitset_test (P, r)
119 && useful_production (r, N))
f9abaa2c 120 {
9222837b
AD
121 bitset_set (Np, rules[r].lhs->number - ntokens);
122 bitset_set (P, r);
f9abaa2c 123 }
ed86e78c 124 if (bitset_equal_p (N, Np))
572909b5
RS
125 break;
126 Ns = Np;
127 Np = N;
128 N = Ns;
129 }
ed86e78c 130 bitset_free (N);
572909b5
RS
131 N = Np;
132}
015acc48
AD
133
134
a083fbbf 135static void
d2729d44 136inaccessable_symbols (void)
572909b5 137{
ed86e78c 138 bitset Vp, Vs, Pp;
015acc48
AD
139
140 /* Find out which productions are reachable and which symbols are
141 used. Starting with an empty set of productions and a set of
142 symbols which only has the start symbol in it, iterate over all
143 productions until the set of productions remains unchanged for an
144 iteration. For each production which has a LHS in the set of
145 reachable symbols, add the production to the set of reachable
146 productions, and add all of the nonterminals in the RHS of the
147 production to the set of reachable symbols.
148
149 Consider only the (partially) reduced grammar which has only
150 nonterminals in N and productions in P.
151
152 The result is the set P of productions in the reduced grammar,
153 and the set V of symbols in the reduced grammar.
154
155 Although this algorithm also computes the set of terminals which
156 are reachable, no terminal will be deleted from the grammar. Some
157 terminals might not be in the grammar but might be generated by
158 semantic routines, and so the user might want them available with
159 specified numbers. (Is this true?) However, the nonreachable
160 terminals are printed (if running in verbose mode) so that the
161 user can know. */
162
ed86e78c 163 Vp = bitset_create (nsyms, BITSET_FIXED);
4b3d3a8e 164 Pp = bitset_create (nrules, BITSET_FIXED);
572909b5
RS
165
166 /* If the start symbol isn't useful, then nothing will be useful. */
88bce5a2 167 if (bitset_test (N, accept->number - ntokens))
572909b5 168 {
88bce5a2 169 bitset_set (V, accept->number);
2c5f66ed
AD
170
171 while (1)
572909b5 172 {
17ee7397 173 rule_number r;
ed86e78c 174 bitset_copy (Vp, V);
4b3d3a8e 175 for (r = 0; r < nrules; r++)
572909b5 176 {
9222837b
AD
177 if (!bitset_test (Pp, r)
178 && bitset_test (P, r)
179 && bitset_test (V, rules[r].lhs->number))
572909b5 180 {
17ee7397 181 item_number *rhsp;
9222837b
AD
182 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
183 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
184 bitset_set (Vp, *rhsp);
185 bitset_set (Pp, r);
572909b5 186 }
572909b5 187 }
ed86e78c 188 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
189 break;
190 Vs = Vp;
191 Vp = V;
192 V = Vs;
572909b5 193 }
572909b5 194 }
572909b5 195
ed86e78c 196 bitset_free (V);
572909b5
RS
197 V = Vp;
198
199 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
88bce5a2 200 bitset_set (V, endtoken->number); /* end-of-input token */
72a23c97
AD
201 bitset_set (V, errtoken->number); /* error token */
202 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 203
ed86e78c 204 bitset_free (P);
572909b5
RS
205 P = Pp;
206
914feea9 207 nuseful_productions = bitset_count (P);
572909b5
RS
208 nuseless_productions = nrules - nuseful_productions;
209
210 nuseful_nonterminals = 0;
9222837b 211 {
17ee7397 212 symbol_number i;
9222837b
AD
213 for (i = ntokens; i < nsyms; i++)
214 if (bitset_test (V, i))
215 nuseful_nonterminals++;
216 }
572909b5
RS
217 nuseless_nonterminals = nvars - nuseful_nonterminals;
218
219 /* A token that was used in %prec should not be warned about. */
9222837b 220 {
17ee7397 221 rule_number r;
4b3d3a8e
AD
222 for (r = 0; r < nrules; ++r)
223 if (rules[r].precsym != 0)
224 bitset_set (V1, rules[r].precsym->number);
9222837b 225 }
572909b5 226}
015acc48 227
c3b407f4
AD
228
229/*-------------------------------------------------------------------.
230| Put the useless productions at the end of RULES, and adjust NRULES |
231| accordingly. |
232`-------------------------------------------------------------------*/
233
a083fbbf 234static void
d2729d44 235reduce_grammar_tables (void)
572909b5 236{
6b98e4b5 237 /* Report and flag useless productions. */
c3b407f4 238 {
17ee7397 239 rule_number r;
4b3d3a8e 240 for (r = 0; r < nrules; r++)
c8f002c7 241 rules[r].useful = bitset_test (P, r);
cff03fb2 242 grammar_rules_useless_report (_("rule useless in grammar"));
c3b407f4 243 }
572909b5 244
c3b407f4
AD
245 /* Map the nonterminals to their new index: useful first, useless
246 afterwards. Kept for later report. */
247 {
4b3d3a8e
AD
248 int useful = 0;
249 int useless = nrules - nuseless_productions;
da2a7671 250 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
17ee7397 251 rule_number r;
4b3d3a8e 252 for (r = 0; r < nrules; ++r)
9222837b 253 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
4b3d3a8e 254 free (rules);
c3b407f4 255 rules = rules_sorted;
076ab033 256
cc9305dd 257 /* Renumber the rules markers in RITEMS. */
4b3d3a8e 258 for (r = 0; r < nrules; ++r)
cc9305dd 259 {
17ee7397 260 item_number *rhsp = rules[r].rhs;
cc9305dd
AD
261 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
262 /* Nothing. */;
12b0043a 263 *rhsp = rule_number_as_item_number (r);
9222837b 264 rules[r].number = r;
cc9305dd 265 }
c3b407f4
AD
266 nrules -= nuseless_productions;
267 }
076ab033 268
a900a624 269 /* Adjust NRITEMS. */
c3b407f4 270 {
a737b216 271 rule_number r;
c3b407f4 272 int length;
4b3d3a8e 273 for (r = nrules; r < nrules + nuseless_productions; ++r)
076ab033 274 {
c3b407f4
AD
275 length = rule_rhs_length (&rules[r]);
276 nritems -= length + 1;
076ab033 277 }
c3b407f4 278 }
00238958 279}
572909b5 280
572909b5 281
00238958
AD
282/*------------------------------.
283| Remove useless nonterminals. |
284`------------------------------*/
572909b5 285
00238958
AD
286static void
287nonterminals_reduce (void)
288{
17ee7397 289 symbol_number i, n;
572909b5 290
68f1e3ed
AD
291 /* Map the nonterminals to their new index: useful first, useless
292 afterwards. Kept for later report. */
572909b5 293
da2a7671 294 symbol_number *nontermmap = xnmalloc (nvars, sizeof *nontermmap);
00238958
AD
295 n = ntokens;
296 for (i = ntokens; i < nsyms; i++)
ed86e78c 297 if (bitset_test (V, i))
c4882934 298 nontermmap[i - ntokens] = n++;
760b53a8 299 for (i = ntokens; i < nsyms; i++)
ed86e78c 300 if (!bitset_test (V, i))
6b98e4b5 301 {
c4882934 302 nontermmap[i - ntokens] = n++;
cff03fb2 303 warn_at (symbols[i]->location, _("nonterminal useless in grammar: %s"),
97650f4e 304 symbols[i]->tag);
6b98e4b5 305 }
572909b5 306
00238958 307
760b53a8
AD
308 /* Shuffle elements of tables indexed by symbol number. */
309 {
da2a7671 310 symbol **symbols_sorted = xnmalloc (nvars, sizeof *symbols_sorted);
760b53a8 311
bba97eb2 312 for (i = ntokens; i < nsyms; i++)
c4882934 313 symbols[i]->number = nontermmap[i - ntokens];
760b53a8 314 for (i = ntokens; i < nsyms; i++)
c4882934 315 symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
760b53a8 316 for (i = ntokens; i < nsyms; i++)
c4882934
PE
317 symbols[i] = symbols_sorted[i - ntokens];
318 free (symbols_sorted);
760b53a8 319 }
572909b5 320
0c2d3f4c 321 {
17ee7397 322 rule_number r;
4b3d3a8e 323 for (r = 0; r < nrules; ++r)
0c2d3f4c 324 {
17ee7397 325 item_number *rhsp;
0c2d3f4c
AD
326 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
327 if (ISVAR (*rhsp))
c4882934
PE
328 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
329 - ntokens]);
0c2d3f4c 330 }
c4882934 331 accept->number = nontermmap[accept->number - ntokens];
0c2d3f4c 332 }
572909b5 333
00238958
AD
334 nsyms -= nuseless_nonterminals;
335 nvars -= nuseless_nonterminals;
572909b5 336
c4882934 337 free (nontermmap);
572909b5 338}
015acc48 339
337c5bd1 340
00238958
AD
341/*------------------------------------------------------------------.
342| Output the detailed results of the reductions. For FILE.output. |
343`------------------------------------------------------------------*/
337c5bd1
AD
344
345void
346reduce_output (FILE *out)
572909b5 347{
572909b5
RS
348 if (nuseless_nonterminals > 0)
349 {
d2d1b42b 350 int i;
cff03fb2 351 fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
760b53a8 352 for (i = 0; i < nuseless_nonterminals; ++i)
97650f4e 353 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 354 fputs ("\n\n", out);
572909b5 355 }
d2d1b42b
AD
356
357 {
8307162d 358 bool b = false;
d2d1b42b
AD
359 int i;
360 for (i = 0; i < ntokens; i++)
d80fb37a 361 if (reduce_token_unused_in_grammar (i))
572909b5
RS
362 {
363 if (!b)
d80fb37a 364 fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
8307162d 365 b = true;
97650f4e 366 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 367 }
d2d1b42b
AD
368 if (b)
369 fputs ("\n\n", out);
370 }
572909b5
RS
371
372 if (nuseless_productions > 0)
cff03fb2
JD
373 grammar_rules_partial_print (out, _("Rules useless in grammar"),
374 rule_useless_in_grammar_p);
572909b5
RS
375}
376\f
572909b5 377
337c5bd1
AD
378/*-------------------------------.
379| Report the results to STDERR. |
380`-------------------------------*/
381
a083fbbf 382static void
337c5bd1 383reduce_print (void)
572909b5 384{
572909b5 385 if (nuseless_nonterminals > 0)
bcf07cb7
JD
386 {
387 fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
388 fprintf (stderr, ngettext ("%d nonterminal useless in grammar",
389 "%d nonterminals useless in grammar",
390 nuseless_nonterminals),
391 nuseless_nonterminals);
392 fprintf (stderr, "\n");
393 }
572909b5 394 if (nuseless_productions > 0)
bcf07cb7
JD
395 {
396 fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
397 fprintf (stderr, ngettext ("%d rule useless in grammar",
398 "%d rules useless in grammar",
399 nuseless_productions),
400 nuseless_productions);
401 fprintf (stderr, "\n");
402 }
015acc48
AD
403}
404\f
405void
406reduce_grammar (void)
407{
408 bool reduced;
409
410 /* Allocate the global sets used to compute the reduced grammar */
411
ed86e78c 412 N = bitset_create (nvars, BITSET_FIXED);
4b3d3a8e 413 P = bitset_create (nrules, BITSET_FIXED);
ed86e78c 414 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 415 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
416
417 useless_nonterminals ();
418 inaccessable_symbols ();
419
4d56beff 420 reduced = (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
421 if (!reduced)
422 return;
015acc48 423
337c5bd1 424 reduce_print ();
015acc48 425
88bce5a2 426 if (!bitset_test (N, accept->number - ntokens))
1bfb97db
AD
427 fatal_at (startsymbol_location,
428 _("start symbol %s does not derive any sentence"),
429 startsymbol->tag);
015acc48 430
bb88b0fc
AD
431 /* First reduce the nonterminals, as they renumber themselves in the
432 whole grammar. If you change the order, nonterms would be
433 renumbered only in the reduced grammar. */
00238958
AD
434 if (nuseless_nonterminals > 0)
435 nonterminals_reduce ();
bb88b0fc
AD
436 if (nuseless_productions > 0)
437 reduce_grammar_tables ();
9bfe901c 438
273a74fa 439 if (trace_flag & trace_grammar)
9bfe901c 440 {
78ab8f67 441 grammar_dump (stderr, "Reduced Grammar");
9bfe901c
AD
442
443 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
444, and %d productions.\n",
95612cfa 445 grammar_file, ntokens, nvars, nrules);
9bfe901c 446 }
337c5bd1 447}
015acc48 448
d80fb37a
JD
449bool
450reduce_token_unused_in_grammar (symbol_number i)
451{
452 aver (i < ntokens);
453 return !bitset_test (V, i) && !bitset_test (V1, i);
454}
455
456bool
457reduce_nonterminal_useless_in_grammar (symbol_number i)
458{
459 aver (ntokens <= i && i < nsyms + nuseless_nonterminals);
460 return nsyms <= i;
461}
015acc48 462
337c5bd1
AD
463/*-----------------------------------------------------------.
464| Free the global sets used to compute the reduced grammar. |
465`-----------------------------------------------------------*/
466
467void
468reduce_free (void)
469{
ed86e78c
AD
470 bitset_free (N);
471 bitset_free (V);
472 bitset_free (V1);
473 bitset_free (P);
572909b5 474}