]> git.saurik.com Git - bison.git/blame - src/reduce.c
Fix "Bison blows chunks on empty file" bug.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
c4882934 2
a737b216
PE
3 Copyright (C) 1988, 1989, 2000, 2001, 2002, 2003 Free Software
4 Foundation, Inc.
572909b5 5
015acc48 6 This file is part of Bison, the GNU Compiler Compiler.
572909b5 7
015acc48
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
572909b5 12
015acc48
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
572909b5 17
015acc48
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
572909b5
RS
22
23
015acc48
AD
24/* Reduce the grammar: Find and eliminate unreachable terminals,
25 nonterminals, and productions. David S. Bakin. */
572909b5 26
015acc48
AD
27/* Don't eliminate unreachable terminals: They may be used by the
28 user's parser. */
572909b5 29
572909b5 30#include "system.h"
17ee7397
PE
31
32#include <bitset.h>
33#include <quotearg.h>
34
35#include "complain.h"
572909b5 36#include "files.h"
17ee7397 37#include "getargs.h"
572909b5 38#include "gram.h"
b2ca4022 39#include "reader.h"
17ee7397
PE
40#include "reduce.h"
41#include "symtab.h"
572909b5 42
00238958 43/* Set of all nonterminals which are not useless. */
ed86e78c 44static bitset N;
572909b5 45
00238958 46/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 47static bitset P;
00238958
AD
48
49/* Set of all accessible symbols. */
ed86e78c 50static bitset V;
00238958
AD
51
52/* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
ed86e78c 54static bitset V1;
572909b5 55
17ee7397
PE
56static rule_number nuseful_productions;
57rule_number nuseless_productions;
015acc48 58static int nuseful_nonterminals;
17ee7397 59symbol_number nuseless_nonterminals;
572909b5 60\f
015acc48
AD
61/*-------------------------------------------------------------------.
62| Another way to do this would be with a set for each production and |
63| then do subset tests against N0, but even for the C grammar the |
64| whole reducing process takes only 2 seconds on my 8Mhz AT. |
65`-------------------------------------------------------------------*/
572909b5 66
a083fbbf 67static bool
17ee7397 68useful_production (rule_number r, bitset N0)
572909b5 69{
17ee7397 70 item_number *rhsp;
572909b5 71
015acc48
AD
72 /* A production is useful if all of the nonterminals in its appear
73 in the set of useful nonterminals. */
572909b5 74
9222837b
AD
75 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
76 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
8307162d
PE
77 return false;
78 return true;
572909b5
RS
79}
80
81
015acc48
AD
82/*---------------------------------------------------------.
83| Remember that rules are 1-origin, symbols are 0-origin. |
84`---------------------------------------------------------*/
572909b5 85
a083fbbf 86static void
d2729d44 87useless_nonterminals (void)
572909b5 88{
ed86e78c 89 bitset Np, Ns;
17ee7397 90 rule_number r;
015acc48
AD
91
92 /* N is set as built. Np is set being built this iteration. P is
93 set of all productions which have a RHS all in N. */
94
ed86e78c 95 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 96
015acc48
AD
97
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
106
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
572909b5 113
572909b5
RS
114 while (1)
115 {
ed86e78c 116 bitset_copy (Np, N);
4b3d3a8e 117 for (r = 0; r < nrules; r++)
9222837b
AD
118 if (!bitset_test (P, r)
119 && useful_production (r, N))
f9abaa2c 120 {
9222837b
AD
121 bitset_set (Np, rules[r].lhs->number - ntokens);
122 bitset_set (P, r);
f9abaa2c 123 }
ed86e78c 124 if (bitset_equal_p (N, Np))
572909b5
RS
125 break;
126 Ns = Np;
127 Np = N;
128 N = Ns;
129 }
ed86e78c 130 bitset_free (N);
572909b5
RS
131 N = Np;
132}
015acc48
AD
133
134
a083fbbf 135static void
d2729d44 136inaccessable_symbols (void)
572909b5 137{
ed86e78c 138 bitset Vp, Vs, Pp;
015acc48
AD
139
140 /* Find out which productions are reachable and which symbols are
141 used. Starting with an empty set of productions and a set of
142 symbols which only has the start symbol in it, iterate over all
143 productions until the set of productions remains unchanged for an
144 iteration. For each production which has a LHS in the set of
145 reachable symbols, add the production to the set of reachable
146 productions, and add all of the nonterminals in the RHS of the
147 production to the set of reachable symbols.
148
149 Consider only the (partially) reduced grammar which has only
150 nonterminals in N and productions in P.
151
152 The result is the set P of productions in the reduced grammar,
153 and the set V of symbols in the reduced grammar.
154
155 Although this algorithm also computes the set of terminals which
156 are reachable, no terminal will be deleted from the grammar. Some
157 terminals might not be in the grammar but might be generated by
158 semantic routines, and so the user might want them available with
159 specified numbers. (Is this true?) However, the nonreachable
160 terminals are printed (if running in verbose mode) so that the
161 user can know. */
162
ed86e78c 163 Vp = bitset_create (nsyms, BITSET_FIXED);
4b3d3a8e 164 Pp = bitset_create (nrules, BITSET_FIXED);
572909b5
RS
165
166 /* If the start symbol isn't useful, then nothing will be useful. */
88bce5a2 167 if (bitset_test (N, accept->number - ntokens))
572909b5 168 {
88bce5a2 169 bitset_set (V, accept->number);
2c5f66ed
AD
170
171 while (1)
572909b5 172 {
17ee7397 173 rule_number r;
ed86e78c 174 bitset_copy (Vp, V);
4b3d3a8e 175 for (r = 0; r < nrules; r++)
572909b5 176 {
9222837b
AD
177 if (!bitset_test (Pp, r)
178 && bitset_test (P, r)
179 && bitset_test (V, rules[r].lhs->number))
572909b5 180 {
17ee7397 181 item_number *rhsp;
9222837b
AD
182 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
183 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
184 bitset_set (Vp, *rhsp);
185 bitset_set (Pp, r);
572909b5 186 }
572909b5 187 }
ed86e78c 188 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
189 break;
190 Vs = Vp;
191 Vp = V;
192 V = Vs;
572909b5 193 }
572909b5 194 }
572909b5 195
ed86e78c 196 bitset_free (V);
572909b5
RS
197 V = Vp;
198
199 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
88bce5a2 200 bitset_set (V, endtoken->number); /* end-of-input token */
72a23c97
AD
201 bitset_set (V, errtoken->number); /* error token */
202 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 203
ed86e78c 204 bitset_free (P);
572909b5
RS
205 P = Pp;
206
914feea9 207 nuseful_productions = bitset_count (P);
572909b5
RS
208 nuseless_productions = nrules - nuseful_productions;
209
210 nuseful_nonterminals = 0;
9222837b 211 {
17ee7397 212 symbol_number i;
9222837b
AD
213 for (i = ntokens; i < nsyms; i++)
214 if (bitset_test (V, i))
215 nuseful_nonterminals++;
216 }
572909b5
RS
217 nuseless_nonterminals = nvars - nuseful_nonterminals;
218
219 /* A token that was used in %prec should not be warned about. */
9222837b 220 {
17ee7397 221 rule_number r;
4b3d3a8e
AD
222 for (r = 0; r < nrules; ++r)
223 if (rules[r].precsym != 0)
224 bitset_set (V1, rules[r].precsym->number);
9222837b 225 }
572909b5 226}
015acc48 227
c3b407f4
AD
228
229/*-------------------------------------------------------------------.
230| Put the useless productions at the end of RULES, and adjust NRULES |
231| accordingly. |
232`-------------------------------------------------------------------*/
233
a083fbbf 234static void
d2729d44 235reduce_grammar_tables (void)
572909b5 236{
6b98e4b5 237 /* Report and flag useless productions. */
c3b407f4 238 {
17ee7397 239 rule_number r;
4b3d3a8e 240 for (r = 0; r < nrules; r++)
c8f002c7
AD
241 rules[r].useful = bitset_test (P, r);
242 grammar_rules_never_reduced_report (_("useless rule"));
c3b407f4 243 }
572909b5 244
c3b407f4
AD
245 /* Map the nonterminals to their new index: useful first, useless
246 afterwards. Kept for later report. */
247 {
4b3d3a8e
AD
248 int useful = 0;
249 int useless = nrules - nuseless_productions;
c4882934 250 rule *rules_sorted = MALLOC (rules_sorted, nrules);
17ee7397 251 rule_number r;
4b3d3a8e 252 for (r = 0; r < nrules; ++r)
9222837b 253 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
4b3d3a8e 254 free (rules);
c3b407f4 255 rules = rules_sorted;
076ab033 256
cc9305dd 257 /* Renumber the rules markers in RITEMS. */
4b3d3a8e 258 for (r = 0; r < nrules; ++r)
cc9305dd 259 {
17ee7397 260 item_number *rhsp = rules[r].rhs;
cc9305dd
AD
261 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
262 /* Nothing. */;
12b0043a 263 *rhsp = rule_number_as_item_number (r);
9222837b 264 rules[r].number = r;
cc9305dd 265 }
c3b407f4
AD
266 nrules -= nuseless_productions;
267 }
076ab033 268
a900a624 269 /* Adjust NRITEMS. */
c3b407f4 270 {
a737b216 271 rule_number r;
c3b407f4 272 int length;
4b3d3a8e 273 for (r = nrules; r < nrules + nuseless_productions; ++r)
076ab033 274 {
c3b407f4
AD
275 length = rule_rhs_length (&rules[r]);
276 nritems -= length + 1;
076ab033 277 }
c3b407f4 278 }
00238958 279}
572909b5 280
572909b5 281
00238958
AD
282/*------------------------------.
283| Remove useless nonterminals. |
284`------------------------------*/
572909b5 285
00238958
AD
286static void
287nonterminals_reduce (void)
288{
17ee7397 289 symbol_number i, n;
572909b5 290
68f1e3ed
AD
291 /* Map the nonterminals to their new index: useful first, useless
292 afterwards. Kept for later report. */
572909b5 293
c4882934 294 symbol_number *nontermmap = CALLOC (nontermmap, nvars);
00238958
AD
295 n = ntokens;
296 for (i = ntokens; i < nsyms; i++)
ed86e78c 297 if (bitset_test (V, i))
c4882934 298 nontermmap[i - ntokens] = n++;
760b53a8 299 for (i = ntokens; i < nsyms; i++)
ed86e78c 300 if (!bitset_test (V, i))
6b98e4b5 301 {
c4882934 302 nontermmap[i - ntokens] = n++;
d6ea8200 303 warn_at (symbols[i]->location, _("useless nonterminal: %s"),
97650f4e 304 symbols[i]->tag);
6b98e4b5 305 }
572909b5 306
00238958 307
760b53a8
AD
308 /* Shuffle elements of tables indexed by symbol number. */
309 {
c4882934 310 symbol **symbols_sorted = MALLOC (symbols_sorted, nvars);
760b53a8 311
bba97eb2 312 for (i = ntokens; i < nsyms; i++)
c4882934 313 symbols[i]->number = nontermmap[i - ntokens];
760b53a8 314 for (i = ntokens; i < nsyms; i++)
c4882934 315 symbols_sorted[nontermmap[i - ntokens] - ntokens] = symbols[i];
760b53a8 316 for (i = ntokens; i < nsyms; i++)
c4882934
PE
317 symbols[i] = symbols_sorted[i - ntokens];
318 free (symbols_sorted);
760b53a8 319 }
572909b5 320
0c2d3f4c 321 {
17ee7397 322 rule_number r;
4b3d3a8e 323 for (r = 0; r < nrules; ++r)
0c2d3f4c 324 {
17ee7397 325 item_number *rhsp;
0c2d3f4c
AD
326 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
327 if (ISVAR (*rhsp))
c4882934
PE
328 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp
329 - ntokens]);
0c2d3f4c 330 }
c4882934 331 accept->number = nontermmap[accept->number - ntokens];
0c2d3f4c 332 }
572909b5 333
00238958
AD
334 nsyms -= nuseless_nonterminals;
335 nvars -= nuseless_nonterminals;
572909b5 336
c4882934 337 free (nontermmap);
572909b5 338}
015acc48 339
337c5bd1 340
00238958
AD
341/*------------------------------------------------------------------.
342| Output the detailed results of the reductions. For FILE.output. |
343`------------------------------------------------------------------*/
337c5bd1
AD
344
345void
346reduce_output (FILE *out)
572909b5 347{
572909b5
RS
348 if (nuseless_nonterminals > 0)
349 {
d2d1b42b 350 int i;
c8f002c7 351 fprintf (out, "%s\n\n", _("Useless nonterminals"));
760b53a8 352 for (i = 0; i < nuseless_nonterminals; ++i)
97650f4e 353 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 354 fputs ("\n\n", out);
572909b5 355 }
d2d1b42b
AD
356
357 {
8307162d 358 bool b = false;
d2d1b42b
AD
359 int i;
360 for (i = 0; i < ntokens; i++)
ed86e78c 361 if (!bitset_test (V, i) && !bitset_test (V1, i))
572909b5
RS
362 {
363 if (!b)
c8f002c7 364 fprintf (out, "%s\n\n", _("Terminals which are not used"));
8307162d 365 b = true;
97650f4e 366 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 367 }
d2d1b42b
AD
368 if (b)
369 fputs ("\n\n", out);
370 }
572909b5
RS
371
372 if (nuseless_productions > 0)
9757c359 373 grammar_rules_partial_print (out, _("Useless rules"),
c8f002c7 374 rule_useless_p);
572909b5
RS
375}
376\f
572909b5 377
572909b5
RS
378
379
337c5bd1
AD
380
381/*-------------------------------.
382| Report the results to STDERR. |
383`-------------------------------*/
384
a083fbbf 385static void
337c5bd1 386reduce_print (void)
572909b5 387{
89cab50d 388 if (yacc_flag && nuseless_productions)
cb487d7d
AD
389 fprintf (stderr, ngettext ("%d rule never reduced\n",
390 "%d rules never reduced\n",
391 nuseless_productions),
392 nuseless_productions);
572909b5 393
95612cfa 394 fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
572909b5
RS
395
396 if (nuseless_nonterminals > 0)
cb487d7d
AD
397 fprintf (stderr, ngettext ("%d useless nonterminal",
398 "%d useless nonterminals",
399 nuseless_nonterminals),
400 nuseless_nonterminals);
401
572909b5 402 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 403 fprintf (stderr, _(" and "));
572909b5
RS
404
405 if (nuseless_productions > 0)
cb487d7d
AD
406 fprintf (stderr, ngettext ("%d useless rule",
407 "%d useless rules",
408 nuseless_productions),
409 nuseless_productions);
015acc48 410 fprintf (stderr, "\n");
015acc48
AD
411}
412\f
413void
414reduce_grammar (void)
415{
416 bool reduced;
417
418 /* Allocate the global sets used to compute the reduced grammar */
419
ed86e78c 420 N = bitset_create (nvars, BITSET_FIXED);
4b3d3a8e 421 P = bitset_create (nrules, BITSET_FIXED);
ed86e78c 422 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 423 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
424
425 useless_nonterminals ();
426 inaccessable_symbols ();
427
428 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
429 if (!reduced)
430 return;
015acc48 431
337c5bd1 432 reduce_print ();
015acc48 433
88bce5a2 434 if (!bitset_test (N, accept->number - ntokens))
1bfb97db
AD
435 fatal_at (startsymbol_location,
436 _("start symbol %s does not derive any sentence"),
437 startsymbol->tag);
015acc48 438
bb88b0fc
AD
439 /* First reduce the nonterminals, as they renumber themselves in the
440 whole grammar. If you change the order, nonterms would be
441 renumbered only in the reduced grammar. */
00238958
AD
442 if (nuseless_nonterminals > 0)
443 nonterminals_reduce ();
bb88b0fc
AD
444 if (nuseless_productions > 0)
445 reduce_grammar_tables ();
9bfe901c 446
273a74fa 447 if (trace_flag & trace_grammar)
9bfe901c 448 {
78ab8f67 449 grammar_dump (stderr, "Reduced Grammar");
9bfe901c
AD
450
451 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
452, and %d productions.\n",
95612cfa 453 grammar_file, ntokens, nvars, nrules);
9bfe901c 454 }
337c5bd1 455}
015acc48 456
015acc48 457
337c5bd1
AD
458/*-----------------------------------------------------------.
459| Free the global sets used to compute the reduced grammar. |
460`-----------------------------------------------------------*/
461
462void
463reduce_free (void)
464{
ed86e78c
AD
465 bitset_free (N);
466 bitset_free (V);
467 bitset_free (V1);
468 bitset_free (P);
572909b5 469}