]> git.saurik.com Git - bison.git/blame - src/reduce.c
(nullable_compute): Do not subtract from the returned value of malloc.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
d6ea8200 2 Copyright (C) 1988, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
572909b5 3
015acc48 4 This file is part of Bison, the GNU Compiler Compiler.
572909b5 5
015acc48
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
572909b5 10
015acc48
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
572909b5 15
015acc48
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
572909b5 28#include "system.h"
17ee7397
PE
29
30#include <bitset.h>
31#include <quotearg.h>
32
33#include "complain.h"
572909b5 34#include "files.h"
17ee7397 35#include "getargs.h"
572909b5 36#include "gram.h"
b2ca4022 37#include "reader.h"
17ee7397
PE
38#include "reduce.h"
39#include "symtab.h"
572909b5 40
00238958 41/* Set of all nonterminals which are not useless. */
ed86e78c 42static bitset N;
572909b5 43
00238958 44/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 45static bitset P;
00238958
AD
46
47/* Set of all accessible symbols. */
ed86e78c 48static bitset V;
00238958
AD
49
50/* Set of symbols used to define rule precedence (so they are
51 `useless', but no warning should be issued). */
ed86e78c 52static bitset V1;
572909b5 53
17ee7397
PE
54static rule_number nuseful_productions;
55rule_number nuseless_productions;
015acc48 56static int nuseful_nonterminals;
17ee7397 57symbol_number nuseless_nonterminals;
572909b5 58\f
015acc48
AD
59/*-------------------------------------------------------------------.
60| Another way to do this would be with a set for each production and |
61| then do subset tests against N0, but even for the C grammar the |
62| whole reducing process takes only 2 seconds on my 8Mhz AT. |
63`-------------------------------------------------------------------*/
572909b5 64
a083fbbf 65static bool
17ee7397 66useful_production (rule_number r, bitset N0)
572909b5 67{
17ee7397 68 item_number *rhsp;
572909b5 69
015acc48
AD
70 /* A production is useful if all of the nonterminals in its appear
71 in the set of useful nonterminals. */
572909b5 72
9222837b
AD
73 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
74 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
8307162d
PE
75 return false;
76 return true;
572909b5
RS
77}
78
79
015acc48
AD
80/*---------------------------------------------------------.
81| Remember that rules are 1-origin, symbols are 0-origin. |
82`---------------------------------------------------------*/
572909b5 83
a083fbbf 84static void
d2729d44 85useless_nonterminals (void)
572909b5 86{
ed86e78c 87 bitset Np, Ns;
17ee7397 88 rule_number r;
015acc48
AD
89
90 /* N is set as built. Np is set being built this iteration. P is
91 set of all productions which have a RHS all in N. */
92
ed86e78c 93 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 94
015acc48
AD
95
96 /* The set being computed is a set of nonterminals which can derive
97 the empty string or strings consisting of all terminals. At each
98 iteration a nonterminal is added to the set if there is a
99 production with that nonterminal as its LHS for which all the
100 nonterminals in its RHS are already in the set. Iterate until
101 the set being computed remains unchanged. Any nonterminals not
102 in the set at that point are useless in that they will never be
103 used in deriving a sentence of the language.
104
105 This iteration doesn't use any special traversal over the
106 productions. A set is kept of all productions for which all the
107 nonterminals in the RHS are in useful. Only productions not in
108 this set are scanned on each iteration. At the end, this set is
109 saved to be used when finding useful productions: only
110 productions in this set will appear in the final grammar. */
572909b5 111
572909b5
RS
112 while (1)
113 {
ed86e78c 114 bitset_copy (Np, N);
4b3d3a8e 115 for (r = 0; r < nrules; r++)
9222837b
AD
116 if (!bitset_test (P, r)
117 && useful_production (r, N))
f9abaa2c 118 {
9222837b
AD
119 bitset_set (Np, rules[r].lhs->number - ntokens);
120 bitset_set (P, r);
f9abaa2c 121 }
ed86e78c 122 if (bitset_equal_p (N, Np))
572909b5
RS
123 break;
124 Ns = Np;
125 Np = N;
126 N = Ns;
127 }
ed86e78c 128 bitset_free (N);
572909b5
RS
129 N = Np;
130}
015acc48
AD
131
132
a083fbbf 133static void
d2729d44 134inaccessable_symbols (void)
572909b5 135{
ed86e78c 136 bitset Vp, Vs, Pp;
015acc48
AD
137
138 /* Find out which productions are reachable and which symbols are
139 used. Starting with an empty set of productions and a set of
140 symbols which only has the start symbol in it, iterate over all
141 productions until the set of productions remains unchanged for an
142 iteration. For each production which has a LHS in the set of
143 reachable symbols, add the production to the set of reachable
144 productions, and add all of the nonterminals in the RHS of the
145 production to the set of reachable symbols.
146
147 Consider only the (partially) reduced grammar which has only
148 nonterminals in N and productions in P.
149
150 The result is the set P of productions in the reduced grammar,
151 and the set V of symbols in the reduced grammar.
152
153 Although this algorithm also computes the set of terminals which
154 are reachable, no terminal will be deleted from the grammar. Some
155 terminals might not be in the grammar but might be generated by
156 semantic routines, and so the user might want them available with
157 specified numbers. (Is this true?) However, the nonreachable
158 terminals are printed (if running in verbose mode) so that the
159 user can know. */
160
ed86e78c 161 Vp = bitset_create (nsyms, BITSET_FIXED);
4b3d3a8e 162 Pp = bitset_create (nrules, BITSET_FIXED);
572909b5
RS
163
164 /* If the start symbol isn't useful, then nothing will be useful. */
88bce5a2 165 if (bitset_test (N, accept->number - ntokens))
572909b5 166 {
88bce5a2 167 bitset_set (V, accept->number);
2c5f66ed
AD
168
169 while (1)
572909b5 170 {
17ee7397 171 rule_number r;
ed86e78c 172 bitset_copy (Vp, V);
4b3d3a8e 173 for (r = 0; r < nrules; r++)
572909b5 174 {
9222837b
AD
175 if (!bitset_test (Pp, r)
176 && bitset_test (P, r)
177 && bitset_test (V, rules[r].lhs->number))
572909b5 178 {
17ee7397 179 item_number *rhsp;
9222837b
AD
180 for (rhsp = rules[r].rhs; *rhsp >= 0; rhsp++)
181 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
182 bitset_set (Vp, *rhsp);
183 bitset_set (Pp, r);
572909b5 184 }
572909b5 185 }
ed86e78c 186 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
187 break;
188 Vs = Vp;
189 Vp = V;
190 V = Vs;
572909b5 191 }
572909b5 192 }
572909b5 193
ed86e78c 194 bitset_free (V);
572909b5
RS
195 V = Vp;
196
197 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
88bce5a2 198 bitset_set (V, endtoken->number); /* end-of-input token */
72a23c97
AD
199 bitset_set (V, errtoken->number); /* error token */
200 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 201
ed86e78c 202 bitset_free (P);
572909b5
RS
203 P = Pp;
204
914feea9 205 nuseful_productions = bitset_count (P);
572909b5
RS
206 nuseless_productions = nrules - nuseful_productions;
207
208 nuseful_nonterminals = 0;
9222837b 209 {
17ee7397 210 symbol_number i;
9222837b
AD
211 for (i = ntokens; i < nsyms; i++)
212 if (bitset_test (V, i))
213 nuseful_nonterminals++;
214 }
572909b5
RS
215 nuseless_nonterminals = nvars - nuseful_nonterminals;
216
217 /* A token that was used in %prec should not be warned about. */
9222837b 218 {
17ee7397 219 rule_number r;
4b3d3a8e
AD
220 for (r = 0; r < nrules; ++r)
221 if (rules[r].precsym != 0)
222 bitset_set (V1, rules[r].precsym->number);
9222837b 223 }
572909b5 224}
015acc48 225
c3b407f4
AD
226
227/*-------------------------------------------------------------------.
228| Put the useless productions at the end of RULES, and adjust NRULES |
229| accordingly. |
230`-------------------------------------------------------------------*/
231
a083fbbf 232static void
d2729d44 233reduce_grammar_tables (void)
572909b5 234{
6b98e4b5 235 /* Report and flag useless productions. */
c3b407f4 236 {
17ee7397 237 rule_number r;
4b3d3a8e 238 for (r = 0; r < nrules; r++)
c8f002c7
AD
239 rules[r].useful = bitset_test (P, r);
240 grammar_rules_never_reduced_report (_("useless rule"));
c3b407f4 241 }
572909b5 242
c3b407f4
AD
243 /* Map the nonterminals to their new index: useful first, useless
244 afterwards. Kept for later report. */
245 {
4b3d3a8e
AD
246 int useful = 0;
247 int useless = nrules - nuseless_productions;
17ee7397
PE
248 rule *rules_sorted = XMALLOC (rule, nrules);
249 rule_number r;
4b3d3a8e 250 for (r = 0; r < nrules; ++r)
9222837b 251 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
4b3d3a8e 252 free (rules);
c3b407f4 253 rules = rules_sorted;
076ab033 254
cc9305dd 255 /* Renumber the rules markers in RITEMS. */
4b3d3a8e 256 for (r = 0; r < nrules; ++r)
cc9305dd 257 {
17ee7397 258 item_number *rhsp = rules[r].rhs;
cc9305dd
AD
259 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
260 /* Nothing. */;
12b0043a 261 *rhsp = rule_number_as_item_number (r);
9222837b 262 rules[r].number = r;
cc9305dd 263 }
c3b407f4
AD
264 nrules -= nuseless_productions;
265 }
076ab033 266
a900a624 267 /* Adjust NRITEMS. */
c3b407f4
AD
268 {
269 int r;
270 int length;
4b3d3a8e 271 for (r = nrules; r < nrules + nuseless_productions; ++r)
076ab033 272 {
c3b407f4
AD
273 length = rule_rhs_length (&rules[r]);
274 nritems -= length + 1;
076ab033 275 }
c3b407f4 276 }
00238958 277}
572909b5 278
572909b5 279
00238958
AD
280/*------------------------------.
281| Remove useless nonterminals. |
282`------------------------------*/
572909b5 283
00238958
AD
284static void
285nonterminals_reduce (void)
286{
17ee7397 287 symbol_number i, n;
572909b5 288
68f1e3ed
AD
289 /* Map the nonterminals to their new index: useful first, useless
290 afterwards. Kept for later report. */
572909b5 291
17ee7397 292 symbol_number *nontermmap = XCALLOC (symbol_number, nvars) - ntokens;
00238958
AD
293 n = ntokens;
294 for (i = ntokens; i < nsyms; i++)
ed86e78c 295 if (bitset_test (V, i))
00238958 296 nontermmap[i] = n++;
760b53a8 297 for (i = ntokens; i < nsyms; i++)
ed86e78c 298 if (!bitset_test (V, i))
6b98e4b5
AD
299 {
300 nontermmap[i] = n++;
d6ea8200 301 warn_at (symbols[i]->location, _("useless nonterminal: %s"),
97650f4e 302 symbols[i]->tag);
6b98e4b5 303 }
572909b5 304
00238958 305
760b53a8
AD
306 /* Shuffle elements of tables indexed by symbol number. */
307 {
17ee7397 308 symbol **symbols_sorted = XMALLOC (symbol *, nvars) - ntokens;
760b53a8 309
bba97eb2
AD
310 for (i = ntokens; i < nsyms; i++)
311 symbols[i]->number = nontermmap[i];
760b53a8 312 for (i = ntokens; i < nsyms; i++)
0e78e603 313 symbols_sorted[nontermmap[i]] = symbols[i];
760b53a8 314 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
315 symbols[i] = symbols_sorted[i];
316 free (symbols_sorted + ntokens);
760b53a8 317 }
572909b5 318
0c2d3f4c 319 {
17ee7397 320 rule_number r;
4b3d3a8e 321 for (r = 0; r < nrules; ++r)
0c2d3f4c 322 {
17ee7397 323 item_number *rhsp;
0c2d3f4c
AD
324 for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp)
325 if (ISVAR (*rhsp))
a49aecd5 326 *rhsp = symbol_number_as_item_number (nontermmap[*rhsp]);
0c2d3f4c 327 }
88bce5a2 328 accept->number = nontermmap[accept->number];
0c2d3f4c 329 }
572909b5 330
00238958
AD
331 nsyms -= nuseless_nonterminals;
332 nvars -= nuseless_nonterminals;
572909b5 333
68f1e3ed 334 free (nontermmap + ntokens);
572909b5 335}
015acc48 336
337c5bd1 337
00238958
AD
338/*------------------------------------------------------------------.
339| Output the detailed results of the reductions. For FILE.output. |
340`------------------------------------------------------------------*/
337c5bd1
AD
341
342void
343reduce_output (FILE *out)
572909b5 344{
572909b5
RS
345 if (nuseless_nonterminals > 0)
346 {
d2d1b42b 347 int i;
c8f002c7 348 fprintf (out, "%s\n\n", _("Useless nonterminals"));
760b53a8 349 for (i = 0; i < nuseless_nonterminals; ++i)
97650f4e 350 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
d2d1b42b 351 fputs ("\n\n", out);
572909b5 352 }
d2d1b42b
AD
353
354 {
8307162d 355 bool b = false;
d2d1b42b
AD
356 int i;
357 for (i = 0; i < ntokens; i++)
ed86e78c 358 if (!bitset_test (V, i) && !bitset_test (V1, i))
572909b5
RS
359 {
360 if (!b)
c8f002c7 361 fprintf (out, "%s\n\n", _("Terminals which are not used"));
8307162d 362 b = true;
97650f4e 363 fprintf (out, " %s\n", symbols[i]->tag);
572909b5 364 }
d2d1b42b
AD
365 if (b)
366 fputs ("\n\n", out);
367 }
572909b5
RS
368
369 if (nuseless_productions > 0)
9757c359 370 grammar_rules_partial_print (out, _("Useless rules"),
c8f002c7 371 rule_useless_p);
572909b5
RS
372}
373\f
572909b5 374
572909b5
RS
375
376
337c5bd1
AD
377
378/*-------------------------------.
379| Report the results to STDERR. |
380`-------------------------------*/
381
a083fbbf 382static void
337c5bd1 383reduce_print (void)
572909b5 384{
89cab50d 385 if (yacc_flag && nuseless_productions)
cb487d7d
AD
386 fprintf (stderr, ngettext ("%d rule never reduced\n",
387 "%d rules never reduced\n",
388 nuseless_productions),
389 nuseless_productions);
572909b5 390
95612cfa 391 fprintf (stderr, "%s: %s: ", grammar_file, _("warning"));
572909b5
RS
392
393 if (nuseless_nonterminals > 0)
cb487d7d
AD
394 fprintf (stderr, ngettext ("%d useless nonterminal",
395 "%d useless nonterminals",
396 nuseless_nonterminals),
397 nuseless_nonterminals);
398
572909b5 399 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 400 fprintf (stderr, _(" and "));
572909b5
RS
401
402 if (nuseless_productions > 0)
cb487d7d
AD
403 fprintf (stderr, ngettext ("%d useless rule",
404 "%d useless rules",
405 nuseless_productions),
406 nuseless_productions);
015acc48
AD
407 fprintf (stderr, "\n");
408 fflush (stderr);
409}
410\f
411void
412reduce_grammar (void)
413{
414 bool reduced;
415
416 /* Allocate the global sets used to compute the reduced grammar */
417
ed86e78c 418 N = bitset_create (nvars, BITSET_FIXED);
4b3d3a8e 419 P = bitset_create (nrules, BITSET_FIXED);
ed86e78c 420 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 421 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
422
423 useless_nonterminals ();
424 inaccessable_symbols ();
425
426 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
427 if (!reduced)
428 return;
015acc48 429
337c5bd1 430 reduce_print ();
015acc48 431
88bce5a2 432 if (!bitset_test (N, accept->number - ntokens))
1bfb97db
AD
433 fatal_at (startsymbol_location,
434 _("start symbol %s does not derive any sentence"),
435 startsymbol->tag);
015acc48 436
bb88b0fc
AD
437 /* First reduce the nonterminals, as they renumber themselves in the
438 whole grammar. If you change the order, nonterms would be
439 renumbered only in the reduced grammar. */
00238958
AD
440 if (nuseless_nonterminals > 0)
441 nonterminals_reduce ();
bb88b0fc
AD
442 if (nuseless_productions > 0)
443 reduce_grammar_tables ();
9bfe901c 444
273a74fa 445 if (trace_flag & trace_grammar)
9bfe901c 446 {
78ab8f67 447 grammar_dump (stderr, "Reduced Grammar");
9bfe901c
AD
448
449 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
450, and %d productions.\n",
95612cfa 451 grammar_file, ntokens, nvars, nrules);
9bfe901c 452 }
337c5bd1 453}
015acc48 454
015acc48 455
337c5bd1
AD
456/*-----------------------------------------------------------.
457| Free the global sets used to compute the reduced grammar. |
458`-----------------------------------------------------------*/
459
460void
461reduce_free (void)
462{
ed86e78c
AD
463 bitset_free (N);
464 bitset_free (V);
465 bitset_free (V1);
466 bitset_free (P);
572909b5 467}