]> git.saurik.com Git - bison.git/blame - src/reduce.c
* src/system.h: No longer using strndup.
[bison.git] / src / reduce.c
CommitLineData
572909b5 1/* Grammar reduction for Bison.
8b3df748 2 Copyright (C) 1988, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
572909b5 3
015acc48 4 This file is part of Bison, the GNU Compiler Compiler.
572909b5 5
015acc48
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
572909b5 10
015acc48
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
572909b5 15
015acc48
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
572909b5
RS
20
21
015acc48
AD
22/* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
572909b5 24
015acc48
AD
25/* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
572909b5 27
572909b5 28#include "system.h"
8b3df748 29#include "quotearg.h"
ceed8467 30#include "getargs.h"
572909b5 31#include "files.h"
0e78e603 32#include "symtab.h"
572909b5 33#include "gram.h"
a0f6b076 34#include "complain.h"
015acc48 35#include "reduce.h"
b2ca4022
AD
36#include "reader.h"
37#include "getargs.h"
ed86e78c 38#include "bitset.h"
572909b5 39
015acc48 40typedef short *rule;
572909b5 41
572909b5 42
00238958 43/* Set of all nonterminals which are not useless. */
ed86e78c 44static bitset N;
572909b5 45
00238958 46/* Set of all rules which have no useless nonterminals in their RHS. */
ed86e78c 47static bitset P;
00238958
AD
48
49/* Set of all accessible symbols. */
ed86e78c 50static bitset V;
00238958
AD
51
52/* Set of symbols used to define rule precedence (so they are
53 `useless', but no warning should be issued). */
ed86e78c 54static bitset V1;
572909b5 55
015acc48
AD
56static int nuseful_productions;
57static int nuseless_productions;
58static int nuseful_nonterminals;
630e182b 59int nuseless_nonterminals;
572909b5 60\f
015acc48
AD
61/*-------------------------------------------------------------------.
62| Another way to do this would be with a set for each production and |
63| then do subset tests against N0, but even for the C grammar the |
64| whole reducing process takes only 2 seconds on my 8Mhz AT. |
65`-------------------------------------------------------------------*/
572909b5 66
a083fbbf 67static bool
ed86e78c 68useful_production (int i, bitset N0)
572909b5 69{
015acc48 70 rule r;
572909b5
RS
71 short n;
72
015acc48
AD
73 /* A production is useful if all of the nonterminals in its appear
74 in the set of useful nonterminals. */
572909b5 75
99013900 76 for (r = rules[i].rhs; *r >= 0; r++)
914feea9
AD
77 if (ISVAR (n = *r) && !bitset_test (N0, n - ntokens))
78 return FALSE;
572909b5
RS
79 return TRUE;
80}
81
82
015acc48
AD
83/*---------------------------------------------------------.
84| Remember that rules are 1-origin, symbols are 0-origin. |
85`---------------------------------------------------------*/
572909b5 86
a083fbbf 87static void
d2729d44 88useless_nonterminals (void)
572909b5 89{
ed86e78c 90 bitset Np, Ns;
342b8b6e 91 int i;
015acc48
AD
92
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
95
ed86e78c 96 Np = bitset_create (nvars, BITSET_FIXED);
ed86e78c 97
015acc48
AD
98
99 /* The set being computed is a set of nonterminals which can derive
100 the empty string or strings consisting of all terminals. At each
101 iteration a nonterminal is added to the set if there is a
102 production with that nonterminal as its LHS for which all the
103 nonterminals in its RHS are already in the set. Iterate until
104 the set being computed remains unchanged. Any nonterminals not
105 in the set at that point are useless in that they will never be
106 used in deriving a sentence of the language.
107
108 This iteration doesn't use any special traversal over the
109 productions. A set is kept of all productions for which all the
110 nonterminals in the RHS are in useful. Only productions not in
111 this set are scanned on each iteration. At the end, this set is
112 saved to be used when finding useful productions: only
113 productions in this set will appear in the final grammar. */
572909b5 114
572909b5
RS
115 while (1)
116 {
ed86e78c 117 bitset_copy (Np, N);
18bcecb0 118 for (i = 1; i < nrules + 1; i++)
f9abaa2c
AD
119 if (!bitset_test (P, i)
120 && useful_production (i, N))
121 {
bba97eb2 122 bitset_set (Np, rules[i].lhs->number - ntokens);
f9abaa2c
AD
123 bitset_set (P, i);
124 }
ed86e78c 125 if (bitset_equal_p (N, Np))
572909b5
RS
126 break;
127 Ns = Np;
128 Np = N;
129 N = Ns;
130 }
ed86e78c 131 bitset_free (N);
572909b5
RS
132 N = Np;
133}
015acc48
AD
134
135
a083fbbf 136static void
d2729d44 137inaccessable_symbols (void)
572909b5 138{
ed86e78c 139 bitset Vp, Vs, Pp;
342b8b6e 140 int i;
572909b5 141 short t;
015acc48
AD
142 rule r;
143
144 /* Find out which productions are reachable and which symbols are
145 used. Starting with an empty set of productions and a set of
146 symbols which only has the start symbol in it, iterate over all
147 productions until the set of productions remains unchanged for an
148 iteration. For each production which has a LHS in the set of
149 reachable symbols, add the production to the set of reachable
150 productions, and add all of the nonterminals in the RHS of the
151 production to the set of reachable symbols.
152
153 Consider only the (partially) reduced grammar which has only
154 nonterminals in N and productions in P.
155
156 The result is the set P of productions in the reduced grammar,
157 and the set V of symbols in the reduced grammar.
158
159 Although this algorithm also computes the set of terminals which
160 are reachable, no terminal will be deleted from the grammar. Some
161 terminals might not be in the grammar but might be generated by
162 semantic routines, and so the user might want them available with
163 specified numbers. (Is this true?) However, the nonreachable
164 terminals are printed (if running in verbose mode) so that the
165 user can know. */
166
ed86e78c 167 Vp = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 168 Pp = bitset_create (nrules + 1, BITSET_FIXED);
572909b5
RS
169
170 /* If the start symbol isn't useful, then nothing will be useful. */
ed86e78c 171 if (bitset_test (N, start_symbol - ntokens))
572909b5 172 {
ed86e78c 173 bitset_set (V, start_symbol);
2c5f66ed
AD
174
175 while (1)
572909b5 176 {
ed86e78c 177 bitset_copy (Vp, V);
18bcecb0 178 for (i = 1; i < nrules + 1; i++)
572909b5 179 {
ed86e78c
AD
180 if (!bitset_test (Pp, i)
181 && bitset_test (P, i)
bba97eb2 182 && bitset_test (V, rules[i].lhs->number))
572909b5 183 {
99013900 184 for (r = rules[i].rhs; *r >= 0; r++)
ed86e78c
AD
185 if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
186 bitset_set (Vp, t);
187 bitset_set (Pp, i);
572909b5 188 }
572909b5 189 }
ed86e78c 190 if (bitset_equal_p (V, Vp))
2c5f66ed
AD
191 break;
192 Vs = Vp;
193 Vp = V;
194 V = Vs;
572909b5 195 }
572909b5 196 }
572909b5 197
ed86e78c 198 bitset_free (V);
572909b5
RS
199 V = Vp;
200
201 /* Tokens 0, 1, and 2 are internal to Bison. Consider them useful. */
72a23c97
AD
202 bitset_set (V, eoftoken->number); /* end-of-input token */
203 bitset_set (V, errtoken->number); /* error token */
204 bitset_set (V, undeftoken->number); /* some undefined token */
572909b5 205
ed86e78c 206 bitset_free (P);
572909b5
RS
207 P = Pp;
208
914feea9 209 nuseful_productions = bitset_count (P);
572909b5
RS
210 nuseless_productions = nrules - nuseful_productions;
211
212 nuseful_nonterminals = 0;
213 for (i = ntokens; i < nsyms; i++)
ed86e78c 214 if (bitset_test (V, i))
572909b5
RS
215 nuseful_nonterminals++;
216 nuseless_nonterminals = nvars - nuseful_nonterminals;
217
218 /* A token that was used in %prec should not be warned about. */
11652ab3 219 for (i = 1; i < nrules + 1; i++)
1a2b5d37 220 if (rules[i].precsym != 0)
03b31c0c 221 bitset_set (V1, rules[i].precsym->number);
572909b5 222}
015acc48 223
c3b407f4
AD
224
225/*-------------------------------------------------------------------.
226| Put the useless productions at the end of RULES, and adjust NRULES |
227| accordingly. |
228`-------------------------------------------------------------------*/
229
a083fbbf 230static void
d2729d44 231reduce_grammar_tables (void)
572909b5 232{
c3b407f4
AD
233 /* Flag useless productions. */
234 {
235 int pn;
236 for (pn = 1; pn < nrules + 1; pn++)
237 rules[pn].useful = bitset_test (P, pn);
238 }
572909b5 239
c3b407f4
AD
240 /* Map the nonterminals to their new index: useful first, useless
241 afterwards. Kept for later report. */
242 {
243 int useful = 1;
244 int useless = nrules + 1 - nuseless_productions;
245 rule_t *rules_sorted = XMALLOC (rule_t, nrules + 1) - 1;
246 int i;
247 for (i = 1; i < nrules + 1; ++i)
248 rules_sorted[rules[i].useful ? useful++ : useless++] = rules[i];
249 free (rules + 1);
250 rules = rules_sorted;
076ab033 251
cc9305dd
AD
252 /* Renumber the rules markers in RITEMS. */
253 for (i = 1; i < nrules + 1; ++i)
254 {
255 short *rhsp = rules[i].rhs;
256 for (/* Nothing. */; *rhsp >= 0; ++rhsp)
257 /* Nothing. */;
258 *rhsp = -i;
d7e1f00c 259 rules[i].number = i;
cc9305dd 260 }
c3b407f4
AD
261 nrules -= nuseless_productions;
262 }
076ab033 263
c3b407f4
AD
264 /* Adjust NRITEMS and NITEMS. */
265 {
266 int r;
267 int length;
268 for (r = nrules + 1; r < nrules + 1 + nuseless_productions; ++r)
076ab033 269 {
c3b407f4
AD
270 length = rule_rhs_length (&rules[r]);
271 nritems -= length + 1;
076ab033 272 }
c3b407f4 273 }
00238958 274}
572909b5 275
572909b5 276
00238958
AD
277/*------------------------------.
278| Remove useless nonterminals. |
279`------------------------------*/
572909b5 280
00238958
AD
281static void
282nonterminals_reduce (void)
283{
284 int i, n;
572909b5 285
68f1e3ed
AD
286 /* Map the nonterminals to their new index: useful first, useless
287 afterwards. Kept for later report. */
572909b5 288
00238958 289 short *nontermmap = XCALLOC (short, nvars) - ntokens;
00238958
AD
290 n = ntokens;
291 for (i = ntokens; i < nsyms; i++)
ed86e78c 292 if (bitset_test (V, i))
00238958 293 nontermmap[i] = n++;
760b53a8 294 for (i = ntokens; i < nsyms; i++)
ed86e78c 295 if (!bitset_test (V, i))
760b53a8 296 nontermmap[i] = n++;
572909b5 297
00238958 298
760b53a8
AD
299 /* Shuffle elements of tables indexed by symbol number. */
300 {
db8837cb 301 symbol_t **symbols_sorted = XMALLOC (symbol_t *, nvars) - ntokens;
760b53a8 302
bba97eb2
AD
303 for (i = ntokens; i < nsyms; i++)
304 symbols[i]->number = nontermmap[i];
760b53a8 305 for (i = ntokens; i < nsyms; i++)
0e78e603 306 symbols_sorted[nontermmap[i]] = symbols[i];
760b53a8 307 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
308 symbols[i] = symbols_sorted[i];
309 free (symbols_sorted + ntokens);
760b53a8 310 }
572909b5 311
9e7f6bbd
AD
312 for (i = 0; i < nritems; ++i)
313 if (ISVAR (ritem[i]))
314 ritem[i] = nontermmap[ritem[i]];
572909b5 315
00238958 316 start_symbol = nontermmap[start_symbol];
572909b5 317
00238958
AD
318 nsyms -= nuseless_nonterminals;
319 nvars -= nuseless_nonterminals;
572909b5 320
68f1e3ed 321 free (nontermmap + ntokens);
572909b5 322}
015acc48 323
337c5bd1 324
00238958
AD
325/*------------------------------------------------------------------.
326| Output the detailed results of the reductions. For FILE.output. |
327`------------------------------------------------------------------*/
337c5bd1
AD
328
329void
330reduce_output (FILE *out)
572909b5 331{
572909b5
RS
332 if (nuseless_nonterminals > 0)
333 {
d2d1b42b
AD
334 int i;
335 fprintf (out, "%s\n\n", _("Useless nonterminals:"));
760b53a8 336 for (i = 0; i < nuseless_nonterminals; ++i)
8b3df748
AD
337 fprintf (out, " %s\n", quotearg_style (escape_quoting_style,
338 symbols[nsyms + i]->tag));
d2d1b42b 339 fputs ("\n\n", out);
572909b5 340 }
d2d1b42b
AD
341
342 {
343 bool b = FALSE;
344 int i;
345 for (i = 0; i < ntokens; i++)
ed86e78c 346 if (!bitset_test (V, i) && !bitset_test (V1, i))
572909b5
RS
347 {
348 if (!b)
d2d1b42b
AD
349 fprintf (out, "%s\n\n", _("Terminals which are not used:"));
350 b = TRUE;
8b3df748
AD
351 fprintf (out, " %s\n", quotearg_style (escape_quoting_style,
352 symbols[i]->tag));
572909b5 353 }
d2d1b42b
AD
354 if (b)
355 fputs ("\n\n", out);
356 }
572909b5
RS
357
358 if (nuseless_productions > 0)
359 {
d2d1b42b
AD
360 int i;
361 fprintf (out, "%s\n\n", _("Useless rules:"));
c3b407f4
AD
362 for (i = nrules + 1; i < nuseless_productions + nrules + 1; i++)
363 {
364 rule r;
d7e1f00c 365 fprintf (out, "#%-4d ", rules[i].user_number - 1);
8b3df748
AD
366 fprintf (out, "%s:", quotearg_style (escape_quoting_style,
367 rules[i].lhs->tag));
c3b407f4 368 for (r = rules[i].rhs; *r >= 0; r++)
8b3df748
AD
369 fprintf (out, " %s", quotearg_style (escape_quoting_style,
370 symbols[*r]->tag));
c3b407f4
AD
371 fputs (";\n", out);
372 }
d2d1b42b 373 fputs ("\n\n", out);
572909b5 374 }
572909b5
RS
375}
376\f
4a120d45 377static void
337c5bd1 378dump_grammar (FILE *out)
572909b5
RS
379{
380 int i;
381 rule r;
382
2c5f66ed 383 fprintf (out, "REDUCED GRAMMAR\n\n");
337c5bd1 384 fprintf (out,
5123689b
AD
385 "ntokens = %d, nvars = %d, nsyms = %d, nrules = %d, nritems = %d\n\n",
386 ntokens, nvars, nsyms, nrules, nritems);
6013d43f 387 fprintf (out, "Variables\n---------\n\n");
cb487d7d 388 fprintf (out, "Value Sprec Sassoc Tag\n");
572909b5 389 for (i = ntokens; i < nsyms; i++)
0e78e603
AD
390 fprintf (out, "%5d %5d %5d %s\n",
391 i,
8b3df748
AD
392 symbols[i]->prec, symbols[i]->assoc,
393 quotearg_style (escape_quoting_style, symbols[i]->tag));
337c5bd1 394 fprintf (out, "\n\n");
6013d43f 395 fprintf (out, "Rules\n-----\n\n");
3067fbef 396 fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem range) [Num]\n");
c3b407f4 397 for (i = 1; i < nrules + nuseless_productions + 1; i++)
572909b5 398 {
3067fbef
AD
399 int rhs_count = 0;
400 /* Find the last RHS index in ritems. */
99013900 401 for (r = rules[i].rhs; *r >= 0; ++r)
3067fbef
AD
402 ++rhs_count;
403 fprintf (out, "%3d (%2d, %2d, %2d, %2d-%2d) %2d ->",
3d4daee3 404 i - 1,
03b31c0c
AD
405 rules[i].prec->prec,
406 rules[i].prec->assoc,
407 rules[i].useful,
408 rules[i].rhs - ritem,
409 rules[i].rhs - ritem + rhs_count - 1,
bba97eb2 410 rules[i].lhs->number);
3067fbef 411 /* Dumped the RHS. */
99013900 412 for (r = rules[i].rhs; *r >= 0; r++)
3067fbef 413 fprintf (out, "%3d", *r);
3d4daee3 414 fprintf (out, " [%d]\n", -(*r) - 1);
572909b5 415 }
337c5bd1 416 fprintf (out, "\n\n");
6013d43f 417 fprintf (out, "Rules interpreted\n-----------------\n\n");
c3b407f4 418 for (i = 1; i < nrules + nuseless_productions + 1; i++)
572909b5 419 {
8b3df748
AD
420 fprintf (out, "%-5d %s :",
421 i, quotearg_style (escape_quoting_style, rules[i].lhs->tag));
99013900 422 for (r = rules[i].rhs; *r >= 0; r++)
8b3df748
AD
423 fprintf (out, " %s",
424 quotearg_style (escape_quoting_style, symbols[*r]->tag));
337c5bd1 425 fputc ('\n', out);
572909b5 426 }
337c5bd1 427 fprintf (out, "\n\n");
572909b5
RS
428}
429
430
337c5bd1
AD
431
432/*-------------------------------.
433| Report the results to STDERR. |
434`-------------------------------*/
435
a083fbbf 436static void
337c5bd1 437reduce_print (void)
572909b5 438{
89cab50d 439 if (yacc_flag && nuseless_productions)
cb487d7d
AD
440 fprintf (stderr, ngettext ("%d rule never reduced\n",
441 "%d rules never reduced\n",
442 nuseless_productions),
443 nuseless_productions);
572909b5 444
015acc48 445 fprintf (stderr, _("%s contains "), infile);
572909b5
RS
446
447 if (nuseless_nonterminals > 0)
cb487d7d
AD
448 fprintf (stderr, ngettext ("%d useless nonterminal",
449 "%d useless nonterminals",
450 nuseless_nonterminals),
451 nuseless_nonterminals);
452
572909b5 453 if (nuseless_nonterminals > 0 && nuseless_productions > 0)
015acc48 454 fprintf (stderr, _(" and "));
572909b5
RS
455
456 if (nuseless_productions > 0)
cb487d7d
AD
457 fprintf (stderr, ngettext ("%d useless rule",
458 "%d useless rules",
459 nuseless_productions),
460 nuseless_productions);
015acc48
AD
461 fprintf (stderr, "\n");
462 fflush (stderr);
463}
464\f
465void
466reduce_grammar (void)
467{
468 bool reduced;
469
470 /* Allocate the global sets used to compute the reduced grammar */
471
ed86e78c 472 N = bitset_create (nvars, BITSET_FIXED);
ed86e78c 473 P = bitset_create (nrules + 1, BITSET_FIXED);
ed86e78c 474 V = bitset_create (nsyms, BITSET_FIXED);
ed86e78c 475 V1 = bitset_create (nsyms, BITSET_FIXED);
015acc48
AD
476
477 useless_nonterminals ();
478 inaccessable_symbols ();
479
480 reduced = (bool) (nuseless_nonterminals + nuseless_productions > 0);
337c5bd1
AD
481 if (!reduced)
482 return;
015acc48 483
337c5bd1 484 reduce_print ();
015acc48 485
ed86e78c 486 if (!bitset_test (N, start_symbol - ntokens))
015acc48 487 fatal (_("Start symbol %s does not derive any sentence"),
8b3df748 488 quotearg_style (escape_quoting_style, symbols[start_symbol]->tag));
015acc48 489
c3b407f4
AD
490 if (nuseless_productions > 0)
491 reduce_grammar_tables ();
00238958
AD
492 if (nuseless_nonterminals > 0)
493 nonterminals_reduce ();
9bfe901c
AD
494
495 if (trace_flag)
496 {
497 dump_grammar (stderr);
498
499 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals\
500, and %d productions.\n",
501 infile, ntokens, nvars, nrules);
502 }
337c5bd1 503}
015acc48 504
015acc48 505
337c5bd1
AD
506/*-----------------------------------------------------------.
507| Free the global sets used to compute the reduced grammar. |
508`-----------------------------------------------------------*/
509
510void
511reduce_free (void)
512{
ed86e78c
AD
513 bitset_free (N);
514 bitset_free (V);
515 bitset_free (V1);
516 bitset_free (P);
572909b5 517}