From: Joel E. Denny Date: Mon, 20 Apr 2009 04:55:50 +0000 (-0400) Subject: Implement %define lr.default_rules. X-Git-Tag: v2.5_rc1~259 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/03c07b039448552b5ac6c51076b88c2f15b0307f?ds=inline Implement %define lr.default_rules. Its value describes the states that are permitted to contain default rules: "all", "consistent", or "accepting". * src/reader.c (reader): Default lr.default_rules to "all". Check for a valid lr.default_rules value. * src/lalr.c (state_lookahead_tokens_count): If lr.default_rules is "accepting", then only mark the accepting state as consistent. (initialize_LA): Tell state_lookahead_tokens_count whether lr.default_rules is "accepting". * src/tables.c (action_row): If lr.default_rules is not "all", then disable default rules in inconsistent states. * src/print.c (print_reductions): Use this opportunity to perform some assertions about whether lr.default_rules was obeyed correctly. * tests/local.at (AT_TEST_TABLES_AND_PARSE): New macro that helps with checking the parser tables for a grammar. * tests/input.at (%define lr.default_rules invalid values): New test group. * tests/reduce.at (AT_TEST_LR_DEFAULT_RULES): New macro using AT_TEST_TABLES_AND_PARSE. (`no %define lr.default_rules'): New test group generated by AT_TEST_LR_DEFAULT_RULES. (`%define lr.default_rules "all"'): Likewise. (`%define lr.default_rules "consistent"'): Likewise. (`%define lr.default_rules "accepting"'): Likewise. --- diff --git a/ChangeLog b/ChangeLog index eb240df7..fdbbba9c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +2009-04-20 Joel E. Denny + + Implement %define lr.default_rules. + Its value describes the states that are permitted to contain + default rules: "all", "consistent", or "accepting". + * src/reader.c (reader): Default lr.default_rules to "all". + Check for a valid lr.default_rules value. + * src/lalr.c (state_lookahead_tokens_count): If lr.default_rules + is "accepting", then only mark the accepting state as + consistent. + (initialize_LA): Tell state_lookahead_tokens_count whether + lr.default_rules is "accepting". + * src/tables.c (action_row): If lr.default_rules is not "all", + then disable default rules in inconsistent states. + * src/print.c (print_reductions): Use this opportunity to + perform some assertions about whether lr.default_rules was + obeyed correctly. + * tests/local.at (AT_TEST_TABLES_AND_PARSE): New macro that + helps with checking the parser tables for a grammar. + * tests/input.at (%define lr.default_rules invalid values): New + test group. + * tests/reduce.at (AT_TEST_LR_DEFAULT_RULES): New macro using + AT_TEST_TABLES_AND_PARSE. + (`no %define lr.default_rules'): New test group generated by + AT_TEST_LR_DEFAULT_RULES. + (`%define lr.default_rules "all"'): Likewise. + (`%define lr.default_rules "consistent"'): Likewise. + (`%define lr.default_rules "accepting"'): Likewise. + 2009-04-20 Akim Demaille Consistently refer to Yacc, not YACC. diff --git a/src/lalr.c b/src/lalr.c index a214dc57..9f5d43f3 100644 --- a/src/lalr.c +++ b/src/lalr.c @@ -1,7 +1,7 @@ /* Compute lookahead criteria for Bison. Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, - 2006, 2007 Free Software Foundation, Inc. + 2006, 2007, 2008, 2009 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -36,6 +36,7 @@ #include "getargs.h" #include "gram.h" #include "lalr.h" +#include "muscle_tab.h" #include "nullable.h" #include "reader.h" #include "relation.h" @@ -336,22 +337,30 @@ compute_lookahead_tokens (void) `----------------------------------------------------*/ static int -state_lookahead_tokens_count (state *s) +state_lookahead_tokens_count (state *s, bool default_rule_only_for_accept) { int n_lookahead_tokens = 0; reductions *rp = s->reductions; transitions *sp = s->transitions; - /* We need a lookahead either to distinguish different - reductions (i.e., there are two or more), or to distinguish a - reduction from a shift. Otherwise, it is straightforward, - and the state is `consistent'. There is no need to check that - transition 0 hasn't been disabled before checking if it is a - shift since transitions are only disabled during conflict - resolution, and that hasn't happened yet. */ + /* Transitions are only disabled during conflict resolution, and that + hasn't happened yet, so there should be no need to check that + transition 0 hasn't been disabled before checking if it is a shift. + However, this check was performed at one time, so we leave it as an + aver. */ aver (sp->num == 0 || !TRANSITION_IS_DISABLED (sp, 0)); + + /* We need a lookahead either to distinguish different reductions + (i.e., there are two or more), or to distinguish a reduction from a + shift. Otherwise, it is straightforward, and the state is + `consistent'. However, for states that have any rules, treat only + the accepting state as consistent (since there is never a lookahead + token that makes sense there, and so no lookahead token should be + read) if the user has otherwise disabled default rules. */ if (rp->num > 1 - || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0))) + || (rp->num == 1 && sp->num && TRANSITION_IS_SHIFT (sp, 0)) + || (rp->num == 1 && rp->rules[0]->number != 0 + && default_rule_only_for_accept)) n_lookahead_tokens += rp->num; else s->consistent = 1; @@ -369,11 +378,18 @@ initialize_LA (void) { state_number i; bitsetv pLA; + bool default_rule_only_for_accept; + { + char *default_rules = muscle_percent_define_get ("lr.default_rules"); + default_rule_only_for_accept = 0 == strcmp (default_rules, "accepting"); + free (default_rules); + } /* Compute the total number of reductions requiring a lookahead. */ nLA = 0; for (i = 0; i < nstates; i++) - nLA += state_lookahead_tokens_count (states[i]); + nLA += + state_lookahead_tokens_count (states[i], default_rule_only_for_accept); /* Avoid having to special case 0. */ if (!nLA) nLA = 1; @@ -385,7 +401,8 @@ initialize_LA (void) require lookahead tokens. */ for (i = 0; i < nstates; i++) { - int count = state_lookahead_tokens_count (states[i]); + int count = + state_lookahead_tokens_count (states[i], default_rule_only_for_accept); if (count) { states[i]->reductions->lookahead_tokens = pLA; diff --git a/src/print.c b/src/print.c index ddd76a60..413896c2 100644 --- a/src/print.c +++ b/src/print.c @@ -1,7 +1,7 @@ /* Print information on generated parser, for bison, - Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, 2007 - Free Software Foundation, Inc. + Copyright (C) 1984, 1986, 1989, 2000, 2001, 2002, 2003, 2004, 2005, + 2007, 2009 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -31,6 +31,7 @@ #include "getargs.h" #include "gram.h" #include "lalr.h" +#include "muscle_tab.h" #include "print.h" #include "reader.h" #include "reduce.h" @@ -244,6 +245,7 @@ print_reductions (FILE *out, state *s) rule *default_rule = NULL; size_t width = 0; int i, j; + bool non_default_action = false; if (reds->num == 0) return; @@ -296,6 +298,8 @@ print_reductions (FILE *out, state *s) { bool defaulted = false; bool count = bitset_test (no_reduce_set, i); + if (count) + non_default_action = true; for (j = 0; j < reds->num; ++j) if (bitset_test (reds->lookahead_tokens[j], i)) @@ -303,15 +307,19 @@ print_reductions (FILE *out, state *s) if (! count) { if (reds->rules[j] != default_rule) - print_reduction (out, width, - symbols[i]->tag, - reds->rules[j], true); + { + non_default_action = true; + print_reduction (out, width, + symbols[i]->tag, + reds->rules[j], true); + } else defaulted = true; count = true; } else { + non_default_action = true; if (defaulted) print_reduction (out, width, symbols[i]->tag, @@ -325,8 +333,15 @@ print_reductions (FILE *out, state *s) } if (default_rule) - print_reduction (out, width, - _("$default"), default_rule, true); + { + char *default_rules = muscle_percent_define_get ("lr.default_rules"); + print_reduction (out, width, _("$default"), default_rule, true); + aver (0 == strcmp (default_rules, "all") + || (0 == strcmp (default_rules, "consistent") + && !non_default_action) + || (reds->num == 1 && reds->rules[0]->number == 0)); + free (default_rules); + } } diff --git a/src/reader.c b/src/reader.c index 7758c772..68e8b44e 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1,7 +1,7 @@ /* Input parser for Bison Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003, - 2005, 2006, 2007 Free Software Foundation, Inc. + 2005, 2006, 2007, 2009 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -555,6 +555,17 @@ reader (void) gram_scanner_initialize (); gram_parse (); + muscle_percent_define_default ("lr.default_rules", "all"); + + /* Check front-end %define variable values. */ + { + static char const * const values[] = { + "lr.default_rules", "all", "consistent", "accepting", NULL, + NULL + }; + muscle_percent_define_check_values (values); + } + if (! complaint_issued) check_and_convert_grammar (); diff --git a/src/tables.c b/src/tables.c index e22b0892..958e77b1 100644 --- a/src/tables.c +++ b/src/tables.c @@ -1,7 +1,7 @@ /* Output the generated parsing program for Bison. Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002, 2003, 2004, - 2005, 2006 Free Software Foundation, Inc. + 2005, 2006, 2009 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -30,6 +30,7 @@ #include "getargs.h" #include "gram.h" #include "lalr.h" +#include "muscle_tab.h" #include "reader.h" #include "symtab.h" #include "tables.h" @@ -303,6 +304,16 @@ action_row (state *s) actrow[sym->number] = ACTION_NUMBER_MINIMUM; } + /* Turn off default rules where requested by the user. See + state_lookahead_tokens_count in lalr.c to understand when states are + labeled as consistent. */ + { + char *default_rules = muscle_percent_define_get ("lr.default_rules"); + if (0 != strcmp (default_rules, "all") && !s->consistent) + nodefault = true; + free (default_rules); + } + /* Now find the most common reduction and make it the default action for this state. */ diff --git a/tests/input.at b/tests/input.at index 4389e9d2..9edf6132 100644 --- a/tests/input.at +++ b/tests/input.at @@ -867,6 +867,24 @@ AT_BISON_CHECK([[Input.y]], [1], [], AT_CLEANUP +## ----------------------------------------- ## +## %define lr.default_rules invalid values. ## +## ----------------------------------------- ## + +AT_SETUP([[%define lr.default_rules invalid values]]) + +AT_DATA([[input.y]], +[[%define lr.default_rules "bogus" +%% +start: ; +]]) + +AT_BISON_CHECK([[input.y]], [[1]], [[]], +[[input.y:1.9-24: invalid value for %define variable `lr.default_rules': `bogus' +]]) + +AT_CLEANUP + ## ------------------------ ## ## %define enum variables. ## ## ------------------------ ## diff --git a/tests/local.at b/tests/local.at index 163de2ef..91e0f20e 100644 --- a/tests/local.at +++ b/tests/local.at @@ -1,8 +1,8 @@ # Process this -*- Autotest -*- file with autom4te. # Macros for the GNU Bison Test suite. -# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, -# Inc. +# Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software +# Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -329,6 +329,165 @@ m4_define([AT_FULL_COMPILE], m4_define([AT_PARSER_CHECK], [AT_CHECK([$5 $PREPARSER $1], [$2], [$3], [$4])]) +# AT_TEST_TABLES_AND_PARSE(TITLE, COND-VALUE, TEST-SPEC, +# DECLS, GRAMMAR, INPUT, +# BISON-STDERR, TABLES-OR-LAST-STATE, +# [OTHER-CHECKS], +# [PARSER-EXIT-VALUE], +# [PARSER-STDOUT], [PARSER-STDERR]) +# ------------------------------------------------------------- +# Using TITLE as the test group title, check the generated parser tables +# and parser for a specified grammar file under a condition labeled by +# COND-VALUE. +# +# TEST-SPEC is a comma-delimited list of attributes of this test. Each +# recognized attribute is described below where it is relevant. +# +# Insert DECLS and GRAMMAR into the declarations and grammar section of +# the grammar file. Insert basic yyerror, yylex, and main function +# definitions as well. Hardcode yylex to return the (possibly empty) +# comma-delimited series of tokens in INPUT followed by token 0. +# +# If TEST-SPEC contains the attribute no-xml, then invoke bison using +# AT_BISON_CHECK_NO_XML. Otherwise, invoke bison using AT_BISON_CHECK. +# On the bison command-line, specify `--report=all --defines'. Check +# that Bison exits with value 0, has no stdout, and has stderr +# BISON-STDERR. +# +# If TEST-SPEC contains the attribute `last-state', check that the value +# of TABLES-OR-LAST-STATE is the index of the last state generated for +# the grammar; in other words, check the number of states (minus one). +# Otherwise, check that everything in the `.output' file starting with +# the definition of state 0 is the same as the entire value of +# TABLES-OR-LAST-STATE. +# +# Expand the M4 in OTHER-CHECKS to perform additional checks of the +# `.output' file, which is named `input.output', and/or grammar file, +# which is named `input.y'. +# +# Finally, compile the generated parser and then run it using +# AT_PARSER_CHECK with PARSER-EXIT-VALUE, PARSER-STDOUT, and +# PARSER-STDERR as the 2nd-4th arguments. +# +# As a precondition, you must properly double-quote all arguments that +# are to be interpreted as strings. +# +# AT_COND_CASE (when appearing in single-quoted segments of arguments) +# invokes m4_case with its own arguments but COND-VALUE inserted as the +# first argument. This is useful, for example, when wrapping multiple +# AT_TEST_TABLES_AND_PARSE invocations, each representing a different +# condition, in another macro. +# +# For example: +# +# # AT_TEST_SYNTAX_ERROR(DESCRIPTION, DECLS, GRAMMAR, INPUT, LAST-STATE, +# # PARSER-EXIT-VALUE, PARSER-STDOUT, PARSER-STDERR) +# # --------------------------------------------------------------------- +# m4_define([AT_TEST_SYNTAX_ERROR], +# [ +# AT_TEST_TABLES_AND_PARSE([$1[ with %error-verbose]], [[verbose]], +# [[last-state]], +# [[%error-verbose ]$2], [$3], [$4], +# [[]], [$5], [], [$6], [$7], [$8]) +# AT_TEST_TABLES_AND_PARSE([$1[ with no %error-verbose]], [[no verbose]], +# [[last-state]], +# [$2], [$3], [$4], +# [[]], [$5], [], [$6], [$7], [$8]) +# ]) +# +# AT_TEST_SYNTAX_ERROR([[Single Char Grammar]], +# [[%token 'b']], [[start: 'a' ;]], [['a', 'b']], +# [[3]], +# [[1]], [[]], +# [AT_COND_CASE([[no verbose]], +# [[syntax error +# ]], +# [[syntax error, unexpected 'b', expecting $end +# ]])]) +m4_define([AT_TEST_TABLES_AND_PARSE], +[_AT_TEST_TABLES_AND_PARSE($[1], $[@], $@)]) + +m4_define([_AT_TEST_TABLES_AND_PARSE], +[m4_pushdef([AT_COND_CASE], [m4_case([$4], $][@)]) + +AT_SETUP([$3]) + +AT_DATA_GRAMMAR([[input.y]], +[[%code { + #include + static void yyerror (char const *msg); + static int yylex (void); +} + +]$6[ + +%% + +]$7[ + +%% + +static void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +static int +yylex (void) +{ + static int const input[] = { + ]m4_if([$8], [], [], [$8], [[]], [], [$8[, ]])[0 + }; + static int const *inputp = input; + return *inputp++; +} + +int +main (void) +{ + return yyparse (); +} +]]) + +# AT_CHECK invokes AS_ESCAPE before expanding macros, so it corrupts some +# special characters in the macros. To avoid this, expand now and pass it +# the result with proper string quotation. Assume args 9 thru 14 expand to +# properly quoted strings. + +# Pass plenty of options, to exercise plenty of code, even if we +# don't actually check the output. But SEGV is watching us, and +# so might do dmalloc. +m4_if(m4_index(m4_quote($5), [no-xml]), -1, + [AT_BISON_CHECK], + [AT_BISON_CHECK_NO_XML])([[--report=all --defines -o input.c input.y]], + [0], [], m4_dquote($9)) + +# Sigh. Some M4's can't reference arg 10 directly. +m4_pushdef([arg10], m4_car(m4_shiftn(9, $@))) +m4_if(m4_index(m4_quote($5), [last-state]), -1, + [AT_CHECK([[sed -n '/^state 0$/,$p' input.output]], [[0]], + m4_dquote(arg10))], + [AT_CHECK([[sed -n 's/^state //p' input.output | tail -1]], [[0]], + m4_dquote(arg10)[[ +]])]) +m4_popdef([arg10]) + +m4_if($#, 10, [], m4_car(m4_shiftn(10, $@))) + +AT_COMPILE([[input]]) + +m4_pushdef([AT_EXPAND_ARGS], [$][*]) +m4_pushdef([AT_DQUOTE_EACH], [[[$1]]m4_if($][#, 1, [], [, AT_DQUOTE_EACH(m4_shift($2))])]) + +AT_PARSER_CHECK([[./input]]m4_if($#, 10, [], $#, 11, [], [, AT_DQUOTE_EACH(AT_EXPAND_ARGS(m4_shiftn(11, $@)))])) + +m4_popdef([AT_DQUOTE_EACH]) +m4_popdef([AT_EXPAND_ARGS]) + +AT_CLEANUP + +m4_popdef([AT_COND_CASE])]) diff --git a/tests/reduce.at b/tests/reduce.at index 5fe650a8..058d7c0d 100644 --- a/tests/reduce.at +++ b/tests/reduce.at @@ -1,5 +1,6 @@ # Exercising Bison Grammar Reduction. -*- Autotest -*- -# Copyright (C) 2001, 2002, 2007, 2008 Free Software Foundation, Inc. +# Copyright (C) 2001, 2002, 2007, 2008, 2009 Free Software Foundation, +# Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -355,3 +356,147 @@ input.y:3.1-3: fatal error: start symbol exp does not derive any sentence ]]) AT_CLEANUP + + + +## -------------------------- ## +## %define lr.default_rules. ## +## -------------------------- ## + +# AT_TEST_LR_DEFAULT_RULES(GRAMMAR, INPUT, TABLES) +# ------------------------------------------------ +m4_define([AT_TEST_LR_DEFAULT_RULES], +[ +AT_TEST_TABLES_AND_PARSE([[no %define lr.default_rules]], + [[all]], [[]], + [[]], + [$1], [$2], [[]], [$3]) +AT_TEST_TABLES_AND_PARSE([[%define lr.default_rules "all"]], + [[all]], [[]], + [[%define lr.default_rules "all"]], + [$1], [$2], [[]], [$3]) +AT_TEST_TABLES_AND_PARSE([[%define lr.default_rules "consistent"]], + [[consistent]], [[]], + [[%define lr.default_rules "consistent"]], + [$1], [$2], [[]], [$3]) +AT_TEST_TABLES_AND_PARSE([[%define lr.default_rules "accepting"]], + [[accepting]], [[]], + [[%define lr.default_rules "accepting"]], + [$1], [$2], [[]], [$3]) +]) + +AT_TEST_LR_DEFAULT_RULES([[ +/* The start state is consistent and has a shift on 'a' and no reductions. + After pushing the b below, enter an inconsistent state that has a shift and + one reduction with one lookahead. */ +start: + a b + | a b 'a' + | a c 'b' + ; + +/* After shifting this 'a', enter a consistent state that has no shift and 1 + reduction with multiple lookaheads. */ +a: 'a' ; + +/* After the previous reduction, enter an inconsistent state that has no shift + and multiple reductions. The first reduction has more lookaheads than the + second, so the first should always be preferred as the default rule if + enabled. The second reduction has one lookahead. */ +b: ; +c: ; +]], +dnl Visit each state mentioned above. +[['a', 'a']], +[[state 0 + + 0 $accept: . start $end + 1 start: . a b + 2 | . a b 'a' + 3 | . a c 'b' + 4 a: . 'a' + + 'a' shift, and go to state 1 + + start go to state 2 + a go to state 3 + + +state 1 + + 4 a: 'a' .]AT_COND_CASE([[accepting]], [[ [$end, 'a', 'b'] + + $end reduce using rule 4 (a) + 'a' reduce using rule 4 (a) + 'b' reduce using rule 4 (a)]], [[ + + $default reduce using rule 4 (a)]])[ + + +state 2 + + 0 $accept: start . $end + + $end shift, and go to state 4 + + +state 3 + + 1 start: a . b + 2 | a . b 'a' + 3 | a . c 'b' + 5 b: . [$end, 'a'] + 6 c: . ['b']]AT_COND_CASE([[all]], [[ + + 'b' reduce using rule 6 (c) + $default reduce using rule 5 (b)]], [[ + + $end reduce using rule 5 (b) + 'a' reduce using rule 5 (b) + 'b' reduce using rule 6 (c)]])[ + + b go to state 5 + c go to state 6 + + +state 4 + + 0 $accept: start $end . + + $default accept + + +state 5 + + 1 start: a b . [$end] + 2 | a b . 'a' + + 'a' shift, and go to state 7 + + ]AT_COND_CASE([[all]], [[$default]], [[$end]])[ reduce using rule 1 (start) + + +state 6 + + 3 start: a c . 'b' + + 'b' shift, and go to state 8 + + +state 7 + + 2 start: a b 'a' .]AT_COND_CASE([[accepting]], [[ [$end] + + $end reduce using rule 2 (start)]], [[ + + $default reduce using rule 2 (start)]])[ + + +state 8 + + 3 start: a c 'b' .]AT_COND_CASE([[accepting]], [[ [$end] + + $end reduce using rule 3 (start)]], [[ + + $default reduce using rule 3 (start)]])[ +]])