From ed8e1f68e9743c391f4aba148968ea1e5e960973 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Thu, 13 Dec 2001 11:02:21 +0000 Subject: [PATCH] The computation of nullable is broken: it doesn't handle empty RHS's properly. * tests/torture.at (GNU AWK Grammar): New. * tests/sets.at (Nullable): New. * src/nullable.c (set_nullable): Instead of blindly looping over `ritems', loop over the rules, and then over their rhs's. Work around Autotest bugs. * src/warshall.c (bitmatrix_print): Don't use `+--+' as table frame, because Autotest understand lines starting with a `+' as traces from the shell. Then, they are not processed properly. Admittedly an Autotest bug, but we don't have time to wait for Autotest to catch up. * tests/regression.at (Broken Closure): Adjust to the new table frames. Move to... * tests/sets.at: here. --- ChangeLog | 22 +++ src/nullable.c | 85 ++++++----- src/warshall.c | 8 +- tests/Makefile.am | 2 +- tests/regression.at | 75 ---------- tests/sets.at | 193 +++++++++++++++++++++++++ tests/testsuite.at | 1 + tests/torture.at | 340 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 601 insertions(+), 125 deletions(-) create mode 100644 tests/sets.at diff --git a/ChangeLog b/ChangeLog index b411c7e0..b0589d35 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2001-12-13 Akim Demaille + + The computation of nullable is broken: it doesn't handle empty + RHS's properly. + + * tests/torture.at (GNU AWK Grammar): New. + * tests/sets.at (Nullable): New. + * src/nullable.c (set_nullable): Instead of blindly looping over + `ritems', loop over the rules, and then over their rhs's. + + Work around Autotest bugs. + + * src/warshall.c (bitmatrix_print): Don't use `+--+' as table + frame, because Autotest understand lines starting with a `+' as + traces from the shell. Then, they are not processed properly. + Admittedly an Autotest bug, but we don't have time to wait for + Autotest to catch up. + * tests/regression.at (Broken Closure): Adjust to the new table + frames. + Move to... + * tests/sets.at: here. + 2001-12-13 Akim Demaille * src/closure.c (closure): Use nrules instead of playing tricks diff --git a/src/nullable.c b/src/nullable.c index 8e8809e2..bbd8e319 100644 --- a/src/nullable.c +++ b/src/nullable.c @@ -46,70 +46,65 @@ nullable_print (FILE *out) void set_nullable (void) { - short *r; + int ruleno; short *s1; short *s2; shorts *p; - short *squeue; - short *rcount; - shorts **rsets; - shorts *relts; + short *squeue = XCALLOC (short, nvars); + short *rcount = XCALLOC (short, nrules + 1); + /* RITEM contains all the rules, including useless productions. + Hence we must allocate room for useless nonterminals too. */ + shorts **rsets = XCALLOC (shorts *, nvars) - ntokens; + /* This is said to be more elements than we actually use. + Supposedly nitems - nrules is enough. But why take the risk? */ + shorts *relts = XCALLOC (shorts, nitems + nvars + 1); if (trace_flag) fprintf (stderr, "Entering set_nullable\n"); nullable = XCALLOC (char, nvars) - ntokens; - squeue = XCALLOC (short, nvars); s1 = s2 = squeue; - - rcount = XCALLOC (short, nrules + 1); - - /* RITEM contains all the rules, including useless productions. - Hence we must allocate room for useless nonterminals too. */ - rsets = XCALLOC (shorts *, nvars + nuseless_nonterminals) - ntokens; - /* This is said to be more elements than we actually use. - Supposedly nitems - nrules is enough. - But why take the risk? */ - relts = XCALLOC (shorts, nitems + nvars + nuseless_nonterminals + 1); p = relts; - for (r = ritem; *r; ++r) - { - /* Walk RITEM to find (i), if there are any tokens in the - RHS, and (ii), to find RULENO. */ - int ruleno; - int any_tokens = 0; - short *r1; - for (r1 = r; *r1 > 0; ++r1) - if (ISTOKEN (*r1)) - any_tokens = 1; - ruleno = -*r1; - - /* Examine the RHS of the rule. */ - if (!any_tokens) - for (/* Nothing. */; *r > 0; ++r) + for (ruleno = 1; ruleno < nrules + 1; ++ruleno) + if (ritem[rule_table[ruleno].rhs] > 0) + { + /* This rule has a non empty RHS. */ + short *r; + int any_tokens = 0; + for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r) + if (ISTOKEN (*r)) + any_tokens = 1; + + /* This rule has only nonterminals: schedule it for the second + pass. */ + if (!any_tokens) + for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r) + { + rcount[ruleno]++; + p->next = rsets[*r]; + p->value = ruleno; + rsets[*r] = p; + p++; + } + } + else + { + /* This rule has an empty RHS. */ + assert (ritem[rule_table[ruleno].rhs] == -ruleno); + if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs]) { - rcount[ruleno]++; - p->next = rsets[*r]; - p->value = ruleno; - rsets[*r] = p; - p++; + nullable[rule_table[ruleno].lhs] = 1; + *s2++ = rule_table[ruleno].lhs; } - - /* Examine its LHS. */ - if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs]) - { - nullable[rule_table[ruleno].lhs] = 1; - *s2++ = rule_table[ruleno].lhs; - } - } + } while (s1 < s2) for (p = rsets[*s1++]; p; p = p->next) { - int ruleno = p->value; + ruleno = p->value; if (--rcount[ruleno] == 0) if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs]) { diff --git a/src/warshall.c b/src/warshall.c index 5963f472..523d42ae 100644 --- a/src/warshall.c +++ b/src/warshall.c @@ -49,10 +49,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size) putc ('\n', stderr); /* Bar. */ - fputs (" +", stderr); + fputs (" .", stderr); for (i = 0; i < size; ++i) putc ('-', stderr); - fputs ("+\n", stderr); + fputs (".\n", stderr); /* Contents. */ for (i = 0; i < size; ++i) @@ -64,10 +64,10 @@ bitmatrix_print (const char *title, unsigned *matrix, size_t size) } /* Bar. */ - fputs (" +", stderr); + fputs (" `", stderr); for (i = 0; i < size; ++i) putc ('-', stderr); - fputs ("+\n", stderr); + fputs ("'\n", stderr); /* End title. */ fprintf (stderr, "%s END\n\n", title); diff --git a/tests/Makefile.am b/tests/Makefile.am index 0a4c0bc6..ac0c8bfb 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -25,7 +25,7 @@ MAINTAINERCLEANFILES = Makefile.in $(TESTSUITE) TESTSUITE_AT = \ testsuite.at \ - output.at reduce.at calc.at torture.at regression.at + output.at sets.at reduce.at calc.at torture.at regression.at TESTSUITE = $(srcdir)/testsuite AUTOTEST = $(AUTOM4TE) --language=autotest diff --git a/tests/regression.at b/tests/regression.at index f6d53250..e7a1dbfe 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -605,78 +605,3 @@ AT_CLEANUP AT_TEST_CPP_GUARD_H([input/input]) AT_TEST_CPP_GUARD_H([9foo]) - - -## ---------------- ## -## Broken Closure. ## -## ---------------- ## - -# TC was once broken during a massive `simplification' of the code. -# It resulted in bison dumping core on the following grammar (the -# computation of FIRSTS uses TC). It managed to produce a pretty -# exotic closure: -# -# TC: Input -# -# 01234567 -# +--------+ -# 0| 1 | -# 1| 1 | -# 2| 1 | -# 3| 1 | -# 4| 1 | -# 5| 1 | -# 6| 1| -# 7| | -# +--------+ -# -# TC: Output -# -# 01234567 -# +--------+ -# 0| 1 | -# 1| 111 | -# 2| 111 | -# 3| 1111 | -# 4| 111 1 | -# 5| 111 1 | -# 6| 111 1| -# 7| 111 | -# +--------+ -# -# instead of that below. - -AT_SETUP([Broken Closure]) - -AT_DATA([input.y], -[[%% -a: b -b: c -c: d -d: e -e: f -f: g -g: h -h: 'h' -]]) - -AT_CHECK([bison --trace input.y 2>&1 | - sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'], - [0], -[[TC: Output BEGIN - @&t@ - 01234567 - +--------+ - 0| 1111111| - 1| 111111| - 2| 11111| - 3| 1111| - 4| 111| - 5| 11| - 6| 1| - 7| | - +--------+ -TC: Output END -]]) - -AT_CLEANUP diff --git a/tests/sets.at b/tests/sets.at new file mode 100644 index 00000000..7fa8d1f7 --- /dev/null +++ b/tests/sets.at @@ -0,0 +1,193 @@ +# Exercising Bison Grammar Sets. -*- Autotest -*- +# Copyright 2001 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +# 02111-1307, USA. + +AT_BANNER([[Grammar Sets (Firsts etc.).]]) + + +## ---------- ## +## Nullable. ## +## ---------- ## + +AT_SETUP([Nullable]) + +# At some point, nullable had been smoking grass, and managed to say: +# +# Entering set_nullable +# NULLABLE +# 'e': yes +# (null): no +# ... + +AT_DATA([[input.y]], +[[%% +e: 'e' | /* Nothing */; +]]) + +AT_CHECK([[bison --trace input.y]], [], [], +[[RITEM + 'e' (rule 1) + (rule 2) + + +DERIVES + e derives + 1: 'e' (rule 1) + 2: (rule 2) + + +Entering set_nullable +NULLABLE + e: yes + + +TC: Input BEGIN + @&t@ + 0 + .-. + 0| | + `-' +TC: Input END + +TC: Output BEGIN + @&t@ + 0 + .-. + 0| | + `-' +TC: Output END + +FIRSTS + e firsts + 4 (e) + + +FDERIVES + e derives + 1: 'e' + 2: + + +Processing state 0 (reached by $) +Closure: input + + +Closure: output + 0: . 'e' (rule 1) + 2: . (rule 2) + + +Entering new_itemsets, state = 0 +Entering append_states, state = 0 +Entering get_state, state = 0, symbol = 3 ('e') +Entering new_state, state = 0, symbol = 3 ('e') +Exiting get_state => 1 +Processing state 1 (reached by 'e') +Closure: input + 1: . (rule 1) + + +Closure: output + 1: . (rule 1) + + +Entering new_itemsets, state = 1 +Entering append_states, state = 1 +transpose: input + 0: @&t@ + +transpose: output + 0: @&t@ + +]]) + +AT_CLEANUP + + +## ---------------- ## +## Broken Closure. ## +## ---------------- ## + +# TC was once broken during a massive `simplification' of the code. +# It resulted in bison dumping core on the following grammar (the +# computation of FIRSTS uses TC). It managed to produce a pretty +# exotic closure: +# +# TC: Input +# +# 01234567 +# +--------+ +# 0| 1 | +# 1| 1 | +# 2| 1 | +# 3| 1 | +# 4| 1 | +# 5| 1 | +# 6| 1| +# 7| | +# +--------+ +# +# TC: Output +# +# 01234567 +# +--------+ +# 0| 1 | +# 1| 111 | +# 2| 111 | +# 3| 1111 | +# 4| 111 1 | +# 5| 111 1 | +# 6| 111 1| +# 7| 111 | +# +--------+ +# +# instead of that below. + +AT_SETUP([Broken Closure]) + +AT_DATA([input.y], +[[%% +a: b +b: c +c: d +d: e +e: f +f: g +g: h +h: 'h' +]]) + +AT_CHECK([bison --trace input.y 2>&1 | + sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'], + [0], +[[TC: Output BEGIN + @&t@ + 01234567 + .--------. + 0| 1111111| + 1| 111111| + 2| 11111| + 3| 1111| + 4| 111| + 5| 11| + 6| 1| + 7| | + `--------' +TC: Output END +]]) + +AT_CLEANUP diff --git a/tests/testsuite.at b/tests/testsuite.at index b59f5802..be0d08d4 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -26,6 +26,7 @@ AT_INIT AT_TESTED([bison]) m4_include([output.at]) +m4_include([sets.at]) m4_include([reduce.at]) m4_include([calc.at]) m4_include([torture.at]) diff --git a/tests/torture.at b/tests/torture.at index e0205372..8fe00f7c 100644 --- a/tests/torture.at +++ b/tests/torture.at @@ -114,3 +114,343 @@ AT_CHECK([input 900], 0, [], [ignore]) AT_CHECK([input 10000], 1, [], [ignore]) AT_CLEANUP + + +## ----------------- ## +## GNU AWK Grammar. ## +## ----------------- ## + +AT_SETUP([GNU AWK Grammar]) + +# We have been careful to strip all the actions excepts the +# mid-rule actions. We rely on %expect to check that there are +# indeed 65 SR conflicts. +# +# Bison was once wrong, due to an incorrect computation of nullable. +# It reported 485 SR conflicts! + +AT_DATA([[input.y]], +[[%expect 65 + +%token FUNC_CALL NAME REGEXP +%token ERROR +%token YNUMBER YSTRING +%token RELOP APPEND_OP +%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP +%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE +%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE +%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION +%token LEX_GETLINE LEX_NEXTFILE +%token LEX_IN +%token LEX_AND LEX_OR INCREMENT DECREMENT +%token LEX_BUILTIN LEX_LENGTH + +/* Lowest to highest */ +%right ASSIGNOP +%right '?' ':' +%left LEX_OR +%left LEX_AND +%left LEX_GETLINE +%nonassoc LEX_IN +%left FUNC_CALL LEX_BUILTIN LEX_LENGTH +%nonassoc ',' +%nonassoc MATCHOP +%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO +%left CONCAT_OP +%left YSTRING YNUMBER +%left '+' '-' +%left '*' '/' '%' +%right '!' UNARY +%right '^' +%left INCREMENT DECREMENT +%left '$' +%left '(' ')' +%% + +start + : opt_nls program opt_nls + ; + +program + : rule + | program rule + | error + | program error + | /* empty */ + ; + +rule + : LEX_BEGIN {} action + | LEX_END {} action + | LEX_BEGIN statement_term + | LEX_END statement_term + | pattern action + | action + | pattern statement_term + | function_prologue function_body + ; + +func_name + : NAME + | FUNC_CALL + | lex_builtin + ; + +lex_builtin + : LEX_BUILTIN + | LEX_LENGTH + ; + +function_prologue + : LEX_FUNCTION {} func_name '(' opt_param_list r_paren opt_nls + ; + +function_body + : l_brace statements r_brace opt_semi opt_nls + | l_brace r_brace opt_semi opt_nls + ; + + +pattern + : exp + | exp ',' exp + ; + +regexp + /* + * In this rule, want_regexp tells yylex that the next thing + * is a regexp so it should read up to the closing slash. + */ + : '/' {} REGEXP '/' + ; + +action + : l_brace statements r_brace opt_semi opt_nls + | l_brace r_brace opt_semi opt_nls + ; + +statements + : statement + | statements statement + | error + | statements error + ; + +statement_term + : nls + | semi opt_nls + ; + +statement + : semi opt_nls + | l_brace r_brace + | l_brace statements r_brace + | if_statement + | LEX_WHILE '(' exp r_paren opt_nls statement + | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls + | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement + | LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement + | LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement + | LEX_BREAK statement_term + | LEX_CONTINUE statement_term + | print '(' expression_list r_paren output_redir statement_term + | print opt_rexpression_list output_redir statement_term + | LEX_NEXT statement_term + | LEX_NEXTFILE statement_term + | LEX_EXIT opt_exp statement_term + | LEX_RETURN {} opt_exp statement_term + | LEX_DELETE NAME '[' expression_list ']' statement_term + | LEX_DELETE NAME statement_term + | exp statement_term + ; + +print + : LEX_PRINT + | LEX_PRINTF + ; + +if_statement + : LEX_IF '(' exp r_paren opt_nls statement + | LEX_IF '(' exp r_paren opt_nls statement + LEX_ELSE opt_nls statement + ; + +nls + : NEWLINE + | nls NEWLINE + ; + +opt_nls + : /* empty */ + | nls + ; + +input_redir + : /* empty */ + | '<' simp_exp + ; + +output_redir + : /* empty */ + | '>' exp + | APPEND_OP exp + | '|' exp + | TWOWAYIO exp + ; + +opt_param_list + : /* empty */ + | param_list + ; + +param_list + : NAME + | param_list comma NAME + | error + | param_list error + | param_list comma error + ; + +/* optional expression, as in for loop */ +opt_exp + : /* empty */ + | exp + ; + +opt_rexpression_list + : /* empty */ + | rexpression_list + ; + +rexpression_list + : rexp + | rexpression_list comma rexp + | error + | rexpression_list error + | rexpression_list error rexp + | rexpression_list comma error + ; + +opt_expression_list + : /* empty */ + | expression_list + ; + +expression_list + : exp + | expression_list comma exp + | error + | expression_list error + | expression_list error exp + | expression_list comma error + ; + +/* Expressions, not including the comma operator. */ +exp : variable ASSIGNOP {} exp + | '(' expression_list r_paren LEX_IN NAME + | exp '|' LEX_GETLINE opt_variable + | exp TWOWAYIO LEX_GETLINE opt_variable + | LEX_GETLINE opt_variable input_redir + | exp LEX_AND exp + | exp LEX_OR exp + | exp MATCHOP exp + | regexp + | '!' regexp %prec UNARY + | exp LEX_IN NAME + | exp RELOP exp + | exp '<' exp + | exp '>' exp + | exp '?' exp ':' exp + | simp_exp + | exp simp_exp %prec CONCAT_OP + ; + +rexp + : variable ASSIGNOP {} rexp + | rexp LEX_AND rexp + | rexp LEX_OR rexp + | LEX_GETLINE opt_variable input_redir + | regexp + | '!' regexp %prec UNARY + | rexp MATCHOP rexp + | rexp LEX_IN NAME + | rexp RELOP rexp + | rexp '?' rexp ':' rexp + | simp_exp + | rexp simp_exp %prec CONCAT_OP + ; + +simp_exp + : non_post_simp_exp + /* Binary operators in order of decreasing precedence. */ + | simp_exp '^' simp_exp + | simp_exp '*' simp_exp + | simp_exp '/' simp_exp + | simp_exp '%' simp_exp + | simp_exp '+' simp_exp + | simp_exp '-' simp_exp + | variable INCREMENT + | variable DECREMENT + ; + +non_post_simp_exp + : '!' simp_exp %prec UNARY + | '(' exp r_paren + | LEX_BUILTIN + '(' opt_expression_list r_paren + | LEX_LENGTH '(' opt_expression_list r_paren + | LEX_LENGTH + | FUNC_CALL '(' opt_expression_list r_paren + | variable + | INCREMENT variable + | DECREMENT variable + | YNUMBER + | YSTRING + | '-' simp_exp %prec UNARY + | '+' simp_exp %prec UNARY + ; + +opt_variable + : /* empty */ + | variable + ; + +variable + : NAME + | NAME '[' expression_list ']' + | '$' non_post_simp_exp + ; + +l_brace + : '{' opt_nls + ; + +r_brace + : '}' opt_nls + ; + +r_paren + : ')' + ; + +opt_semi + : /* empty */ + | semi + ; + +semi + : ';' + ; + +comma : ',' opt_nls + ; + +%% +]]) + +# Pass plenty of options, to exercise plenty of code, even if we +# don't actually check the output. But SEGV is watching us, and +# so might do dmalloc. +AT_CHECK([[bison --verbose --defines input.y]]) + +AT_CLEANUP -- 2.45.2