RHS's properly.
* tests/torture.at (GNU AWK Grammar): New.
* tests/sets.at (Nullable): New.
* src/nullable.c (set_nullable): Instead of blindly looping over
`ritems', loop over the rules, and then over their rhs's.
Work around Autotest bugs.
* src/warshall.c (bitmatrix_print): Don't use `+--+' as table
frame, because Autotest understand lines starting with a `+' as
traces from the shell. Then, they are not processed properly.
Admittedly an Autotest bug, but we don't have time to wait for
Autotest to catch up.
* tests/regression.at (Broken Closure): Adjust to the new table
frames.
Move to...
* tests/sets.at: here.
+2001-12-13 Akim Demaille <akim@epita.fr>
+
+ The computation of nullable is broken: it doesn't handle empty
+ RHS's properly.
+
+ * tests/torture.at (GNU AWK Grammar): New.
+ * tests/sets.at (Nullable): New.
+ * src/nullable.c (set_nullable): Instead of blindly looping over
+ `ritems', loop over the rules, and then over their rhs's.
+
+ Work around Autotest bugs.
+
+ * src/warshall.c (bitmatrix_print): Don't use `+--+' as table
+ frame, because Autotest understand lines starting with a `+' as
+ traces from the shell. Then, they are not processed properly.
+ Admittedly an Autotest bug, but we don't have time to wait for
+ Autotest to catch up.
+ * tests/regression.at (Broken Closure): Adjust to the new table
+ frames.
+ Move to...
+ * tests/sets.at: here.
+
2001-12-13 Akim Demaille <akim@epita.fr>
* src/closure.c (closure): Use nrules instead of playing tricks
void
set_nullable (void)
{
- short *r;
+ int ruleno;
short *s1;
short *s2;
shorts *p;
- short *squeue;
- short *rcount;
- shorts **rsets;
- shorts *relts;
+ short *squeue = XCALLOC (short, nvars);
+ short *rcount = XCALLOC (short, nrules + 1);
+ /* RITEM contains all the rules, including useless productions.
+ Hence we must allocate room for useless nonterminals too. */
+ shorts **rsets = XCALLOC (shorts *, nvars) - ntokens;
+ /* This is said to be more elements than we actually use.
+ Supposedly nitems - nrules is enough. But why take the risk? */
+ shorts *relts = XCALLOC (shorts, nitems + nvars + 1);
if (trace_flag)
fprintf (stderr, "Entering set_nullable\n");
nullable = XCALLOC (char, nvars) - ntokens;
- squeue = XCALLOC (short, nvars);
s1 = s2 = squeue;
-
- rcount = XCALLOC (short, nrules + 1);
-
- /* RITEM contains all the rules, including useless productions.
- Hence we must allocate room for useless nonterminals too. */
- rsets = XCALLOC (shorts *, nvars + nuseless_nonterminals) - ntokens;
- /* This is said to be more elements than we actually use.
- Supposedly nitems - nrules is enough.
- But why take the risk? */
- relts = XCALLOC (shorts, nitems + nvars + nuseless_nonterminals + 1);
p = relts;
- for (r = ritem; *r; ++r)
- {
- /* Walk RITEM to find (i), if there are any tokens in the
- RHS, and (ii), to find RULENO. */
- int ruleno;
- int any_tokens = 0;
- short *r1;
- for (r1 = r; *r1 > 0; ++r1)
- if (ISTOKEN (*r1))
- any_tokens = 1;
- ruleno = -*r1;
-
- /* Examine the RHS of the rule. */
- if (!any_tokens)
- for (/* Nothing. */; *r > 0; ++r)
+ for (ruleno = 1; ruleno < nrules + 1; ++ruleno)
+ if (ritem[rule_table[ruleno].rhs] > 0)
+ {
+ /* This rule has a non empty RHS. */
+ short *r;
+ int any_tokens = 0;
+ for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
+ if (ISTOKEN (*r))
+ any_tokens = 1;
+
+ /* This rule has only nonterminals: schedule it for the second
+ pass. */
+ if (!any_tokens)
+ for (r = ritem + rule_table[ruleno].rhs; *r > 0; ++r)
+ {
+ rcount[ruleno]++;
+ p->next = rsets[*r];
+ p->value = ruleno;
+ rsets[*r] = p;
+ p++;
+ }
+ }
+ else
+ {
+ /* This rule has an empty RHS. */
+ assert (ritem[rule_table[ruleno].rhs] == -ruleno);
+ if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
{
- rcount[ruleno]++;
- p->next = rsets[*r];
- p->value = ruleno;
- rsets[*r] = p;
- p++;
+ nullable[rule_table[ruleno].lhs] = 1;
+ *s2++ = rule_table[ruleno].lhs;
}
-
- /* Examine its LHS. */
- if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
- {
- nullable[rule_table[ruleno].lhs] = 1;
- *s2++ = rule_table[ruleno].lhs;
- }
- }
+ }
while (s1 < s2)
for (p = rsets[*s1++]; p; p = p->next)
{
- int ruleno = p->value;
+ ruleno = p->value;
if (--rcount[ruleno] == 0)
if (rule_table[ruleno].useful && !nullable[rule_table[ruleno].lhs])
{
putc ('\n', stderr);
/* Bar. */
- fputs (" +", stderr);
+ fputs (" .", stderr);
for (i = 0; i < size; ++i)
putc ('-', stderr);
- fputs ("+\n", stderr);
+ fputs (".\n", stderr);
/* Contents. */
for (i = 0; i < size; ++i)
}
/* Bar. */
- fputs (" +", stderr);
+ fputs (" `", stderr);
for (i = 0; i < size; ++i)
putc ('-', stderr);
- fputs ("+\n", stderr);
+ fputs ("'\n", stderr);
/* End title. */
fprintf (stderr, "%s END\n\n", title);
TESTSUITE_AT = \
testsuite.at \
- output.at reduce.at calc.at torture.at regression.at
+ output.at sets.at reduce.at calc.at torture.at regression.at
TESTSUITE = $(srcdir)/testsuite
AUTOTEST = $(AUTOM4TE) --language=autotest
AT_TEST_CPP_GUARD_H([input/input])
AT_TEST_CPP_GUARD_H([9foo])
-
-
-## ---------------- ##
-## Broken Closure. ##
-## ---------------- ##
-
-# TC was once broken during a massive `simplification' of the code.
-# It resulted in bison dumping core on the following grammar (the
-# computation of FIRSTS uses TC). It managed to produce a pretty
-# exotic closure:
-#
-# TC: Input
-#
-# 01234567
-# +--------+
-# 0| 1 |
-# 1| 1 |
-# 2| 1 |
-# 3| 1 |
-# 4| 1 |
-# 5| 1 |
-# 6| 1|
-# 7| |
-# +--------+
-#
-# TC: Output
-#
-# 01234567
-# +--------+
-# 0| 1 |
-# 1| 111 |
-# 2| 111 |
-# 3| 1111 |
-# 4| 111 1 |
-# 5| 111 1 |
-# 6| 111 1|
-# 7| 111 |
-# +--------+
-#
-# instead of that below.
-
-AT_SETUP([Broken Closure])
-
-AT_DATA([input.y],
-[[%%
-a: b
-b: c
-c: d
-d: e
-e: f
-f: g
-g: h
-h: 'h'
-]])
-
-AT_CHECK([bison --trace input.y 2>&1 |
- sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
- [0],
-[[TC: Output BEGIN
- @&t@
- 01234567
- +--------+
- 0| 1111111|
- 1| 111111|
- 2| 11111|
- 3| 1111|
- 4| 111|
- 5| 11|
- 6| 1|
- 7| |
- +--------+
-TC: Output END
-]])
-
-AT_CLEANUP
--- /dev/null
+# Exercising Bison Grammar Sets. -*- Autotest -*-
+# Copyright 2001 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+AT_BANNER([[Grammar Sets (Firsts etc.).]])
+
+
+## ---------- ##
+## Nullable. ##
+## ---------- ##
+
+AT_SETUP([Nullable])
+
+# At some point, nullable had been smoking grass, and managed to say:
+#
+# Entering set_nullable
+# NULLABLE
+# 'e': yes
+# (null): no
+# ...
+
+AT_DATA([[input.y]],
+[[%%
+e: 'e' | /* Nothing */;
+]])
+
+AT_CHECK([[bison --trace input.y]], [], [],
+[[RITEM
+ 'e' (rule 1)
+ (rule 2)
+
+
+DERIVES
+ e derives
+ 1: 'e' (rule 1)
+ 2: (rule 2)
+
+
+Entering set_nullable
+NULLABLE
+ e: yes
+
+
+TC: Input BEGIN
+ @&t@
+ 0
+ .-.
+ 0| |
+ `-'
+TC: Input END
+
+TC: Output BEGIN
+ @&t@
+ 0
+ .-.
+ 0| |
+ `-'
+TC: Output END
+
+FIRSTS
+ e firsts
+ 4 (e)
+
+
+FDERIVES
+ e derives
+ 1: 'e'
+ 2:
+
+
+Processing state 0 (reached by $)
+Closure: input
+
+
+Closure: output
+ 0: . 'e' (rule 1)
+ 2: . (rule 2)
+
+
+Entering new_itemsets, state = 0
+Entering append_states, state = 0
+Entering get_state, state = 0, symbol = 3 ('e')
+Entering new_state, state = 0, symbol = 3 ('e')
+Exiting get_state => 1
+Processing state 1 (reached by 'e')
+Closure: input
+ 1: . (rule 1)
+
+
+Closure: output
+ 1: . (rule 1)
+
+
+Entering new_itemsets, state = 1
+Entering append_states, state = 1
+transpose: input
+ 0: @&t@
+
+transpose: output
+ 0: @&t@
+
+]])
+
+AT_CLEANUP
+
+
+## ---------------- ##
+## Broken Closure. ##
+## ---------------- ##
+
+# TC was once broken during a massive `simplification' of the code.
+# It resulted in bison dumping core on the following grammar (the
+# computation of FIRSTS uses TC). It managed to produce a pretty
+# exotic closure:
+#
+# TC: Input
+#
+# 01234567
+# +--------+
+# 0| 1 |
+# 1| 1 |
+# 2| 1 |
+# 3| 1 |
+# 4| 1 |
+# 5| 1 |
+# 6| 1|
+# 7| |
+# +--------+
+#
+# TC: Output
+#
+# 01234567
+# +--------+
+# 0| 1 |
+# 1| 111 |
+# 2| 111 |
+# 3| 1111 |
+# 4| 111 1 |
+# 5| 111 1 |
+# 6| 111 1|
+# 7| 111 |
+# +--------+
+#
+# instead of that below.
+
+AT_SETUP([Broken Closure])
+
+AT_DATA([input.y],
+[[%%
+a: b
+b: c
+c: d
+d: e
+e: f
+f: g
+g: h
+h: 'h'
+]])
+
+AT_CHECK([bison --trace input.y 2>&1 |
+ sed -n '/^TC: Output BEGIN/,/^TC: Output END/p'],
+ [0],
+[[TC: Output BEGIN
+ @&t@
+ 01234567
+ .--------.
+ 0| 1111111|
+ 1| 111111|
+ 2| 11111|
+ 3| 1111|
+ 4| 111|
+ 5| 11|
+ 6| 1|
+ 7| |
+ `--------'
+TC: Output END
+]])
+
+AT_CLEANUP
AT_TESTED([bison])
m4_include([output.at])
+m4_include([sets.at])
m4_include([reduce.at])
m4_include([calc.at])
m4_include([torture.at])
AT_CHECK([input 10000], 1, [], [ignore])
AT_CLEANUP
+
+
+## ----------------- ##
+## GNU AWK Grammar. ##
+## ----------------- ##
+
+AT_SETUP([GNU AWK Grammar])
+
+# We have been careful to strip all the actions excepts the
+# mid-rule actions. We rely on %expect to check that there are
+# indeed 65 SR conflicts.
+#
+# Bison was once wrong, due to an incorrect computation of nullable.
+# It reported 485 SR conflicts!
+
+AT_DATA([[input.y]],
+[[%expect 65
+
+%token FUNC_CALL NAME REGEXP
+%token ERROR
+%token YNUMBER YSTRING
+%token RELOP APPEND_OP
+%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP
+%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
+%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
+%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
+%token LEX_GETLINE LEX_NEXTFILE
+%token LEX_IN
+%token LEX_AND LEX_OR INCREMENT DECREMENT
+%token LEX_BUILTIN LEX_LENGTH
+
+/* Lowest to highest */
+%right ASSIGNOP
+%right '?' ':'
+%left LEX_OR
+%left LEX_AND
+%left LEX_GETLINE
+%nonassoc LEX_IN
+%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
+%nonassoc ','
+%nonassoc MATCHOP
+%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO
+%left CONCAT_OP
+%left YSTRING YNUMBER
+%left '+' '-'
+%left '*' '/' '%'
+%right '!' UNARY
+%right '^'
+%left INCREMENT DECREMENT
+%left '$'
+%left '(' ')'
+%%
+
+start
+ : opt_nls program opt_nls
+ ;
+
+program
+ : rule
+ | program rule
+ | error
+ | program error
+ | /* empty */
+ ;
+
+rule
+ : LEX_BEGIN {} action
+ | LEX_END {} action
+ | LEX_BEGIN statement_term
+ | LEX_END statement_term
+ | pattern action
+ | action
+ | pattern statement_term
+ | function_prologue function_body
+ ;
+
+func_name
+ : NAME
+ | FUNC_CALL
+ | lex_builtin
+ ;
+
+lex_builtin
+ : LEX_BUILTIN
+ | LEX_LENGTH
+ ;
+
+function_prologue
+ : LEX_FUNCTION {} func_name '(' opt_param_list r_paren opt_nls
+ ;
+
+function_body
+ : l_brace statements r_brace opt_semi opt_nls
+ | l_brace r_brace opt_semi opt_nls
+ ;
+
+
+pattern
+ : exp
+ | exp ',' exp
+ ;
+
+regexp
+ /*
+ * In this rule, want_regexp tells yylex that the next thing
+ * is a regexp so it should read up to the closing slash.
+ */
+ : '/' {} REGEXP '/'
+ ;
+
+action
+ : l_brace statements r_brace opt_semi opt_nls
+ | l_brace r_brace opt_semi opt_nls
+ ;
+
+statements
+ : statement
+ | statements statement
+ | error
+ | statements error
+ ;
+
+statement_term
+ : nls
+ | semi opt_nls
+ ;
+
+statement
+ : semi opt_nls
+ | l_brace r_brace
+ | l_brace statements r_brace
+ | if_statement
+ | LEX_WHILE '(' exp r_paren opt_nls statement
+ | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
+ | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
+ | LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement
+ | LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement
+ | LEX_BREAK statement_term
+ | LEX_CONTINUE statement_term
+ | print '(' expression_list r_paren output_redir statement_term
+ | print opt_rexpression_list output_redir statement_term
+ | LEX_NEXT statement_term
+ | LEX_NEXTFILE statement_term
+ | LEX_EXIT opt_exp statement_term
+ | LEX_RETURN {} opt_exp statement_term
+ | LEX_DELETE NAME '[' expression_list ']' statement_term
+ | LEX_DELETE NAME statement_term
+ | exp statement_term
+ ;
+
+print
+ : LEX_PRINT
+ | LEX_PRINTF
+ ;
+
+if_statement
+ : LEX_IF '(' exp r_paren opt_nls statement
+ | LEX_IF '(' exp r_paren opt_nls statement
+ LEX_ELSE opt_nls statement
+ ;
+
+nls
+ : NEWLINE
+ | nls NEWLINE
+ ;
+
+opt_nls
+ : /* empty */
+ | nls
+ ;
+
+input_redir
+ : /* empty */
+ | '<' simp_exp
+ ;
+
+output_redir
+ : /* empty */
+ | '>' exp
+ | APPEND_OP exp
+ | '|' exp
+ | TWOWAYIO exp
+ ;
+
+opt_param_list
+ : /* empty */
+ | param_list
+ ;
+
+param_list
+ : NAME
+ | param_list comma NAME
+ | error
+ | param_list error
+ | param_list comma error
+ ;
+
+/* optional expression, as in for loop */
+opt_exp
+ : /* empty */
+ | exp
+ ;
+
+opt_rexpression_list
+ : /* empty */
+ | rexpression_list
+ ;
+
+rexpression_list
+ : rexp
+ | rexpression_list comma rexp
+ | error
+ | rexpression_list error
+ | rexpression_list error rexp
+ | rexpression_list comma error
+ ;
+
+opt_expression_list
+ : /* empty */
+ | expression_list
+ ;
+
+expression_list
+ : exp
+ | expression_list comma exp
+ | error
+ | expression_list error
+ | expression_list error exp
+ | expression_list comma error
+ ;
+
+/* Expressions, not including the comma operator. */
+exp : variable ASSIGNOP {} exp
+ | '(' expression_list r_paren LEX_IN NAME
+ | exp '|' LEX_GETLINE opt_variable
+ | exp TWOWAYIO LEX_GETLINE opt_variable
+ | LEX_GETLINE opt_variable input_redir
+ | exp LEX_AND exp
+ | exp LEX_OR exp
+ | exp MATCHOP exp
+ | regexp
+ | '!' regexp %prec UNARY
+ | exp LEX_IN NAME
+ | exp RELOP exp
+ | exp '<' exp
+ | exp '>' exp
+ | exp '?' exp ':' exp
+ | simp_exp
+ | exp simp_exp %prec CONCAT_OP
+ ;
+
+rexp
+ : variable ASSIGNOP {} rexp
+ | rexp LEX_AND rexp
+ | rexp LEX_OR rexp
+ | LEX_GETLINE opt_variable input_redir
+ | regexp
+ | '!' regexp %prec UNARY
+ | rexp MATCHOP rexp
+ | rexp LEX_IN NAME
+ | rexp RELOP rexp
+ | rexp '?' rexp ':' rexp
+ | simp_exp
+ | rexp simp_exp %prec CONCAT_OP
+ ;
+
+simp_exp
+ : non_post_simp_exp
+ /* Binary operators in order of decreasing precedence. */
+ | simp_exp '^' simp_exp
+ | simp_exp '*' simp_exp
+ | simp_exp '/' simp_exp
+ | simp_exp '%' simp_exp
+ | simp_exp '+' simp_exp
+ | simp_exp '-' simp_exp
+ | variable INCREMENT
+ | variable DECREMENT
+ ;
+
+non_post_simp_exp
+ : '!' simp_exp %prec UNARY
+ | '(' exp r_paren
+ | LEX_BUILTIN
+ '(' opt_expression_list r_paren
+ | LEX_LENGTH '(' opt_expression_list r_paren
+ | LEX_LENGTH
+ | FUNC_CALL '(' opt_expression_list r_paren
+ | variable
+ | INCREMENT variable
+ | DECREMENT variable
+ | YNUMBER
+ | YSTRING
+ | '-' simp_exp %prec UNARY
+ | '+' simp_exp %prec UNARY
+ ;
+
+opt_variable
+ : /* empty */
+ | variable
+ ;
+
+variable
+ : NAME
+ | NAME '[' expression_list ']'
+ | '$' non_post_simp_exp
+ ;
+
+l_brace
+ : '{' opt_nls
+ ;
+
+r_brace
+ : '}' opt_nls
+ ;
+
+r_paren
+ : ')'
+ ;
+
+opt_semi
+ : /* empty */
+ | semi
+ ;
+
+semi
+ : ';'
+ ;
+
+comma : ',' opt_nls
+ ;
+
+%%
+]])
+
+# Pass plenty of options, to exercise plenty of code, even if we
+# don't actually check the output. But SEGV is watching us, and
+# so might do dmalloc.
+AT_CHECK([[bison --verbose --defines input.y]])
+
+AT_CLEANUP