Merge remote-tracking branch 'origin/maint'

[bison.git] / tests / conflicts.at
diff --git a/tests/conflicts.at b/tests/conflicts.at

index 705419b95b8c296c685a6664540f3463470f1335..599d708b54486e24b232b6026f21e2fafcd59788 100644 (file)
--- a/tests/conflicts.at
+++ b/tests/conflicts.at
@@ -1,6 +1,6 @@
  # Exercising Bison on conflicts.                         -*- Autotest -*-
  
-# Copyright (C) 2002-2005, 2007-2010 Free Software Foundation, Inc.
+# Copyright (C) 2002-2005, 2007-2012 Free Software Foundation, Inc.
  
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -93,46 +93,52 @@ main (int argc, const char *argv[])
  }
  ]])
  
-# Specify the output files to avoid problems on different file systems.
-AT_BISON_CHECK([-o input.c input.y])
+m4_pushdef([AT_NONASSOC_AND_EOF_CHECK],
+[AT_BISON_CHECK([$1[ -o input.c input.y]])
  AT_COMPILE([input])
  
+m4_pushdef([AT_EXPECTING], [m4_if($2, [correct], [[, expecting $end]])])
+
  AT_PARSER_CHECK([./input '0<0'])
  AT_PARSER_CHECK([./input '0<0<0'], [1], [],
-         [syntax error, unexpected '<'
+         [syntax error, unexpected '<'AT_EXPECTING
  ])
  
  AT_PARSER_CHECK([./input '0>0'])
  AT_PARSER_CHECK([./input '0>0>0'], [1], [],
-         [syntax error, unexpected '>'
+         [syntax error, unexpected '>'AT_EXPECTING
  ])
  
  AT_PARSER_CHECK([./input '0<0>0'], [1], [],
-         [syntax error, unexpected '>'
+         [syntax error, unexpected '>'AT_EXPECTING
  ])
  
-# We must disable default reductions in inconsistent states in order to
-# have an explicit list of all expected tokens.  (However, unless we use
-# canonical LR, lookahead sets are merged for different left contexts,
-# so it is still possible to have extra incorrect tokens in the expected
-# list.  That just doesn't happen to be a problem for this test case.)
-
-AT_BISON_CHECK([-Dlr.default-reductions=consistent -o input.c input.y])
-AT_COMPILE([input])
+m4_popdef([AT_EXPECTING])])
  
-AT_PARSER_CHECK([./input '0<0'])
-AT_PARSER_CHECK([./input '0<0<0'], [1], [],
-         [syntax error, unexpected '<', expecting $end
-])
-
-AT_PARSER_CHECK([./input '0>0'])
-AT_PARSER_CHECK([./input '0>0>0'], [1], [],
-         [syntax error, unexpected '>', expecting $end
-])
+# Expected token list is missing.
+AT_NONASSOC_AND_EOF_CHECK([], [[incorrect]])
  
-AT_PARSER_CHECK([./input '0<0>0'], [1], [],
-         [syntax error, unexpected '>', expecting $end
-])
+# We must disable default reductions in inconsistent states in order to
+# have an explicit list of all expected tokens.
+AT_NONASSOC_AND_EOF_CHECK([[-Dlr.default-reductions=consistent]],
+                          [[correct]])
+
+# lr.default-reductions=consistent happens to work for this test case.
+# However, for other grammars, lookahead sets can be merged for
+# different left contexts, so it is still possible to have an incorrect
+# expected list.  Canonical LR is almost a general solution (that is, it
+# can fail only when %nonassoc is used), so make sure it gives the same
+# result as above.
+AT_NONASSOC_AND_EOF_CHECK([[-Dlr.type=canonical-lr]], [[correct]])
+
+# parse.lac=full is a completely general solution that does not require
+# any of the above sacrifices.  Of course, it does not extend the
+# language-recognition power of LALR to (IE)LR, but it does ensure that
+# the reported list of expected tokens matches what the given parser
+# would have accepted in place of the unexpected token.
+AT_NONASSOC_AND_EOF_CHECK([[-Dparse.lac=full]], [[correct]])
+
+m4_popdef([AT_NONASSOC_AND_EOF_CHECK])
  
  AT_CLEANUP
  
@@ -146,64 +152,244 @@ AT_SETUP([[parse.error=verbose and consistent errors]])
  
  m4_pushdef([AT_CONSISTENT_ERRORS_CHECK], [
  
-AT_BISON_CHECK([$1[ -o input.c input.y]])
-AT_COMPILE([[input]])
+AT_BISON_OPTION_PUSHDEFS([$1])
  
-m4_pushdef([AT_EXPECTING], [m4_if($3, [ab], [[, expecting 'a' or 'b']],
-                                  $3, [a],  [[, expecting 'a']],
-                                  $3, [b],  [[, expecting 'b']])])
+m4_pushdef([AT_YYLEX_PROTOTYPE],
+[AT_SKEL_CC_IF([[int yylex (yy::parser::semantic_type *lvalp)]],
+               [[int yylex (YYSTYPE *lvalp)]])])
  
-AT_PARSER_CHECK([[./input]], [[1]], [],
-[[syntax error, unexpected ]$2[]AT_EXPECTING[
-]])
+AT_SKEL_JAVA_IF([AT_DATA], [AT_DATA_GRAMMAR])([input.y],
+[AT_SKEL_JAVA_IF([[
  
-m4_popdef([AT_EXPECTING])
-
-])
+%code imports {
+  import java.io.IOException;
+}]], [[
  
-AT_DATA_GRAMMAR([input.y],
-[[%code {
+%code {]AT_SKEL_CC_IF([[
+  #include <string>]], [[
    #include <assert.h>
    #include <stdio.h>
-  int yylex (void);
-  void yyerror (char const *);
+  void yyerror (char const *msg);]])[
+  ]AT_YYLEX_PROTOTYPE[;
    #define USE(Var)
  }
  
+]AT_SKEL_CC_IF([[%defines]], [[%define api.pure]])])[
+
+]$1[
+
  %define parse.error verbose
  
-// The point isn't to test IELR here, but state merging happens to
-// complicate the example.
-%define lr.type ielr
+%%
  
-%nonassoc 'a'
+]$2[
+
+]AT_SKEL_JAVA_IF([[%code lexer {]], [[%%]])[
+
+/*--------.
+| yylex.  |
+`--------*/]AT_SKEL_JAVA_IF([[
+
+public String input = "]$3[";
+public int index = 0;
+public int yylex ()
+{
+  if (index < input.length ())
+    return input.charAt (index++);
+  else
+    return 0;
+}
+public Object getLVal ()
+{
+  return new Integer(1);
+}]], [[
+
+]AT_YYLEX_PROTOTYPE[
+{
+  static char const *input = "]$3[";
+  *lvalp = 1;
+  return *input++;
+}]])[
+
+/*----------.
+| yyerror.  |
+`----------*/]AT_SKEL_JAVA_IF([[
+
+public void yyerror (String msg)
+{
+  System.err.println (msg);
+}
+
+};
+
+%%]], [AT_SKEL_CC_IF([[
  
-// If yylval=0 here, then we know that the 'a' destructor is being
-// invoked incorrectly for the 'b' set in the semantic action below.
-// All 'a' tokens are returned by yylex, which sets yylval=1.
+void
+yy::parser::error (std::string const &msg)
+{
+  std::cerr << msg << std::endl;
+}]], [[
+
+void
+yyerror (char const *msg)
+{
+  fprintf (stderr, "%s\n", msg);
+}]])])[
+
+/*-------.
+| main.  |
+`-------*/]AT_SKEL_JAVA_IF([[
+
+class input
+{
+  public static void main (String args[]) throws IOException
+  {
+    YYParser p = new YYParser ();
+    p.parse ();
+  }
+}]], [AT_SKEL_CC_IF([[
+
+int
+main (void)
+{
+  yy::parser parser;
+  return parser.parse ();
+}]], [[
+
+int
+main (void)
+{
+  return yyparse ();
+}]])])[
+]])
+
+AT_FULL_COMPILE([[input]])
+
+m4_pushdef([AT_EXPECTING], [m4_if($5, [ab], [[, expecting 'a' or 'b']],
+                                  $5, [a],  [[, expecting 'a']],
+                                  $5, [b],  [[, expecting 'b']])])
+
+AT_SKEL_JAVA_IF([AT_JAVA_PARSER_CHECK([[input]], [[0]]],
+                [AT_PARSER_CHECK([[./input]], [[1]]]),
+[[]],
+[[syntax error, unexpected ]$4[]AT_EXPECTING[
+]])
+
+m4_popdef([AT_EXPECTING])
+m4_popdef([AT_YYLEX_PROTOTYPE])
+AT_BISON_OPTION_POPDEFS
+
+])
+
+m4_pushdef([AT_PREVIOUS_STATE_GRAMMAR],
+[[%nonassoc 'a';
+
+start: consistent-error-on-a-a 'a' ;
+
+consistent-error-on-a-a:
+    'a' default-reduction
+  | 'a' default-reduction 'a'
+  | 'a' shift
+  ;
+
+default-reduction: /*empty*/ ;
+shift: 'b' ;
+
+// Provide another context in which all rules are useful so that this
+// test case looks a little more realistic.
+start: 'b' consistent-error-on-a-a 'c' ;
+]])
+
+m4_pushdef([AT_PREVIOUS_STATE_INPUT], [[a]])
+
+# Unfortunately, no expected tokens are reported even though 'b' can be
+# accepted.  Nevertheless, the main point of this test is to make sure
+# that at least the unexpected token is reported.  In a previous version
+# of Bison, it wasn't reported because the error is detected in a
+# consistent state with an error action, and that case always triggered
+# the simple "syntax error" message.
+#
+# The point isn't to test IELR here, but state merging happens to
+# complicate this example.
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %glr-parser]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %language "c++"]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %language "java"]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[end of input]], [[none]])
+
+# Even canonical LR doesn't foresee the error for 'a'!
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %define lr.default-reductions consistent]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[ab]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %define lr.default-reductions accepting]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[ab]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[ab]])
+
+# Only LAC gets it right.
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr
+                             %define parse.lac full]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[b]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type ielr
+                             %define parse.lac full]],
+                           [AT_PREVIOUS_STATE_GRAMMAR],
+                           [AT_PREVIOUS_STATE_INPUT],
+                           [[$end]], [[b]])
+
+m4_popdef([AT_PREVIOUS_STATE_GRAMMAR])
+m4_popdef([AT_PREVIOUS_STATE_INPUT])
+
+m4_pushdef([AT_USER_ACTION_GRAMMAR],
+[[%nonassoc 'a';
+
+// If $$ = 0 here, then we know that the 'a' destructor is being invoked
+// incorrectly for the 'b' set in the semantic action below.  All 'a'
+// tokens are returned by yylex, which sets $$ = 1.
  %destructor {
    if (!$$)
      fprintf (stderr, "Wrong destructor.\n");
-} 'a'
-
-%%
-
-// The lookahead assigned by the semantic action isn't needed before
-// either error action is encountered.  In a previous version of Bison,
-// this was a problem as it meant yychar was not translated into yytoken
-// before either error action.  The second error action thus invoked a
+} 'a';
+
+// Rather than depend on an inconsistent state to induce reading a
+// lookahead as in the previous grammar, just assign the lookahead in a
+// semantic action.  That lookahead isn't needed before either error
+// action is encountered.  In a previous version of Bison, this was a
+// problem as it meant yychar was not translated into yytoken before
+// either error action.  The second error action thus invoked a
  // destructor that it selected according to the incorrect yytoken.  The
  // first error action would have reported an incorrect unexpected token
-// except that, due to another bug, the unexpected token is not reported
-// at all because the error action is the default action in a consistent
-// state.  That bug still needs to be fixed.
-start: error-reduce consistent-error 'a' { USE ($3); } ;
+// except that, due to the bug described in the previous grammar, the
+// unexpected token was not reported at all.
+start: error-reduce consistent-error 'a' { USE ($][3); } ;
  
  error-reduce:
    'a' 'a' consistent-reduction consistent-error 'a'
-  { USE (($1, $2, $5)); }
+  { USE (($][1, $][2, $][5)); }
  | 'a' error
-  { USE ($1); }
+  { USE ($][1); }
  ;
  
  consistent-reduction: /*empty*/ {
@@ -213,30 +399,128 @@ consistent-reduction: /*empty*/ {
  } ;
  
  consistent-error:
-  'a' { USE ($1); }
+  'a' { USE ($][1); }
  | /*empty*/ %prec 'a'
  ;
  
  // Provide another context in which all rules are useful so that this
  // test case looks a little more realistic.
  start: 'b' consistent-error 'b' ;
+]])
+m4_pushdef([AT_USER_ACTION_INPUT], [[aa]])
  
-%%
+AT_CONSISTENT_ERRORS_CHECK([[]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [['b']], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%glr-parser]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [['b']], [[none]])
+# No C++ or Java test because yychar cannot be manipulated by users.
  
-int
-yylex (void)
-{
-  static char const *input = "aa";
-  yylval = 1;
-  return *input++;
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reductions consistent]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [['b']], [[none]])
+
+# Canonical LR doesn't foresee the error for 'a'!
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.default-reductions accepting]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [[$end]], [[a]])
+AT_CONSISTENT_ERRORS_CHECK([[%define lr.type canonical-lr]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [[$end]], [[a]])
+
+AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [['b']], [[none]])
+AT_CONSISTENT_ERRORS_CHECK([[%define parse.lac full
+                             %define lr.default-reductions accepting]],
+                           [AT_USER_ACTION_GRAMMAR],
+                           [AT_USER_ACTION_INPUT],
+                           [[$end]], [[none]])
+
+m4_popdef([AT_USER_ACTION_GRAMMAR])
+m4_popdef([AT_USER_ACTION_INPUT])
+
+m4_popdef([AT_CONSISTENT_ERRORS_CHECK])
+
+AT_CLEANUP
+
+
+
+## ------------------------------------------------------- ##
+## LAC: %nonassoc requires splitting canonical LR states.  ##
+## ------------------------------------------------------- ##
+
+# This test case demonstrates that, when %nonassoc is used, canonical
+# LR(1) parser table construction followed by conflict resolution
+# without further state splitting is not always sufficient to produce a
+# parser that can detect all syntax errors as soon as possible on one
+# token of lookahead.  However, LAC solves the problem completely even
+# with minimal LR parser tables.
+
+AT_SETUP([[LAC: %nonassoc requires splitting canonical LR states]])
+
+AT_DATA_GRAMMAR([[input.y]],
+[[%code {
+  #include <stdio.h>
+  void yyerror (char const *);
+  int yylex (void);
  }
  
+%error-verbose
+%nonassoc 'a'
+
+%%
+
+start:
+  'a' problem 'a' // First context.
+| 'b' problem 'b' // Second context.
+| 'c' reduce-nonassoc // Just makes reduce-nonassoc useful.
+;
+
+problem:
+  look reduce-nonassoc
+| look 'a'
+| look 'b'
+;
+
+// For the state reached after shifting the 'a' in these productions,
+// lookahead sets are the same in both the first and second contexts.
+// Thus, canonical LR reuses the same state for both contexts.  However,
+// the lookahead 'a' for the reduction "look: 'a'" later becomes an
+// error action only in the first context.  In order to immediately
+// detect the syntax error on 'a' here for only the first context, this
+// canonical LR state would have to be split into two states, and the
+// 'a' lookahead would have to be removed from only one of the states.
+look:
+  'a' // Reduction lookahead set is always ['a', 'b'].
+| 'a' 'b'
+| 'a' 'c' // 'c' is forgotten as an expected token.
+;
+
+reduce-nonassoc: %prec 'a';
+
+%%
+
  void
  yyerror (char const *msg)
  {
    fprintf (stderr, "%s\n", msg);
  }
  
+int
+yylex (void)
+{
+  char const *input = "aaa";
+  return *input++;
+}
+
  int
  main (void)
  {
@@ -244,24 +528,38 @@ main (void)
  }
  ]])
  
-# See comments in grammar for why this test doesn't succeed.
-AT_XFAIL_IF([[:]])
-
-AT_CONSISTENT_ERRORS_CHECK([], [['b']], [[none]])
-AT_CONSISTENT_ERRORS_CHECK([[-Dlr.default-reductions=consistent]],
-                           [['b']], [[none]])
+# Show canonical LR's failure.
+AT_BISON_CHECK([[-Dlr.type=canonical-lr -o input.c input.y]],
+               [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b'
+]])
  
-# Canonical LR doesn't foresee the error for 'a'!
-AT_CONSISTENT_ERRORS_CHECK([[-Dlr.default-reductions=accepting]],
-                           [[$end]], [[a]])
-AT_CONSISTENT_ERRORS_CHECK([[-Flr.type=canonical-lr]], [[$end]], [[a]])
+# It's corrected by LAC.
+AT_BISON_CHECK([[-Dlr.type=canonical-lr -Dparse.lac=full \
+                 -o input.c input.y]], [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b' or 'c'
+]])
  
-m4_popdef([AT_CONSISTENT_ERRORS_CHECK])
+# IELR is sufficient when LAC is used.
+AT_BISON_CHECK([[-Dlr.type=ielr -Dparse.lac=full -o input.c input.y]],
+               [[0]], [[]],
+[[input.y: conflicts: 2 shift/reduce
+]])
+AT_COMPILE([[input]])
+AT_PARSER_CHECK([[./input]], [[1]], [[]],
+[[syntax error, unexpected 'a', expecting 'b' or 'c'
+]])
  
  AT_CLEANUP
  
-
-
  ## ------------------------- ##
  ## Unresolved SR Conflicts.  ##
  ## ------------------------- ##
@@ -1192,3 +1490,104 @@ AT_CHECK([[cat input.output | sed -n '/^state 0$/,/^state 1$/p']], 0,
  state 1
  ]])
  AT_CLEANUP
+
+
+## --------------------------------- ##
+## -W versus %expect and %expect-rr  ##
+## --------------------------------- ##
+
+AT_SETUP([[-W versus %expect and %expect-rr]])
+
+AT_DATA([[sr-rr.y]],
+[[%glr-parser
+%%
+start: 'a' | A 'a' | B 'a' ;
+A: ;
+B: ;
+]])
+AT_DATA([[sr.y]],
+[[%glr-parser
+%%
+start: 'a' | A 'a' ;
+A: ;
+]])
+AT_DATA([[rr.y]],
+[[%glr-parser
+%%
+start: A | B ;
+A: ;
+B: ;
+]])
+
+AT_BISON_CHECK([[sr-rr.y]], [[0]], [[]],
+[[sr-rr.y: conflicts: 1 shift/reduce, 1 reduce/reduce
+]])
+AT_BISON_CHECK([[-Wno-conflicts-sr sr-rr.y]], [[0]], [[]],
+[[sr-rr.y: conflicts: 1 reduce/reduce
+]])
+AT_BISON_CHECK([[-Wno-conflicts-rr sr-rr.y]], [[0]], [[]],
+[[sr-rr.y: conflicts: 1 shift/reduce
+]])
+
+[for gram in sr-rr sr rr; do
+  for sr_exp_i in '' 0 1 2; do
+    for rr_exp_i in '' 0 1 2; do
+      test -z "$sr_exp_i" && test -z "$rr_exp_i" && continue
+
+      # Build grammar file.
+      sr_exp=0
+      rr_exp=0
+      file=$gram
+      directives=
+      if test -n "$sr_exp_i"; then
+        sr_exp=$sr_exp_i
+        file=$file-expect-$sr_exp
+        directives="%expect $sr_exp"
+      fi
+      if test -n "$rr_exp_i"; then
+        rr_exp=$rr_exp_i
+        file=$file-expect-rr-$rr_exp
+        directives="$directives %expect-rr $rr_exp"
+      fi
+      file=$file.y
+      echo "$directives" > $file
+      cat $gram.y >> $file
+
+      # Count actual conflicts.
+      conflicts=
+      sr_count=0
+      rr_count=0
+      if test $gram = sr || test $gram = sr-rr; then
+        conflicts="1 shift/reduce"
+        sr_count=1
+      fi
+      if test $gram = rr || test $gram = sr-rr; then
+        if test -n "$conflicts"; then
+          conflicts="$conflicts, "
+        fi
+        conflicts="${conflicts}1 reduce/reduce"
+        rr_count=1
+      fi
+
+      # Run tests.
+      if test $sr_count -eq $sr_exp && test $rr_count -eq $rr_exp; then
+        ]AT_BISON_CHECK([[-Wnone $file]])[
+        ]AT_BISON_CHECK([[-Werror $file]])[
+      else
+        echo "$file: conflicts: $conflicts" > experr
+        if test $sr_count -ne $sr_exp; then
+          if test $sr_exp -ne 1; then s=s; else s= ; fi
+          echo "$file: expected $sr_exp shift/reduce conflict$s" >> experr
+        fi
+        if test $rr_count -ne $rr_exp; then
+          if test $rr_exp -ne 1; then s=s; else s= ; fi
+          echo "$file: expected $rr_exp reduce/reduce conflict$s" >> experr
+        fi
+        ]AT_BISON_CHECK([[-Wnone $file]], [[1]], [[]], [[experr]])[
+        ]AT_BISON_CHECK([[-Werror $file]], [[1]], [[]], [[experr]])[
+      fi
+    done
+  done
+done]
+
+AT_CLEANUP