lalr1, yacc: use the default location as initial error location

author Akim Demaille <akim@lrde.epita.fr>

Tue, 11 Aug 2015 11:48:57 +0000 (13:48 +0200)

committer Akim Demaille <akim@lrde.epita.fr>

Wed, 12 Aug 2015 11:56:07 +0000 (13:56 +0200)
author Akim Demaille <akim@lrde.epita.fr>
Tue, 11 Aug 2015 11:48:57 +0000 (13:48 +0200)
committer Akim Demaille <akim@lrde.epita.fr>
Wed, 12 Aug 2015 11:56:07 +0000 (13:56 +0200)
diff --git a/NEWS b/NEWS

index 99a6e16cb1e0b32df942ded62c3b206e241a5b50..2aed3f2a922d81faa3d5967c8400785680029642 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,17 @@ GNU Bison NEWS
  
  * Noteworthy changes in release ?.? (????-??-??) [?]
  
  
  * Noteworthy changes in release ?.? (????-??-??) [?]
  
+** Bug fixes
+
+*** Location of errors
+
+  In C++ parsers, out-of-bounds errors can happen when a rule with an empty
+  ride-hand side raises a syntax error.  The behavior of the default parser
+  (yacc.c) in such a condition was undefined.
+
+  Now all the parsers match the behavior of glr.c: @$ is used as the
+  location of the error.  This handles gracefully rules with and without
+  rhs.
  
  * Noteworthy changes in release 3.0.4 (2015-01-23) [stable]
  
  
  * Noteworthy changes in release 3.0.4 (2015-01-23) [stable]
  
diff --git a/data/lalr1.cc b/data/lalr1.cc

index eb0923e9eb31ca39344e0f86b488fc621307c122..4c823409adea9827e67fb8d0dfd103ed803b4b7b 100644 (file)
--- a/data/lalr1.cc
+++ b/data/lalr1.cc
@@ -848,6 +848,7 @@ b4_dollar_popdef])[]dnl
        {
          slice<stack_symbol_type, stack_type> slice (yystack_, yylen);
          YYLLOC_DEFAULT (yylhs.location, slice, yylen);
        {
          slice<stack_symbol_type, stack_type> slice (yystack_, yylen);
          YYLLOC_DEFAULT (yylhs.location, slice, yylen);
+        yyerror_range[1].location = yylhs.location;
        }]])[
  
        // Perform the reduction.
        }]])[
  
        // Perform the reduction.
@@ -918,8 +919,7 @@ b4_dollar_popdef])[]dnl
         YYERROR and the label yyerrorlab therefore never appears in user
         code.  */
      if (false)
         YYERROR and the label yyerrorlab therefore never appears in user
         code.  */
      if (false)
-      goto yyerrorlab;]b4_locations_if([[
-    yyerror_range[1].location = yystack_[yylen - 1].location;]])[
+      goto yyerrorlab;
      /* Do not reclaim the symbols of the rule whose action triggered
         this YYERROR.  */
      yypop_ (yylen);
      /* Do not reclaim the symbols of the rule whose action triggered
         this YYERROR.  */
      yypop_ (yylen);
diff --git a/data/yacc.c b/data/yacc.c

index 600e77dffb6eb3351856d54cc99df8f38d04912e..e974319ff1e9ae44f8e7f8b913fcdaa2aa46f596 100644 (file)
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -1641,8 +1641,9 @@ yyreduce:
    yyval = yyvsp[1-yylen];
  
  ]b4_locations_if(
    yyval = yyvsp[1-yylen];
  
  ]b4_locations_if(
-[[  /* Default location.  */
-  YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);]])[
+[[  /* Default location. */
+  YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
+  yyerror_range[1] = yyloc;]])[
    YY_REDUCE_PRINT (yyn);]b4_lac_if([[
    {
      int yychar_backup = yychar;
    YY_REDUCE_PRINT (yyn);]b4_lac_if([[
    {
      int yychar_backup = yychar;
@@ -1782,8 +1783,7 @@ yyerrorlab:
    if (/*CONSTCOND*/ 0)
       goto yyerrorlab;
  
    if (/*CONSTCOND*/ 0)
       goto yyerrorlab;
  
-]b4_locations_if([[  yyerror_range[1] = yylsp[1-yylen];
-]])[  /* Do not reclaim the symbols of the rule whose action triggered
+  /* Do not reclaim the symbols of the rule whose action triggered
       this YYERROR.  */
    YYPOPSTACK (yylen);
    yylen = 0;
       this YYERROR.  */
    YYPOPSTACK (yylen);
    yylen = 0;
diff --git a/tests/actions.at b/tests/actions.at

index d538f883e9b6acca4549ce9b3b127c8f88ba092c..7d876709f001dbaf52922e7e73f0251329a4fe8a 100644 (file)
--- a/tests/actions.at
+++ b/tests/actions.at
@@ -475,7 +475,7 @@ AT_DATA_GRAMMAR([[input.y]],
  
  /* Display the symbol type Symbol.  */
  #define V(Symbol, Value, Location, Sep) \
  
  /* Display the symbol type Symbol.  */
  #define V(Symbol, Value, Location, Sep) \
-   fprintf (stderr, #Symbol " (%d@%d-%d)" Sep, Value, RANGE(Location))
+   fprintf (stderr, #Symbol " (%d@%d-%d)%s", Value, RANGE(Location), Sep)
  }
  
  $5
  }
  
  $5
@@ -490,15 +490,17 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
  ]AT_LALR1_CC_IF([], [AT_YYERROR_DECLARE])
  [}
  
  ]AT_LALR1_CC_IF([], [AT_YYERROR_DECLARE])
  [}
  
-]m4_ifval([$6], [%type <ival> '(' 'x' 'y' ')' ';' thing line input END])[
+]m4_ifval([$6],
+[%type <ival> '(' 'x' 'y' ')' ';' thing line input
+              '!' raise check-spontaneous-errors END])[
  
  /* FIXME: This %printer isn't actually tested.  */
  %printer
    {
  
  /* FIXME: This %printer isn't actually tested.  */
  %printer
    {
-    ]AT_LALR1_CC_IF([debug_stream () << $$;],
-                    [fprintf (yyoutput, "%d", $$)])[;
+    ]AT_LALR1_CC_IF([yyo << $$;],
+                    [fprintf (yyo, "%d", $$)])[;
    }
    }
-  input line thing 'x' 'y'
+  '(' 'x' 'y' ')' ';' thing line input '!' raise check-spontaneous-errors END
  
  %destructor
    { fprintf (stderr, "Freeing nterm input (%d@%d-%d)\n", $$, RANGE (@$)); }
  
  %destructor
    { fprintf (stderr, "Freeing nterm input (%d@%d-%d)\n", $$, RANGE (@$)); }
@@ -512,6 +514,14 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
    { fprintf (stderr, "Freeing nterm thing (%d@%d-%d)\n", $$, RANGE (@$)); }
    thing
  
    { fprintf (stderr, "Freeing nterm thing (%d@%d-%d)\n", $$, RANGE (@$)); }
    thing
  
+%destructor
+  { fprintf (stderr, "Freeing raise thing (%d@%d-%d)\n", $$, RANGE (@$)); }
+  raise
+
+%destructor
+  { fprintf (stderr, "Freeing check-spontaneous-errors thing (%d@%d-%d)\n", $$, RANGE (@$)); }
+  check-spontaneous-errors
+
  %destructor
    { fprintf (stderr, "Freeing token 'x' (%d@%d-%d)\n", $$, RANGE (@$)); }
    'x'
  %destructor
    { fprintf (stderr, "Freeing token 'x' (%d@%d-%d)\n", $$, RANGE (@$)); }
    'x'
@@ -534,7 +544,7 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[
  */
  
  input:
  */
  
  input:
-  /* Nothing. */
+  %empty
      {
        $$ = 0;
        V(input, $$, @$, ": /* Nothing */\n");
      {
        $$ = 0;
        V(input, $$, @$, ": /* Nothing */\n");
@@ -547,6 +557,38 @@ input:
        V(line,  $1, @1, " ");
        V(input, $2, @2, "\n");
      }
        V(line,  $1, @1, " ");
        V(input, $2, @2, "\n");
      }
+| '!' check-spontaneous-errors
+  {
+    $$ = $2;
+  }
+;
+
+check-spontaneous-errors:
+  raise         { abort(); $$ = $1; }
+| '(' raise ')' { abort(); $$ = $2; }
+| error
+  {
+    $$ = 5;
+    V(check-spontaneous-errors, $$, @$, ": ");
+    fprintf (stderr, "error (@%d-%d)\n", RANGE(@1));
+  }
+;
+
+raise:
+  %empty
+  {
+    $$ = 4;
+    V(raise, $$, @$, ": %empty\n");
+    YYERROR;
+  }
+| '!' '!'
+  {
+    $$ = 5;
+    V(raise, $$, @$, ": ");
+    V(!, $1, @2, " ");
+    V(!, $2, @2, "\n");
+    YYERROR;
+  }
  ;
  
  line:
  ;
  
  line:
@@ -668,6 +710,38 @@ Freeing nterm input (2@0-29)
  Successful parse.
  ]])
  
  Successful parse.
  ]])
  
+# Check the location of empty reductions raising an error
+# -------------------------------------------------------
+# Here, the error is after token "!@0-9", so the error is raised from
+# @9-9, and the error recovery detects that it starts from @9-9 and
+# ends where starts the next token: END@10-19.
+#
+# So error recovery reports error@9-19.
+AT_PARSER_CHECK([./input '!'], 0, [],
+[[sending: '!' (0@0-9)
+sending: END (1@10-19)
+raise (4@9-9): %empty
+check-spontaneous-errors (5@9-19): error (@9-19)
+Freeing token END (1@10-19)
+Freeing nterm input (5@0-19)
+Successful parse.
+]])
+
+# Check the location of not empty reductions raising an error
+# -----------------------------------------------------------
+# This time the error is raised from a rule with 2 rhs symbols: @10-29.
+# It is recovered @10-29.
+AT_PARSER_CHECK([[./input '!!!']], 0, [],
+[[sending: '!' (0@0-9)
+sending: '!' (1@10-19)
+sending: '!' (2@20-29)
+raise (5@10-29): ! (1@20-29) ! (2@20-29)
+check-spontaneous-errors (5@10-29): error (@10-29)
+sending: END (3@30-39)
+Freeing token END (3@30-39)
+Freeing nterm input (5@0-29)
+Successful parse.
+]])
  
  # Check locations in error recovery
  # ---------------------------------
  
  # Check locations in error recovery
  # ---------------------------------
author	Akim Demaille <akim@lrde.epita.fr>
	Tue, 11 Aug 2015 11:48:57 +0000 (13:48 +0200)
committer	Akim Demaille <akim@lrde.epita.fr>
	Wed, 12 Aug 2015 11:56:07 +0000 (13:56 +0200)
NEWS		patch \| blob \| blame \| history
data/lalr1.cc		patch \| blob \| blame \| history
data/yacc.c		patch \| blob \| blame \| history
tests/actions.at		patch \| blob \| blame \| history