From 476c1cca5945eeef3493cfc6ef06ed6d0972d787 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Tue, 11 Aug 2015 13:48:57 +0200 Subject: [PATCH] lalr1, yacc: use the default location as initial error location Currently lalr1.cc makes an out-of-bound access when trying to read @1 in rules with an empty rhs (i.e., when there is no @1) that raises an error (YYERROR). glr.c already gracefully handles this by using @$ as initial location for the errors. Let's do that in yacc.c and lalr1.cc. * data/lalr1.cc, data/yacc.c: Use @$ to initialize the error location. * tests/actions.at: Check that case. --- NEWS | 11 +++++++ data/lalr1.cc | 4 +-- data/yacc.c | 8 ++--- tests/actions.at | 86 ++++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 97 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index 99a6e16c..2aed3f2a 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,17 @@ GNU Bison NEWS * Noteworthy changes in release ?.? (????-??-??) [?] +** Bug fixes + +*** Location of errors + + In C++ parsers, out-of-bounds errors can happen when a rule with an empty + ride-hand side raises a syntax error. The behavior of the default parser + (yacc.c) in such a condition was undefined. + + Now all the parsers match the behavior of glr.c: @$ is used as the + location of the error. This handles gracefully rules with and without + rhs. * Noteworthy changes in release 3.0.4 (2015-01-23) [stable] diff --git a/data/lalr1.cc b/data/lalr1.cc index eb0923e9..4c823409 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -848,6 +848,7 @@ b4_dollar_popdef])[]dnl { slice slice (yystack_, yylen); YYLLOC_DEFAULT (yylhs.location, slice, yylen); + yyerror_range[1].location = yylhs.location; }]])[ // Perform the reduction. @@ -918,8 +919,7 @@ b4_dollar_popdef])[]dnl YYERROR and the label yyerrorlab therefore never appears in user code. */ if (false) - goto yyerrorlab;]b4_locations_if([[ - yyerror_range[1].location = yystack_[yylen - 1].location;]])[ + goto yyerrorlab; /* Do not reclaim the symbols of the rule whose action triggered this YYERROR. */ yypop_ (yylen); diff --git a/data/yacc.c b/data/yacc.c index 600e77df..e974319f 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -1641,8 +1641,9 @@ yyreduce: yyval = yyvsp[1-yylen]; ]b4_locations_if( -[[ /* Default location. */ - YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);]])[ +[[ /* Default location. */ + YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); + yyerror_range[1] = yyloc;]])[ YY_REDUCE_PRINT (yyn);]b4_lac_if([[ { int yychar_backup = yychar; @@ -1782,8 +1783,7 @@ yyerrorlab: if (/*CONSTCOND*/ 0) goto yyerrorlab; -]b4_locations_if([[ yyerror_range[1] = yylsp[1-yylen]; -]])[ /* Do not reclaim the symbols of the rule whose action triggered + /* Do not reclaim the symbols of the rule whose action triggered this YYERROR. */ YYPOPSTACK (yylen); yylen = 0; diff --git a/tests/actions.at b/tests/actions.at index d538f883..7d876709 100644 --- a/tests/actions.at +++ b/tests/actions.at @@ -475,7 +475,7 @@ AT_DATA_GRAMMAR([[input.y]], /* Display the symbol type Symbol. */ #define V(Symbol, Value, Location, Sep) \ - fprintf (stderr, #Symbol " (%d@%d-%d)" Sep, Value, RANGE(Location)) + fprintf (stderr, #Symbol " (%d@%d-%d)%s", Value, RANGE(Location), Sep) } $5 @@ -490,15 +490,17 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[ ]AT_LALR1_CC_IF([], [AT_YYERROR_DECLARE]) [} -]m4_ifval([$6], [%type '(' 'x' 'y' ')' ';' thing line input END])[ +]m4_ifval([$6], +[%type '(' 'x' 'y' ')' ';' thing line input + '!' raise check-spontaneous-errors END])[ /* FIXME: This %printer isn't actually tested. */ %printer { - ]AT_LALR1_CC_IF([debug_stream () << $$;], - [fprintf (yyoutput, "%d", $$)])[; + ]AT_LALR1_CC_IF([yyo << $$;], + [fprintf (yyo, "%d", $$)])[; } - input line thing 'x' 'y' + '(' 'x' 'y' ')' ';' thing line input '!' raise check-spontaneous-errors END %destructor { fprintf (stderr, "Freeing nterm input (%d@%d-%d)\n", $$, RANGE (@$)); } @@ -512,6 +514,14 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[ { fprintf (stderr, "Freeing nterm thing (%d@%d-%d)\n", $$, RANGE (@$)); } thing +%destructor + { fprintf (stderr, "Freeing raise thing (%d@%d-%d)\n", $$, RANGE (@$)); } + raise + +%destructor + { fprintf (stderr, "Freeing check-spontaneous-errors thing (%d@%d-%d)\n", $$, RANGE (@$)); } + check-spontaneous-errors + %destructor { fprintf (stderr, "Freeing token 'x' (%d@%d-%d)\n", $$, RANGE (@$)); } 'x' @@ -534,7 +544,7 @@ AT_LALR1_CC_IF([typedef yy::location YYLTYPE;])[ */ input: - /* Nothing. */ + %empty { $$ = 0; V(input, $$, @$, ": /* Nothing */\n"); @@ -547,6 +557,38 @@ input: V(line, $1, @1, " "); V(input, $2, @2, "\n"); } +| '!' check-spontaneous-errors + { + $$ = $2; + } +; + +check-spontaneous-errors: + raise { abort(); $$ = $1; } +| '(' raise ')' { abort(); $$ = $2; } +| error + { + $$ = 5; + V(check-spontaneous-errors, $$, @$, ": "); + fprintf (stderr, "error (@%d-%d)\n", RANGE(@1)); + } +; + +raise: + %empty + { + $$ = 4; + V(raise, $$, @$, ": %empty\n"); + YYERROR; + } +| '!' '!' + { + $$ = 5; + V(raise, $$, @$, ": "); + V(!, $1, @2, " "); + V(!, $2, @2, "\n"); + YYERROR; + } ; line: @@ -668,6 +710,38 @@ Freeing nterm input (2@0-29) Successful parse. ]]) +# Check the location of empty reductions raising an error +# ------------------------------------------------------- +# Here, the error is after token "!@0-9", so the error is raised from +# @9-9, and the error recovery detects that it starts from @9-9 and +# ends where starts the next token: END@10-19. +# +# So error recovery reports error@9-19. +AT_PARSER_CHECK([./input '!'], 0, [], +[[sending: '!' (0@0-9) +sending: END (1@10-19) +raise (4@9-9): %empty +check-spontaneous-errors (5@9-19): error (@9-19) +Freeing token END (1@10-19) +Freeing nterm input (5@0-19) +Successful parse. +]]) + +# Check the location of not empty reductions raising an error +# ----------------------------------------------------------- +# This time the error is raised from a rule with 2 rhs symbols: @10-29. +# It is recovered @10-29. +AT_PARSER_CHECK([[./input '!!!']], 0, [], +[[sending: '!' (0@0-9) +sending: '!' (1@10-19) +sending: '!' (2@20-29) +raise (5@10-29): ! (1@20-29) ! (2@20-29) +check-spontaneous-errors (5@10-29): error (@10-29) +sending: END (3@30-39) +Freeing token END (3@30-39) +Freeing nterm input (5@0-29) +Successful parse. +]]) # Check locations in error recovery # --------------------------------- -- 2.45.2