* data/glr.c, data/lalr1.cc, data/yacc.c: When YYABORT was

author Akim Demaille <akim@epita.fr>

Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)

committer Akim Demaille <akim@epita.fr>

Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)
author Akim Demaille <akim@epita.fr>
Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)
committer Akim Demaille <akim@epita.fr>
Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)
diff --git a/ChangeLog b/ChangeLog

index 0e8e9c2c7d582beab0fd1a174f66a8d1b8f6c65a..09e7d27c3aaea00e7102e724b43d985980fb673e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2004-09-03  Akim Demaille  <akim@epita.fr>
+
+       * data/glr.c, data/lalr1.cc, data/yacc.c: When YYABORT was
+       invoked, yydestruct the lookahead.
+       * tests/calc.at (Calculator $1): Update the expected lengths of
+       traces: there is an added line for the discarded lookahead.
+       * doc/bison.texinfo (Destructor Decl): Some rewording.
+       Define "discarded" symbols.
+
  2004-09-02  Akim Demaille  <akim@epita.fr>
  
         * data/lalr1.cc (translate_, destruct_): No reason to be static.
diff --git a/data/glr.c b/data/glr.c

index 883c248e6488dd4185b0dcf786074f87b1fe53b7..fbbd993babf9866bec2951e5257bf7aec19dc14e 100644 (file)
--- a/data/glr.c
+++ b/data/glr.c
@@ -1916,7 +1916,13 @@ yyrecoverSyntaxError (yyGLRStack* yystack,
        yyposn = yystack.yytops.yystates[0]->yyposn;
      }
   yyDone:
-  ;
+  /* On YYABORT, free the lookahead. */
+  if (yystack.yyerrflag == 1 && yytoken != YYEMPTY)
+    {
+      YY_SYMBOL_PRINT ("Error: discarding lookahead",
+                       yytoken, yylvalp, yyllocp);
+      yydestruct (yytoken, yylvalp]b4_location_if([, yyllocp])[);
+    }
  
    yyfreeGLRStack (&yystack);
    return yystack.yyerrflag;
diff --git a/data/lalr1.cc b/data/lalr1.cc

index 7cc296cf2c43474959e9c6b86e1bef1132bc3ae1..e13965c6466d8a927532b40412426a782a1e321e 100644 (file)
--- a/data/lalr1.cc
+++ b/data/lalr1.cc
@@ -714,6 +714,10 @@ yyacceptlab:
  
    /* Abort.  */
  yyabortlab:
+  /* Free the lookahead. */
+  YY_SYMBOL_PRINT ("Error: discarding lookahead", ilooka_, &value, &location);
+  destruct_ (ilooka_, &value, &location);
+  looka_ = empty_;
    return 1;
  }
  
diff --git a/data/yacc.c b/data/yacc.c

index 78fae04f8d7ac533793ad4b5cc27e2c3ac38256f..3acb4453c50a57700a95f81e9c075f0d48fbbf55 100644 (file)
--- a/data/yacc.c
+++ b/data/yacc.c
@@ -1209,6 +1209,9 @@ yyacceptlab:
  | yyabortlab -- YYABORT comes here.  |
  `-----------------------------------*/
  yyabortlab:
+  YY_SYMBOL_PRINT ("Error: discarding lookahead", yytoken, &yylval, &yylloc);
+  yydestruct (yytoken, &yylval]b4_location_if([, &yylloc])[);
+  yychar = YYEMPTY;
    yyresult = 1;
    goto yyreturn;
  
diff --git a/doc/bison.texinfo b/doc/bison.texinfo

index af459f2bd9dca9907e868e17696b644993b9d5f1..dd2407a39c8c17585a2fdd4e6dd666ad8030df8e 100644 (file)
--- a/doc/bison.texinfo
+++ b/doc/bison.texinfo
@@ -787,7 +787,7 @@ are possible---either locally redefining @samp{a}, or using the
  value of @samp{a} from the outer scope.  So this approach cannot
  work.
  
-A simple solution to this problem is to declare the parser to 
+A simple solution to this problem is to declare the parser to
  use the @acronym{GLR} algorithm.
  When the @acronym{GLR} parser reaches the critical state, it
  merely splits into two branches and pursues both syntax rules
@@ -871,7 +871,7 @@ type t = (a) .. b;
  
  The parser can be turned into a @acronym{GLR} parser, while also telling Bison
  to be silent about the one known reduce/reduce conflict, by
-adding these two declarations to the Bison input file (before the first 
+adding these two declarations to the Bison input file (before the first
  @samp{%%}):
  
  @example
@@ -893,7 +893,7 @@ Bison to make sure that @acronym{GLR} splitting is only done where it is
  intended.  A @acronym{GLR} parser splitting inadvertently may cause
  problems less obvious than an @acronym{LALR} parser statically choosing the
  wrong alternative in a conflict.
-Second, consider interactions with the lexer (@pxref{Semantic Tokens}) 
+Second, consider interactions with the lexer (@pxref{Semantic Tokens})
  with great care.  Since a split parser consumes tokens
  without performing any actions during the split, the lexer cannot
  obtain information via parser actions.  Some cases of
@@ -977,20 +977,20 @@ parses as either an @code{expr} or a @code{stmt}
  @samp{x} as an @code{ID}).
  Bison detects this as a reduce/reduce conflict between the rules
  @code{expr : ID} and @code{declarator : ID}, which it cannot resolve at the
-time it encounters @code{x} in the example above.  Since this is a 
-@acronym{GLR} parser, it therefore splits the problem into two parses, one for 
+time it encounters @code{x} in the example above.  Since this is a
+@acronym{GLR} parser, it therefore splits the problem into two parses, one for
  each choice of resolving the reduce/reduce conflict.
  Unlike the example from the previous section (@pxref{Simple GLR Parsers}),
  however, neither of these parses ``dies,'' because the grammar as it stands is
-ambiguous.  One of the parsers eventually reduces @code{stmt : expr ';'} and 
-the other reduces @code{stmt : decl}, after which both parsers are in an 
-identical state: they've seen @samp{prog stmt} and have the same unprocessed 
-input remaining.  We say that these parses have @dfn{merged.}  
+ambiguous.  One of the parsers eventually reduces @code{stmt : expr ';'} and
+the other reduces @code{stmt : decl}, after which both parsers are in an
+identical state: they've seen @samp{prog stmt} and have the same unprocessed
+input remaining.  We say that these parses have @dfn{merged.}
  
  At this point, the @acronym{GLR} parser requires a specification in the
  grammar of how to choose between the competing parses.
  In the example above, the two @code{%dprec}
-declarations specify that Bison is to give precedence 
+declarations specify that Bison is to give precedence
  to the parse that interprets the example as a
  @code{decl}, which implies that @code{x} is a declarator.
  The parser therefore prints
@@ -1007,7 +1007,7 @@ T (x) + y;
  @end example
  
  @noindent
-This is another example of using @acronym{GLR} to parse an unambiguous 
+This is another example of using @acronym{GLR} to parse an unambiguous
  construct, as shown in the previous section (@pxref{Simple GLR Parsers}).
  Here, there is no ambiguity (this cannot be parsed as a declaration).
  However, at the time the Bison parser encounters @code{x}, it does not
@@ -1066,7 +1066,7 @@ as both an @code{expr} and a @code{decl}, and prints
  @end example
  
  Bison requires that all of the
-productions that participate in any particular merge have identical 
+productions that participate in any particular merge have identical
  @samp{%merge} clauses.  Otherwise, the ambiguity would be unresolvable,
  and the parser will report an error during any parse that results in
  the offending merge.
@@ -3734,14 +3734,13 @@ terminal symbol.  All kinds of token declarations allow
  @cindex freeing discarded symbols
  @findex %destructor
  
-Some symbols can be discarded by the parser, typically during error
-recovery (@pxref{Error Recovery}).  Basically, during error recovery,
-embarrassing symbols already pushed on the stack, and embarrassing
-tokens coming from the rest of the file are thrown away until the parser
-falls on its feet.  If these symbols convey heap based information, this
-memory is lost.  While this behavior is tolerable for batch parsers,
-such as in compilers, it is unacceptable for parsers that can
-possibility ``never end'' such as shells, or implementations of
+Some symbols can be discarded by the parser.  For instance, during error
+recovery (@pxref{Error Recovery}), embarrassing symbols already pushed
+on the stack, and embarrassing tokens coming from the rest of the file
+are thrown away until the parser falls on its feet.  If these symbols
+convey heap based information, this memory is lost.  While this behavior
+can be tolerable for batch parsers, such as in compilers, it is not for
+possibly ``never ending'' parsers such as shells, or implementations of
  communication protocols.
  
  The @code{%destructor} directive allows for the definition of code that
@@ -3794,6 +3793,22 @@ typeless: string;  // $$ = $1 does not apply; $1 is destroyed.
  typefull: string;  // $$ = $1 applies, $1 is not destroyed.
  @end smallexample
  
+@sp 1
+
+@cindex discarded symbols
+@dfn{Discarded symbols} are the following:
+
+@itemize
+@item
+stacked symbols popped during the first phase of error recovery,
+@item
+incoming terminals during the second phase of error recovery,
+@item
+the current lookahead when the parser aborts (either via an explicit
+call to @code{YYABORT}, or as a consequence of a failed error recovery).
+@end itemize
+
+
  @node Expect Decl
  @subsection Suppressing Conflict Warnings
  @cindex suppressing conflict warnings
diff --git a/tests/calc.at b/tests/calc.at

index 47f05919d866f4b67de213b9004c86a84bcb0a1c..71d35f072adc7abfabfcd02426ad3933e03bece8 100644 (file)
--- a/tests/calc.at
+++ b/tests/calc.at
@@ -466,21 +466,21 @@ _AT_CHECK_CALC([$1],
                 [486])
  
  # Some syntax errors.
-_AT_CHECK_CALC_ERROR([$1], [1], [0 0], [11],
+_AT_CHECK_CALC_ERROR([$1], [1], [0 0], [12],
                       [1.2: syntax error, unexpected "number"])
-_AT_CHECK_CALC_ERROR([$1], [1], [1//2], [15],
+_AT_CHECK_CALC_ERROR([$1], [1], [1//2], [16],
                       [1.2: syntax error, unexpected '/', expecting "number" or '-' or '(' or '!'])
-_AT_CHECK_CALC_ERROR([$1], [1], [error], [4],
+_AT_CHECK_CALC_ERROR([$1], [1], [error], [5],
                       [1.0: syntax error, unexpected $undefined])
-_AT_CHECK_CALC_ERROR([$1], [1], [1 = 2 = 3], [22],
+_AT_CHECK_CALC_ERROR([$1], [1], [1 = 2 = 3], [23],
                       [1.6: syntax error, unexpected '='])
  _AT_CHECK_CALC_ERROR([$1], [1],
                       [
  +1],
-                     [14],
+                     [15],
                       [2.0: syntax error, unexpected '+'])
  # Exercise error messages with EOF: work on an empty file.
-_AT_CHECK_CALC_ERROR([$1], [1], [/dev/null], [4],
+_AT_CHECK_CALC_ERROR([$1], [1], [/dev/null], [5],
                       [1.0: syntax error, unexpected "end of input"])
  
  # Exercise the error token: without it, we die at the first error,
author	Akim Demaille <akim@epita.fr>
	Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)
committer	Akim Demaille <akim@epita.fr>
	Mon, 6 Sep 2004 07:48:20 +0000 (07:48 +0000)
ChangeLog		patch \| blob \| blame \| history
data/glr.c		patch \| blob \| blame \| history
data/lalr1.cc		patch \| blob \| blame \| history
data/yacc.c		patch \| blob \| blame \| history
doc/bison.texinfo		patch \| blob \| blame \| history
tests/calc.at		patch \| blob \| blame \| history