Regenerate.

[bison.git] / doc / bison.texinfo
diff --git a/doc/bison.texinfo b/doc/bison.texinfo

index 6198c280468a3de932122bfefedecee2439996e2..69eb649fe3b386f341170fac81631c7e547bb76c 100644 (file)
--- a/doc/bison.texinfo
+++ b/doc/bison.texinfo
@@ -145,9 +145,9 @@ The Concepts of Bison
  
  Writing @acronym{GLR} Parsers
  
-* Simple GLR Parsers::          Using @acronym{GLR} parsers on unambiguous grammars
-* Merging GLR Parses::          Using @acronym{GLR} parsers to resolve ambiguities
-* Compiler Requirements::       @acronym{GLR} parsers require a modern C compiler
+* Simple GLR Parsers::       Using @acronym{GLR} parsers on unambiguous grammars
+* Merging GLR Parses::       Using @acronym{GLR} parsers to resolve ambiguities
+* Compiler Requirements::    @acronym{GLR} parsers require a modern C compiler
  
  Examples
  
@@ -225,6 +225,7 @@ Tracking Locations
  
  Bison Declarations
  
+* Require Decl::      Requiring a Bison version.
  * Token Decl::        Declaring terminal symbols.
  * Precedence Decl::   Declaring terminals with precedence and associativity.
  * Union Decl::        Declaring the set of all semantic value types.
@@ -732,9 +733,9 @@ user-defined function on the resulting values to produce an arbitrary
  merged result.
  
  @menu
-* Simple GLR Parsers::          Using @acronym{GLR} parsers on unambiguous grammars
-* Merging GLR Parses::          Using @acronym{GLR} parsers to resolve ambiguities
-* Compiler Requirements::       @acronym{GLR} parsers require a modern C compiler
+* Simple GLR Parsers::       Using @acronym{GLR} parsers on unambiguous grammars
+* Merging GLR Parses::       Using @acronym{GLR} parsers to resolve ambiguities
+* Compiler Requirements::    @acronym{GLR} parsers require a modern C compiler
  @end menu
  
  @node Simple GLR Parsers
@@ -1197,11 +1198,13 @@ function @code{yyerror} and the parser function @code{yyparse} itself.
  This also includes numerous identifiers used for internal purposes.
  Therefore, you should avoid using C identifiers starting with @samp{yy}
  or @samp{YY} in the Bison grammar file except for the ones defined in
-this manual.
+this manual.  Also, you should avoid using the C identifiers
+@samp{malloc} and @samp{free} for anything other than their usual
+meanings.
  
  In some cases the Bison parser file includes system headers, and in
  those cases your code should respect the identifiers reserved by those
-headers.  On some non-@acronym{GNU} hosts, @code{<alloca.h>},
+headers.  On some non-@acronym{GNU} hosts, @code{<alloca.h>}, @code{<malloc.h>},
  @code{<stddef.h>}, and @code{<stdlib.h>} are included as needed to
  declare memory allocators and related types.  @code{<libintl.h>} is
  included if message translation is in use
@@ -1716,12 +1719,12 @@ With all the source in a single file, you use the following command to
  convert it into a parser file:
  
  @example
-bison @var{file_name}.y
+bison @var{file}.y
  @end example
  
  @noindent
  In this example the file was called @file{rpcalc.y} (for ``Reverse Polish
-@sc{calc}ulator'').  Bison produces a file named @file{@var{file_name}.tab.c},
+@sc{calc}ulator'').  Bison produces a file named @file{@var{file}.tab.c},
  removing the @samp{.y} from the original file name.  The file output by
  Bison contains the source code for @code{yyparse}.  The additional
  functions in the input file (@code{yylex}, @code{yyerror} and @code{main})
@@ -3546,6 +3549,7 @@ it explicitly (@pxref{Language and Grammar, ,Languages and Context-Free
  Grammars}).
  
  @menu
+* Require Decl::      Requiring a Bison version.
  * Token Decl::        Declaring terminal symbols.
  * Precedence Decl::   Declaring terminals with precedence and associativity.
  * Union Decl::        Declaring the set of all semantic value types.
@@ -3558,6 +3562,19 @@ Grammars}).
  * Decl Summary::      Table of all Bison declarations.
  @end menu
  
+@node Require Decl
+@subsection Require a Version of Bison
+@cindex version requirement
+@cindex requiring a version of Bison
+@findex %require
+
+You may require the minimum version of Bison to process the grammar.  If
+the requirement is not met, @command{bison} exits with an error.
+
+@example
+%require "@var{version}"
+@end example
+
  @node Token Decl
  @subsection Token Type Names
  @cindex declaring token type names
@@ -3779,10 +3796,10 @@ Declare that the @var{code} must be invoked before parsing each time
  For instance, if your locations use a file name, you may use
  
  @example
-%parse-param @{ const char *filename @};
+%parse-param @{ char const *file_name @};
  %initial-action
  @{
-  @@$.begin.filename = @@$.end.filename = filename;
+  @@$.begin.filename = @@$.end.filename = file_name;
  @};
  @end example
  
@@ -4133,7 +4150,7 @@ parser file contains just @code{#define} directives and static variable
  declarations.
  
  This option also tells Bison to write the C code for the grammar actions
-into a file named @file{@var{filename}.act}, in the form of a
+into a file named @file{@var{file}.act}, in the form of a
  brace-surrounded body fit for a @code{switch} statement.
  @end deffn
  
@@ -4146,8 +4163,8 @@ associate errors with the parser file, treating it an independent source
  file in its own right.
  @end deffn
  
-@deffn {Directive} %output="@var{filename}"
-Specify the @var{filename} for the parser file.
+@deffn {Directive} %output="@var{file}"
+Specify @var{file} for the parser file.
  @end deffn
  
  @deffn {Directive} %pure-parser
@@ -4155,6 +4172,12 @@ Request a pure (reentrant) parser program (@pxref{Pure Decl, ,A Pure
  (Reentrant) Parser}).
  @end deffn
  
+@deffn {Directive} %require "@var{version}"
+Specify that version @var{version} or higher of Bison required for the
+grammar.
+@xref{Require Decl, , Require a Version of Bison}.
+@end deffn
+
  @deffn {Directive} %token-table
  Generate an array of token names in the parser file.  The name of the
  array is @code{yytname}; @code{yytname[@var{i}]} is the name of the
@@ -4164,15 +4187,14 @@ three elements of @code{yytname} correspond to the predefined tokens
  @code{"error"}, and @code{"$undefined"}; after these come the symbols
  defined in the grammar file.
  
-For single-character literal tokens and literal string tokens, the name
-in the table includes the single-quote or double-quote characters: for
-example, @code{"'+'"} is a single-character literal and @code{"\"<=\""}
-is a literal string token.  All the characters of the literal string
-token appear verbatim in the string found in the table; even
-double-quote characters are not escaped.  For example, if the token
-consists of three characters @samp{*"*}, its string in @code{yytname}
-contains @samp{"*"*"}.  (In C, that would be written as
-@code{"\"*\"*\""}).
+The name in the table includes all the characters needed to represent
+the token in Bison.  For single-character literals and literal
+strings, this includes the surrounding quoting characters and any
+escape sequences.  For example, the Bison single-character literal
+@code{'+'} corresponds to a three-character name, represented in C as
+@code{"'+'"}; and the Bison two-character literal string @code{"\\/"}
+corresponds to a five-character name, represented in C as
+@code{"\"\\\\/\""}.
  
  When you specify @code{%token-table}, Bison also generates macro
  definitions for macros @code{YYNTOKENS}, @code{YYNNTS}, and
@@ -4272,7 +4294,11 @@ without reading further.
  The value returned by @code{yyparse} is 0 if parsing was successful (return
  is due to end-of-input).
  
-The value is 1 if parsing failed (return is due to a syntax error).
+The value is 1 if parsing failed because of invalid input, i.e., input
+that contains a syntax error or that causes @code{YYABORT} to be
+invoked.
+
+The value is 2 if parsing failed due to memory exhaustion.
  @end deftypefun
  
  In an action, you can cause immediate return from @code{yyparse} by using
@@ -4413,11 +4439,13 @@ the grammar file has no effect on @code{yylex}.
  table.  The index of the token in the table is the token type's code.
  The name of a multicharacter token is recorded in @code{yytname} with a
  double-quote, the token's characters, and another double-quote.  The
-token's characters are not escaped in any way; they appear verbatim in
-the contents of the string in the table.
+token's characters are escaped as necessary to be suitable as input
+to Bison.
  
-Here's code for looking up a token in @code{yytname}, assuming that the
-characters of the token are stored in @code{token_buffer}.
+Here's code for looking up a multicharacter token in @code{yytname},
+assuming that the characters of the token are stored in
+@code{token_buffer}, and assuming that the token does not contain any
+characters like @samp{"} that require escaping.
  
  @smallexample
  for (i = 0; i < YYNTOKENS; i++)
@@ -4688,7 +4716,7 @@ preferable since it more accurately describes the return type for
  
  @vindex yynerrs
  The variable @code{yynerrs} contains the number of syntax errors
-encountered so far.  Normally this variable is global; but if you
+reported so far.  Normally this variable is global; but if you
  request a pure parser (@pxref{Pure Decl, ,A Pure (Reentrant) Parser})
  then it is a local variable which only the actions can access.
  
@@ -6604,14 +6632,15 @@ bison @var{infile}
  
  Here @var{infile} is the grammar file name, which usually ends in
  @samp{.y}.  The parser file's name is made by replacing the @samp{.y}
-with @samp{.tab.c}.  Thus, the @samp{bison foo.y} filename yields
-@file{foo.tab.c}, and the @samp{bison hack/foo.y} filename yields
-@file{hack/foo.tab.c}.  It's also possible, in case you are writing
+with @samp{.tab.c} and removing any leading directory.  Thus, the
+@samp{bison foo.y} file name yields
+@file{foo.tab.c}, and the @samp{bison hack/foo.y} file name yields
+@file{foo.tab.c}.  It's also possible, in case you are writing
  C++ code instead of C in your grammar file, to name it @file{foo.ypp}
  or @file{foo.y++}.  Then, the output files will take an extension like
  the given one as input (respectively @file{foo.tab.cpp} and
  @file{foo.tab.c++}).
-This feature takes effect with all options that manipulate filenames like
+This feature takes effect with all options that manipulate file names like
  @samp{-o} or @samp{-d}.
  
  For example :
@@ -6769,11 +6798,11 @@ Pretend that @code{%verbose} was specified, i.e, write an extra output
  file containing verbose descriptions of the grammar and
  parser.  @xref{Decl Summary}.
  
-@item -o @var{filename}
-@itemx --output=@var{filename}
-Specify the @var{filename} for the parser file.
+@item -o @var{file}
+@itemx --output=@var{file}
+Specify the @var{file} for the parser file.
  
-The other output files' names are constructed from @var{filename} as
+The other output files' names are constructed from @var{file} as
  described under the @samp{-v} and @samp{-d} options.
  
  @item -g
@@ -6785,7 +6814,7 @@ be @file{foo.vcg}.
  @item --graph=@var{graph-file}
  The behavior of @var{--graph} is the same than @samp{-g}.  The only
  difference is that it has an optional argument which is the name of
-the output graph filename.
+the output graph file.
  @end table
  
  @node Option Cross Key
@@ -6901,13 +6930,13 @@ used for location tracking.  @xref{C++ Location Values}.
  @item stack.hh
  An auxiliary class @code{stack} used by the parser.
  
-@item @var{filename}.hh
-@itemx @var{filename}.cc
+@item @var{file}.hh
+@itemx @var{file}.cc
  The declaration and implementation of the C++ parser class.
-@var{filename} is the name of the output file.  It follows the same
+@var{file} is the name of the output file.  It follows the same
  rules as with regular C parsers.
  
-Note that @file{@var{filename}.hh} is @emph{mandatory}, the C++ cannot
+Note that @file{@var{file}.hh} is @emph{mandatory}, the C++ cannot
  work without the parser class declaration.  Therefore, you must either
  pass @option{-d}/@option{--defines} to @command{bison}, or use the
  @samp{%defines} directive.
@@ -6925,12 +6954,13 @@ for a complete and accurate documentation.
  The @code{%union} directive works as for C, see @ref{Union Decl, ,The
  Collection of Value Types}.  In particular it produces a genuine
  @code{union}@footnote{In the future techniques to allow complex types
-within pseudo-unions (variants) might be implemented to alleviate
-these issues.}, which have a few specific features in C++.
+within pseudo-unions (similar to Boost variants) might be implemented to
+alleviate these issues.}, which have a few specific features in C++.
  @itemize @minus
  @item
-The name @code{YYSTYPE} also denotes @samp{union YYSTYPE}.  You may
-forward declare it just with @samp{union YYSTYPE;}.
+The type @code{YYSTYPE} is defined but its use is discouraged: rather
+you should refer to the parser's encapsulated type
+@code{yy::parser::semantic_type}.
  @item
  Non POD (Plain Old Data) types cannot be used.  C++ forbids any
  instance of classes with constructors in unions: only @emph{pointers}
@@ -6956,7 +6986,7 @@ auxiliary classes define a @code{position}, a single point in a file,
  and a @code{location}, a range composed of a pair of
  @code{position}s (possibly spanning several files).
  
-@deftypemethod {position} {std::string*} filename
+@deftypemethod {position} {std::string*} file
  The name of the file.  It will always be handled as a pointer, the
  parser will never duplicate nor deallocate it.  As an experimental
  feature you may change it to @samp{@var{type}*} using @samp{%define
@@ -6988,8 +7018,8 @@ Various forms of syntactic sugar for @code{columns}.
  
  @deftypemethod {position} {position} operator<< (std::ostream @var{o}, const position& @var{p})
  Report @var{p} on @var{o} like this:
-@samp{@var{filename}:@var{line}.@var{column}}, or
-@samp{@var{line}.@var{column}} if @var{filename} is null.
+@samp{@var{file}:@var{line}.@var{column}}, or
+@samp{@var{line}.@var{column}} if @var{file} is null.
  @end deftypemethod
  
  @deftypemethod {location} {position} begin
@@ -7131,7 +7161,8 @@ transforming the simple parsing context structure into a fully blown
  
  The declaration of this driver class, @file{calc++-driver.hh}, is as
  follows.  The first part includes the CPP guard and imports the
-required standard library components.
+required standard library components, and the declaration of the parser
+class.
  
  @comment file: calc++-driver.hh
  @example
@@ -7139,26 +7170,9 @@ required standard library components.
  # define CALCXX_DRIVER_HH
  # include <string>
  # include <map>
+# include "calc++-parser.hh"
  @end example
  
-@noindent
-Then come forward declarations.  Because the parser uses the parsing
-driver and reciprocally, simple inclusions of header files will not
-do.  Because the driver's declaration is the one that will be imported
-by the rest of the project, it is saner to forward declare the
-parser's information here.
-
-@comment file: calc++-driver.hh
-@example
-// Forward declarations.
-union YYSTYPE;
-namespace yy
-@{
-  class location;
-  class calcxx_parser;
-@}
-class calcxx_driver;
-@end example
  
  @noindent
  Then comes the declaration of the scanning function.  Flex expects
@@ -7170,7 +7184,9 @@ factor both as follows.
  @example
  // Announce to Flex the prototype we want for lexing function, ...
  # define YY_DECL                                                \
-  int yylex (YYSTYPE* yylval, yy::location* yylloc, calcxx_driver& driver)
+  int yylex (yy::calcxx_parser::semantic_type* yylval,           \
+             yy::calcxx_parser::location_type* yylloc,           \
+             calcxx_driver& driver)
  // ... and declare it for the parser's sake.
  YY_DECL;
  @end example
@@ -7280,19 +7296,33 @@ calcxx_driver::error (const std::string& m)
  @node Calc++ Parser
  @subsection Calc++ Parser
  
-The parser definition file @file{calc++-parser.yy} starts by asking
-for the C++ skeleton, the creation of the parser header file, and
-specifies the name of the parser class.  It then includes the required
-headers.
+The parser definition file @file{calc++-parser.yy} starts by asking for
+the C++ LALR(1) skeleton, the creation of the parser header file, and
+specifies the name of the parser class.  Because the C++ skeleton
+changed several times, it is safer to require the version you designed
+the grammar for.
  
  @comment file: calc++-parser.yy
  @example
  %skeleton "lalr1.cc"                          /*  -*- C++ -*- */
-%define "parser_class_name" "calcxx_parser"
+%require "2.1a"
  %defines
+%define "parser_class_name" "calcxx_parser"
+@end example
+
+@noindent
+Then come the declarations/inclusions needed to define the
+@code{%union}.  Because the parser uses the parsing driver and
+reciprocally, both cannot include the header of the other.  Because the
+driver's header needs detailed knowledge about the parser class (in
+particular its inner types), it is the parser's header which will simply
+use a forward declaration of the driver.
+
+@comment file: calc++-parser.yy
+@example
  %@{
  # include <string>
-# include "calc++-driver.hh"
+class calcxx_driver;
  %@}
  @end example
  
@@ -7348,6 +7378,19 @@ them.
  @};
  @end example
  
+@noindent
+The code between @samp{%@{} and @samp{%@}} after the introduction of the
+@samp{%union} is output in the @file{*.cc} file; it needs detailed
+knowledge about the driver.
+
+@comment file: calc++-parser.yy
+@example
+%@{
+# include "calc++-driver.hh"
+%@}
+@end example
+
+
  @noindent
  The token numbered as 0 corresponds to end of file; the following line
  allows for nicer error messages referring to ``end of file'' instead
@@ -7357,11 +7400,11 @@ avoid name clashes.
  
  @comment file: calc++-parser.yy
  @example
-%token        YYEOF          0 "end of file"
-%token        TOKEN_ASSIGN     ":="
-%token <sval> TOKEN_IDENTIFIER "identifier"
-%token <ival> TOKEN_NUMBER     "number"
-%type  <ival> exp              "expression"
+%token        END      0 "end of file"
+%token        ASSIGN     ":="
+%token <sval> IDENTIFIER "identifier"
+%token <ival> NUMBER     "number"
+%type  <ival> exp        "expression"
  @end example
  
  @noindent
@@ -7388,7 +7431,7 @@ unit: assignments exp  @{ driver.result = $2; @};
  assignments: assignments assignment @{@}
             | /* Nothing. */         @{@};
  
-assignment: TOKEN_IDENTIFIER ":=" exp @{ driver.variables[*$1] = $3; @};
+assignment: "identifier" ":=" exp @{ driver.variables[*$1] = $3; @};
  
  %left '+' '-';
  %left '*' '/';
@@ -7396,8 +7439,8 @@ exp: exp '+' exp   @{ $$ = $1 + $3; @}
     | exp '-' exp   @{ $$ = $1 - $3; @}
     | exp '*' exp   @{ $$ = $1 * $3; @}
     | exp '/' exp   @{ $$ = $1 / $3; @}
-   | TOKEN_IDENTIFIER  @{ $$ = driver.variables[*$1]; @}
-   | TOKEN_NUMBER      @{ $$ = $1; @};
+   | "identifier"  @{ $$ = driver.variables[*$1]; @}
+   | "number"      @{ $$ = $1; @};
  %%
  @end example
  
@@ -7424,6 +7467,9 @@ parser's to get the set of defined tokens.
  @comment file: calc++-scanner.ll
  @example
  %@{                                            /* -*- C++ -*- */
+# include <cstdlib>
+# include <errno.h>
+# include <limits.h>
  # include <string>
  # include "calc++-driver.hh"
  # include "calc++-parser.hh"
@@ -7474,15 +7520,28 @@ preceding tokens.  Comments would be treated equally.
  @end example
  
  @noindent
-The rules are simple, just note the use of the driver to report
-errors.
+The rules are simple, just note the use of the driver to report errors.
+It is convenient to use a typedef to shorten
+@code{yy::calcxx_parser::token::identifier} into
+@code{token::identifier} for isntance.
  
  @comment file: calc++-scanner.ll
  @example
+%@{
+  typedef yy::calcxx_parser::token token;
+%@}
+
  [-+*/]     return yytext[0];
-":="       return TOKEN_ASSIGN;
-@{int@}      yylval->ival = atoi (yytext); return TOKEN_NUMBER;
-@{id@}       yylval->sval = new std::string (yytext); return TOKEN_IDENTIFIER;
+":="       return token::ASSIGN;
+@{int@}      @{
+  errno = 0;
+  long n = strtol (yytext, NULL, 10);
+  if (! (INT_MIN <= n && n <= INT_MAX && errno != ERANGE))
+    driver.error (*yylloc, "integer is out of range");
+  yylval->ival = n;
+  return token::NUMBER;
+@}
+@{id@}       yylval->sval = new std::string (yytext); return token::IDENTIFIER;
  .          driver.error (*yylloc, "invalid character");
  %%
  @end example
@@ -7519,7 +7578,7 @@ The top level file, @file{calc++.cc}, poses no problem.
  #include "calc++-driver.hh"
  
  int
-main (int argc, const char* argv[])
+main (int argc, char *argv[])
  @{
    calcxx_driver driver;
    for (++argv; argv[0]; ++argv)
@@ -7912,7 +7971,7 @@ Bison declaration to assign non-associativity to token(s).
  @xref{Precedence Decl, ,Operator Precedence}.
  @end deffn
  
-@deffn {Directive} %output="@var{filename}"
+@deffn {Directive} %output="@var{file}"
  Bison declaration to set the name of the parser file.  @xref{Decl
  Summary}.
  @end deffn
@@ -7933,6 +7992,12 @@ Bison declaration to request a pure (reentrant) parser.
  @xref{Pure Decl, ,A Pure (Reentrant) Parser}.
  @end deffn
  
+@deffn {Directive} %require "@var{version}"
+Specify that version @var{version} or higher of Bison required for the
+grammar.
+@xref{Require Decl, , Require a Version of Bison}.
+@end deffn
+
  @deffn {Directive} %right
  Bison declaration to assign right associativity to token(s).
  @xref{Precedence Decl, ,Operator Precedence}.
@@ -8081,7 +8146,7 @@ Management}.
  @end deffn
  
  @deffn {Variable} yynerrs
-Global variable which Bison increments each time there is a syntax error.
+Global variable which Bison increments each time it reports a syntax error.
  (In a pure parser, it is a local variable within @code{yyparse}.)
  @xref{Error Reporting, ,The Error Reporting Function @code{yyerror}}.
  @end deffn
@@ -8111,10 +8176,7 @@ the parser will use @code{malloc} to extend its stacks.  If defined to
  reserved for future Bison extensions.  If not defined,
  @code{YYSTACK_USE_ALLOCA} defaults to 0.
  
-If you define @code{YYSTACK_USE_ALLOCA} to 1, it is your
-responsibility to make sure that @code{alloca} is visible, e.g., by
-using @acronym{GCC} or by including @code{<stdlib.h>}.  Furthermore,
-in the all-too-common case where your code may run on a host with a
+In the all-too-common case where your code may run on a host with a
  limited stack and with unreliable stack-overflow checking, you should
  set @code{YYMAXDEPTH} to a value that cannot possibly result in
  unchecked stack overflow on any of your target hosts when