* doc/bison.texinfo (Actions): Make clear that `|' is not the same

[bison.git] / doc / bison.texinfo
diff --git a/doc/bison.texinfo b/doc/bison.texinfo

index f4360cd3cf8da01c02da46a6b60ac68daec6e720..58186349d5f5c7264e0909309fc429c0f5047b2d 100644 (file)
--- a/doc/bison.texinfo
+++ b/doc/bison.texinfo
@@ -47,7 +47,7 @@ END-INFO-DIR-ENTRY
  This file documents the Bison parser generator.
  
  Copyright (C) 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998, 1999,
-2000, 2001
+2000, 2001, 2002
  Free Software Foundation, Inc.
  
  Permission is granted to make and distribute verbatim copies of
@@ -89,7 +89,7 @@ instead of in the original English.
  @page
  @vskip 0pt plus 1filll
  Copyright @copyright{} 1988, 1989, 1990, 1991, 1992, 1993, 1995, 1998,
-1999, 2000, 2001
+1999, 2000, 2001, 2002
  Free Software Foundation, Inc.
  
  @sp 2
@@ -433,7 +433,7 @@ smaller constructs according to grammatical rules are called
  @dfn{nonterminal symbols}; those which can't be subdivided are called
  @dfn{terminal symbols} or @dfn{token types}.  We call a piece of input
  corresponding to a single terminal symbol a @dfn{token}, and a piece
-corresponding to a single nonterminal symbol a @dfn{grouping}.@refill
+corresponding to a single nonterminal symbol a @dfn{grouping}.
  
  We can use the C language as an example of what symbols, terminal and
  nonterminal, mean.  The tokens of C are identifiers, constants (numeric and
@@ -565,7 +565,7 @@ if a rule mentions the terminal symbol `integer constant', it means that
  @emph{any} integer constant is grammatically valid in that position.  The
  precise value of the constant is irrelevant to how to parse the input: if
  @samp{x+4} is grammatical then @samp{x+1} or @samp{x+3989} is equally
-grammatical.@refill
+grammatical.
  
  But the precise value is very important for what the input means once it is
  parsed.  A compiler is useless if it fails to distinguish between 4, 1 and
@@ -577,7 +577,7 @@ The token type is a terminal symbol defined in the grammar, such as
  @code{INTEGER}, @code{IDENTIFIER} or @code{','}.  It tells everything
  you need to know to decide where the token may validly appear and how to
  group it with other tokens.  The grammar rules know nothing about tokens
-except their types.@refill
+except their types.
  
  The semantic value has all the rest of the information about the
  meaning of the token, such as the value of an integer, or the name of an
@@ -707,8 +707,7 @@ In some cases the Bison parser file includes system headers, and in
  those cases your code should respect the identifiers reserved by those
  headers.  On some non-@sc{gnu} hosts, @code{<alloca.h>},
  @code{<stddef.h>}, and @code{<stdlib.h>} are included as needed to
-declare memory allocators and related types.  In the same situation,
-C++ parsers may include @code{<cstddef>} and @code{<cstdlib>} instead.
+declare memory allocators and related types.
  Other system headers may be included if you define @code{YYDEBUG} to a
  nonzero value (@pxref{Debugging, ,Debugging Your Parser}).
  
@@ -1084,7 +1083,7 @@ The return value of the lexical analyzer function is a numeric code which
  represents a token type.  The same text used in Bison rules to stand for
  this token type is also a C expression for the numeric code for the type.
  This works in two ways.  If the token type is a character literal, then its
-numeric code is the ASCII code for that character; you can use the same
+numeric code is that of the character; you can use the same
  character literal in the lexical analyzer to express the number.  If the
  token type is an identifier, that identifier is defined by Bison as a C
  macro whose definition is the appropriate number.  In this example,
@@ -1105,8 +1104,8 @@ Here is the code for the lexical analyzer:
  @example
  @group
  /* Lexical analyzer returns a double floating point
-   number on the stack and the token NUM, or the ASCII
-   character read if not a number.  Skips all blanks
+   number on the stack and the token NUM, or the numeric code
+   of the character read if not a number.  Skips all blanks
     and tabs, returns 0 for EOF. */
  
  #include <ctype.h>
@@ -1383,7 +1382,7 @@ upon to print its message as well.)  The action executes the statement
  @code{yyerrok}, a macro defined automatically by Bison; its meaning is
  that error recovery is complete (@pxref{Error Recovery}).  Note the
  difference between @code{yyerrok} and @code{yyerror}; neither one is a
-misprint.@refill
+misprint.
  
  This form of error recovery deals with syntax errors.  There are other
  kinds of errors; for example, division by zero, which raises an exception
@@ -1882,7 +1881,7 @@ the name appears in the table, a pointer to its location and its type
  (@code{VAR} or @code{FNCT}) is returned to @code{yyparse}.  If it is not
  already in the table, then it is installed as a @code{VAR} using
  @code{putsym}.  Again, a pointer and its type (which must be @code{VAR}) is
-returned to @code{yyparse}.@refill
+returned to @code{yyparse}.
  
  No change is needed in the handling of numeric values and arithmetic
  operators in @code{yylex}.
@@ -2149,7 +2148,7 @@ your program will confuse other readers.
  
  All the usual escape sequences used in character literals in C can be
  used in Bison as well, but you must not use the null character as a
-character literal because its ASCII code, zero, is the code @code{yylex}
+character literal because its numeric code, zero, is the code @code{yylex}
  returns for end-of-input (@pxref{Calling Convention, ,Calling Convention
  for @code{yylex}}).
  
@@ -2190,7 +2189,7 @@ on when the parser function returns that symbol.
  The value returned by @code{yylex} is always one of the terminal symbols
  (or 0 for end-of-input).  Whichever way you write the token type in the
  grammar rules, you write it the same way in the definition of @code{yylex}.
-The numeric code for a character token type is simply the ASCII code for
+The numeric code for a character token type is simply the numeric code of
  the character, so @code{yylex} can use the identical character constant to
  generate the requisite code.  Each named token type becomes a C macro in
  the parser file, so @code{yylex} can use the name to stand for the code.
@@ -2203,9 +2202,27 @@ option when you run Bison, so that it will write these macro definitions
  into a separate header file @file{@var{name}.tab.h} which you can include
  in the other source files that need it.  @xref{Invocation, ,Invoking Bison}.
  
+The @code{yylex} function must use the same character set and encoding
+that was used by Bison.  For example, if you run Bison in an
+@sc{ascii} environment, but then compile and run the resulting program
+in an environment that uses an incompatible character set like
+@sc{ebcdic}, the resulting program will probably not work because the
+tables generated by Bison will assume @sc{ascii} numeric values for
+character tokens.  Portable grammars should avoid non-@sc{ascii}
+character tokens, as implementations in practice often use different
+and incompatible extensions in this area.  However, it is standard
+practice for software distributions to contain C source files that
+were generated by Bison in an @sc{ascii} environment, so installers on
+platforms that are incompatible with @sc{ascii} must rebuild those
+files before compiling them.
+
  The symbol @code{error} is a terminal symbol reserved for error recovery
  (@pxref{Error Recovery}); you shouldn't use it for any other purpose.
  In particular, @code{yylex} should never return this value.
+The default value of the error token is 256, so in the
+unlikely event that you need to use a character token with numeric
+value 256 you must reassign the error token's value with a
+@code{%token} declaration.
  
  @node Rules
  @section Syntax of Grammar Rules
@@ -2486,7 +2503,19 @@ which are the first and third symbols on the right hand side of the rule.
  The sum is stored into @code{$$} so that it becomes the semantic value of
  the addition-expression just recognized by the rule.  If there were a
  useful semantic value associated with the @samp{+} token, it could be
-referred to as @code{$2}.@refill
+referred to as @code{$2}.
+
+Note that the vertical-bar character @samp{|} is really a rule
+separator, and actions are attached to a single rule.  This is a
+difference with tools like Flex, for which @samp{|} stands for either
+``or'', or ``the same action as that of the next rule''.  In the
+following example, the action is triggered only when @samp{b} is found:
+
+@example
+@group
+a-or-b: 'a'|'b'   @{ a_or_b_found = 1; @};
+@end group
+@end example
  
  @cindex default action
  If you don't specify an action for a rule, Bison supplies a default:
@@ -2532,7 +2561,7 @@ If you have used @code{%union} to specify a variety of data types, then you
  must declare a choice among these types for each terminal or nonterminal
  symbol that can have a semantic value.  Then each time you use @code{$$} or
  @code{$@var{n}}, its data type is determined by which symbol it refers to
-in the rule.  In this example,@refill
+in the rule.  In this example,
  
  @example
  @group
@@ -2546,7 +2575,7 @@ exp:    @dots{}
  @code{$1} and @code{$3} refer to instances of @code{exp}, so they all
  have the data type declared for the nonterminal symbol @code{exp}.  If
  @code{$2} were used, it would have the data type declared for the
-terminal symbol @code{'+'}, whatever that might be.@refill
+terminal symbol @code{'+'}, whatever that might be.
  
  Alternatively, you can specify the data type when you refer to the value,
  by inserting @samp{<@var{type}>} after the @samp{$} at the beginning of the
@@ -2943,7 +2972,7 @@ an integer value in the field immediately following the token name:
  @noindent
  It is generally best, however, to let Bison choose the numeric codes for
  all token types.  Bison will automatically select codes that don't conflict
-with each other or with ASCII characters.
+with each other or with normal characters.
  
  In the event that the stack type is a union, you must augment the
  @code{%token} or other token declaration to include the data type
@@ -3259,12 +3288,12 @@ type names defined in the grammar and the semantic value type
  @code{YYSTYPE}, as well as a few @code{extern} variable declarations.
  
  If the parser output file is named @file{@var{name}.c} then this file
-is named @file{@var{name}.h}.@refill
+is named @file{@var{name}.h}.
  
  This output file is essential if you wish to put the definition of
  @code{yylex} in a separate source file, because @code{yylex} needs to
  be able to refer to token type codes and the variable
-@code{yylval}.  @xref{Token Values, ,Semantic Values of Tokens}.@refill
+@code{yylval}.  @xref{Token Values, ,Semantic Values of Tokens}.
  
  @item %file-prefix="@var{prefix}"
  Specify a prefix to use for all Bison output file names.  The names are
@@ -3367,11 +3396,11 @@ This file also describes all the conflicts, both those resolved by
  operator precedence and the unresolved ones.
  
  The file's name is made by removing @samp{.tab.c} or @samp{.c} from
-the parser output file name, and adding @samp{.output} instead.@refill
+the parser output file name, and adding @samp{.output} instead.
  
  Therefore, if the input file is @file{foo.y}, then the parser file is
  called @file{foo.tab.c} by default.  As a consequence, the verbose
-output file is called @file{foo.output}.@refill
+output file is called @file{foo.output}.
  
  @item %yacc
  Pretend the option @option{--yacc} was given, i.e., imitate Yacc,
@@ -3477,7 +3506,7 @@ need to arrange for the token-type macro definitions to be available there.
  To do this, use the @samp{-d} option when you run Bison, so that it will
  write these macro definitions into a separate header file
  @file{@var{name}.tab.h} which you can include in the other source files
-that need it.  @xref{Invocation, ,Invoking Bison}.@refill
+that need it.  @xref{Invocation, ,Invoking Bison}.
  
  @menu
  * Calling Convention::  How @code{yyparse} calls @code{yylex}.
@@ -3827,7 +3856,7 @@ Types of Values in Actions}.
  @item $<@var{typealt}>@var{n}
  Like @code{$@var{n}} but specifies alternative @var{typealt} in the
  union specified by the @code{%union} declaration.
-@xref{Action Types, ,Data Types of Values in Actions}.@refill
+@xref{Action Types, ,Data Types of Values in Actions}.
  
  @item YYABORT;
  Return immediately from @code{yyparse}, indicating failure.
@@ -4285,7 +4314,7 @@ The Bison precedence declarations, @code{%left}, @code{%right} and
  @code{%nonassoc}, can only be used once for a given token; so a token has
  only one precedence declared in this way.  For context-dependent
  precedence, you need to use an additional mechanism: the @code{%prec}
-modifier for rules.@refill
+modifier for rules.
  
  The @code{%prec} modifier declares the precedence of a particular rule by
  specifying a terminal symbol whose precedence should be used for that rule.
@@ -4931,7 +4960,6 @@ clear the flag.
  
  @node Debugging
  @chapter Debugging Your Parser
-@findex YYDEBUG
  @findex yydebug
  @cindex debugging
  @cindex tracing the parser
@@ -4939,24 +4967,39 @@ clear the flag.
  If a Bison grammar compiles properly but doesn't do what you want when it
  runs, the @code{yydebug} parser-trace feature can help you figure out why.
  
-To enable compilation of trace facilities, you must define the macro
-@code{YYDEBUG} to a nonzero value when you compile the parser.  You
-could use @samp{-DYYDEBUG=1} as a compiler option or you could put
-@samp{#define YYDEBUG 1} in the prologue of the grammar file
-(@pxref{Prologue, , The Prologue}).  Alternatively, use the @samp{-t}
-option when you run Bison (@pxref{Invocation, ,Invoking Bison}) or the
-@code{%debug} declaration (@pxref{Decl Summary, ,Bison Declaration
-Summary}).  We suggest that you always define @code{YYDEBUG} so that
-debugging is always possible.
+There are several means to enable compilation of trace facilities:
+
+@table @asis
+@item the macro @code{YYDEBUG}
+@findex YYDEBUG
+Define the macro @code{YYDEBUG} to a nonzero value when you compile the
+parser.  This is compliant with POSIX Yacc.  You could use
+@samp{-DYYDEBUG=1} as a compiler option or you could put @samp{#define
+YYDEBUG 1} in the prologue of the grammar file (@pxref{Prologue, , The
+Prologue}).
+
+@item the option @option{-t}, @option{--debug}
+Use the @samp{-t} option when you run Bison (@pxref{Invocation,
+,Invoking Bison}).  This is POSIX compliant too.
+
+@item the directive @samp{%debug}
+@findex %debug
+Add the @code{%debug} directive (@pxref{Decl Summary, ,Bison
+Declaration Summary}).  This is a Bison extension, which will prove
+useful when Bison will output parsers for languages that don't use a
+preprocessor.  Useless POSIX and Yacc portability matter to you, this is
+the preferred solution.
+@end table
+
+We suggest that you always enable the debug option so that debugging is
+always possible.
  
  The trace facility outputs messages with macro calls of the form
-@code{YYFPRINTF (YYSTDERR, @var{format}, @var{args})} where
+@code{YYFPRINTF (stderr, @var{format}, @var{args})} where
  @var{format} and @var{args} are the usual @code{printf} format and
  arguments.  If you define @code{YYDEBUG} to a nonzero value but do not
  define @code{YYFPRINTF}, @code{<stdio.h>} is automatically included
-and the macros are defined to @code{fprintf} and @code{stderr}.  In
-the same situation, C++ parsers include @code{<cstdio.h>} instead, and
-use @code{std::fprintf} and @code{std::stderr}.
+and @code{YYPRINTF} is defined to @code{fprintf}.
  
  Once you have compiled the program with trace facilities, the way to
  request a trace is to store a nonzero value in the variable @code{yydebug}.
@@ -5099,7 +5142,7 @@ Equivalent to @samp{-o y.tab.c}; the parser output file is called
  @file{y.tab.c}, and the other outputs are called @file{y.output} and
  @file{y.tab.h}.  The purpose of this option is to imitate Yacc's output
  file name conventions.  Thus, the following shell script can substitute
-for Yacc:@refill
+for Yacc:
  
  @example
  bison -y $*
@@ -5292,6 +5335,22 @@ would instead be named @file{foo_tab.c}.
  @cindex symbols in Bison, table of
  
  @table @code
+@item @@$
+In an action, the location of the left-hand side of the rule.
+  @xref{Locations, , Locations Overview}.
+
+@item @@@var{n}
+In an action, the location of the @var{n}-th symbol of the right-hand
+side of the rule.  @xref{Locations, , Locations Overview}.
+
+@item $$
+In an action, the semantic value of the left-hand side of the rule.
+@xref{Actions}.
+
+@item $@var{n}
+In an action, the semantic value of the @var{n}-th symbol of the
+right-hand side of the rule.  @xref{Actions}.
+
  @item error
  A token name reserved for error recovery.  This token may be used in
  grammar rules so as to allow the Bison parser to recognize an error in
@@ -5317,6 +5376,10 @@ read, by making @code{yyparse} return 0 immediately.
  Macro to discard a value from the parser stack and fake a look-ahead
  token.  @xref{Action Features, ,Special Features for Use in Actions}.
  
+@item YYDEBUG
+Macro to define to equip the parser with tracing code. @xref{Debugging,
+,Debugging Your Parser}.
+
  @item YYERROR
  Macro to pretend that a syntax error has just been detected: call
  @code{yyerror} and then perform normal error recovery if possible
@@ -5490,6 +5553,8 @@ Bison declaration to specify several possible data types for semantic
  values.  @xref{Union Decl, ,The Collection of Value Types}.
  @end table
  
+@sp 1
+
  These are the punctuation and delimiters used in Bison input:
  
  @table @samp