@example
%@{
- #if __STDC_VERSION__ < 199901 && ! defined __GNUC__ && ! defined inline
- #define inline
+ #if (__STDC_VERSION__ < 199901 && ! defined __GNUC__ \
+ && ! defined inline)
+ # define inline
#endif
%@}
@end example
@cindex simple examples
@cindex examples, simple
-Now we show and explain three sample programs written using Bison: a
+Now we show and explain several sample programs written using Bison: a
reverse polish notation calculator, an algebraic (infix) notation
-calculator, and a multi-function calculator. All three have been tested
-under BSD Unix 4.3; each produces a usable, though limited, interactive
-desk-top calculator.
+calculator --- later extended to track ``locations'' ---
+and a multi-function calculator. All
+produce usable, though limited, interactive desk-top calculators.
These examples are simple, but Bison grammars for real programming
languages are written the same way. You can copy these examples into a
@comment file: rpcalc.y
@example
+@group
input: /* empty */
| input line
;
+@end group
+@group
line: '\n'
| exp '\n' @{ printf ("%.10g\n", $1); @}
;
+@end group
+@group
exp: NUM @{ $$ = $1; @}
| exp exp '+' @{ $$ = $1 + $2; @}
| exp exp '-' @{ $$ = $1 - $2; @}
| exp exp '*' @{ $$ = $1 * $2; @}
| exp exp '/' @{ $$ = $1 / $2; @}
- /* Exponentiation */
- | exp exp '^' @{ $$ = pow ($1, $2); @}
- /* Unary minus */
- | exp 'n' @{ $$ = -$1; @}
+ | exp exp '^' @{ $$ = pow ($1, $2); @} /* Exponentiation */
+ | exp 'n' @{ $$ = -$1; @} /* Unary minus */
;
+@end group
%%
@end example
/* Skip white space. */
while ((c = getchar ()) == ' ' || c == '\t')
- ;
+ continue;
@end group
@group
/* Process numbers. */
@example
@group
#include <stdio.h>
+@end group
+@group
/* Called by yyparse on error. */
void
yyerror (char const *s)
@example
/* Infix notation calculator. */
+@group
%@{
#define YYSTYPE double
#include <math.h>
int yylex (void);
void yyerror (char const *);
%@}
+@end group
+@group
/* Bison declarations. */
%token NUM
%left '-' '+'
%left '*' '/'
%precedence NEG /* negation--unary minus */
%right '^' /* exponentiation */
+@end group
%% /* The grammar follows. */
+@group
input: /* empty */
| input line
;
+@end group
+@group
line: '\n'
| exp '\n' @{ printf ("\t%.10g\n", $1); @}
;
+@end group
-exp: NUM @{ $$ = $1; @}
- | exp '+' exp @{ $$ = $1 + $3; @}
- | exp '-' exp @{ $$ = $1 - $3; @}
- | exp '*' exp @{ $$ = $1 * $3; @}
- | exp '/' exp @{ $$ = $1 / $3; @}
- | '-' exp %prec NEG @{ $$ = -$2; @}
+@group
+exp: NUM @{ $$ = $1; @}
+ | exp '+' exp @{ $$ = $1 + $3; @}
+ | exp '-' exp @{ $$ = $1 - $3; @}
+ | exp '*' exp @{ $$ = $1 * $3; @}
+ | exp '/' exp @{ $$ = $1 / $3; @}
+ | '-' exp %prec NEG @{ $$ = -$2; @}
| exp '^' exp @{ $$ = pow ($1, $3); @}
- | '(' exp ')' @{ $$ = $2; @}
+ | '(' exp ')' @{ $$ = $2; @}
;
+@end group
%%
@end example
if (c == EOF)
return 0;
+@group
/* Return a single char, and update location. */
if (c == '\n')
@{
++yylloc.last_column;
return c;
@}
+@end group
@end example
Basically, the lexical analyzer performs the same processing as before:
Here is a sample session with the multi-function calculator:
@example
+@group
$ @kbd{mfcalc}
@kbd{pi = 3.141592653589}
@result{} 3.1415926536
+@end group
+@group
@kbd{sin(pi)}
@result{} 0.0000000000
+@end group
@kbd{alpha = beta1 = 2.3}
@result{} 2.3000000000
@kbd{alpha}
init_table (void)
@{
int i;
- symrec *ptr;
for (i = 0; arith_fncts[i].fname != 0; i++)
@{
- ptr = putsym (arith_fncts[i].fname, FNCT);
+ symrec *ptr = putsym (arith_fncts[i].fname, FNCT);
ptr->value.fnctptr = arith_fncts[i].fnct;
@}
@}
#include <stdlib.h> /* malloc. */
#include <string.h> /* strlen. */
+@group
symrec *
putsym (char const *sym_name, int sym_type)
@{
- symrec *ptr;
- ptr = (symrec *) malloc (sizeof (symrec));
+ symrec *ptr = (symrec *) malloc (sizeof (symrec));
ptr->name = (char *) malloc (strlen (sym_name) + 1);
strcpy (ptr->name,sym_name);
ptr->type = sym_type;
sym_table = ptr;
return ptr;
@}
+@end group
+@group
symrec *
getsym (char const *sym_name)
@{
symrec *ptr;
for (ptr = sym_table; ptr != (symrec *) 0;
ptr = (symrec *)ptr->next)
- if (strcmp (ptr->name,sym_name) == 0)
+ if (strcmp (ptr->name, sym_name) == 0)
return ptr;
return 0;
@}
+@end group
@end smallexample
@node Mfcalc Lexer
int c;
/* Ignore white space, get first nonwhite character. */
- while ((c = getchar ()) == ' ' || c == '\t');
+ while ((c = getchar ()) == ' ' || c == '\t')
+ continue;
if (c == EOF)
return 0;
/* Char starts an identifier => read the name. */
if (isalpha (c))
@{
- symrec *s;
+ /* Initially make the buffer long enough
+ for a 40-character symbol name. */
+ static size_t length = 40;
static char *symbuf = 0;
- static int length = 0;
+ symrec *s;
int i;
@end group
-
-@group
- /* Initially make the buffer long enough
- for a 40-character symbol name. */
- if (length == 0)
- @{
- length = 40;
- symbuf = (char *) malloc (length + 1);
- @}
+ if (!symbuf)
+ symbuf = (char *) malloc (length + 1);
i = 0;
do
-@end group
@group
@{
/* If buffer is full, make it bigger. */
@comment file: mfcalc.y
@smallexample
-
-@group
@group
/* Called by yyparse on error. */
void
@}
@end group
+@group
int
main (int argc, char const* argv[])
@{
Thus, they belong in one or more @code{%code requires}:
@smallexample
+@group
%code top @{
#define _GNU_SOURCE
#include <stdio.h>
@}
+@end group
+@group
%code requires @{
#include "ptypes.h"
@}
+@end group
+@group
%union @{
long int n;
tree t; /* @r{@code{tree} is defined in @file{ptypes.h}.} */
@}
+@end group
+@group
%code requires @{
#define YYLTYPE YYLTYPE
typedef struct YYLTYPE
char *filename;
@} YYLTYPE;
@}
+@end group
+@group
%code @{
static void print_token_value (FILE *, int, YYSTYPE);
#define YYPRINT(F, N, L) print_token_value (F, N, L)
static void trace_token (enum yytokentype token, YYLTYPE loc);
@}
+@end group
@dots{}
@end smallexample
@code{%code} to a @code{%code provides}:
@smallexample
+@group
%code top @{
#define _GNU_SOURCE
#include <stdio.h>
@}
+@end group
+@group
%code requires @{
#include "ptypes.h"
@}
+@end group
+@group
%union @{
long int n;
tree t; /* @r{@code{tree} is defined in @file{ptypes.h}.} */
@}
+@end group
+@group
%code requires @{
#define YYLTYPE YYLTYPE
typedef struct YYLTYPE
char *filename;
@} YYLTYPE;
@}
+@end group
+@group
%code provides @{
void trace_token (enum yytokentype token, YYLTYPE loc);
@}
+@end group
+@group
%code @{
static void print_token_value (FILE *, int, YYSTYPE);
#define YYPRINT(F, N, L) print_token_value (F, N, L)
@}
+@end group
@dots{}
@end smallexample
type:
@smallexample
+@group
%code requires @{ #include "type1.h" @}
%union @{ type1 field1; @}
%destructor @{ type1_free ($$); @} <field1>
%printer @{ type1_print ($$); @} <field1>
+@end group
+@group
%code requires @{ #include "type2.h" @}
%union @{ type2 field2; @}
%destructor @{ type2_free ($$); @} <field2>
%printer @{ type2_print ($$); @} <field2>
+@end group
@end smallexample
@noindent
@end group
@end smallexample
+@noindent
where @code{YYRHSLOC (rhs, k)} is the location of the @var{k}th symbol
in @var{rhs} when @var{k} is positive, and the location of the symbol
just before the reduction when @var{k} and @var{n} are both zero.
of zero or more @code{word} groupings.
@example
+@group
sequence: /* empty */
@{ printf ("empty sequence\n"); @}
| maybeword
| sequence word
@{ printf ("added word %s\n", $2); @}
;
+@end group
+@group
maybeword: /* empty */
@{ printf ("empty maybeword\n"); @}
| word
@{ printf ("single word %s\n", $1); @}
;
+@end group
@end example
@noindent
from being empty:
@example
+@group
sequence: /* empty */
| sequence words
| sequence redirects
;
+@end group
+@group
words: word
| words word
;
+@end group
+@group
redirects:redirect
| redirects redirect
;
+@end group
@end example
@node Mysterious Conflicts
@example
typedef int foo, bar;
int baz (void)
+@group
@{
static bar (bar); /* @r{redeclare @code{bar} as static variable} */
extern foo foo (foo); /* @r{redeclare @code{foo} as function} */
return foo (bar);
@}
+@end group
@end example
Unfortunately, the name being declared is separated from the declaration
duplication, with actions omitted for brevity:
@example
+@group
initdcl:
declarator maybeasm '='
init
| declarator maybeasm
;
+@end group
+@group
notype_initdcl:
notype_declarator maybeasm '='
init
| notype_declarator maybeasm
;
+@end group
@end example
@noindent
and reports the uses of the symbols:
@example
+@group
Terminals, with rules where they appear
$end (0) 0
'/' (47) 4
error (256)
NUM (258) 5
+@end group
+@group
Nonterminals, with rules where they appear
$accept (8)
on left: 0
exp (9)
on left: 1 2 3 4 5, on right: 0 1 2 3 4
+@end group
@end example
@noindent
@cindex pointed rule
@cindex rule, pointed
Bison then proceeds onto the automaton itself, describing each state
-with it set of @dfn{items}, also known as @dfn{pointed rules}. Each
-item is a production rule together with a point (marked by @samp{.})
-that the input cursor.
+with its set of @dfn{items}, also known as @dfn{pointed rules}. Each
+item is a production rule together with a point (@samp{.}) marking
+the location of the input cursor.
@example
state 0
symbol (here, @code{exp}). When the parser returns to this state right
after having reduced a rule that produced an @code{exp}, the control
flow jumps to state 2. If there is no such transition on a nonterminal
-symbol, and the lookahead is a @code{NUM}, then this token is shifted on
+symbol, and the lookahead is a @code{NUM}, then this token is shifted onto
the parse stack, and the control flow jumps to state 1. Any other
lookahead triggers a syntax error.''
at the beginning of any rule deriving an @code{exp}. By default Bison
reports the so-called @dfn{core} or @dfn{kernel} of the item set, but if
you want to see more detail you can invoke @command{bison} with
-@option{--report=itemset} to list all the items, include those that can
-be derived:
+@option{--report=itemset} to list the derived items as well:
@example
state 0
@noindent
In state 2, the automaton can only shift a symbol. For instance,
-because of the item @samp{exp -> exp . '+' exp}, if the lookahead if
-@samp{+}, it will be shifted on the parse stack, and the automaton
-control will jump to state 4, corresponding to the item @samp{exp -> exp
-'+' . exp}. Since there is no default action, any other token than
-those listed above will trigger a syntax error.
+because of the item @samp{exp -> exp . '+' exp}, if the lookahead is
+@samp{+} it is shifted onto the parse stack, and the automaton
+jumps to state 4, corresponding to the item @samp{exp -> exp '+' . exp}.
+Since there is no default action, any lookahead not listed triggers a syntax
+error.
@cindex accepting state
The state 3 is named the @dfn{final state}, or the @dfn{accepting
The remaining states are similar:
@example
+@group
state 9
exp -> exp . '+' exp (rule 1)
'/' [reduce using rule 2 (exp)]
$default reduce using rule 2 (exp)
+@end group
+@group
state 10
exp -> exp . '+' exp (rule 1)
'/' [reduce using rule 3 (exp)]
$default reduce using rule 3 (exp)
+@end group
+@group
state 11
exp -> exp . '+' exp (rule 1)
'*' [reduce using rule 4 (exp)]
'/' [reduce using rule 4 (exp)]
$default reduce using rule 4 (exp)
+@end group
@end example
@noindent
@end defcv
@defcv {Type} {parser} {token}
-A structure that contains (only) the definition of the tokens as the
-@code{yytokentype} enumeration. To refer to the token @code{FOO}, the
-scanner should use @code{yy::parser::token::FOO}. The scanner can use
+A structure that contains (only) the @code{yytokentype} enumeration, which
+defines the tokens. To refer to the token @code{FOO},
+use @code{yy::parser::token::FOO}. The scanner can use
@samp{typedef yy::parser::token token;} to ``import'' the token enumeration
(@pxref{Calc++ Scanner}).
@end defcv
@comment file: calc++-scanner.ll
@example
+@group
%@{
// Code run each time a pattern is matched.
# define YY_USER_ACTION loc.columns (yyleng);
%@}
+@end group
%%
+@group
%@{
// Code run each time yylex is called.
loc.step ();
%@}
+@end group
@{blank@}+ loc.step ();
[\n]+ loc.lines (yyleng); loc.step ();
@end example
")" return yy::calcxx_parser::make_RPAREN(loc);
":=" return yy::calcxx_parser::make_ASSIGN(loc);
+@group
@{int@} @{
errno = 0;
long n = strtol (yytext, NULL, 10);
driver.error (loc, "integer is out of range");
return yy::calcxx_parser::make_NUMBER(n, loc);
@}
+@end group
@{id@} return yy::calcxx_parser::make_IDENTIFIER(yytext, loc);
. driver.error (loc, "invalid character");
<<EOF>> return yy::calcxx_parser::make_END(loc);
@comment file: calc++-scanner.ll
@example
+@group
void
calcxx_driver::scan_begin ()
@{
yyin = stdin;
else if (!(yyin = fopen (file.c_str (), "r")))
@{
- error (std::string ("cannot open ") + file + ": " + strerror(errno));
+ error ("cannot open " + file + ": " + strerror(errno));
exit (EXIT_FAILURE);
@}
@}
+@end group
+@group
void
calcxx_driver::scan_end ()
@{
fclose (yyin);
@}
+@end group
@end example
@node Calc++ Top Level
#include <iostream>
#include "calc++-driver.hh"
+@group
int
main (int argc, char *argv[])
@{
res = 1;
return res;
@}
+@end group
@end example
@node Java Parsers
@node Memory Exhausted
@section Memory Exhausted
-@display
+@quotation
My parser returns with error with a @samp{memory exhausted}
message. What can I do?
-@end display
+@end quotation
This question is already addressed elsewhere, @xref{Recursion,
,Recursive Rules}.
The following phenomenon has several symptoms, resulting in the
following typical questions:
-@display
+@quotation
I invoke @code{yyparse} several times, and on correct input it works
properly; but when a parse error is found, all the other calls fail
too. How can I reset the error flag of @code{yyparse}?
-@end display
+@end quotation
@noindent
or
-@display
+@quotation
My parser includes support for an @samp{#include}-like feature, in
which case I run @code{yyparse} from @code{yyparse}. This fails
although I did specify @samp{%define api.pure}.
-@end display
+@end quotation
These problems typically come not from Bison itself, but from
Lex-generated scanners. Because these scanners use large buffers for
demonstration, consider the following source file,
@file{first-line.l}:
-@verbatim
-%{
+@example
+@group
+%@{
#include <stdio.h>
#include <stdlib.h>
-%}
+%@}
+@end group
%%
.*\n ECHO; return 1;
%%
+@group
int
yyparse (char const *file)
-{
+@{
yyin = fopen (file, "r");
if (!yyin)
- {
- perror ("fopen");
- exit (EXIT_FAILURE);
- }
+ @{
+ perror ("fopen");
+ exit (EXIT_FAILURE);
+ @}
+@end group
+@group
/* One token only. */
yylex ();
if (fclose (yyin) != 0)
- {
- perror ("fclose");
- exit (EXIT_FAILURE);
- }
+ @{
+ perror ("fclose");
+ exit (EXIT_FAILURE);
+ @}
return 0;
-}
+@}
+@end group
+@group
int
main (void)
-{
+@{
yyparse ("input");
yyparse ("input");
return 0;
-}
-@end verbatim
+@}
+@end group
+@end example
@noindent
If the file @file{input} contains
-@verbatim
+@example
input:1: Hello,
input:2: World!
-@end verbatim
+@end example
@noindent
then instead of getting the first line twice, you get:
@node Strings are Destroyed
@section Strings are Destroyed
-@display
+@quotation
My parser seems to destroy old strings, or maybe it loses track of
them. Instead of reporting @samp{"foo", "bar"}, it reports
@samp{"bar", "bar"}, or even @samp{"foo\nbar", "bar"}.
-@end display
+@end quotation
This error is probably the single most frequent ``bug report'' sent to
Bison lists, but is only concerned with a misunderstanding of the role
of the scanner. Consider the following Lex code:
-@verbatim
-%{
+@example
+@group
+%@{
#include <stdio.h>
char *yylval = NULL;
-%}
+%@}
+@end group
+@group
%%
.* yylval = yytext; return 1;
\n /* IGNORE */
%%
+@end group
+@group
int
main ()
-{
+@{
/* Similar to using $1, $2 in a Bison action. */
char *fst = (yylex (), yylval);
char *snd = (yylex (), yylval);
printf ("\"%s\", \"%s\"\n", fst, snd);
return 0;
-}
-@end verbatim
+@}
+@end group
+@end example
If you compile and run this code, you get:
@node Implementing Gotos/Loops
@section Implementing Gotos/Loops
-@display
+@quotation
My simple calculator supports variables, assignments, and functions,
but how can I implement gotos, or loops?
-@end display
+@end quotation
Although very pedagogical, the examples included in the document blur
the distinction to make between the parser---whose job is to recover
@node Multiple start-symbols
@section Multiple start-symbols
-@display
+@quotation
I have several closely related grammars, and I would like to share their
implementations. In fact, I could use a single grammar but with
multiple entry points.
-@end display
+@end quotation
Bison does not support multiple start-symbols, but there is a very
simple means to simulate them. If @code{foo} and @code{bar} are the two
@node Secure? Conform?
@section Secure? Conform?
-@display
+@quotation
Is Bison secure? Does it conform to POSIX?
-@end display
+@end quotation
If you're looking for a guarantee or certification, we don't provide it.
However, Bison is intended to be a reliable program that conforms to the
@node I can't build Bison
@section I can't build Bison
-@display
+@quotation
I can't build Bison because @command{make} complains that
@code{msgfmt} is not found.
What should I do?
-@end display
+@end quotation
Like most GNU packages with internationalization support, that feature
is turned on by default. If you have problems building in the @file{po}
@node Where can I find help?
@section Where can I find help?
-@display
+@quotation
I'm having trouble using Bison. Where can I find help?
-@end display
+@end quotation
First, read this fine manual. Beyond that, you can send mail to
@email{help-bison@@gnu.org}. This mailing list is intended to be
@node Bug Reports
@section Bug Reports
-@display
+@quotation
I found a bug. What should I include in the bug report?
-@end display
+@end quotation
Before you send a bug report, make sure you are using the latest
version. Check @url{ftp://ftp.gnu.org/pub/gnu/bison/} or one of its
@node More Languages
@section More Languages
-@display
+@quotation
Will Bison ever have C++ and Java support? How about @var{insert your
favorite language here}?
-@end display
+@end quotation
C++ and Java support is there now, and is documented. We'd love to add other
languages; contributions are welcome.
@node Beta Testing
@section Beta Testing
-@display
+@quotation
What is involved in being a beta tester?
-@end display
+@end quotation
It's not terribly involved. Basically, you would download a test
release, compile it, and use it to build and run a parser or two. After
@node Mailing Lists
@section Mailing Lists
-@display
+@quotation
How do I join the help-bison and bug-bison mailing lists?
-@end display
+@end quotation
See @url{http://lists.gnu.org/}.