+2002-11-14 Akim Demaille <akim@epita.fr>
+
+ * tests/atlocal.in (CPPFLAGS): We have config.h.
+ * tests/testsuite.at (AT_DATA_GRAMMAR_PROLOGUE, AT_DATA_GRAMMAR):
+ New.
+ * tests/actions.at, tests/calc.at, tests/conflicts.at,
+ * tests/cxx-type.at, tests/glr-regr1.at, tests/headers.at,
+ * tests/regression.at, tests/torture.at: Use them for all the
+ grammars that are to be compiled.
+ * tests/cxx-type.at (_AT_TEST_GLR_CALC): Rename as...
+ * tests/cxx-type.at (_AT_TEST_GLR_CXXTYPES): this.
+ * doc/bison.texinfo (GLR Parsers): Document `inline'.
+
2002-11-14 Akim Demaille <akim@epita.fr>
* doc/bison.texinfo: Various formatting changes (alignments in
----------
Changes in version 1.75c:
+* GLR and inline
+ Users of Bison have to decide how they handle the portability of the
+ C keyword `inline'.
+
Changes in version 1.75b, 2002-11-13:
* %destructor
@cindex generalized @acronym{LR} (@acronym{GLR}) parsing
@cindex ambiguous grammars
@cindex non-deterministic parsing
-Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic},
-meaning roughly that
-the next grammar rule to apply at any point in the input is uniquely
-determined by the preceding input and a fixed, finite portion (called
-a @dfn{look-ahead}) of the remaining input.
-A context-free grammar can be @dfn{ambiguous}, meaning that
-there are multiple ways to apply the grammar rules to get the some inputs.
-Even unambiguous grammars can be @dfn{non-deterministic}, meaning that no
-fixed look-ahead always suffices to determine the next grammar rule to apply.
-With the proper declarations, Bison is also able to parse these more general
-context-free grammars, using a technique known as @acronym{GLR} parsing (for
-Generalized @acronym{LR}). Bison's @acronym{GLR} parsers are able to
-handle any context-free
-grammar for which the number of possible parses of any given string
-is finite.
+
+Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic}, meaning
+roughly that the next grammar rule to apply at any point in the input is
+uniquely determined by the preceding input and a fixed, finite portion
+(called a @dfn{look-ahead}) of the remaining input. A context-free
+grammar can be @dfn{ambiguous}, meaning that there are multiple ways to
+apply the grammar rules to get the some inputs. Even unambiguous
+grammars can be @dfn{non-deterministic}, meaning that no fixed
+look-ahead always suffices to determine the next grammar rule to apply.
+With the proper declarations, Bison is also able to parse these more
+general context-free grammars, using a technique known as @acronym{GLR}
+parsing (for Generalized @acronym{LR}). Bison's @acronym{GLR} parsers
+are able to handle any context-free grammar for which the number of
+possible parses of any given string is finite.
@cindex symbols (abstract)
@cindex token
@cindex syntactic grouping
@cindex grouping, syntactic
-In the formal grammatical rules for a language, each kind of syntactic unit
-or grouping is named by a @dfn{symbol}. Those which are built by grouping
-smaller constructs according to grammatical rules are called
+In the formal grammatical rules for a language, each kind of syntactic
+unit or grouping is named by a @dfn{symbol}. Those which are built by
+grouping smaller constructs according to grammatical rules are called
@dfn{nonterminal symbols}; those which can't be subdivided are called
@dfn{terminal symbols} or @dfn{token types}. We call a piece of input
corresponding to a single terminal symbol a @dfn{token}, and a piece
corresponding to a single nonterminal symbol a @dfn{grouping}.
We can use the C language as an example of what symbols, terminal and
-nonterminal, mean. The tokens of C are identifiers, constants (numeric and
-string), and the various keywords, arithmetic operators and punctuation
-marks. So the terminal symbols of a grammar for C include `identifier',
-`number', `string', plus one symbol for each keyword, operator or
-punctuation mark: `if', `return', `const', `static', `int', `char',
-`plus-sign', `open-brace', `close-brace', `comma' and many more. (These
-tokens can be subdivided into characters, but that is a matter of
+nonterminal, mean. The tokens of C are identifiers, constants (numeric
+and string), and the various keywords, arithmetic operators and
+punctuation marks. So the terminal symbols of a grammar for C include
+`identifier', `number', `string', plus one symbol for each keyword,
+operator or punctuation mark: `if', `return', `const', `static', `int',
+`char', `plus-sign', `open-brace', `close-brace', `comma' and many more.
+(These tokens can be subdivided into characters, but that is a matter of
lexicography, not grammar.)
Here is a simple C function subdivided into tokens:
@cindex conflicts
@cindex shift/reduce conflicts
-In some grammars, there will be cases where Bison's standard @acronym{LALR}(1)
-parsing algorithm cannot decide whether to apply a certain grammar rule
-at a given point. That is, it may not be able to decide (on the basis
-of the input read so far) which of two possible reductions (applications
-of a grammar rule) applies, or whether to apply a reduction or read more
-of the input and apply a reduction later in the input. These are known
-respectively as @dfn{reduce/reduce} conflicts (@pxref{Reduce/Reduce}),
-and @dfn{shift/reduce} conflicts (@pxref{Shift/Reduce}).
-
-To use a grammar that is not easily modified to be @acronym{LALR}(1), a more
-general parsing algorithm is sometimes necessary. If you include
+In some grammars, there will be cases where Bison's standard
+@acronym{LALR}(1) parsing algorithm cannot decide whether to apply a
+certain grammar rule at a given point. That is, it may not be able to
+decide (on the basis of the input read so far) which of two possible
+reductions (applications of a grammar rule) applies, or whether to apply
+a reduction or read more of the input and apply a reduction later in the
+input. These are known respectively as @dfn{reduce/reduce} conflicts
+(@pxref{Reduce/Reduce}), and @dfn{shift/reduce} conflicts
+(@pxref{Shift/Reduce}).
+
+To use a grammar that is not easily modified to be @acronym{LALR}(1), a
+more general parsing algorithm is sometimes necessary. If you include
@code{%glr-parser} among the Bison declarations in your file
-(@pxref{Grammar Outline}), the result will be a Generalized
-@acronym{LR} (@acronym{GLR})
-parser. These parsers handle Bison grammars that contain no unresolved
-conflicts (i.e., after applying precedence declarations) identically to
-@acronym{LALR}(1) parsers. However, when faced with unresolved
-shift/reduce and reduce/reduce conflicts, @acronym{GLR} parsers use
-the simple expedient of doing
-both, effectively cloning the parser to follow both possibilities. Each
-of the resulting parsers can again split, so that at any given time,
-there can be any number of possible parses being explored. The parsers
+(@pxref{Grammar Outline}), the result will be a Generalized @acronym{LR}
+(@acronym{GLR}) parser. These parsers handle Bison grammars that
+contain no unresolved conflicts (i.e., after applying precedence
+declarations) identically to @acronym{LALR}(1) parsers. However, when
+faced with unresolved shift/reduce and reduce/reduce conflicts,
+@acronym{GLR} parsers use the simple expedient of doing both,
+effectively cloning the parser to follow both possibilities. Each of
+the resulting parsers can again split, so that at any given time, there
+can be any number of possible parses being explored. The parsers
proceed in lockstep; that is, all of them consume (shift) a given input
symbol before any of them proceed to the next. Each of the cloned
parsers eventually meets one of two possible fates: either it runs into
"x" y z + T <init-declare> x T <cast> y z + = <OR>
@end example
+@sp 1
+
+@cindex @code{incline}
+@cindex @acronym{GLR} parsers and @code{inline}
+Note that the @acronym{GLR} parsers require an ISO C89 compiler. In
+addition, they use the @code{inline} keyword, which is not C89, but a
+common extension. It is up to the user of these parsers to handle
+portability issues. For instance, if using Autoconf and the Autoconf
+macro @code{AC_C_INLINE}, a mere
+
+@example
+%@{
+#include <config.h>
+%@}
+@end example
+
+@noindent
+will suffice. Otherwise, we suggest
+
+@example
+%@{
+#if ! defined __GNUC__ && ! defined inline
+# define inline
+#endif
+%@}
+@end example
@node Locations Overview
@section Locations
# instead of being attached to the empty rule dedicated to this
# action.
-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
[[%{
# include <stdio.h>
# include <stdlib.h>
AT_SETUP([Exotic Dollars])
-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
[[%{
# include <stdio.h>
# include <stdlib.h>
# Make sure complex $n work.
-AT_DATA([[input.y]],
+AT_DATA_GRAMMAR([[input.y]],
[[$4
%{
#include <stdio.h>
CFLAGS='@O0CFLAGS@ @WARNING_CFLAGS@ @WERROR_CFLAGS@'
# We need `config.h'.
-CPPFLAGS="-I$abs_top_builddir @CPPFLAGS@"
+CPPFLAGS="-DHAVE_CONFIG_H=1 -I$abs_top_builddir @CPPFLAGS@"
# Is the compiler GCC?
GCC='@GCC@'
m4_define([_AT_DATA_CALC_Y],
[m4_if([$1$2$3], $[1]$[2]$[3], [],
[m4_fatal([$0: Invalid arguments: $@])])dnl
-AT_DATA([calc.y],
+AT_DATA_GRAMMAR([calc.y],
[[/* Infix notation calculator--calc */
]$4[
%{
-#include <config.h>
-/* We don't need perfect functions for these tests. */
-#undef malloc
-#undef memcmp
-#undef realloc
#include <stdio.h>
#if STDC_HEADERS
AT_SETUP([%nonassoc and eof])
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[[
%{
-#include <config.h>
-/* We don't need perfect functions for these tests. */
-#undef malloc
-#undef memcmp
-#undef realloc
#include <stdio.h>
#if STDC_HEADERS
AT_BANNER([[C++ Type Syntax (GLR).]])
-# _AT_TEST_GLR_CALC(DECL, RESOLVE1, RESOLVE2)
-# -------------------------------------------
+# _AT_TEST_GLR_CXXTYPES(DECL, RESOLVE1, RESOLVE2)
+# -----------------------------------------------
# Store into types.y the calc program, with DECL inserted as a declaration,
# and with RESOLVE1 and RESOLVE2 as annotations on the conflicted rule for
# stmt. Then compile the result.
-m4_define([_AT_TEST_GLR_CALC],
-[AT_DATA([types.y],
+m4_define([_AT_TEST_GLR_CXXTYPES],
+[AT_DATA_GRAMMAR([types.y],
[[/* Simplified C++ Type and Expression Grammar. */
$1
## ---------------------------------------------------- ##
AT_SETUP([GLR: Resolve ambiguity, impure, no locations])
-_AT_TEST_GLR_CALC([],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([],
+ [%dprec 1], [%dprec 2])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Resolve ambiguity, impure, locations])
-_AT_TEST_GLR_CALC([%locations],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%locations],[%dprec 1],[%dprec 2])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Resolve ambiguity, pure, no locations])
-_AT_TEST_GLR_CALC([%pure-parser],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%pure-parser],
+ [%dprec 1], [%dprec 2])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Resolve ambiguity, pure, locations])
-_AT_TEST_GLR_CALC([%pure-parser
-%locations],[%dprec 1],[%dprec 2])
+_AT_TEST_GLR_CXXTYPES([%pure-parser %locations],
+ [%dprec 1], [%dprec 2])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Merge conflicting parses, impure, no locations])
-_AT_TEST_GLR_CALC([],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([],
+ [%merge <stmtMerge>], [%merge <stmtMerge>])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Merge conflicting parses, impure, locations])
-_AT_TEST_GLR_CALC([%locations],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%locations],
+ [%merge <stmtMerge>], [%merge <stmtMerge>])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Merge conflicting parses, pure, no locations])
-_AT_TEST_GLR_CALC([%pure-parser],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%pure-parser],
+ [%merge <stmtMerge>], [%merge <stmtMerge>])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Merge conflicting parses, pure, locations])
-_AT_TEST_GLR_CALC([%pure-parser
-%locations],[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%pure-parser %locations],
+ [%merge <stmtMerge>],[%merge <stmtMerge>])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR)
AT_CLEANUP
AT_SETUP([GLR: Verbose messages, resolve ambiguity, impure, no locations])
-_AT_TEST_GLR_CALC([%error-verbose],
-[%merge <stmtMerge>],[%merge <stmtMerge>])
+_AT_TEST_GLR_CXXTYPES([%error-verbose],
+ [%merge <stmtMerge>], [%merge <stmtMerge>])
AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0,
_AT_AMBIG_GLR_OUTPUT, _AT_VERBOSE_GLR_STDERR)
AT_CLEANUP
AT_SETUP([Badly Collapsed GLR States])
-AT_DATA([glr-regr1.y],
+AT_DATA_GRAMMAR([glr-regr1.y],
[[/* Regression Test: Improper state compression */
/* Reported by Scott McPeak */
dirname=`AS_DIRNAME([$1])`
AS_MKDIR_P([$dirname])
-AT_DATA([$1.y],
+AT_DATA_GRAMMAR([$1.y],
[%%
dummy:;
])
AT_SETUP([export YYLTYPE])
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[%locations
%name-prefix="my_"
AT_SETUP([Torturing the Scanner])
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[[%{
/* This is seen in GCC: a %{ and %} in middle of a comment. */
const char *foo = "So %{ and %} can be here too.";
}
]])
+# Pacify Emacs'font-lock-mode: "
+
AT_DATA([main.c],
[[typedef int value_t;
#include "input.h"
# Found in GCJ: they expect the tokens to be defined before the user
# prologue, so that they can use the token definitions in it.
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[[%{
void yyerror (const char *s);
int yylex (void);
AT_SETUP([Token definitions])
# Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[%{
void yyerror (const char *s);
int yylex (void);
AT_SETUP([Characters Escapes])
-AT_DATA([input.y],
+AT_DATA_GRAMMAR([input.y],
[%{
void yyerror (const char *s);
int yylex (void);
| '"' "'"
;
]])
+# Pacify font-lock-mode: "
AT_CHECK([bison -o input.c input.y])
AT_COMPILE([input.o], [-c input.c])
m4_version_prereq([2.52g])
+# AT_DATA_GRAMMAR_PROLOGUE
+# ------------------------
+# The prologue that should be included in any grammar which parser is
+# meant to be compiled.
+m4_define([AT_DATA_GRAMMAR_PROLOGUE],
+[[%{
+#if HAVE_CONFIG_H
+# include <config.h>
+ /* We don't need perfect functions for these tests. */
+# undef malloc
+# undef memcmp
+# undef realloc
+#endif
+%}]
+])
+
+
+# AT_DATA_GRAMMAR(NAME, CONTENT)
+# ------------------------------
+# Generate the file NAME, which CONTENT is preceded by
+# AT_DATA_GRAMMAR_PROLOGUE.
+m4_define([AT_DATA_GRAMMAR],
+[AT_DATA([$1],
+[AT_DATA_GRAMMAR_PROLOGUE
+$2])
+])
+
+
# AT_COMPILE(OUTPUT, [SOURCES = OUTPUT.c])
# ----------------------------------------
#
# Some old bugs.
m4_include([regression.at])
-# GLR tests:
+# GLR tests:
# C++ types, simplified
m4_include([cxx-type.at])
# Regression test for state-folding optimization
my $max = $ARGV[0] || 10;
print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
%{
#include <stdio.h>
#include <stdlib.h>
my $max = $ARGV[0] || 10;
print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
%{
#include <stdio.h>
#include <stdlib.h>