From 9501dc6e69988cd8cf7623278a3894af9479e198 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Thu, 14 Nov 2002 09:58:01 +0000 Subject: [PATCH] * tests/atlocal.in (CPPFLAGS): We have config.h. * tests/testsuite.at (AT_DATA_GRAMMAR_PROLOGUE, AT_DATA_GRAMMAR): New. * tests/actions.at, tests/calc.at, tests/conflicts.at, * tests/cxx-type.at, tests/glr-regr1.at, tests/headers.at, * tests/regression.at, tests/torture.at: Use them for all the grammars that are to be compiled. * tests/cxx-type.at (_AT_TEST_GLR_CALC): Rename as... * tests/cxx-type.at (_AT_TEST_GLR_CXXTYPES): this. * doc/bison.texinfo (GLR Parsers): Document `inline'. --- ChangeLog | 13 +++++ NEWS | 4 ++ doc/bison.texinfo | 117 +++++++++++++++++++++++++++----------------- tests/actions.at | 6 +-- tests/atlocal.in | 2 +- tests/calc.at | 7 +-- tests/conflicts.at | 7 +-- tests/cxx-type.at | 37 ++++++++------ tests/glr-regr1.at | 2 +- tests/headers.at | 4 +- tests/input.at | 4 +- tests/regression.at | 7 +-- tests/testsuite.at | 30 +++++++++++- tests/torture.at | 2 + 14 files changed, 156 insertions(+), 86 deletions(-) diff --git a/ChangeLog b/ChangeLog index 4c037192..423459b3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2002-11-14 Akim Demaille + + * tests/atlocal.in (CPPFLAGS): We have config.h. + * tests/testsuite.at (AT_DATA_GRAMMAR_PROLOGUE, AT_DATA_GRAMMAR): + New. + * tests/actions.at, tests/calc.at, tests/conflicts.at, + * tests/cxx-type.at, tests/glr-regr1.at, tests/headers.at, + * tests/regression.at, tests/torture.at: Use them for all the + grammars that are to be compiled. + * tests/cxx-type.at (_AT_TEST_GLR_CALC): Rename as... + * tests/cxx-type.at (_AT_TEST_GLR_CXXTYPES): this. + * doc/bison.texinfo (GLR Parsers): Document `inline'. + 2002-11-14 Akim Demaille * doc/bison.texinfo: Various formatting changes (alignments in diff --git a/NEWS b/NEWS index b325a7ea..727101b1 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,10 @@ Bison News ---------- Changes in version 1.75c: +* GLR and inline + Users of Bison have to decide how they handle the portability of the + C keyword `inline'. + Changes in version 1.75b, 2002-11-13: * %destructor diff --git a/doc/bison.texinfo b/doc/bison.texinfo index 967b01f8..6883f471 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -412,42 +412,41 @@ more information on this. @cindex generalized @acronym{LR} (@acronym{GLR}) parsing @cindex ambiguous grammars @cindex non-deterministic parsing -Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic}, -meaning roughly that -the next grammar rule to apply at any point in the input is uniquely -determined by the preceding input and a fixed, finite portion (called -a @dfn{look-ahead}) of the remaining input. -A context-free grammar can be @dfn{ambiguous}, meaning that -there are multiple ways to apply the grammar rules to get the some inputs. -Even unambiguous grammars can be @dfn{non-deterministic}, meaning that no -fixed look-ahead always suffices to determine the next grammar rule to apply. -With the proper declarations, Bison is also able to parse these more general -context-free grammars, using a technique known as @acronym{GLR} parsing (for -Generalized @acronym{LR}). Bison's @acronym{GLR} parsers are able to -handle any context-free -grammar for which the number of possible parses of any given string -is finite. + +Parsers for @acronym{LALR}(1) grammars are @dfn{deterministic}, meaning +roughly that the next grammar rule to apply at any point in the input is +uniquely determined by the preceding input and a fixed, finite portion +(called a @dfn{look-ahead}) of the remaining input. A context-free +grammar can be @dfn{ambiguous}, meaning that there are multiple ways to +apply the grammar rules to get the some inputs. Even unambiguous +grammars can be @dfn{non-deterministic}, meaning that no fixed +look-ahead always suffices to determine the next grammar rule to apply. +With the proper declarations, Bison is also able to parse these more +general context-free grammars, using a technique known as @acronym{GLR} +parsing (for Generalized @acronym{LR}). Bison's @acronym{GLR} parsers +are able to handle any context-free grammar for which the number of +possible parses of any given string is finite. @cindex symbols (abstract) @cindex token @cindex syntactic grouping @cindex grouping, syntactic -In the formal grammatical rules for a language, each kind of syntactic unit -or grouping is named by a @dfn{symbol}. Those which are built by grouping -smaller constructs according to grammatical rules are called +In the formal grammatical rules for a language, each kind of syntactic +unit or grouping is named by a @dfn{symbol}. Those which are built by +grouping smaller constructs according to grammatical rules are called @dfn{nonterminal symbols}; those which can't be subdivided are called @dfn{terminal symbols} or @dfn{token types}. We call a piece of input corresponding to a single terminal symbol a @dfn{token}, and a piece corresponding to a single nonterminal symbol a @dfn{grouping}. We can use the C language as an example of what symbols, terminal and -nonterminal, mean. The tokens of C are identifiers, constants (numeric and -string), and the various keywords, arithmetic operators and punctuation -marks. So the terminal symbols of a grammar for C include `identifier', -`number', `string', plus one symbol for each keyword, operator or -punctuation mark: `if', `return', `const', `static', `int', `char', -`plus-sign', `open-brace', `close-brace', `comma' and many more. (These -tokens can be subdivided into characters, but that is a matter of +nonterminal, mean. The tokens of C are identifiers, constants (numeric +and string), and the various keywords, arithmetic operators and +punctuation marks. So the terminal symbols of a grammar for C include +`identifier', `number', `string', plus one symbol for each keyword, +operator or punctuation mark: `if', `return', `const', `static', `int', +`char', `plus-sign', `open-brace', `close-brace', `comma' and many more. +(These tokens can be subdivided into characters, but that is a matter of lexicography, not grammar.) Here is a simple C function subdivided into tokens: @@ -642,28 +641,28 @@ from the values of the two subexpressions. @cindex conflicts @cindex shift/reduce conflicts -In some grammars, there will be cases where Bison's standard @acronym{LALR}(1) -parsing algorithm cannot decide whether to apply a certain grammar rule -at a given point. That is, it may not be able to decide (on the basis -of the input read so far) which of two possible reductions (applications -of a grammar rule) applies, or whether to apply a reduction or read more -of the input and apply a reduction later in the input. These are known -respectively as @dfn{reduce/reduce} conflicts (@pxref{Reduce/Reduce}), -and @dfn{shift/reduce} conflicts (@pxref{Shift/Reduce}). - -To use a grammar that is not easily modified to be @acronym{LALR}(1), a more -general parsing algorithm is sometimes necessary. If you include +In some grammars, there will be cases where Bison's standard +@acronym{LALR}(1) parsing algorithm cannot decide whether to apply a +certain grammar rule at a given point. That is, it may not be able to +decide (on the basis of the input read so far) which of two possible +reductions (applications of a grammar rule) applies, or whether to apply +a reduction or read more of the input and apply a reduction later in the +input. These are known respectively as @dfn{reduce/reduce} conflicts +(@pxref{Reduce/Reduce}), and @dfn{shift/reduce} conflicts +(@pxref{Shift/Reduce}). + +To use a grammar that is not easily modified to be @acronym{LALR}(1), a +more general parsing algorithm is sometimes necessary. If you include @code{%glr-parser} among the Bison declarations in your file -(@pxref{Grammar Outline}), the result will be a Generalized -@acronym{LR} (@acronym{GLR}) -parser. These parsers handle Bison grammars that contain no unresolved -conflicts (i.e., after applying precedence declarations) identically to -@acronym{LALR}(1) parsers. However, when faced with unresolved -shift/reduce and reduce/reduce conflicts, @acronym{GLR} parsers use -the simple expedient of doing -both, effectively cloning the parser to follow both possibilities. Each -of the resulting parsers can again split, so that at any given time, -there can be any number of possible parses being explored. The parsers +(@pxref{Grammar Outline}), the result will be a Generalized @acronym{LR} +(@acronym{GLR}) parser. These parsers handle Bison grammars that +contain no unresolved conflicts (i.e., after applying precedence +declarations) identically to @acronym{LALR}(1) parsers. However, when +faced with unresolved shift/reduce and reduce/reduce conflicts, +@acronym{GLR} parsers use the simple expedient of doing both, +effectively cloning the parser to follow both possibilities. Each of +the resulting parsers can again split, so that at any given time, there +can be any number of possible parses being explored. The parsers proceed in lockstep; that is, all of them consume (shift) a given input symbol before any of them proceed to the next. Each of the cloned parsers eventually meets one of two possible fates: either it runs into @@ -810,6 +809,32 @@ as both an @code{expr} and a @code{decl}, and print "x" y z + T x T y z + = @end example +@sp 1 + +@cindex @code{incline} +@cindex @acronym{GLR} parsers and @code{inline} +Note that the @acronym{GLR} parsers require an ISO C89 compiler. In +addition, they use the @code{inline} keyword, which is not C89, but a +common extension. It is up to the user of these parsers to handle +portability issues. For instance, if using Autoconf and the Autoconf +macro @code{AC_C_INLINE}, a mere + +@example +%@{ +#include +%@} +@end example + +@noindent +will suffice. Otherwise, we suggest + +@example +%@{ +#if ! defined __GNUC__ && ! defined inline +# define inline +#endif +%@} +@end example @node Locations Overview @section Locations diff --git a/tests/actions.at b/tests/actions.at index 2e707bde..20030692 100644 --- a/tests/actions.at +++ b/tests/actions.at @@ -29,7 +29,7 @@ AT_SETUP([Mid-rule actions]) # instead of being attached to the empty rule dedicated to this # action. -AT_DATA([[input.y]], +AT_DATA_GRAMMAR([[input.y]], [[%{ # include # include @@ -88,7 +88,7 @@ AT_CLEANUP AT_SETUP([Exotic Dollars]) -AT_DATA([[input.y]], +AT_DATA_GRAMMAR([[input.y]], [[%{ # include # include @@ -167,7 +167,7 @@ AT_SETUP([Printers and Destructors: $4]) # Make sure complex $n work. -AT_DATA([[input.y]], +AT_DATA_GRAMMAR([[input.y]], [[$4 %{ #include diff --git a/tests/atlocal.in b/tests/atlocal.in index 356833c9..9046abb5 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -9,7 +9,7 @@ CC='@CC@' CFLAGS='@O0CFLAGS@ @WARNING_CFLAGS@ @WERROR_CFLAGS@' # We need `config.h'. -CPPFLAGS="-I$abs_top_builddir @CPPFLAGS@" +CPPFLAGS="-DHAVE_CONFIG_H=1 -I$abs_top_builddir @CPPFLAGS@" # Is the compiler GCC? GCC='@GCC@' diff --git a/tests/calc.at b/tests/calc.at index aff3b455..b226cbc9 100644 --- a/tests/calc.at +++ b/tests/calc.at @@ -35,15 +35,10 @@ m4_define([_AT_DATA_CALC_Y], [m4_if([$1$2$3], $[1]$[2]$[3], [], [m4_fatal([$0: Invalid arguments: $@])])dnl -AT_DATA([calc.y], +AT_DATA_GRAMMAR([calc.y], [[/* Infix notation calculator--calc */ ]$4[ %{ -#include -/* We don't need perfect functions for these tests. */ -#undef malloc -#undef memcmp -#undef realloc #include #if STDC_HEADERS diff --git a/tests/conflicts.at b/tests/conflicts.at index ee5fd026..dbc9be58 100644 --- a/tests/conflicts.at +++ b/tests/conflicts.at @@ -50,14 +50,9 @@ AT_CLEANUP AT_SETUP([%nonassoc and eof]) -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [[ %{ -#include -/* We don't need perfect functions for these tests. */ -#undef malloc -#undef memcmp -#undef realloc #include #if STDC_HEADERS diff --git a/tests/cxx-type.at b/tests/cxx-type.at index b63b2dd3..7e58fa4a 100644 --- a/tests/cxx-type.at +++ b/tests/cxx-type.at @@ -18,13 +18,13 @@ AT_BANNER([[C++ Type Syntax (GLR).]]) -# _AT_TEST_GLR_CALC(DECL, RESOLVE1, RESOLVE2) -# ------------------------------------------- +# _AT_TEST_GLR_CXXTYPES(DECL, RESOLVE1, RESOLVE2) +# ----------------------------------------------- # Store into types.y the calc program, with DECL inserted as a declaration, # and with RESOLVE1 and RESOLVE2 as annotations on the conflicted rule for # stmt. Then compile the result. -m4_define([_AT_TEST_GLR_CALC], -[AT_DATA([types.y], +m4_define([_AT_TEST_GLR_CXXTYPES], +[AT_DATA_GRAMMAR([types.y], [[/* Simplified C++ Type and Expression Grammar. */ $1 @@ -253,57 +253,62 @@ m4_define([_AT_VERBOSE_GLR_STDERR], ## ---------------------------------------------------- ## AT_SETUP([GLR: Resolve ambiguity, impure, no locations]) -_AT_TEST_GLR_CALC([],[%dprec 1],[%dprec 2]) +_AT_TEST_GLR_CXXTYPES([], + [%dprec 1], [%dprec 2]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Resolve ambiguity, impure, locations]) -_AT_TEST_GLR_CALC([%locations],[%dprec 1],[%dprec 2]) +_AT_TEST_GLR_CXXTYPES([%locations],[%dprec 1],[%dprec 2]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Resolve ambiguity, pure, no locations]) -_AT_TEST_GLR_CALC([%pure-parser],[%dprec 1],[%dprec 2]) +_AT_TEST_GLR_CXXTYPES([%pure-parser], + [%dprec 1], [%dprec 2]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Resolve ambiguity, pure, locations]) -_AT_TEST_GLR_CALC([%pure-parser -%locations],[%dprec 1],[%dprec 2]) +_AT_TEST_GLR_CXXTYPES([%pure-parser %locations], + [%dprec 1], [%dprec 2]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_RESOLVED_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Merge conflicting parses, impure, no locations]) -_AT_TEST_GLR_CALC([],[%merge ],[%merge ]) +_AT_TEST_GLR_CXXTYPES([], + [%merge ], [%merge ]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Merge conflicting parses, impure, locations]) -_AT_TEST_GLR_CALC([%locations],[%merge ],[%merge ]) +_AT_TEST_GLR_CXXTYPES([%locations], + [%merge ], [%merge ]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Merge conflicting parses, pure, no locations]) -_AT_TEST_GLR_CALC([%pure-parser],[%merge ],[%merge ]) +_AT_TEST_GLR_CXXTYPES([%pure-parser], + [%merge ], [%merge ]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Merge conflicting parses, pure, locations]) -_AT_TEST_GLR_CALC([%pure-parser -%locations],[%merge ],[%merge ]) +_AT_TEST_GLR_CXXTYPES([%pure-parser %locations], + [%merge ],[%merge ]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_AMBIG_GLR_OUTPUT, _AT_GLR_STDERR) AT_CLEANUP AT_SETUP([GLR: Verbose messages, resolve ambiguity, impure, no locations]) -_AT_TEST_GLR_CALC([%error-verbose], -[%merge ],[%merge ]) +_AT_TEST_GLR_CXXTYPES([%error-verbose], + [%merge ], [%merge ]) AT_PARSER_CHECK([[./types test-input | sed 's/ *$//']], 0, _AT_AMBIG_GLR_OUTPUT, _AT_VERBOSE_GLR_STDERR) AT_CLEANUP diff --git a/tests/glr-regr1.at b/tests/glr-regr1.at index d0b558a5..2483fc52 100644 --- a/tests/glr-regr1.at +++ b/tests/glr-regr1.at @@ -20,7 +20,7 @@ AT_BANNER([[GLR Regression Test #1.]]) AT_SETUP([Badly Collapsed GLR States]) -AT_DATA([glr-regr1.y], +AT_DATA_GRAMMAR([glr-regr1.y], [[/* Regression Test: Improper state compression */ /* Reported by Scott McPeak */ diff --git a/tests/headers.at b/tests/headers.at index 90652514..81d5c7dc 100644 --- a/tests/headers.at +++ b/tests/headers.at @@ -55,7 +55,7 @@ m4_define([AT_TEST_CPP_GUARD_H], dirname=`AS_DIRNAME([$1])` AS_MKDIR_P([$dirname]) -AT_DATA([$1.y], +AT_DATA_GRAMMAR([$1.y], [%% dummy:; ]) @@ -80,7 +80,7 @@ AT_TEST_CPP_GUARD_H([9foo]) AT_SETUP([export YYLTYPE]) -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [%locations %name-prefix="my_" diff --git a/tests/input.at b/tests/input.at index 69e421a5..d0a61ba2 100644 --- a/tests/input.at +++ b/tests/input.at @@ -92,7 +92,7 @@ AT_CLEANUP AT_SETUP([Torturing the Scanner]) -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [[%{ /* This is seen in GCC: a %{ and %} in middle of a comment. */ const char *foo = "So %{ and %} can be here too."; @@ -188,6 +188,8 @@ yyerror (const char *msg) } ]]) +# Pacify Emacs'font-lock-mode: " + AT_DATA([main.c], [[typedef int value_t; #include "input.h" diff --git a/tests/regression.at b/tests/regression.at index ec5e140f..db49790d 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -29,7 +29,7 @@ AT_SETUP([Early token definitions]) # Found in GCJ: they expect the tokens to be defined before the user # prologue, so that they can use the token definitions in it. -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [[%{ void yyerror (const char *s); int yylex (void); @@ -313,7 +313,7 @@ AT_CLEANUP AT_SETUP([Token definitions]) # Bison managed, when fed with `%token 'f' "f"' to #define 'f'! -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [%{ void yyerror (const char *s); int yylex (void); @@ -340,7 +340,7 @@ AT_CLEANUP AT_SETUP([Characters Escapes]) -AT_DATA([input.y], +AT_DATA_GRAMMAR([input.y], [%{ void yyerror (const char *s); int yylex (void); @@ -352,6 +352,7 @@ exp: | '"' "'" ; ]]) +# Pacify font-lock-mode: " AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) diff --git a/tests/testsuite.at b/tests/testsuite.at index 9991296c..73e184d5 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -22,6 +22,34 @@ m4_version_prereq([2.52g]) +# AT_DATA_GRAMMAR_PROLOGUE +# ------------------------ +# The prologue that should be included in any grammar which parser is +# meant to be compiled. +m4_define([AT_DATA_GRAMMAR_PROLOGUE], +[[%{ +#if HAVE_CONFIG_H +# include + /* We don't need perfect functions for these tests. */ +# undef malloc +# undef memcmp +# undef realloc +#endif +%}] +]) + + +# AT_DATA_GRAMMAR(NAME, CONTENT) +# ------------------------------ +# Generate the file NAME, which CONTENT is preceded by +# AT_DATA_GRAMMAR_PROLOGUE. +m4_define([AT_DATA_GRAMMAR], +[AT_DATA([$1], +[AT_DATA_GRAMMAR_PROLOGUE +$2]) +]) + + # AT_COMPILE(OUTPUT, [SOURCES = OUTPUT.c]) # ---------------------------------------- # @@ -77,7 +105,7 @@ m4_include([existing.at]) # Some old bugs. m4_include([regression.at]) -# GLR tests: +# GLR tests: # C++ types, simplified m4_include([cxx-type.at]) # Regression test for state-folding optimization diff --git a/tests/torture.at b/tests/torture.at index c02be2f5..4882d83e 100644 --- a/tests/torture.at +++ b/tests/torture.at @@ -48,6 +48,7 @@ use strict; my $max = $ARGV[0] || 10; print < #include @@ -163,6 +164,7 @@ use strict; my $max = $ARGV[0] || 10; print < #include -- 2.45.2