# Bison Regressions. -*- Autotest -*- # Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software # Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. AT_BANNER([[Regression tests.]]) ## ------------------ ## ## Trivial grammars. ## ## ------------------ ## AT_SETUP([Trivial grammars]) AT_DATA_GRAMMAR([input.y], [[%{ void yyerror (char const *); int yylex (void); #define YYSTYPE int * %} %error-verbose %% program: 'x'; ]]) AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) AT_COMPILE([input.o], [-DYYDEBUG -c input.c]) AT_CLEANUP ## ----------------- ## ## YYSTYPE typedef. ## ## ----------------- ## AT_SETUP([YYSTYPE typedef]) AT_DATA_GRAMMAR([input.y], [[%{ void yyerror (char const *); int yylex (void); typedef union { char const *val; } YYSTYPE; %} %type program %% program: { $$ = ""; }; ]]) AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) AT_CLEANUP ## ------------------------------------- ## ## Early token definitions with --yacc. ## ## ------------------------------------- ## AT_SETUP([Early token definitions with --yacc]) # Found in GCJ: they expect the tokens to be defined before the user # prologue, so that they can use the token definitions in it. AT_DATA_GRAMMAR([input.y], [[%{ void yyerror (const char *s); int yylex (void); %} %union { int val; }; %{ #ifndef MY_TOKEN # error "MY_TOKEN not defined." #endif %} %token MY_TOKEN %% exp: MY_TOKEN; %% ]]) AT_CHECK([bison -y -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) AT_CLEANUP ## ---------------------------------------- ## ## Early token definitions without --yacc. ## ## ---------------------------------------- ## AT_SETUP([Early token definitions without --yacc]) # Found in GCJ: they expect the tokens to be defined before the user # prologue, so that they can use the token definitions in it. AT_DATA_GRAMMAR([input.y], [[%{ #include void yyerror (const char *s); int yylex (void); void print_my_token (void); %} %union { int val; }; %{ void print_my_token (void) { enum yytokentype my_token = MY_TOKEN; printf ("%d\n", my_token); } %} %token MY_TOKEN %% exp: MY_TOKEN; %% ]]) AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) AT_CLEANUP ## ---------------- ## ## Braces parsing. ## ## ---------------- ## AT_SETUP([Braces parsing]) AT_DATA([input.y], [[/* Bison used to swallow the character after `}'. */ %% exp: { tests = {{{{{{{{{{}}}}}}}}}}; }; %% ]]) AT_CHECK([bison -v -o input.c input.y]) AT_CHECK([grep 'tests = {{{{{{{{{{}}}}}}}}}};' input.c], 0, [ignore]) AT_CLEANUP ## ------------------ ## ## Duplicate string. ## ## ------------------ ## AT_SETUP([Duplicate string]) AT_DATA([input.y], [[/* `Bison -v' used to dump core when two tokens are defined with the same string, as LE and GE below. */ %token NUM %token LE "<=" %token GE "<=" %% exp: '(' exp ')' | NUM ; %% ]]) AT_CHECK([bison -v -o input.c input.y], 0, [], [[input.y:6.8-14: warning: symbol `"<="' used more than once as a literal string ]]) AT_CLEANUP ## ------------------- ## ## Rule Line Numbers. ## ## ------------------- ## AT_SETUP([Rule Line Numbers]) AT_KEYWORDS([report]) AT_DATA([input.y], [[%% expr: 'a' { } 'b' { } | { } 'c' { }; ]]) AT_CHECK([bison -o input.c -v input.y]) # Check the contents of the report. AT_CHECK([cat input.output], [], [[Grammar 0 $accept: expr $end 1 $@1: /* empty */ 2 expr: 'a' $@1 'b' 3 $@2: /* empty */ 4 expr: $@2 'c' Terminals, with rules where they appear $end (0) 0 'a' (97) 2 'b' (98) 2 'c' (99) 4 error (256) Nonterminals, with rules where they appear $accept (6) on left: 0 expr (7) on left: 2 4, on right: 0 $@1 (8) on left: 1, on right: 2 $@2 (9) on left: 3, on right: 4 state 0 0 $accept: . expr $end 'a' shift, and go to state 1 $default reduce using rule 3 ($@2) expr go to state 2 $@2 go to state 3 state 1 2 expr: 'a' . $@1 'b' $default reduce using rule 1 ($@1) $@1 go to state 4 state 2 0 $accept: expr . $end $end shift, and go to state 5 state 3 4 expr: $@2 . 'c' 'c' shift, and go to state 6 state 4 2 expr: 'a' $@1 . 'b' 'b' shift, and go to state 7 state 5 0 $accept: expr $end . $default accept state 6 4 expr: $@2 'c' . $default reduce using rule 4 (expr) state 7 2 expr: 'a' $@1 'b' . $default reduce using rule 2 (expr) ]]) AT_CLEANUP ## ---------------------- ## ## Mixing %token styles. ## ## ---------------------- ## AT_SETUP([Mixing %token styles]) # Taken from the documentation. AT_DATA([input.y], [[%token OR "||" %token LE 134 "<=" %left OR "<=" %% exp: ; %% ]]) AT_CHECK([bison -v -o input.c input.y]) AT_CLEANUP ## ---------------- ## ## Invalid inputs. ## ## ---------------- ## AT_SETUP([Invalid inputs]) AT_DATA([input.y], [[%% ? default: 'a' } %& %a-does-not-exist %- %{ ]]) AT_CHECK([bison input.y], [1], [], [[input.y:2.1: invalid character: `?' input.y:3.14: invalid character: `}' input.y:4.1: invalid character: `%' input.y:4.2: invalid character: `&' input.y:5.1-17: invalid directive: `%a-does-not-exist' input.y:6.1: invalid character: `%' input.y:6.2: invalid character: `-' input.y:7.1-8.0: missing `%}' at end of file input.y:7.1-8.0: syntax error, unexpected %{...%} ]]) AT_CLEANUP AT_SETUP([Invalid inputs with {}]) AT_DATA([input.y], [[ %destructor %initial-action %lex-param %parse-param %printer %union ]]) AT_CHECK([bison input.y], [1], [], [[input.y:3.1-15: syntax error, unexpected %initial-action, expecting {...} ]]) AT_CLEANUP ## ------------------- ## ## Token definitions. ## ## ------------------- ## AT_SETUP([Token definitions]) # Bison managed, when fed with `%token 'f' "f"' to #define 'f'! AT_DATA_GRAMMAR([input.y], [%{ #include #include void yyerror (const char *s); int yylex (void); %} [%error-verbose %token MYEOF 0 "end of file" %token 'a' "a" %token B_TOKEN "b" %token C_TOKEN 'c' %token 'd' D_TOKEN %token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!" %% exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"; %% void yyerror (char const *s) { fprintf (stderr, "%s\n", s); } int yylex (void) { static int called; if (called++) abort (); return SPECIAL; } int main (void) { return yyparse (); } ]]) AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input]) AT_DATA([experr], [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201?\?!", expecting a ]]) AT_PARSER_CHECK([./input], 1, [], [experr]) AT_CLEANUP ## -------------------- ## ## Characters Escapes. ## ## -------------------- ## AT_SETUP([Characters Escapes]) AT_DATA_GRAMMAR([input.y], [%{ void yyerror (const char *s); int yylex (void); %} [%% exp: '\'' "\'" | '\"' "\"" | '"' "'" ; ]]) # Pacify font-lock-mode: " AT_CHECK([bison -o input.c input.y]) AT_COMPILE([input.o], [-c input.c]) AT_CLEANUP ## -------------- ## ## Web2c Report. ## ## -------------- ## # The generation of the reduction was once wrong in Bison, and made it # miss some reductions. In the following test case, the reduction on # `undef_id_tok' in state 1 was missing. This is stripped down from # the actual web2c.y. AT_SETUP([Web2c Report]) AT_KEYWORDS([report]) AT_DATA([input.y], [[%token undef_id_tok const_id_tok %start CONST_DEC_PART %% CONST_DEC_PART: CONST_DEC_LIST ; CONST_DEC_LIST: CONST_DEC | CONST_DEC_LIST CONST_DEC ; CONST_DEC: { } undef_id_tok '=' const_id_tok ';' ; %% ]]) AT_CHECK([bison -v input.y]) AT_CHECK([cat input.output], 0, [[Grammar 0 $accept: CONST_DEC_PART $end 1 CONST_DEC_PART: CONST_DEC_LIST 2 CONST_DEC_LIST: CONST_DEC 3 | CONST_DEC_LIST CONST_DEC 4 $@1: /* empty */ 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';' Terminals, with rules where they appear $end (0) 0 ';' (59) 5 '=' (61) 5 error (256) undef_id_tok (258) 5 const_id_tok (259) 5 Nonterminals, with rules where they appear $accept (7) on left: 0 CONST_DEC_PART (8) on left: 1, on right: 0 CONST_DEC_LIST (9) on left: 2 3, on right: 1 3 CONST_DEC (10) on left: 5, on right: 2 3 $@1 (11) on left: 4, on right: 5 state 0 0 $accept: . CONST_DEC_PART $end $default reduce using rule 4 ($@1) CONST_DEC_PART go to state 1 CONST_DEC_LIST go to state 2 CONST_DEC go to state 3 $@1 go to state 4 state 1 0 $accept: CONST_DEC_PART . $end $end shift, and go to state 5 state 2 1 CONST_DEC_PART: CONST_DEC_LIST . 3 CONST_DEC_LIST: CONST_DEC_LIST . CONST_DEC undef_id_tok reduce using rule 4 ($@1) $default reduce using rule 1 (CONST_DEC_PART) CONST_DEC go to state 6 $@1 go to state 4 state 3 2 CONST_DEC_LIST: CONST_DEC . $default reduce using rule 2 (CONST_DEC_LIST) state 4 5 CONST_DEC: $@1 . undef_id_tok '=' const_id_tok ';' undef_id_tok shift, and go to state 7 state 5 0 $accept: CONST_DEC_PART $end . $default accept state 6 3 CONST_DEC_LIST: CONST_DEC_LIST CONST_DEC . $default reduce using rule 3 (CONST_DEC_LIST) state 7 5 CONST_DEC: $@1 undef_id_tok . '=' const_id_tok ';' '=' shift, and go to state 8 state 8 5 CONST_DEC: $@1 undef_id_tok '=' . const_id_tok ';' const_id_tok shift, and go to state 9 state 9 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok . ';' ';' shift, and go to state 10 state 10 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';' . $default reduce using rule 5 (CONST_DEC) ]]) AT_CLEANUP ## --------------- ## ## Web2c Actions. ## ## --------------- ## # The generation of the mapping `state -> action' was once wrong in # extremely specific situations. web2c.y exhibits this situation. # Below is a stripped version of the grammar. It looks like one can # simplify it further, but just don't: it is tuned to exhibit a bug, # which disapears when applying sane grammar transformations. # # It used to be wrong on yydefact only: # # static const yytype_uint8 yydefact[] = # { # - 2, 0, 1, 0, 0, 2, 3, 2, 5, 4, # + 2, 0, 1, 0, 0, 0, 3, 2, 5, 4, # 0, 0 # }; # # but let's check all the tables. AT_SETUP([Web2c Actions]) AT_KEYWORDS([report]) AT_DATA([input.y], [[%% statement: struct_stat; struct_stat: /* empty. */ | if else; if: "if" "const" "then" statement; else: "else" statement; %% ]]) AT_CHECK([bison -v -o input.c input.y]) # Check only the tables. We don't use --no-parser, because it is # still to be implemented in the experimental branch of Bison. [sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c] AT_CHECK([[cat tables.c]], 0, [[static const yytype_uint8 yytranslate[] = { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, 6 }; static const yytype_uint8 yyprhs[] = { 0, 0, 3, 5, 6, 9, 14 }; static const yytype_int8 yyrhs[] = { 8, 0, -1, 9, -1, -1, 10, 11, -1, 3, 4, 5, 8, -1, 6, 8, -1 }; static const yytype_uint8 yyrline[] = { 0, 2, 2, 3, 3, 4, 5 }; static const char *const yytname[] = { "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"", "\"else\"", "$accept", "statement", "struct_stat", "if", "else", 0 }; static const yytype_uint16 yytoknum[] = { 0, 256, 257, 258, 259, 260, 261 }; static const yytype_uint8 yyr1[] = { 0, 7, 8, 9, 9, 10, 11 }; static const yytype_uint8 yyr2[] = { 0, 2, 1, 0, 2, 4, 2 }; static const yytype_uint8 yydefact[] = { 3, 0, 0, 2, 0, 0, 1, 3, 4, 3, 6, 5 }; static const yytype_int8 yydefgoto[] = { -1, 2, 3, 4, 8 }; static const yytype_int8 yypact[] = { -2, -1, 4, -8, 0, 2, -8, -2, -8, -2, -8, -8 }; static const yytype_int8 yypgoto[] = { -8, -7, -8, -8, -8 }; static const yytype_uint8 yytable[] = { 10, 1, 11, 5, 6, 0, 7, 9 }; static const yytype_int8 yycheck[] = { 7, 3, 9, 4, 0, -1, 6, 5 }; static const yytype_uint8 yystos[] = { 0, 3, 8, 9, 10, 4, 0, 6, 11, 5, 8, 8 }; ]]) AT_CLEANUP ## ------------------------- ## ## yycheck Bound Violation. ## ## ------------------------- ## # _AT_DATA_DANCER_Y(BISON-OPTIONS) # -------------------------------- # The following grammar, taken from Andrew Suffield's GPL'd implementation # of DGMTP, the Dancer Generic Message Transport Protocol, used to violate # yycheck's bounds where issuing a verbose error message. Keep this test # so that possible bound checking compilers could check all the skeletons. m4_define([_AT_DATA_DANCER_Y], [AT_DATA_GRAMMAR([dancer.y], [%{ static int yylex (AT_LALR1_CC_IF([int *], [void])); AT_LALR1_CC_IF([], [#include #include static void yyerror (const char *);]) %} $1 %token ARROW INVALID NUMBER STRING DATA %defines %verbose %error-verbose /* Grammar follows */ %% line: header body ; header: '<' from ARROW to '>' type ':' | '<' ARROW to '>' type ':' | ARROW to type ':' | type ':' | '<' '>' ; from: DATA | STRING | INVALID ; to: DATA | STRING | INVALID ; type: DATA | STRING | INVALID ; body: /* empty */ | body member ; member: STRING | DATA | '+' NUMBER | '-' NUMBER | NUMBER | INVALID ; %% AT_LALR1_CC_IF( [/* A C++ error reporting function. */ void yy::parser::error (const location&, const std::string& m) { std::cerr << m << std::endl; } int yyparse () { yy::parser parser; #if YYDEBUG parser.set_debug_level (YYDEBUG); #endif return parser.parse (); } ], [static void yyerror (const char *s) { fprintf (stderr, "%s\n", s); }]) static int yylex (AT_LALR1_CC_IF([int *lval], [void])) [{ static int const tokens[] = { ':', -1 }; static size_t toknum; ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[ if (! (toknum < sizeof tokens / sizeof *tokens)) abort (); return tokens[toknum++]; }] int main (void) { return yyparse (); } ]) ])# _AT_DATA_DANCER_Y # AT_CHECK_DANCER(BISON-OPTIONS) # ------------------------------ # Generate the grammar, compile it, run it. m4_define([AT_CHECK_DANCER], [AT_SETUP([Dancer $1]) AT_BISON_OPTION_PUSHDEFS([$1]) _AT_DATA_DANCER_Y([$1]) AT_CHECK([bison -o dancer.c dancer.y]) AT_LALR1_CC_IF( [AT_CHECK([bison -o dancer.cc dancer.y]) AT_COMPILE_CXX([dancer])], [AT_CHECK([bison -o dancer.c dancer.y]) AT_COMPILE([dancer])]) AT_PARSER_CHECK([./dancer], 1, [], [syntax error, unexpected ':' ]) AT_BISON_OPTION_POPDEFS AT_CLEANUP ]) AT_CHECK_DANCER() AT_CHECK_DANCER([%glr-parser]) AT_CHECK_DANCER([%skeleton "lalr1.cc"]) ## ------------------------------------------ ## ## Diagnostic that expects two alternatives. ## ## ------------------------------------------ ## # _AT_DATA_EXPECT2_Y(BISON-OPTIONS) # -------------------------------- m4_define([_AT_DATA_EXPECT2_Y], [AT_DATA_GRAMMAR([expect2.y], [%{ static int yylex (AT_LALR1_CC_IF([int *], [void])); AT_LALR1_CC_IF([], [#include #include static void yyerror (const char *);]) %} $1 %defines %error-verbose %token A 1000 %token B %% program: /* empty */ | program e ';' | program error ';'; e: e '+' t | t; t: A | B; %% AT_LALR1_CC_IF( [/* A C++ error reporting function. */ void yy::parser::error (const location&, const std::string& m) { std::cerr << m << std::endl; } int yyparse () { yy::parser parser; return parser.parse (); } ], [static void yyerror (const char *s) { fprintf (stderr, "%s\n", s); }]) static int yylex (AT_LALR1_CC_IF([int *lval], [void])) [{ static int const tokens[] = { 1000, '+', '+', -1 }; static size_t toknum; ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[ if (! (toknum < sizeof tokens / sizeof *tokens)) abort (); return tokens[toknum++]; }] int main (void) { return yyparse (); } ]) ])# _AT_DATA_EXPECT2_Y # AT_CHECK_EXPECT2(BISON-OPTIONS) # ------------------------------ # Generate the grammar, compile it, run it. m4_define([AT_CHECK_EXPECT2], [AT_SETUP([Expecting two tokens $1]) AT_BISON_OPTION_PUSHDEFS([$1]) _AT_DATA_EXPECT2_Y([$1]) AT_CHECK([bison -o expect2.c expect2.y]) AT_LALR1_CC_IF( [AT_CHECK([bison -o expect2.cc expect2.y]) AT_COMPILE_CXX([expect2])], [AT_CHECK([bison -o expect2.c expect2.y]) AT_COMPILE([expect2])]) AT_PARSER_CHECK([./expect2], 1, [], [syntax error, unexpected '+', expecting A or B ]) AT_BISON_OPTION_POPDEFS AT_CLEANUP ]) AT_CHECK_EXPECT2() AT_CHECK_EXPECT2([%glr-parser]) AT_CHECK_EXPECT2([%skeleton "lalr1.cc"]) ## --------------------------------------------- ## ## Braced code in declaration in rules section. ## ## --------------------------------------------- ## AT_SETUP([Braced code in declaration in rules section]) # Bison once mistook braced code in a declaration in the rules section to be a # rule action. AT_DATA_GRAMMAR([input.y], [[%{ #include static void yyerror (char const *msg); static int yylex (void); %} %error-verbose %% start: { printf ("Bison would once convert this action to a midrule because of the" " subsequent braced code.\n"); } ; %destructor { fprintf (stderr, "DESTRUCTOR\n"); } 'a'; %printer { fprintf (yyoutput, "PRINTER"); } 'a'; %% static void yyerror (char const *msg) { fprintf (stderr, "%s\n", msg); } static int yylex (void) { return 'a'; } int main (void) { yydebug = 1; return !yyparse (); } ]]) AT_CHECK([bison -t -o input.c input.y]) AT_COMPILE([input]) AT_PARSER_CHECK([./input], 0, [[Bison would once convert this action to a midrule because of the subsequent braced code. ]], [[Starting parse Entering state 0 Reducing stack by rule 1 (line 20): -> $$ = nterm start () Stack now 0 Entering state 1 Reading a token: Next token is token 'a' (PRINTER) syntax error, unexpected 'a', expecting $end Error: popping nterm start () Stack now 0 Cleanup: discarding lookahead token 'a' (PRINTER) DESTRUCTOR Stack now 0 ]]) AT_CLEANUP ## --------------------------------- ## ## String alias declared after use. ## ## --------------------------------- ## AT_SETUP([String alias declared after use]) # Bison once incorrectly asserted that the symbol number for either a token or # its alias was the highest symbol number so far at the point of the alias # declaration. That was true unless the declaration appeared after their first # uses and other tokens appeared in between. AT_DATA([input.y], [[%% start: 'a' "A" 'b'; %token 'a' "A"; ]]) AT_CHECK([bison -t -o input.c input.y]) AT_CLEANUP