1 # Bison Regressions. -*- Autotest -*-
3 # Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
19 AT_BANNER([[Regression tests.]])
22 ## ------------------ ##
23 ## Trivial grammars. ##
24 ## ------------------ ##
26 AT_SETUP([Trivial grammars])
28 AT_DATA_GRAMMAR([input.y],
30 void yyerror (char const *);
42 AT_BISON_CHECK([-o input.c input.y])
43 AT_COMPILE([input.o], [-c input.c])
44 AT_COMPILE([input.o], [-DYYDEBUG -c input.c])
50 ## ----------------- ##
51 ## YYSTYPE typedef. ##
52 ## ----------------- ##
54 AT_SETUP([YYSTYPE typedef])
56 AT_DATA_GRAMMAR([input.y],
58 void yyerror (char const *);
60 typedef union { char const *val; } YYSTYPE;
67 program: { $$ = ""; };
70 AT_BISON_CHECK([-o input.c input.y])
71 AT_COMPILE([input.o], [-c input.c])
77 ## ------------------------------------- ##
78 ## Early token definitions with --yacc. ##
79 ## ------------------------------------- ##
82 AT_SETUP([Early token definitions with --yacc])
84 # Found in GCJ: they expect the tokens to be defined before the user
85 # prologue, so that they can use the token definitions in it.
87 AT_DATA_GRAMMAR([input.y],
89 void yyerror (const char *s);
99 # error "MY_TOKEN not defined."
108 AT_BISON_CHECK([-y -o input.c input.y])
109 AT_COMPILE([input.o], [-c input.c])
115 ## ---------------------------------------- ##
116 ## Early token definitions without --yacc. ##
117 ## ---------------------------------------- ##
120 AT_SETUP([Early token definitions without --yacc])
122 # Found in GCJ: they expect the tokens to be defined before the user
123 # prologue, so that they can use the token definitions in it.
125 AT_DATA_GRAMMAR([input.y],
128 void yyerror (const char *s);
130 void print_my_token (void);
139 print_my_token (void)
141 enum yytokentype my_token = MY_TOKEN;
142 printf ("%d\n", my_token);
151 AT_BISON_CHECK([-o input.c input.y])
152 AT_COMPILE([input.o], [-c input.c])
158 ## ---------------- ##
159 ## Braces parsing. ##
160 ## ---------------- ##
163 AT_SETUP([Braces parsing])
166 [[/* Bison used to swallow the character after `}'. */
169 exp: { tests = {{{{{{{{{{}}}}}}}}}}; };
173 AT_BISON_CHECK([-v -o input.c input.y])
175 AT_CHECK([grep 'tests = {{{{{{{{{{}}}}}}}}}};' input.c], 0, [ignore])
180 ## ------------------ ##
181 ## Duplicate string. ##
182 ## ------------------ ##
185 AT_SETUP([Duplicate string])
188 [[/* `Bison -v' used to dump core when two tokens are defined with the same
189 string, as LE and GE below. */
196 exp: '(' exp ')' | NUM ;
200 AT_BISON_CHECK([-v -o input.c input.y], 0, [],
201 [[input.y:6.8-14: warning: symbol `"<="' used more than once as a literal string
207 ## ------------------- ##
208 ## Rule Line Numbers. ##
209 ## ------------------- ##
211 AT_SETUP([Rule Line Numbers])
213 AT_KEYWORDS([report])
245 AT_BISON_CHECK([-o input.c -v input.y])
247 # Check the contents of the report.
248 AT_CHECK([cat input.output], [],
262 Terminals, with rules where they appear
271 Nonterminals, with rules where they appear
276 on left: 2 4, on right: 0
278 on left: 1, on right: 2
280 on left: 3, on right: 4
285 0 $accept: . expr $end
287 'a' shift, and go to state 1
289 $default reduce using rule 3 ($@2)
297 2 expr: 'a' . $@1 'b'
299 $default reduce using rule 1 ($@1)
306 0 $accept: expr . $end
308 $end shift, and go to state 5
315 'c' shift, and go to state 6
320 2 expr: 'a' $@1 . 'b'
322 'b' shift, and go to state 7
327 0 $accept: expr $end .
336 $default reduce using rule 4 (expr)
341 2 expr: 'a' $@1 'b' .
343 $default reduce using rule 2 (expr)
350 ## ---------------------- ##
351 ## Mixing %token styles. ##
352 ## ---------------------- ##
355 AT_SETUP([Mixing %token styles])
357 # Taken from the documentation.
359 [[%token <operator> OR "||"
360 %token <operator> LE 134 "<="
367 AT_BISON_CHECK([-v -o input.c input.y])
373 ## ---------------- ##
374 ## Invalid inputs. ##
375 ## ---------------- ##
378 AT_SETUP([Invalid inputs])
390 AT_BISON_CHECK([input.y], [1], [],
391 [[input.y:2.1: invalid character: `?'
392 input.y:3.14: invalid character: `}'
393 input.y:4.1: invalid character: `%'
394 input.y:4.2: invalid character: `&'
395 input.y:5.1-17: invalid directive: `%a-does-not-exist'
396 input.y:6.1: invalid character: `%'
397 input.y:6.2: invalid character: `-'
398 input.y:7.1-8.0: missing `%}' at end of file
399 input.y:7.1-8.0: syntax error, unexpected %{...%}
405 AT_SETUP([Invalid inputs with {}])
417 AT_BISON_CHECK([input.y], [1], [],
418 [[input.y:3.1-15: syntax error, unexpected %initial-action, expecting {...}
425 ## ------------------- ##
426 ## Token definitions. ##
427 ## ------------------- ##
430 AT_SETUP([Token definitions])
432 # Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
433 AT_DATA_GRAMMAR([input.y],
437 void yyerror (const char *s);
441 %token MYEOF 0 "end of file"
446 %token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"
447 %token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"
449 exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
452 yyerror (char const *s)
454 fprintf (stderr, "%s\n", s);
473 # Checking the warning message guarantees that the trigraph "??!" isn't
474 # unnecessarily escaped here even though it would need to be if encoded in a
475 # C-string literal. Also notice that unnecessary escaping, such as "\?", from
476 # the user specification is eliminated.
477 AT_BISON_CHECK([-o input.c input.y], [[0]], [[]],
478 [[input.y:22.8-14: warning: symbol SPECIAL redeclared
479 input.y:22.8-63: warning: symbol `"\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!"' used more than once as a literal string
483 # Checking the error message here guarantees that yytname, which does contain
484 # C-string literals, does have the trigraph escaped correctly. Thus, the
485 # symbol name reported by the parser is exactly the same as that reported by
488 [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", expecting a
490 AT_PARSER_CHECK([./input], 1, [], [experr])
495 ## -------------------- ##
496 ## Characters Escapes. ##
497 ## -------------------- ##
500 AT_SETUP([Characters Escapes])
502 AT_DATA_GRAMMAR([input.y],
504 void yyerror (const char *s);
514 # Pacify font-lock-mode: "
516 AT_BISON_CHECK([-o input.c input.y])
517 AT_COMPILE([input.o], [-c input.c])
526 # The generation of the reduction was once wrong in Bison, and made it
527 # miss some reductions. In the following test case, the reduction on
528 # `undef_id_tok' in state 1 was missing. This is stripped down from
529 # the actual web2c.y.
531 AT_SETUP([Web2c Report])
533 AT_KEYWORDS([report])
536 [[%token undef_id_tok const_id_tok
538 %start CONST_DEC_PART
547 | CONST_DEC_LIST CONST_DEC
551 { } undef_id_tok '=' const_id_tok ';'
556 AT_BISON_CHECK([-v input.y])
557 AT_CHECK([cat input.output], 0,
560 0 $accept: CONST_DEC_PART $end
562 1 CONST_DEC_PART: CONST_DEC_LIST
564 2 CONST_DEC_LIST: CONST_DEC
565 3 | CONST_DEC_LIST CONST_DEC
569 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';'
572 Terminals, with rules where they appear
582 Nonterminals, with rules where they appear
587 on left: 1, on right: 0
589 on left: 2 3, on right: 1 3
591 on left: 5, on right: 2 3
593 on left: 4, on right: 5
598 0 $accept: . CONST_DEC_PART $end
600 $default reduce using rule 4 ($@1)
602 CONST_DEC_PART go to state 1
603 CONST_DEC_LIST go to state 2
604 CONST_DEC go to state 3
610 0 $accept: CONST_DEC_PART . $end
612 $end shift, and go to state 5
617 1 CONST_DEC_PART: CONST_DEC_LIST .
618 3 CONST_DEC_LIST: CONST_DEC_LIST . CONST_DEC
620 undef_id_tok reduce using rule 4 ($@1)
621 $default reduce using rule 1 (CONST_DEC_PART)
623 CONST_DEC go to state 6
629 2 CONST_DEC_LIST: CONST_DEC .
631 $default reduce using rule 2 (CONST_DEC_LIST)
636 5 CONST_DEC: $@1 . undef_id_tok '=' const_id_tok ';'
638 undef_id_tok shift, and go to state 7
643 0 $accept: CONST_DEC_PART $end .
650 3 CONST_DEC_LIST: CONST_DEC_LIST CONST_DEC .
652 $default reduce using rule 3 (CONST_DEC_LIST)
657 5 CONST_DEC: $@1 undef_id_tok . '=' const_id_tok ';'
659 '=' shift, and go to state 8
664 5 CONST_DEC: $@1 undef_id_tok '=' . const_id_tok ';'
666 const_id_tok shift, and go to state 9
671 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok . ';'
673 ';' shift, and go to state 10
678 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';' .
680 $default reduce using rule 5 (CONST_DEC)
686 ## --------------- ##
688 ## --------------- ##
690 # The generation of the mapping `state -> action' was once wrong in
691 # extremely specific situations. web2c.y exhibits this situation.
692 # Below is a stripped version of the grammar. It looks like one can
693 # simplify it further, but just don't: it is tuned to exhibit a bug,
694 # which disapears when applying sane grammar transformations.
696 # It used to be wrong on yydefact only:
698 # static const yytype_uint8 yydefact[] =
700 # - 2, 0, 1, 0, 0, 2, 3, 2, 5, 4,
701 # + 2, 0, 1, 0, 0, 0, 3, 2, 5, 4,
705 # but let's check all the tables.
708 AT_SETUP([Web2c Actions])
710 AT_KEYWORDS([report])
714 statement: struct_stat;
715 struct_stat: /* empty. */ | if else;
716 if: "if" "const" "then" statement;
717 else: "else" statement;
721 AT_BISON_CHECK([-v -o input.c input.y])
723 # Check only the tables.
724 [sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]
726 AT_CHECK([[cat tables.c]], 0,
727 [[static const yytype_uint8 yytranslate[] =
729 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
730 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
731 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
732 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
733 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
734 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
735 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
736 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
737 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
738 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
739 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
740 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
741 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
742 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
743 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
744 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
745 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
746 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
747 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
748 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
749 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
750 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
751 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
752 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
753 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
754 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
757 static const yytype_uint8 yyprhs[] =
761 static const yytype_int8 yyrhs[] =
763 8, 0, -1, 9, -1, -1, 10, 11, -1, 3,
764 4, 5, 8, -1, 6, 8, -1
766 static const yytype_uint8 yyrline[] =
770 static const char *const yytname[] =
772 "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"",
773 "\"else\"", "$accept", "statement", "struct_stat", "if", "else", 0
775 static const yytype_uint16 yytoknum[] =
777 0, 256, 257, 258, 259, 260, 261
779 static const yytype_uint8 yyr1[] =
781 0, 7, 8, 9, 9, 10, 11
783 static const yytype_uint8 yyr2[] =
787 static const yytype_uint8 yydefact[] =
789 3, 0, 0, 2, 0, 0, 1, 3, 4, 3,
792 static const yytype_int8 yydefgoto[] =
796 static const yytype_int8 yypact[] =
798 -2, -1, 4, -8, 0, 2, -8, -2, -8, -2,
801 static const yytype_int8 yypgoto[] =
805 static const yytype_uint8 yytable[] =
807 10, 1, 11, 5, 6, 0, 7, 9
809 static const yytype_int8 yycheck[] =
811 7, 3, 9, 4, 0, -1, 6, 5
813 static const yytype_uint8 yystos[] =
815 0, 3, 8, 9, 10, 4, 0, 6, 11, 5,
823 ## ------------------------- ##
824 ## yycheck Bound Violation. ##
825 ## ------------------------- ##
828 # _AT_DATA_DANCER_Y(BISON-OPTIONS)
829 # --------------------------------
830 # The following grammar, taken from Andrew Suffield's GPL'd implementation
831 # of DGMTP, the Dancer Generic Message Transport Protocol, used to violate
832 # yycheck's bounds where issuing a verbose error message. Keep this test
833 # so that possible bound checking compilers could check all the skeletons.
834 m4_define([_AT_DATA_DANCER_Y],
835 [AT_DATA_GRAMMAR([dancer.y],
837 static int yylex (AT_LALR1_CC_IF([int *], [void]));
841 static void yyerror (const char *);])
844 %token ARROW INVALID NUMBER STRING DATA
848 /* Grammar follows */
853 header: '<' from ARROW to '>' type ':'
854 | '<' ARROW to '>' type ':'
888 [/* A C++ error reporting function. */
890 yy::parser::error (const location&, const std::string& m)
892 std::cerr << m << std::endl;
900 parser.set_debug_level (YYDEBUG);
902 return parser.parse ();
906 yyerror (const char *s)
908 fprintf (stderr, "%s\n", s);
912 yylex (AT_LALR1_CC_IF([int *lval], [void]))
914 static int const tokens[] =
918 static size_t toknum;
919 ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[
920 if (! (toknum < sizeof tokens / sizeof *tokens))
922 return tokens[toknum++];
931 ])# _AT_DATA_DANCER_Y
934 # AT_CHECK_DANCER(BISON-OPTIONS)
935 # ------------------------------
936 # Generate the grammar, compile it, run it.
937 m4_define([AT_CHECK_DANCER],
938 [AT_SETUP([Dancer $1])
939 AT_BISON_OPTION_PUSHDEFS([$1])
940 _AT_DATA_DANCER_Y([$1])
941 AT_BISON_CHECK([-o dancer.c dancer.y])
943 [AT_BISON_CHECK([-o dancer.cc dancer.y])
944 AT_COMPILE_CXX([dancer])],
945 [AT_BISON_CHECK([-o dancer.c dancer.y])
946 AT_COMPILE([dancer])])
947 AT_PARSER_CHECK([./dancer], 1, [],
948 [syntax error, unexpected ':'
950 AT_BISON_OPTION_POPDEFS
955 AT_CHECK_DANCER([%glr-parser])
956 AT_CHECK_DANCER([%skeleton "lalr1.cc"])
959 ## ------------------------------------------ ##
960 ## Diagnostic that expects two alternatives. ##
961 ## ------------------------------------------ ##
964 # _AT_DATA_EXPECT2_Y(BISON-OPTIONS)
965 # --------------------------------
966 m4_define([_AT_DATA_EXPECT2_Y],
967 [AT_DATA_GRAMMAR([expect2.y],
969 static int yylex (AT_LALR1_CC_IF([int *], [void]));
973 static void yyerror (const char *);])
991 [/* A C++ error reporting function. */
993 yy::parser::error (const location&, const std::string& m)
995 std::cerr << m << std::endl;
1002 return parser.parse ();
1006 yyerror (const char *s)
1008 fprintf (stderr, "%s\n", s);
1012 yylex (AT_LALR1_CC_IF([int *lval], [void]))
1014 static int const tokens[] =
1018 static size_t toknum;
1019 ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[
1020 if (! (toknum < sizeof tokens / sizeof *tokens))
1022 return tokens[toknum++];
1031 ])# _AT_DATA_EXPECT2_Y
1034 # AT_CHECK_EXPECT2(BISON-OPTIONS)
1035 # ------------------------------
1036 # Generate the grammar, compile it, run it.
1037 m4_define([AT_CHECK_EXPECT2],
1038 [AT_SETUP([Expecting two tokens $1])
1039 AT_BISON_OPTION_PUSHDEFS([$1])
1040 _AT_DATA_EXPECT2_Y([$1])
1041 AT_BISON_CHECK([-o expect2.c expect2.y])
1043 [AT_BISON_CHECK([-o expect2.cc expect2.y])
1044 AT_COMPILE_CXX([expect2])],
1045 [AT_BISON_CHECK([-o expect2.c expect2.y])
1046 AT_COMPILE([expect2])])
1047 AT_PARSER_CHECK([./expect2], 1, [],
1048 [syntax error, unexpected '+', expecting A or B
1050 AT_BISON_OPTION_POPDEFS
1055 AT_CHECK_EXPECT2([%glr-parser])
1056 AT_CHECK_EXPECT2([%skeleton "lalr1.cc"])
1060 ## --------------------------------------------- ##
1061 ## Braced code in declaration in rules section. ##
1062 ## --------------------------------------------- ##
1064 AT_SETUP([Braced code in declaration in rules section])
1066 # Bison once mistook braced code in a declaration in the rules section to be a
1069 AT_DATA_GRAMMAR([input.y],
1072 static void yyerror (char const *msg);
1073 static int yylex (void);
1082 printf ("Bison would once convert this action to a midrule because of the"
1083 " subsequent braced code.\n");
1087 %destructor { fprintf (stderr, "DESTRUCTOR\n"); } 'a';
1088 %printer { fprintf (yyoutput, "PRINTER"); } 'a';
1093 yyerror (char const *msg)
1095 fprintf (stderr, "%s\n", msg);
1112 AT_BISON_CHECK([-t -o input.c input.y])
1114 AT_PARSER_CHECK([./input], 0,
1115 [[Bison would once convert this action to a midrule because of the subsequent braced code.
1119 Reducing stack by rule 1 (line 20):
1120 -> $$ = nterm start ()
1123 Reading a token: Next token is token 'a' (PRINTER)
1124 syntax error, unexpected 'a', expecting $end
1125 Error: popping nterm start ()
1127 Cleanup: discarding lookahead token 'a' (PRINTER)
1136 ## --------------------------------- ##
1137 ## String alias declared after use. ##
1138 ## --------------------------------- ##
1140 AT_SETUP([String alias declared after use])
1142 # Bison once incorrectly asserted that the symbol number for either a token or
1143 # its alias was the highest symbol number so far at the point of the alias
1144 # declaration. That was true unless the declaration appeared after their first
1145 # uses and other tokens appeared in between.
1153 AT_BISON_CHECK([-t -o input.c input.y])
1159 ## -------------------------------- ##
1160 ## Extra lookahead sets in report. ##
1161 ## -------------------------------- ##
1163 AT_SETUP([[Extra lookahead sets in report]])
1165 # Bison prints each reduction's lookahead set only next to the associated
1166 # state's one item that (1) is associated with the same rule as the reduction
1167 # and (2) has its dot at the end of its RHS. Previously, Bison also
1168 # erroneously printed the lookahead set next to all of the state's other items
1169 # associated with the same rule. This bug affected only the `.output' file and
1170 # not the generated parser source code.
1172 AT_DATA([[input.y]],
1174 start: a | 'a' a 'a' ;
1178 AT_BISON_CHECK([[--report=all input.y]])
1179 AT_CHECK([[sed -n '/^state 1$/,/^state 2$/p' input.output]], [[0]],
1182 2 start: 'a' . a 'a'
1186 'a' shift, and go to state 4
1188 $default reduce using rule 3 (a)
1200 ## ---------------------------------------- ##
1201 ## Token number in precedence declaration. ##
1202 ## ---------------------------------------- ##
1204 AT_SETUP([[Token number in precedence declaration]])
1206 # POSIX says token numbers can be declared in %left, %right, and %nonassoc, but
1207 # we lost this in Bison 1.50.
1209 AT_DATA_GRAMMAR([input.y],
1212 void yyerror (char const *);
1217 %left TK1 1 TK2 2 "tok alias" 3
1221 start: TK1 sr_conflict "tok alias" ;
1231 yyerror (char const *msg)
1233 fprintf (stderr, "%s\n", msg);
1239 static int const input[] = { 1, 2, 3, 0 };
1240 static int const *inputp = input;
1251 AT_BISON_CHECK([[-o input.c input.y]], [[0]],,
1252 [[input.y:24.5-19: warning: rule useless in parser due to conflicts: sr_conflict: TK2 "tok alias"
1254 AT_COMPILE([[input]])
1255 AT_PARSER_CHECK([[./input]])
1261 ## ----------------------------------------------- ##
1262 ## Fix user actions without a trailing semicolon. ##
1263 ## ----------------------------------------------- ##
1265 AT_SETUP([[Fix user actions without a trailing semicolon]])
1267 # This feature is undocumented, but we accidentally broke it in 2.3a, and there
1268 # was a complaint at:
1269 # <http://lists.gnu.org/archive/html/bug-bison/2008-11/msg00001.html>.
1276 AT_BISON_CHECK([[-o input.c input.y]])
1277 AT_CHECK([[sed -n '/asdffdsa/s/^ *//p' input.c]], [[0]],