1 # Bison Regressions. -*- Autotest -*-
3 # Copyright (C) 2001-2010 Free Software Foundation, Inc.
5 # This program is free software: you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
18 AT_BANNER([[Regression tests.]])
21 ## ------------------ ##
22 ## Trivial grammars. ##
23 ## ------------------ ##
25 AT_SETUP([Trivial grammars])
27 AT_DATA_GRAMMAR([input.y],
29 void yyerror (char const *);
41 AT_BISON_CHECK([-o input.c input.y])
42 AT_COMPILE([input.o], [-c input.c])
43 AT_COMPILE([input.o], [-DYYDEBUG -c input.c])
49 ## ----------------- ##
50 ## YYSTYPE typedef. ##
51 ## ----------------- ##
53 AT_SETUP([YYSTYPE typedef])
55 AT_DATA_GRAMMAR([input.y],
57 void yyerror (char const *);
59 typedef union { char const *val; } YYSTYPE;
66 program: { $$ = ""; };
69 AT_BISON_CHECK([-o input.c input.y])
70 AT_COMPILE([input.o], [-c input.c])
76 ## ------------------------------------- ##
77 ## Early token definitions with --yacc. ##
78 ## ------------------------------------- ##
81 AT_SETUP([Early token definitions with --yacc])
83 # Found in GCJ: they expect the tokens to be defined before the user
84 # prologue, so that they can use the token definitions in it.
86 AT_DATA_GRAMMAR([input.y],
88 void yyerror (const char *s);
98 # error "MY_TOKEN not defined."
107 AT_BISON_CHECK([-y -o input.c input.y])
108 AT_COMPILE([input.o], [-c input.c])
114 ## ---------------------------------------- ##
115 ## Early token definitions without --yacc. ##
116 ## ---------------------------------------- ##
119 AT_SETUP([Early token definitions without --yacc])
121 # Found in GCJ: they expect the tokens to be defined before the user
122 # prologue, so that they can use the token definitions in it.
124 AT_DATA_GRAMMAR([input.y],
127 void yyerror (const char *s);
129 void print_my_token (void);
138 print_my_token (void)
140 enum yytokentype my_token = MY_TOKEN;
141 printf ("%d\n", my_token);
150 AT_BISON_CHECK([-o input.c input.y])
151 AT_COMPILE([input.o], [-c input.c])
157 ## ---------------- ##
158 ## Braces parsing. ##
159 ## ---------------- ##
162 AT_SETUP([Braces parsing])
165 [[/* Bison used to swallow the character after `}'. */
168 exp: { tests = {{{{{{{{{{}}}}}}}}}}; };
172 AT_BISON_CHECK([-v -o input.c input.y])
174 AT_CHECK([grep 'tests = {{{{{{{{{{}}}}}}}}}};' input.c], 0, [ignore])
179 ## ------------------ ##
180 ## Duplicate string. ##
181 ## ------------------ ##
184 AT_SETUP([Duplicate string])
187 [[/* `Bison -v' used to dump core when two tokens are defined with the same
188 string, as LE and GE below. */
195 exp: '(' exp ')' | NUM ;
199 AT_BISON_CHECK([-v -o input.c input.y], 0, [],
200 [[input.y:6.8-14: warning: symbol `"<="' used more than once as a literal string
206 ## ------------------- ##
207 ## Rule Line Numbers. ##
208 ## ------------------- ##
210 AT_SETUP([Rule Line Numbers])
212 AT_KEYWORDS([report])
244 AT_BISON_CHECK([-o input.c -v input.y])
246 # Check the contents of the report.
247 AT_CHECK([cat input.output], [],
261 Terminals, with rules where they appear
270 Nonterminals, with rules where they appear
275 on left: 2 4, on right: 0
277 on left: 1, on right: 2
279 on left: 3, on right: 4
284 0 $accept: . expr $end
286 'a' shift, and go to state 1
288 $default reduce using rule 3 ($@2)
296 2 expr: 'a' . $@1 'b'
298 $default reduce using rule 1 ($@1)
305 0 $accept: expr . $end
307 $end shift, and go to state 5
314 'c' shift, and go to state 6
319 2 expr: 'a' $@1 . 'b'
321 'b' shift, and go to state 7
326 0 $accept: expr $end .
335 $default reduce using rule 4 (expr)
340 2 expr: 'a' $@1 'b' .
342 $default reduce using rule 2 (expr)
349 ## ---------------------- ##
350 ## Mixing %token styles. ##
351 ## ---------------------- ##
354 AT_SETUP([Mixing %token styles])
356 # Taken from the documentation.
358 [[%token <operator> OR "||"
359 %token <operator> LE 134 "<="
366 AT_BISON_CHECK([-v -o input.c input.y])
372 ## ---------------- ##
373 ## Invalid inputs. ##
374 ## ---------------- ##
377 AT_SETUP([Invalid inputs])
389 AT_BISON_CHECK([input.y], [1], [],
390 [[input.y:2.1: invalid character: `?'
391 input.y:3.14: invalid character: `}'
392 input.y:4.1: invalid character: `%'
393 input.y:4.2: invalid character: `&'
394 input.y:5.1-17: invalid directive: `%a-does-not-exist'
395 input.y:6.1: invalid character: `%'
396 input.y:6.2: invalid character: `-'
397 input.y:7.1-8.0: missing `%}' at end of file
398 input.y:7.1-8.0: syntax error, unexpected %{...%}
404 AT_SETUP([Invalid inputs with {}])
416 AT_BISON_CHECK([input.y], [1], [],
417 [[input.y:3.1-15: syntax error, unexpected %initial-action, expecting {...}
424 ## ------------------- ##
425 ## Token definitions. ##
426 ## ------------------- ##
429 AT_SETUP([Token definitions])
431 # Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
432 AT_DATA_GRAMMAR([input.y],
436 void yyerror (const char *s);
440 %token MYEOF 0 "end of file"
445 %token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"
446 %token SPECIAL "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!"
448 exp: "a" "\\\'\?\"\a\b\f\n\r\t\v\001\201\x001\x000081??!";
451 yyerror (char const *s)
453 fprintf (stderr, "%s\n", s);
472 # Checking the warning message guarantees that the trigraph "??!" isn't
473 # unnecessarily escaped here even though it would need to be if encoded in a
474 # C-string literal. Also notice that unnecessary escaping, such as "\?", from
475 # the user specification is eliminated.
476 AT_BISON_CHECK([-o input.c input.y], [[0]], [[]],
477 [[input.y:22.8-14: warning: symbol SPECIAL redeclared
478 input.y:22.8-63: warning: symbol `"\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!"' used more than once as a literal string
482 # Checking the error message here guarantees that yytname, which does contain
483 # C-string literals, does have the trigraph escaped correctly. Thus, the
484 # symbol name reported by the parser is exactly the same as that reported by
487 [[syntax error, unexpected "\\'?\"\a\b\f\n\r\t\v\001\201\001\201??!", expecting a
489 AT_PARSER_CHECK([./input], 1, [], [experr])
494 ## -------------------- ##
495 ## Characters Escapes. ##
496 ## -------------------- ##
499 AT_SETUP([Characters Escapes])
501 AT_DATA_GRAMMAR([input.y],
503 void yyerror (const char *s);
513 # Pacify font-lock-mode: "
515 AT_BISON_CHECK([-o input.c input.y])
516 AT_COMPILE([input.o], [-c input.c])
525 # The generation of the reduction was once wrong in Bison, and made it
526 # miss some reductions. In the following test case, the reduction on
527 # `undef_id_tok' in state 1 was missing. This is stripped down from
528 # the actual web2c.y.
530 AT_SETUP([Web2c Report])
532 AT_KEYWORDS([report])
535 [[%token undef_id_tok const_id_tok
537 %start CONST_DEC_PART
546 | CONST_DEC_LIST CONST_DEC
550 { } undef_id_tok '=' const_id_tok ';'
555 AT_BISON_CHECK([-v input.y])
556 AT_CHECK([cat input.output], 0,
559 0 $accept: CONST_DEC_PART $end
561 1 CONST_DEC_PART: CONST_DEC_LIST
563 2 CONST_DEC_LIST: CONST_DEC
564 3 | CONST_DEC_LIST CONST_DEC
568 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';'
571 Terminals, with rules where they appear
581 Nonterminals, with rules where they appear
586 on left: 1, on right: 0
588 on left: 2 3, on right: 1 3
590 on left: 5, on right: 2 3
592 on left: 4, on right: 5
597 0 $accept: . CONST_DEC_PART $end
599 $default reduce using rule 4 ($@1)
601 CONST_DEC_PART go to state 1
602 CONST_DEC_LIST go to state 2
603 CONST_DEC go to state 3
609 0 $accept: CONST_DEC_PART . $end
611 $end shift, and go to state 5
616 1 CONST_DEC_PART: CONST_DEC_LIST .
617 3 CONST_DEC_LIST: CONST_DEC_LIST . CONST_DEC
619 undef_id_tok reduce using rule 4 ($@1)
620 $default reduce using rule 1 (CONST_DEC_PART)
622 CONST_DEC go to state 6
628 2 CONST_DEC_LIST: CONST_DEC .
630 $default reduce using rule 2 (CONST_DEC_LIST)
635 5 CONST_DEC: $@1 . undef_id_tok '=' const_id_tok ';'
637 undef_id_tok shift, and go to state 7
642 0 $accept: CONST_DEC_PART $end .
649 3 CONST_DEC_LIST: CONST_DEC_LIST CONST_DEC .
651 $default reduce using rule 3 (CONST_DEC_LIST)
656 5 CONST_DEC: $@1 undef_id_tok . '=' const_id_tok ';'
658 '=' shift, and go to state 8
663 5 CONST_DEC: $@1 undef_id_tok '=' . const_id_tok ';'
665 const_id_tok shift, and go to state 9
670 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok . ';'
672 ';' shift, and go to state 10
677 5 CONST_DEC: $@1 undef_id_tok '=' const_id_tok ';' .
679 $default reduce using rule 5 (CONST_DEC)
685 ## --------------- ##
687 ## --------------- ##
689 # The generation of the mapping `state -> action' was once wrong in
690 # extremely specific situations. web2c.y exhibits this situation.
691 # Below is a stripped version of the grammar. It looks like one can
692 # simplify it further, but just don't: it is tuned to exhibit a bug,
693 # which disapears when applying sane grammar transformations.
695 # It used to be wrong on yydefact only:
697 # static const yytype_uint8 yydefact[] =
699 # - 2, 0, 1, 0, 0, 2, 3, 2, 5, 4,
700 # + 2, 0, 1, 0, 0, 0, 3, 2, 5, 4,
704 # but let's check all the tables.
707 AT_SETUP([Web2c Actions])
709 AT_KEYWORDS([report])
713 statement: struct_stat;
714 struct_stat: /* empty. */ | if else;
715 if: "if" "const" "then" statement;
716 else: "else" statement;
720 AT_BISON_CHECK([-v -o input.c input.y])
722 # Check only the tables.
723 [sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]
725 AT_CHECK([[cat tables.c]], 0,
726 [[static const yytype_uint8 yytranslate[] =
728 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
729 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
730 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
731 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
732 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
733 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
734 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
735 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
736 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
737 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
738 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
739 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
740 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
741 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
742 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
743 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
744 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
745 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
746 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
747 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
748 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
749 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
750 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
751 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
752 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
753 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
756 static const yytype_uint8 yyprhs[] =
760 static const yytype_int8 yyrhs[] =
762 8, 0, -1, 9, -1, -1, 10, 11, -1, 3,
763 4, 5, 8, -1, 6, 8, -1
765 static const yytype_uint8 yyrline[] =
769 static const char *const yytname[] =
771 "$end", "error", "$undefined", "\"if\"", "\"const\"", "\"then\"",
772 "\"else\"", "$accept", "statement", "struct_stat", "if", "else", 0
774 static const yytype_uint16 yytoknum[] =
776 0, 256, 257, 258, 259, 260, 261
778 static const yytype_uint8 yyr1[] =
780 0, 7, 8, 9, 9, 10, 11
782 static const yytype_uint8 yyr2[] =
786 static const yytype_uint8 yydefact[] =
788 3, 0, 0, 2, 0, 0, 1, 3, 4, 3,
791 static const yytype_int8 yydefgoto[] =
795 static const yytype_int8 yypact[] =
797 -2, -1, 4, -8, 0, 2, -8, -2, -8, -2,
800 static const yytype_int8 yypgoto[] =
804 static const yytype_uint8 yytable[] =
806 10, 1, 11, 5, 6, 0, 7, 9
808 static const yytype_int8 yycheck[] =
810 7, 3, 9, 4, 0, -1, 6, 5
812 static const yytype_uint8 yystos[] =
814 0, 3, 8, 9, 10, 4, 0, 6, 11, 5,
822 ## ------------------------- ##
823 ## yycheck Bound Violation. ##
824 ## ------------------------- ##
827 # _AT_DATA_DANCER_Y(BISON-OPTIONS)
828 # --------------------------------
829 # The following grammar, taken from Andrew Suffield's GPL'd implementation
830 # of DGMTP, the Dancer Generic Message Transport Protocol, used to violate
831 # yycheck's bounds where issuing a verbose error message. Keep this test
832 # so that possible bound checking compilers could check all the skeletons.
833 m4_define([_AT_DATA_DANCER_Y],
834 [AT_DATA_GRAMMAR([dancer.y],
836 static int yylex (AT_LALR1_CC_IF([int *], [void]));
840 static void yyerror (const char *);])
843 %token ARROW INVALID NUMBER STRING DATA
847 /* Grammar follows */
852 header: '<' from ARROW to '>' type ':'
853 | '<' ARROW to '>' type ':'
887 [/* A C++ error reporting function. */
889 yy::parser::error (const location&, const std::string& m)
891 std::cerr << m << std::endl;
899 parser.set_debug_level (YYDEBUG);
901 return parser.parse ();
905 yyerror (const char *s)
907 fprintf (stderr, "%s\n", s);
911 yylex (AT_LALR1_CC_IF([int *lval], [void]))
913 static int const tokens[] =
917 static size_t toknum;
918 ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[
919 if (! (toknum < sizeof tokens / sizeof *tokens))
921 return tokens[toknum++];
930 ])# _AT_DATA_DANCER_Y
933 # AT_CHECK_DANCER(BISON-OPTIONS)
934 # ------------------------------
935 # Generate the grammar, compile it, run it.
936 m4_define([AT_CHECK_DANCER],
937 [AT_SETUP([Dancer $1])
938 AT_BISON_OPTION_PUSHDEFS([$1])
939 _AT_DATA_DANCER_Y([$1])
940 AT_BISON_CHECK([-o dancer.c dancer.y])
942 [AT_BISON_CHECK([-o dancer.cc dancer.y])
943 AT_COMPILE_CXX([dancer])],
944 [AT_BISON_CHECK([-o dancer.c dancer.y])
945 AT_COMPILE([dancer])])
946 AT_PARSER_CHECK([./dancer], 1, [],
947 [syntax error, unexpected ':'
949 AT_BISON_OPTION_POPDEFS
954 AT_CHECK_DANCER([%glr-parser])
955 AT_CHECK_DANCER([%skeleton "lalr1.cc"])
958 ## ------------------------------------------ ##
959 ## Diagnostic that expects two alternatives. ##
960 ## ------------------------------------------ ##
963 # _AT_DATA_EXPECT2_Y(BISON-OPTIONS)
964 # --------------------------------
965 m4_define([_AT_DATA_EXPECT2_Y],
966 [AT_DATA_GRAMMAR([expect2.y],
968 static int yylex (AT_LALR1_CC_IF([int *], [void]));
972 static void yyerror (const char *);])
990 [/* A C++ error reporting function. */
992 yy::parser::error (const location&, const std::string& m)
994 std::cerr << m << std::endl;
1001 return parser.parse ();
1005 yyerror (const char *s)
1007 fprintf (stderr, "%s\n", s);
1011 yylex (AT_LALR1_CC_IF([int *lval], [void]))
1013 static int const tokens[] =
1017 static size_t toknum;
1018 ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[
1019 if (! (toknum < sizeof tokens / sizeof *tokens))
1021 return tokens[toknum++];
1030 ])# _AT_DATA_EXPECT2_Y
1033 # AT_CHECK_EXPECT2(BISON-OPTIONS)
1034 # ------------------------------
1035 # Generate the grammar, compile it, run it.
1036 m4_define([AT_CHECK_EXPECT2],
1037 [AT_SETUP([Expecting two tokens $1])
1038 AT_BISON_OPTION_PUSHDEFS([$1])
1039 _AT_DATA_EXPECT2_Y([$1])
1040 AT_BISON_CHECK([-o expect2.c expect2.y])
1042 [AT_BISON_CHECK([-o expect2.cc expect2.y])
1043 AT_COMPILE_CXX([expect2])],
1044 [AT_BISON_CHECK([-o expect2.c expect2.y])
1045 AT_COMPILE([expect2])])
1046 AT_PARSER_CHECK([./expect2], 1, [],
1047 [syntax error, unexpected '+', expecting A or B
1049 AT_BISON_OPTION_POPDEFS
1054 AT_CHECK_EXPECT2([%glr-parser])
1055 AT_CHECK_EXPECT2([%skeleton "lalr1.cc"])
1059 ## --------------------------------------------- ##
1060 ## Braced code in declaration in rules section. ##
1061 ## --------------------------------------------- ##
1063 AT_SETUP([Braced code in declaration in rules section])
1065 # Bison once mistook braced code in a declaration in the rules section to be a
1068 AT_DATA_GRAMMAR([input.y],
1071 static void yyerror (char const *msg);
1072 static int yylex (void);
1081 printf ("Bison would once convert this action to a midrule because of the"
1082 " subsequent braced code.\n");
1086 %destructor { fprintf (stderr, "DESTRUCTOR\n"); } 'a';
1087 %printer { fprintf (yyoutput, "PRINTER"); } 'a';
1092 yyerror (char const *msg)
1094 fprintf (stderr, "%s\n", msg);
1111 AT_BISON_CHECK([-t -o input.c input.y])
1113 AT_PARSER_CHECK([./input], 0,
1114 [[Bison would once convert this action to a midrule because of the subsequent braced code.
1118 Reducing stack by rule 1 (line 20):
1119 -> $$ = nterm start ()
1122 Reading a token: Next token is token 'a' (PRINTER)
1123 syntax error, unexpected 'a', expecting $end
1124 Error: popping nterm start ()
1126 Cleanup: discarding lookahead token 'a' (PRINTER)
1135 ## --------------------------------- ##
1136 ## String alias declared after use. ##
1137 ## --------------------------------- ##
1139 AT_SETUP([String alias declared after use])
1141 # Bison once incorrectly asserted that the symbol number for either a token or
1142 # its alias was the highest symbol number so far at the point of the alias
1143 # declaration. That was true unless the declaration appeared after their first
1144 # uses and other tokens appeared in between.
1152 AT_BISON_CHECK([-t -o input.c input.y])
1158 ## -------------------------------- ##
1159 ## Extra lookahead sets in report. ##
1160 ## -------------------------------- ##
1162 AT_SETUP([[Extra lookahead sets in report]])
1164 # Bison prints each reduction's lookahead set only next to the associated
1165 # state's one item that (1) is associated with the same rule as the reduction
1166 # and (2) has its dot at the end of its RHS. Previously, Bison also
1167 # erroneously printed the lookahead set next to all of the state's other items
1168 # associated with the same rule. This bug affected only the `.output' file and
1169 # not the generated parser source code.
1171 AT_DATA([[input.y]],
1173 start: a | 'a' a 'a' ;
1177 AT_BISON_CHECK([[--report=all input.y]])
1178 AT_CHECK([[sed -n '/^state 1$/,/^state 2$/p' input.output]], [[0]],
1181 2 start: 'a' . a 'a'
1185 'a' shift, and go to state 4
1187 $default reduce using rule 3 (a)
1199 ## ---------------------------------------- ##
1200 ## Token number in precedence declaration. ##
1201 ## ---------------------------------------- ##
1203 AT_SETUP([[Token number in precedence declaration]])
1205 # POSIX says token numbers can be declared in %left, %right, and %nonassoc, but
1206 # we lost this in Bison 1.50.
1208 AT_DATA_GRAMMAR([input.y],
1211 void yyerror (char const *);
1216 %left TK1 1 TK2 2 "tok alias" 3
1220 start: TK1 sr_conflict "tok alias" ;
1230 yyerror (char const *msg)
1232 fprintf (stderr, "%s\n", msg);
1238 static int const input[] = { 1, 2, 3, 0 };
1239 static int const *inputp = input;
1250 AT_BISON_CHECK([[-o input.c input.y]], [[0]],,
1251 [[input.y:24.5-19: warning: rule useless in parser due to conflicts: sr_conflict: TK2 "tok alias"
1253 AT_COMPILE([[input]])
1254 AT_PARSER_CHECK([[./input]])
1260 ## ----------------------------------------------- ##
1261 ## Fix user actions without a trailing semicolon. ##
1262 ## ----------------------------------------------- ##
1264 AT_SETUP([[Fix user actions without a trailing semicolon]])
1266 # This feature is undocumented, but we accidentally broke it in 2.3a, and there
1267 # was a complaint at:
1268 # <http://lists.gnu.org/archive/html/bug-bison/2008-11/msg00001.html>.
1275 AT_BISON_CHECK([[-o input.c input.y]])
1276 AT_CHECK([[sed -n '/asdffdsa/s/^ *//p' input.c]], [[0]],