]> git.saurik.com Git - bison.git/blob - tests/regression.at
* data/lalr1.cc (_): New.
[bison.git] / tests / regression.at
1 # Bison Regressions. -*- Autotest -*-
2 # Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2, or (at your option)
7 # any later version.
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 # 02110-1301, USA.
18
19 AT_BANNER([[Regression tests.]])
20
21
22 ## ------------------------- ##
23 ## Early token definitions. ##
24 ## ------------------------- ##
25
26
27 AT_SETUP([Early token definitions])
28
29 # Found in GCJ: they expect the tokens to be defined before the user
30 # prologue, so that they can use the token definitions in it.
31
32 AT_DATA_GRAMMAR([input.y],
33 [[%{
34 void yyerror (const char *s);
35 int yylex (void);
36 %}
37
38 %union
39 {
40 int val;
41 };
42 %{
43 #ifndef MY_TOKEN
44 # error "MY_TOKEN not defined."
45 #endif
46 %}
47 %token MY_TOKEN
48 %%
49 exp: MY_TOKEN;
50 %%
51 ]])
52
53 AT_CHECK([bison -o input.c input.y])
54 AT_COMPILE([input.o], [-c input.c])
55
56 AT_CLEANUP
57
58
59
60 ## ---------------- ##
61 ## Braces parsing. ##
62 ## ---------------- ##
63
64
65 AT_SETUP([Braces parsing])
66
67 AT_DATA([input.y],
68 [[/* Bison used to swallow the character after `}'. */
69
70 %%
71 exp: { tests = {{{{{{{{{{}}}}}}}}}}; };
72 %%
73 ]])
74
75 AT_CHECK([bison -v -o input.c input.y])
76
77 AT_CHECK([grep 'tests = {{{{{{{{{{}}}}}}}}}};' input.c], 0, [ignore])
78
79 AT_CLEANUP
80
81
82 ## ------------------ ##
83 ## Duplicate string. ##
84 ## ------------------ ##
85
86
87 AT_SETUP([Duplicate string])
88
89 AT_DATA([input.y],
90 [[/* `Bison -v' used to dump core when two tokens are defined with the same
91 string, as LE and GE below. */
92
93 %token NUM
94 %token LE "<="
95 %token GE "<="
96
97 %%
98 exp: '(' exp ')' | NUM ;
99 %%
100 ]])
101
102 AT_CHECK([bison -v -o input.c input.y], 0, [],
103 [[input.y:6.8-14: warning: symbol `"<="' used more than once as a literal string
104 ]])
105
106 AT_CLEANUP
107
108
109 ## ------------------- ##
110 ## Rule Line Numbers. ##
111 ## ------------------- ##
112
113 AT_SETUP([Rule Line Numbers])
114
115 AT_KEYWORDS([report])
116
117 AT_DATA([input.y],
118 [[%%
119 expr:
120 'a'
121
122 {
123
124 }
125
126 'b'
127
128 {
129
130 }
131
132 |
133
134
135 {
136
137
138 }
139
140 'c'
141
142 {
143
144 };
145 ]])
146
147 AT_CHECK([bison -o input.c -v input.y])
148
149 # Check the contents of the report.
150 AT_CHECK([cat input.output], [],
151 [[Grammar
152
153 0 $accept: expr $end
154
155 1 @1: /* empty */
156
157 2 expr: 'a' @1 'b'
158
159 3 @2: /* empty */
160
161 4 expr: @2 'c'
162
163
164 Terminals, with rules where they appear
165
166 $end (0) 0
167 'a' (97) 2
168 'b' (98) 2
169 'c' (99) 4
170 error (256)
171
172
173 Nonterminals, with rules where they appear
174
175 $accept (6)
176 on left: 0
177 expr (7)
178 on left: 2 4, on right: 0
179 @1 (8)
180 on left: 1, on right: 2
181 @2 (9)
182 on left: 3, on right: 4
183
184
185 state 0
186
187 0 $accept: . expr $end
188
189 'a' shift, and go to state 1
190
191 $default reduce using rule 3 (@2)
192
193 expr go to state 2
194 @2 go to state 3
195
196
197 state 1
198
199 2 expr: 'a' . @1 'b'
200
201 $default reduce using rule 1 (@1)
202
203 @1 go to state 4
204
205
206 state 2
207
208 0 $accept: expr . $end
209
210 $end shift, and go to state 5
211
212
213 state 3
214
215 4 expr: @2 . 'c'
216
217 'c' shift, and go to state 6
218
219
220 state 4
221
222 2 expr: 'a' @1 . 'b'
223
224 'b' shift, and go to state 7
225
226
227 state 5
228
229 0 $accept: expr $end .
230
231 $default accept
232
233
234 state 6
235
236 4 expr: @2 'c' .
237
238 $default reduce using rule 4 (expr)
239
240
241 state 7
242
243 2 expr: 'a' @1 'b' .
244
245 $default reduce using rule 2 (expr)
246 ]])
247
248 AT_CLEANUP
249
250
251
252 ## ---------------------- ##
253 ## Mixing %token styles. ##
254 ## ---------------------- ##
255
256
257 AT_SETUP([Mixing %token styles])
258
259 # Taken from the documentation.
260 AT_DATA([input.y],
261 [[%token <operator> OR "||"
262 %token <operator> LE 134 "<="
263 %left OR "<="
264 %%
265 exp: ;
266 %%
267 ]])
268
269 AT_CHECK([bison -v -o input.c input.y])
270
271 AT_CLEANUP
272
273
274
275 ## ---------------- ##
276 ## Invalid inputs. ##
277 ## ---------------- ##
278
279
280 AT_SETUP([Invalid inputs])
281
282 AT_DATA([input.y],
283 [[%%
284 ?
285 default: 'a' }
286 %&
287 %a-does-not-exist
288 %-
289 %{
290 ]])
291
292 AT_CHECK([bison input.y], [1], [],
293 [[input.y:2.1: invalid character: `?'
294 input.y:3.14: invalid character: `}'
295 input.y:4.1: invalid character: `%'
296 input.y:4.2: invalid character: `&'
297 input.y:5.1-17: invalid directive: `%a-does-not-exist'
298 input.y:6.1: invalid character: `%'
299 input.y:6.2: invalid character: `-'
300 input.y:7.1-8.0: missing `%}' at end of file
301 ]])
302
303 AT_CLEANUP
304
305
306
307 ## ------------------- ##
308 ## Token definitions. ##
309 ## ------------------- ##
310
311
312 AT_SETUP([Token definitions])
313
314 # Bison managed, when fed with `%token 'f' "f"' to #define 'f'!
315 AT_DATA_GRAMMAR([input.y],
316 [%{
317 #include <stdio.h>
318 void yyerror (const char *s);
319 int yylex (void);
320 %}
321 [%error-verbose
322 %token MYEOF 0 "end of file"
323 %token 'a' "a"
324 %token B_TOKEN "b"
325 %token C_TOKEN 'c'
326 %token 'd' D_TOKEN
327 %token SPECIAL "\\\'\?\"\n\t??!"
328 %%
329 exp: "a" "\\\'\?\"\n\t??!";
330 %%
331 void
332 yyerror (char const *s)
333 {
334 fprintf (stderr, "%s\n", s);
335 }
336
337 int
338 yylex (void)
339 {
340 return SPECIAL;
341 }
342
343 int
344 main (void)
345 {
346 return yyparse ();
347 }
348 ]])
349
350 AT_CHECK([bison -o input.c input.y])
351 AT_COMPILE([input])
352 AT_PARSER_CHECK([./input], 1, [],
353 [syntax error, unexpected \'?"
354 ??!, expecting a
355 ])
356 AT_CLEANUP
357
358
359
360 ## -------------------- ##
361 ## Characters Escapes. ##
362 ## -------------------- ##
363
364
365 AT_SETUP([Characters Escapes])
366
367 AT_DATA_GRAMMAR([input.y],
368 [%{
369 void yyerror (const char *s);
370 int yylex (void);
371 %}
372 [%%
373 exp:
374 '\'' "\'"
375 | '\"' "\""
376 | '"' "'"
377 ;
378 ]])
379 # Pacify font-lock-mode: "
380
381 AT_CHECK([bison -o input.c input.y])
382 AT_COMPILE([input.o], [-c input.c])
383 AT_CLEANUP
384
385
386
387 ## -------------- ##
388 ## Web2c Report. ##
389 ## -------------- ##
390
391 # The generation of the reduction was once wrong in Bison, and made it
392 # miss some reductions. In the following test case, the reduction on
393 # `undef_id_tok' in state 1 was missing. This is stripped down from
394 # the actual web2c.y.
395
396 AT_SETUP([Web2c Report])
397
398 AT_KEYWORDS([report])
399
400 AT_DATA([input.y],
401 [[%token undef_id_tok const_id_tok
402
403 %start CONST_DEC_PART
404 \f
405 %%
406 CONST_DEC_PART:
407 CONST_DEC_LIST
408 ;
409
410 CONST_DEC_LIST:
411 CONST_DEC
412 | CONST_DEC_LIST CONST_DEC
413 ;
414
415 CONST_DEC:
416 { } undef_id_tok '=' const_id_tok ';'
417 ;
418 %%
419 ]])
420
421 AT_CHECK([bison -v input.y])
422 AT_CHECK([cat input.output], 0,
423 [[Grammar
424
425 0 $accept: CONST_DEC_PART $end
426
427 1 CONST_DEC_PART: CONST_DEC_LIST
428
429 2 CONST_DEC_LIST: CONST_DEC
430 3 | CONST_DEC_LIST CONST_DEC
431
432 4 @1: /* empty */
433
434 5 CONST_DEC: @1 undef_id_tok '=' const_id_tok ';'
435
436
437 Terminals, with rules where they appear
438
439 $end (0) 0
440 ';' (59) 5
441 '=' (61) 5
442 error (256)
443 undef_id_tok (258) 5
444 const_id_tok (259) 5
445
446
447 Nonterminals, with rules where they appear
448
449 $accept (7)
450 on left: 0
451 CONST_DEC_PART (8)
452 on left: 1, on right: 0
453 CONST_DEC_LIST (9)
454 on left: 2 3, on right: 1 3
455 CONST_DEC (10)
456 on left: 5, on right: 2 3
457 @1 (11)
458 on left: 4, on right: 5
459
460
461 state 0
462
463 0 $accept: . CONST_DEC_PART $end
464
465 $default reduce using rule 4 (@1)
466
467 CONST_DEC_PART go to state 1
468 CONST_DEC_LIST go to state 2
469 CONST_DEC go to state 3
470 @1 go to state 4
471
472
473 state 1
474
475 0 $accept: CONST_DEC_PART . $end
476
477 $end shift, and go to state 5
478
479
480 state 2
481
482 1 CONST_DEC_PART: CONST_DEC_LIST .
483 3 CONST_DEC_LIST: CONST_DEC_LIST . CONST_DEC
484
485 undef_id_tok reduce using rule 4 (@1)
486 $default reduce using rule 1 (CONST_DEC_PART)
487
488 CONST_DEC go to state 6
489 @1 go to state 4
490
491
492 state 3
493
494 2 CONST_DEC_LIST: CONST_DEC .
495
496 $default reduce using rule 2 (CONST_DEC_LIST)
497
498
499 state 4
500
501 5 CONST_DEC: @1 . undef_id_tok '=' const_id_tok ';'
502
503 undef_id_tok shift, and go to state 7
504
505
506 state 5
507
508 0 $accept: CONST_DEC_PART $end .
509
510 $default accept
511
512
513 state 6
514
515 3 CONST_DEC_LIST: CONST_DEC_LIST CONST_DEC .
516
517 $default reduce using rule 3 (CONST_DEC_LIST)
518
519
520 state 7
521
522 5 CONST_DEC: @1 undef_id_tok . '=' const_id_tok ';'
523
524 '=' shift, and go to state 8
525
526
527 state 8
528
529 5 CONST_DEC: @1 undef_id_tok '=' . const_id_tok ';'
530
531 const_id_tok shift, and go to state 9
532
533
534 state 9
535
536 5 CONST_DEC: @1 undef_id_tok '=' const_id_tok . ';'
537
538 ';' shift, and go to state 10
539
540
541 state 10
542
543 5 CONST_DEC: @1 undef_id_tok '=' const_id_tok ';' .
544
545 $default reduce using rule 5 (CONST_DEC)
546 ]])
547
548 AT_CLEANUP
549
550
551 ## --------------- ##
552 ## Web2c Actions. ##
553 ## --------------- ##
554
555 # The generation of the mapping `state -> action' was once wrong in
556 # extremely specific situations. web2c.y exhibits this situation.
557 # Below is a stripped version of the grammar. It looks like one can
558 # simplify it further, but just don't: it is tuned to exhibit a bug,
559 # which disapears when applying sane grammar transformations.
560 #
561 # It used to be wrong on yydefact only:
562 #
563 # static const short int yydefact[] =
564 # {
565 # - 2, 0, 1, 0, 0, 2, 3, 2, 5, 4,
566 # + 2, 0, 1, 0, 0, 0, 3, 2, 5, 4,
567 # 0, 0
568 # };
569 #
570 # but let's check all the tables.
571
572
573 AT_SETUP([Web2c Actions])
574
575 AT_KEYWORDS([report])
576
577 AT_DATA([input.y],
578 [[%%
579 statement: struct_stat;
580 struct_stat: /* empty. */ | if else;
581 if: "if" "const" "then" statement;
582 else: "else" statement;
583 %%
584 ]])
585
586 AT_CHECK([bison -v -o input.c input.y])
587
588 # Check only the tables. We don't use --no-parser, because it is
589 # still to be implemented in the experimental branch of Bison.
590 [sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c >tables.c]
591
592 AT_CHECK([[cat tables.c]], 0,
593 [[static const unsigned char yytranslate[] =
594 {
595 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
596 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
597 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
598 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
599 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
600 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
601 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
602 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
603 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
604 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
605 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
606 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
607 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
608 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
609 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
610 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
611 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
612 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
613 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
614 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
615 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
616 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
617 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
618 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
619 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
620 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
621 5, 6
622 };
623 static const unsigned char yyprhs[] =
624 {
625 0, 0, 3, 5, 6, 9, 14
626 };
627 static const yysigned_char yyrhs[] =
628 {
629 8, 0, -1, 9, -1, -1, 10, 11, -1, 3,
630 4, 5, 8, -1, 6, 8, -1
631 };
632 static const unsigned char yyrline[] =
633 {
634 0, 2, 2, 3, 3, 4, 5
635 };
636 static const char *const yytname[] =
637 {
638 "$end", "error", "$undefined", "if", "const", "then", "else", "$accept",
639 "statement", "struct_stat", "if", "else", 0
640 };
641 static const unsigned short int yytoknum[] =
642 {
643 0, 256, 257, 258, 259, 260, 261
644 };
645 static const unsigned char yyr1[] =
646 {
647 0, 7, 8, 9, 9, 10, 11
648 };
649 static const unsigned char yyr2[] =
650 {
651 0, 2, 1, 0, 2, 4, 2
652 };
653 static const unsigned char yydefact[] =
654 {
655 3, 0, 0, 2, 0, 0, 1, 3, 4, 3,
656 6, 5
657 };
658 static const yysigned_char yydefgoto[] =
659 {
660 -1, 2, 3, 4, 8
661 };
662 static const yysigned_char yypact[] =
663 {
664 -2, -1, 4, -8, 0, 2, -8, -2, -8, -2,
665 -8, -8
666 };
667 static const yysigned_char yypgoto[] =
668 {
669 -8, -7, -8, -8, -8
670 };
671 static const unsigned char yytable[] =
672 {
673 10, 1, 11, 5, 6, 0, 7, 9
674 };
675 static const yysigned_char yycheck[] =
676 {
677 7, 3, 9, 4, 0, -1, 6, 5
678 };
679 static const unsigned char yystos[] =
680 {
681 0, 3, 8, 9, 10, 4, 0, 6, 11, 5,
682 8, 8
683 };
684 ]])
685
686 AT_CLEANUP
687
688
689 ## ------------------------- ##
690 ## yycheck Bound Violation. ##
691 ## ------------------------- ##
692
693
694 # _AT_DATA_DANCER_Y(BISON-OPTIONS)
695 # --------------------------------
696 # The following grammar, taken from Andrew Suffield's GPL'd implementation
697 # of DGMTP, the Dancer Generic Message Transport Protocol, used to violate
698 # yycheck's bounds where issuing a verbose error message. Keep this test
699 # so that possible bound checking compilers could check all the skeletons.
700 m4_define([_AT_DATA_DANCER_Y],
701 [AT_DATA_GRAMMAR([dancer.y],
702 [%{
703 static int yylex (AT_LALR1_CC_IF([int *], [void]));
704 AT_LALR1_CC_IF([],
705 [#include <stdio.h>
706 static void yyerror (const char *);])
707 %}
708 $1
709 %token ARROW INVALID NUMBER STRING DATA
710 %defines
711 %verbose
712 %error-verbose
713 /* Grammar follows */
714 %%
715 line: header body
716 ;
717
718 header: '<' from ARROW to '>' type ':'
719 | '<' ARROW to '>' type ':'
720 | ARROW to type ':'
721 | type ':'
722 | '<' '>'
723 ;
724
725 from: DATA
726 | STRING
727 | INVALID
728 ;
729
730 to: DATA
731 | STRING
732 | INVALID
733 ;
734
735 type: DATA
736 | STRING
737 | INVALID
738 ;
739
740 body: /* empty */
741 | body member
742 ;
743
744 member: STRING
745 | DATA
746 | '+' NUMBER
747 | '-' NUMBER
748 | NUMBER
749 | INVALID
750 ;
751 %%
752 AT_LALR1_CC_IF(
753 [/* A C++ error reporting function. */
754 void
755 yy::parser::error (const location&, const std::string& m)
756 {
757 std::cerr << m << std::endl;
758 }
759
760 int
761 yyparse ()
762 {
763 yy::parser parser;
764 parser.set_debug_level (!!YYDEBUG);
765 return parser.parse ();
766 }
767 ],
768 [static void
769 yyerror (const char *s)
770 {
771 fprintf (stderr, "%s\n", s);
772 }])
773
774 static int
775 yylex (AT_LALR1_CC_IF([int *lval], [void]))
776 [{
777 static int toknum = 0;
778 int tokens[] =
779 {
780 ':', -1
781 };
782 ]AT_LALR1_CC_IF([*lval = 0; /* Pacify GCC. */])[
783 return tokens[toknum++];
784 }]
785
786 int
787 main (void)
788 {
789 return yyparse ();
790 }
791 ])
792 ])# _AT_DATA_DANCER_Y
793
794
795 # AT_CHECK_DANCER(BISON-OPTIONS)
796 # ------------------------------
797 # Generate the grammar, compile it, run it.
798 m4_define([AT_CHECK_DANCER],
799 [AT_SETUP([Dancer $1])
800 AT_BISON_OPTION_PUSHDEFS([$1])
801 _AT_DATA_DANCER_Y([$1])
802 AT_CHECK([bison -o dancer.c dancer.y])
803 AT_LALR1_CC_IF(
804 [AT_CHECK([bison -o dancer.cc dancer.y])
805 AT_COMPILE_CXX([dancer])],
806 [AT_CHECK([bison -o dancer.c dancer.y])
807 AT_COMPILE([dancer])])
808 AT_PARSER_CHECK([./dancer], 1, [],
809 [syntax error, unexpected ':'
810 ])
811 AT_BISON_OPTION_POPDEFS
812 AT_CLEANUP
813 ])
814
815 AT_CHECK_DANCER()
816 AT_CHECK_DANCER([%glr-parser])
817 AT_CHECK_DANCER([%skeleton "lalr1.cc"])