# Torturing Bison. -*- Autotest -*-
-# Copyright 2001 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2004 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
AT_BANNER([[Torture Tests.]])
+# AT_INCREASE_DATA_SIZE(SIZE)
+# -------------------------------------------
+# Try to increase the data size to SIZE KiB if possible.
+m4_define([AT_INCREASE_DATA_SIZE],
+[data_limit=`(ulimit -S -d) 2>/dev/null`
+case $data_limit in
+[[0-9]]*)
+ if test "$data_limit" -lt $1; then
+ ulimit -S -d $1
+ fi
+esac])
+
+
+## ------------------------------------- ##
+## Creating a large artificial grammar. ##
+## ------------------------------------- ##
+
+# AT_DATA_TRIANGULAR_GRAMMAR(FILE-NAME, SIZE)
+# -------------------------------------------
+# Create FILE-NAME, containing a self checking parser for a huge
+# triangular grammar.
+m4_define([AT_DATA_TRIANGULAR_GRAMMAR],
+[AT_DATA([[gengram.pl]],
+[[#! /usr/bin/perl -w
+
+use strict;
+my $max = $ARGV[0] || 10;
+
+print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
+%{
+#include <stdio.h>
+#include <stdlib.h>
+
+#define YYERROR_VERBOSE 1
+#define YYDEBUG 1
+
+static int yylex (void);
+static void yyerror (const char *msg);
+%}
+%union
+{
+ int val;
+};
+
+%token END "end"
+%type <val> exp input
+EOF
+
+for my $size (1 .. $max)
+ {
+ print "%token t$size $size \"$size\"\n";
+ };
+
+print <<EOF;
+%%
+input:
+ exp { if (\@S|@1 != 0) abort (); \$\$ = \@S|@1; }
+| input exp { if (\@S|@2 != \@S|@1 + 1) abort (); \$\$ = \@S|@2; }
+;
+
+exp:
+ END
+ { \$\$ = 0; }
+EOF
+
+for my $size (1 .. $max)
+ {
+ use Text::Wrap;
+ print wrap ("| ", " ",
+ (map { "\"$_\"" } (1 .. $size)),
+ " END \n"),
+ " { \$\$ = $size; }\n";
+ };
+print ";\n";
+
+print <<EOF;
+%%
+static int
+yylex (void)
+{
+ static int inner = 1;
+ static int outer = 0;
+ if (outer > $max)
+ return 0;
+ else if (inner > outer)
+ {
+ inner = 1;
+ ++outer;
+ return END;
+ }
+ return inner++;
+}
+
+static void
+yyerror (const char *msg)
+{
+ fprintf (stderr, "%s\\n", msg);
+}
+
+int
+main (void)
+{
+ yydebug = !!getenv ("YYDEBUG");
+ return yyparse ();
+}
+EOF
+]])
+
+AT_CHECK([perl -w ./gengram.pl $2 || exit 77], 0, [stdout])
+mv stdout $1
+])
+
+
+## -------------- ##
+## Big triangle. ##
+## -------------- ##
+
+AT_SETUP([Big triangle])
+
+# I have been able to go up to 2000 on my machine.
+# I tried 3000, a 29Mb grammar file, but then my system killed bison.
+# With 500 and the new parser, which consume far too much memory,
+# it gets killed too. Of course the parser is to be cleaned.
+AT_DATA_TRIANGULAR_GRAMMAR([input.y], [200])
+AT_CHECK([bison -v -o input.c input.y])
+AT_COMPILE([input])
+AT_PARSER_CHECK([./input])
+
+AT_CLEANUP
+
+
+
+# AT_DATA_HORIZONTAL_GRAMMAR(FILE-NAME, SIZE)
+# -------------------------------------------
+# Create FILE-NAME, containing a self checking parser for a huge
+# horizontal grammar.
+m4_define([AT_DATA_HORIZONTAL_GRAMMAR],
+[AT_DATA([[gengram.pl]],
+[[#! /usr/bin/perl -w
+
+use strict;
+my $max = $ARGV[0] || 10;
+
+print <<EOF;
+]AT_DATA_GRAMMAR_PROLOGUE[
+%{
+#include <stdio.h>
+#include <stdlib.h>
+
+#define YYERROR_VERBOSE 1
+#define YYDEBUG 1
+
+static int yylex (void);
+static void yyerror (const char *msg);
+%}
+EOF
+
+for my $size (1 .. $max)
+ {
+ print "%token t$size $size \"$size\"\n";
+ };
+
+print <<EOF;
+%%
+EOF
+
+use Text::Wrap;
+print
+ wrap ("exp: ", " ",
+ (map { "\"$_\"" } (1 .. $max)), ";"),
+ "\n";
+
+print <<EOF;
+%%
+static int
+yylex (void)
+{
+ static int counter = 1;
+ if (counter > $max)
+ return 0;
+ else
+ return counter++;
+}
+
+static void
+yyerror (const char *msg)
+{
+ fprintf (stderr, "%s\\n", msg);
+}
+
+int
+main (void)
+{
+ yydebug = !!getenv ("YYDEBUG");
+ return yyparse ();
+}
+EOF
+]])
+
+AT_CHECK([perl -w ./gengram.pl $2 || exit 77], 0, [stdout])
+mv stdout $1
+])
+
+
+## ---------------- ##
+## Big horizontal. ##
+## ---------------- ##
+
+AT_SETUP([Big horizontal])
+
+# I have been able to go up to 10000 on my machine, but I had to
+# increase the maximum stack size (* 100). It gave:
+#
+# input.y 263k
+# input.tab.c 1.3M
+# input 453k
+#
+# gengram.pl 10000 0.70s user 0.01s sys 99% cpu 0.711 total
+# bison input.y 730.56s user 0.53s sys 99% cpu 12:12.34 total
+# gcc -Wall input.tab.c -o input 5.81s user 0.20s sys 100% cpu 6.01 total
+# ./input 0.00s user 0.01s sys 108% cpu 0.01 total
+#
+AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
+
+# GNU m4 requires about 70 MiB for this test on a 32-bit host.
+# Ask for 200 MiB, which should be plenty even on a 64-bit host.
+AT_INCREASE_DATA_SIZE(204000)
+
+AT_CHECK([bison -v -o input.c input.y])
+AT_COMPILE([input])
+AT_PARSER_CHECK([./input])
+
+AT_CLEANUP
+
+
+
+# AT_DATA_LOOK_AHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
+# -------------------------------------------
+# Create FILE-NAME, containing a self checking parser for a grammar
+# requiring SIZE look-ahead tokens.
+m4_define([AT_DATA_LOOK_AHEAD_TOKENS_GRAMMAR],
+[AT_DATA([[gengram.pl]],
+[[#! /usr/bin/perl -w
+
+use strict;
+use Text::Wrap;
+my $max = $ARGV[0] || 10;
+
+print <<EOF;
+%{
+#include <stdio.h>
+#include <stdlib.h>
+
+#define YYERROR_VERBOSE 1
+#define YYDEBUG 1
+
+static int yylex (void);
+static void yyerror (const char *msg);
+%}
+%union
+{
+ int val;
+};
+
+%type <val> input exp
+%token token
+EOF
+
+print
+ wrap ("%type <val> ",
+ " ",
+ map { "n$_" } (1 .. $max)),
+ "\n";
+
+for my $count (1 .. $max)
+ {
+ print "%token t$count $count \"$count\"\n";
+ };
+
+print <<EOF;
+%%
+input:
+ exp { if (\@S|@1 != 1) abort (); \$\$ = \@S|@1; }
+| input exp { if (\@S|@2 != \@S|@1 + 1) abort (); \$\$ = \@S|@2; }
+;
+
+exp:
+ n1 "1" { if (\@S|@1 != 1) abort (); }
+EOF
+
+for my $count (2 .. $max)
+ {
+ print "| n$count \"$count\" { if (\@S|@1 != $count) abort (); }\n";
+ };
+print ";\n";
+
+for my $count (1 .. $max)
+ {
+ print "n$count: token { \$\$ = $count; };\n";
+ };
+
+print <<EOF;
+%%
+static int
+yylex (void)
+{
+ static int return_token = 1;
+ static int counter = 1;
+ if (counter > $max)
+ return 0;
+ if (return_token)
+ {
+ return_token = 0;
+ return token;
+ }
+ return_token = 1;
+ return counter++;
+}
+
+static void
+yyerror (const char *msg)
+{
+ fprintf (stderr, "%s\\n", msg);
+}
+
+int
+main (void)
+{
+ yydebug = !!getenv ("YYDEBUG");
+ return yyparse ();
+}
+EOF
+]])
+
+AT_CHECK([perl -w ./gengram.pl $2 || exit 77], 0, [stdout])
+mv stdout $1
+])
+
+
+## ------------------------ ##
+## Many look-ahead tokens. ##
+## ------------------------ ##
+
+AT_SETUP([Many look-ahead tokens])
+
+AT_DATA_LOOK_AHEAD_TOKENS_GRAMMAR([input.y], [1000])
+
+# GNU m4 requires about 70 MiB for this test on a 32-bit host.
+# Ask for 200 MiB, which should be plenty even on a 64-bit host.
+AT_INCREASE_DATA_SIZE(204000)
+
+AT_CHECK([bison -v -o input.c input.y])
+AT_COMPILE([input])
+AT_PARSER_CHECK([./input])
+
+AT_CLEANUP
+
+
+
# AT_DATA_STACK_TORTURE(C-PROLOGUE)
# ---------------------------------
# A parser specialized in torturing the stack size.
[[%{
#include <stdio.h>
#include <stdlib.h>
-#include <assert.h>
]$1[
static int yylex (void);
static void yyerror (const char *msg);
-#define YYPRINT(File, Type, Value) \
- fprintf (File, " (%d, stack size = %d, max = %d)", \
- Value, yyssp - yyss + 1, yystacksize);
%}
%error-verbose
%debug
int
main (int argc, const char **argv)
{
- assert (argc == 2);
+ if (argc != 2)
+ abort ();
yylval = atoi (argv[1]);
yydebug = 1;
return yyparse ();
}
]])
-AT_CHECK([bison input.y -o input.c])
-AT_CHECK([$CC $CFLAGS $CPPFLAGS input.c -o input], 0, [], [ignore])
+AT_CHECK([bison -o input.c input.y])
+AT_COMPILE([input])
])
AT_DATA_STACK_TORTURE
# Below the limit of 200.
-AT_CHECK([input 20], 0, [], [ignore])
+AT_PARSER_CHECK([./input 20], 0, [], [ignore])
# Two enlargements: 2 * 2 * 200.
-AT_CHECK([input 900], 0, [], [ignore])
+AT_PARSER_CHECK([./input 900], 0, [], [ignore])
# Fails: beyond the limit of 10,000 (which we don't reach anyway since we
# multiply by two starting at 200 => 5120 is the last possible).
-AT_CHECK([input 10000], 1, [], [ignore])
+AT_PARSER_CHECK([./input 10000], 1, [], [ignore])
AT_CLEANUP
AT_DATA_STACK_TORTURE([[#define YYSTACK_USE_ALLOCA 0]])
# Below the limit of 200.
-AT_CHECK([input 20], 0, [], [ignore])
+AT_PARSER_CHECK([./input 20], 0, [], [ignore])
# Two enlargements: 2 * 2 * 200.
-AT_CHECK([input 900], 0, [], [ignore])
+AT_PARSER_CHECK([./input 900], 0, [], [ignore])
# Fails: beyond the limit of 10,000 (which we don't reach anyway since we
# multiply by two starting at 200 => 5120 is the possible).
-AT_CHECK([input 10000], 1, [], [ignore])
-
-AT_CLEANUP
-
-
-## ----------------- ##
-## GNU AWK Grammar. ##
-## ----------------- ##
-
-AT_SETUP([GNU AWK Grammar])
-
-# We have been careful to strip all the actions excepts the
-# mid-rule actions. We rely on %expect to check that there are
-# indeed 65 SR conflicts.
-#
-# Bison was once wrong, due to an incorrect computation of nullable.
-# It reported 485 SR conflicts!
-
-AT_DATA([[input.y]],
-[[%expect 65
-
-%token FUNC_CALL NAME REGEXP
-%token ERROR
-%token YNUMBER YSTRING
-%token RELOP APPEND_OP
-%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP
-%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE
-%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE
-%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION
-%token LEX_GETLINE LEX_NEXTFILE
-%token LEX_IN
-%token LEX_AND LEX_OR INCREMENT DECREMENT
-%token LEX_BUILTIN LEX_LENGTH
-
-/* Lowest to highest */
-%right ASSIGNOP
-%right '?' ':'
-%left LEX_OR
-%left LEX_AND
-%left LEX_GETLINE
-%nonassoc LEX_IN
-%left FUNC_CALL LEX_BUILTIN LEX_LENGTH
-%nonassoc ','
-%nonassoc MATCHOP
-%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO
-%left CONCAT_OP
-%left YSTRING YNUMBER
-%left '+' '-'
-%left '*' '/' '%'
-%right '!' UNARY
-%right '^'
-%left INCREMENT DECREMENT
-%left '$'
-%left '(' ')'
-%%
-
-start
- : opt_nls program opt_nls
- ;
-
-program
- : rule
- | program rule
- | error
- | program error
- | /* empty */
- ;
-
-rule
- : LEX_BEGIN {} action
- | LEX_END {} action
- | LEX_BEGIN statement_term
- | LEX_END statement_term
- | pattern action
- | action
- | pattern statement_term
- | function_prologue function_body
- ;
-
-func_name
- : NAME
- | FUNC_CALL
- | lex_builtin
- ;
-
-lex_builtin
- : LEX_BUILTIN
- | LEX_LENGTH
- ;
-
-function_prologue
- : LEX_FUNCTION {} func_name '(' opt_param_list r_paren opt_nls
- ;
-
-function_body
- : l_brace statements r_brace opt_semi opt_nls
- | l_brace r_brace opt_semi opt_nls
- ;
-
-
-pattern
- : exp
- | exp ',' exp
- ;
-
-regexp
- /*
- * In this rule, want_regexp tells yylex that the next thing
- * is a regexp so it should read up to the closing slash.
- */
- : '/' {} REGEXP '/'
- ;
-
-action
- : l_brace statements r_brace opt_semi opt_nls
- | l_brace r_brace opt_semi opt_nls
- ;
-
-statements
- : statement
- | statements statement
- | error
- | statements error
- ;
-
-statement_term
- : nls
- | semi opt_nls
- ;
-
-statement
- : semi opt_nls
- | l_brace r_brace
- | l_brace statements r_brace
- | if_statement
- | LEX_WHILE '(' exp r_paren opt_nls statement
- | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls
- | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement
- | LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement
- | LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement
- | LEX_BREAK statement_term
- | LEX_CONTINUE statement_term
- | print '(' expression_list r_paren output_redir statement_term
- | print opt_rexpression_list output_redir statement_term
- | LEX_NEXT statement_term
- | LEX_NEXTFILE statement_term
- | LEX_EXIT opt_exp statement_term
- | LEX_RETURN {} opt_exp statement_term
- | LEX_DELETE NAME '[' expression_list ']' statement_term
- | LEX_DELETE NAME statement_term
- | exp statement_term
- ;
-
-print
- : LEX_PRINT
- | LEX_PRINTF
- ;
-
-if_statement
- : LEX_IF '(' exp r_paren opt_nls statement
- | LEX_IF '(' exp r_paren opt_nls statement
- LEX_ELSE opt_nls statement
- ;
-
-nls
- : NEWLINE
- | nls NEWLINE
- ;
-
-opt_nls
- : /* empty */
- | nls
- ;
-
-input_redir
- : /* empty */
- | '<' simp_exp
- ;
-
-output_redir
- : /* empty */
- | '>' exp
- | APPEND_OP exp
- | '|' exp
- | TWOWAYIO exp
- ;
-
-opt_param_list
- : /* empty */
- | param_list
- ;
-
-param_list
- : NAME
- | param_list comma NAME
- | error
- | param_list error
- | param_list comma error
- ;
-
-/* optional expression, as in for loop */
-opt_exp
- : /* empty */
- | exp
- ;
-
-opt_rexpression_list
- : /* empty */
- | rexpression_list
- ;
-
-rexpression_list
- : rexp
- | rexpression_list comma rexp
- | error
- | rexpression_list error
- | rexpression_list error rexp
- | rexpression_list comma error
- ;
-
-opt_expression_list
- : /* empty */
- | expression_list
- ;
-
-expression_list
- : exp
- | expression_list comma exp
- | error
- | expression_list error
- | expression_list error exp
- | expression_list comma error
- ;
-
-/* Expressions, not including the comma operator. */
-exp : variable ASSIGNOP {} exp
- | '(' expression_list r_paren LEX_IN NAME
- | exp '|' LEX_GETLINE opt_variable
- | exp TWOWAYIO LEX_GETLINE opt_variable
- | LEX_GETLINE opt_variable input_redir
- | exp LEX_AND exp
- | exp LEX_OR exp
- | exp MATCHOP exp
- | regexp
- | '!' regexp %prec UNARY
- | exp LEX_IN NAME
- | exp RELOP exp
- | exp '<' exp
- | exp '>' exp
- | exp '?' exp ':' exp
- | simp_exp
- | exp simp_exp %prec CONCAT_OP
- ;
-
-rexp
- : variable ASSIGNOP {} rexp
- | rexp LEX_AND rexp
- | rexp LEX_OR rexp
- | LEX_GETLINE opt_variable input_redir
- | regexp
- | '!' regexp %prec UNARY
- | rexp MATCHOP rexp
- | rexp LEX_IN NAME
- | rexp RELOP rexp
- | rexp '?' rexp ':' rexp
- | simp_exp
- | rexp simp_exp %prec CONCAT_OP
- ;
-
-simp_exp
- : non_post_simp_exp
- /* Binary operators in order of decreasing precedence. */
- | simp_exp '^' simp_exp
- | simp_exp '*' simp_exp
- | simp_exp '/' simp_exp
- | simp_exp '%' simp_exp
- | simp_exp '+' simp_exp
- | simp_exp '-' simp_exp
- | variable INCREMENT
- | variable DECREMENT
- ;
-
-non_post_simp_exp
- : '!' simp_exp %prec UNARY
- | '(' exp r_paren
- | LEX_BUILTIN
- '(' opt_expression_list r_paren
- | LEX_LENGTH '(' opt_expression_list r_paren
- | LEX_LENGTH
- | FUNC_CALL '(' opt_expression_list r_paren
- | variable
- | INCREMENT variable
- | DECREMENT variable
- | YNUMBER
- | YSTRING
- | '-' simp_exp %prec UNARY
- | '+' simp_exp %prec UNARY
- ;
-
-opt_variable
- : /* empty */
- | variable
- ;
-
-variable
- : NAME
- | NAME '[' expression_list ']'
- | '$' non_post_simp_exp
- ;
-
-l_brace
- : '{' opt_nls
- ;
-
-r_brace
- : '}' opt_nls
- ;
-
-r_paren
- : ')'
- ;
-
-opt_semi
- : /* empty */
- | semi
- ;
-
-semi
- : ';'
- ;
-
-comma : ',' opt_nls
- ;
-
-%%
-]])
-
-# Pass plenty of options, to exercise plenty of code, even if we
-# don't actually check the output. But SEGV is watching us, and
-# so might do dmalloc.
-AT_CHECK([[bison --verbose --defines input.y]])
+AT_PARSER_CHECK([./input 10000], 1, [], [ignore])
AT_CLEANUP