X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/7e5f9c54c0c6c0e695857e6437507a54423d0cfe..30bb2edccf7f352a0c149513905f5e1fcac04aaf:/etc/bench.pl.in diff --git a/etc/bench.pl.in b/etc/bench.pl.in index 041bc3f6..d058ce19 100755 --- a/etc/bench.pl.in +++ b/etc/bench.pl.in @@ -19,71 +19,203 @@ =head1 NAME -bench.pl - perform benches on Bison parsers. +bench.pl - bench marks for Bison parsers. =head1 SYNOPSIS - ./bench.pl + ./bench.pl [OPTIONS]... DIRECTIVES + +=head1 DIRECTIVES + +Specify the set of benches to run. The following grammar defines the +I: + + directives ::= + directives | directives -- Alternation + | directives & directives -- Concatenation + | [ directives> ] -- Optional + | ( directives> ) -- Parentheses + | %s skeleton -- %skeleton "skeleton" + | #d definition -- %code { #define definition } + | directive + +Parentheses only group to override precedence. For instance: + + [ %debug ] & [ %error-verbose ] & [ %define variant ] + +will generate eight different cases. =head1 OPTIONS +=over 4 + +=item B<-b>, B<--bench> + +Predefined benches, that is, combimation between a grammar and a I +request. + +=over 4 + +=item I + +Test F with three stacks against F which +uses a single one. + +=item I + +Test the push parser vs. the pull interface. Use the C parser. + +=item I + +Test the use of variants instead of union in the C++ parser. + +=back + =item B<-c>, B<--cflags>=I -Flags to pass to the C or C++ compiler. +Flags to pass to the C or C++ compiler. Defaults to -O2. + +=item B<-d>, B<--directive>=I + +Add a set of Bison directives to bench against each other. + +=item B<-g>, B<--grammar>=I + +Select the base I to use. Defaults to I. + +=over 4 + +=item I + +Traditional calculator. + +=item I + +C++ grammar that uses std::string and std::list. Can be used with +or without %define variant. + +=item I + +Artificial grammar with very long rules. + +=back + +=item B<-h>, B<--help> + +Display this message and exit succesfully. The more verbose, the more +details. =item B<-i>, B<--iterations>=I -Say how many times a single test of the bench must be run. +Say how many times a single test of the bench must be run. If +negative, specify the minimum number of CPU seconds to run. Defaults +to -1. + +=item B<-q>, B<--quiet> + +Decrease the verbosity level (defaults to 1). =item B<-v>, B<--verbose> -Raise the verbosity level. Currently only affects B<--help>. +Raise the verbosity level (defaults to 1). + +=back =cut +use strict; use IO::File; -use Benchmark qw (:all); +################################################################## + +=head1 VARIABLES + +=over 4 + +=item C<@bench> + +The list of benches to run. + +=item C<$bison> + +The Bison program to use to compile the grammar. + +=item C<$cc> + +The C compiler. + +=item C<$cxx> + +The C++ compiler. + +=item C<$cflags> + +Compiler flags (C or C++). + +=item C<@directive> + +A list of directive sets to measure against each other. + +=item C<$iterations> + +The number of times the parser is run for a bench. + +=item C<$verbose> + +Verbosity level. + +=back + +=cut + +my $bench; my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison'; my $cc = $ENV{'CC'} || 'gcc'; my $cxx = $ENV{'CXX'} || 'g++'; -# Compiler flags (C or C++). -my $cflags = ''; -# The number of times the parser is run for a bench. -my $iterations = 50; +my $cflags = '-O2'; +my @directive = (); +my $grammar = 'calc'; +my $iterations = -1; +my $verbose = 1; -################################################################## +=head1 FUNCTIONS + +=over 4 -=head1 DESCRIPTIONS +=item C -=head2 Functions +Report the C<$message> is C<$level> E= C<$verbose>. -=over 4 +=cut + +sub verbose($$) +{ + my ($level, $message) = @_; + print STDERR $message + if $level <= $verbose; +} + + +###################################################################### =item C Format the list of directives for Bison for bench named C<$bench>. -The special fake C<%variant> directive requests the use of -Boost.Variants instead of a regular union. So don't pass it, it is -not a valid directive. - =cut sub directives($@) { - my ($bench, @directives) = @_; + my ($bench, @directive) = @_; my $res = "/* Directives for bench `$bench'. */\n"; - for my $d (@directives) - { - $res .= $d . "\n" - unless $d eq '%variant'; - } + $res .= join ("\n", @directive) . "\n"; $res .= "/* End of directives for bench `$bench'. */\n"; return $res; } -=item C +###################################################################### + +=item C Create a large triangular grammar which looks like : @@ -103,16 +235,16 @@ Create a large triangular grammar which looks like : C<$base> is the base name for the file to create (F<$base.y>). C<$max> is the number of such rules (here, 5). You may pass -additional Bison C<@directives>. +additional Bison C<@directive>. The created parser is self contained: it includes its scanner, and source of input. =cut -sub triangular_grammar ($$$) +sub generate_grammar_triangular ($$@) { - my ($base, $max, @directives) = @_; - my $directives = directives ($base, @directives); + my ($base, $max, @directive) = @_; + my $directives = directives ($base, @directive); my $out = new IO::File ">$base.y" or die; @@ -220,18 +352,23 @@ sub calc_input ($$) } ################################################################## -=item C + +=item C Generate a Bison file F<$base.y> for a calculator parser in C. Pass -the additional Bison C<@directives>. C<$max> is ignored, but left to +the additional Bison C<@directive>. C<$max> is ignored, but left to have the same interface as C. =cut -sub calc_grammar ($$$) +sub generate_grammar_calc ($$@) { - my ($base, $max, @directives) = @_; - my $directives = directives ($base, @directives); + my ($base, $max, @directive) = @_; + my $directives = directives ($base, @directive); + + # Putting this request here is stupid, since the input will be + # generated each time we generate a grammar. + calc_input ('calc', 200); my $out = new IO::File ">$base.y" or die; @@ -430,32 +567,31 @@ EOF ################################################################## -=item C +=item C -Generate a Bison file F<$base.y> that uses, or not, the Boost.Variants -depending on the C<@directives>. +Generate a Bison file F<$base.y> for a C++ parser that uses C++ +objects (std::string, std::list). Tailored for using %define variant. =cut -sub variant_grammar ($$$) +sub generate_grammar_list ($$@) { - my ($base, $max, @directives) = @_; - my $directives = directives ($base, @directives); - my $variant = grep { '%variant' } @directives; - + my ($base, $max, @directive) = @_; + my $directives = directives ($base, @directive); + my $variant = grep { /%define "?variant"?/ } @directive; my $out = new IO::File ">$base.y" or die; print $out < } -%code // code for the .cc file +%code // *.c { #include #include @@ -464,20 +600,26 @@ sub variant_grammar ($$$) // Prototype of the yylex function providing subsequent tokens. static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); -#define STAGE_MAX ($max * 10) +#define STAGE_MAX ($max * 10) // max = $max + #define USE_VARIANTS $variant #if USE_VARIANTS # define IF_VARIANTS(True, False) True #else # define IF_VARIANTS(True, False) False #endif + +#ifdef ONE_STAGE_BUILD +# define IF_ONE_STAGE_BUILD(True, False) True +#else +# define IF_ONE_STAGE_BUILD(True, False) False +#endif } EOF if ($variant) { print $out <<'EOF'; -%code variant {int,std::string} %token TEXT %token NUMBER %printer { std::cerr << "Number: " << $$; } @@ -504,7 +646,7 @@ EOF } else { - # Not using Boost variants. + # Not using Bison variants. print $out <<'EOF'; %union {int ival; std::string* sval;} %token TEXT @@ -543,12 +685,28 @@ yylex(yy::parser::semantic_type* yylval) return yy::parser::token::END_OF_FILE; else if (stage % 2) { - IF_VARIANTS(*yylval, yylval->ival) = stage; +#if USE_VARIANTS +# ifdef ONE_STAGE_BUILD + yylval->build(stage); +# else + yylval->build() = stage; +# endif +#else + yylval->ival = stage; +#endif return yy::parser::token::NUMBER; } else { - IF_VARIANTS(*yylval =, yylval->sval = new) std::string("A string."); +#if USE_VARIANTS +# ifdef ONE_STAGE_BUILD + yylval->build(std::string("A string.")); +# else + yylval->build() = std::string("A string."); +# endif +#else + yylval->sval = new std::string("A string."); +#endif return yy::parser::token::TEXT; } abort(); @@ -565,7 +723,9 @@ yy::parser::error(const yy::parser::location_type& yylloc, int main(int argc, char *argv[]) { yy::parser p; +#if YYDEBUG p.set_debug_level(!!getenv("YYDEBUG")); +#endif p.parse(); return 0; } @@ -574,6 +734,43 @@ EOF ################################################################## +=item C + +Generate F<$base.y> by calling C<&generate_grammar_$name>. + +=cut + +sub generate_grammar ($$@) +{ + my ($name, $base, @directive) = @_; + verbose 3, "Generating $base.y\n"; + my %generator = + ( + "calc" => \&generate_grammar_calc, + "list" => \&generate_grammar_list, + "triangular" => \&generate_grammar_triangular, + ); + &{$generator{$name}}($base, 200, @directive); +} + +################################################################## + +=item C + +Run, possibly verbosely, the shell C<$command>. + +=cut + +sub run ($) +{ + my ($command) = @_; + verbose 3, "$command\n"; + system ("$command") == 0 + or die "$command failed"; +} + +################################################################## + =item C Compile C<$base.y> to an executable C, Using the C or C++ compiler @@ -589,48 +786,81 @@ sub compile ($) my $compiler = $language eq 'C++' ? $cxx : $cc; - system ("$bison $base.y -o $base.c") == 0 - or die; - system ("$compiler -o $base $cflags $base.c") == 0 - or die; + run "$bison $base.y -o $base.c"; + run "$compiler -o $base $cflags $base.c"; } -=item C - -Generate benches for C<$gram>. C<$gram> should be C or -C. C<%bench> is a hash of the form: +###################################################################### - $name => @directives +=item C -where C<$name> is the name of the bench, and C<@directives> are the -Bison directive to use for this bench. All the benches are compared -against each other, repeated 50 times. +Generate benches for the C<$grammar> and the directive specification +given in the list of C<@token>. =cut -sub bench_grammar ($%) +sub bench ($@) { - my ($gram, %test) = @_; + my ($grammar, @token) = @_; + use Benchmark qw (:all :hireswallclock); + + my @directive = parse (@token); # Set up the benches as expected by timethese. my %bench; - while (my ($name, $directives) = each %test) + # A counter of directive sets. + my $count = 1; + for my $d (@directive) + { + $bench{$count} = $d; + printf " %2d. %s\n", $count, join (' ', split ("\n", $d)); + $count++; + }; + + # For each bench, capture the size. + my %size; + + while (my ($name, $directives) = each %bench) { - print STDERR "$name\n"; - # Call the Bison input file generator. - my $generator = "$gram" . "_grammar"; - &$generator ($name, 200, @$directives); + generate_grammar ($grammar, $name, $directives); + # Compile the executable. compile ($name); $bench{$name} = "system ('./$name');"; + chop($size{$name} = `wc -c <$name`); } - print "$gram:\n"; # Run the benches. + # + # STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all' + # shows each of the 5 times available ('wallclock' time, user time, + # system time, user time of children, and system time of + # children). 'noc' shows all except the two children times. 'nop' + # shows only wallclock and the two children times. 'auto' (the + # default) will act as 'all' unless the children times are both + # zero, in which case it acts as 'noc'. 'none' prevents output. + verbose 3, "Running the benches for $grammar\n"; my $res = timethese ($iterations, \%bench, 'nop'); - # Output the result. + + # Output the speed result. cmpthese ($res, 'nop'); + + # Display the sizes. + print "Sizes (decreasing):\n"; + my $width = 10; + for my $bench (keys %size) + { + $width = length $bench + if $width < length $bench; + } + # Benches sorted by decreasing size. + my @benches_per_size = sort {$size{$b} <=> $size{$a}} keys %size; + for my $bench (@benches_per_size) + { + printf "%${width}s: %5.2fkB\n", $bench, $size{$bench} / 1024; + } } +###################################################################### =item C @@ -641,18 +871,16 @@ interfaces. sub bench_push_parser () { - calc_input ('calc', 200); - bench_grammar - ('calc', - ( - "pull-impure" => [], - "pull-pure" => ['%define api.pure'], - "push-impure" => ['%define api.push_pull "both"'], - "push-pure" => ['%define api.push_pull "both"', '%define api.pure'], - ) - ); + bench ('calc', + ( + '[', '%define api.pure', ']', + '&', + '[', '%define api.push_pull "both"', ']' + )); } +###################################################################### + =item C Bench the C++ lalr1.cc parser using Boost.Variants or %union. @@ -661,12 +889,38 @@ Bench the C++ lalr1.cc parser using Boost.Variants or %union. sub bench_variant_parser () { - bench_grammar - ('variant', - ( - "union" => [], - "variant" => ['%variant'], - ) + bench ('list', + qw( + %s lalr1.cc + & + [ %debug ] + & + [ %define variant + & + [ #d VARIANT_DESTROY ] + & + [ #d ONE_STAGE_BUILD ] + ] + ) + ); +} + +###################################################################### + +=item C + +Bench the C++ lalr1.cc parser using Boost.Variants or %union. + +=cut + +sub bench_fusion_parser () +{ + bench ('list', + qw( + %s lalr1-split.cc + | + %s lalr1.cc + ) ); } @@ -683,14 +937,129 @@ sub help ($) -output => \*STDOUT }); } +###################################################################### + +# The list of tokens parsed by the following functions. +my @token; + +# Parse directive specifications: +# expr: term (| term)* +# term: fact (& fact)* +# fact: ( expr ) | [ expr ] | dirs +# dirs: %s SKELETON | #d DEFINE | directive +sub parse (@) +{ + @token = @_; + verbose 3, "Parsing: @token\n"; + my @res = parse_expr (); + die "expected end of directives, unexpected: @token" + if defined $token[0]; + return @res; +} + +sub parse_expr () +{ + my @res = parse_term (); + while (defined $token[0] && $token[0] eq '|') + { + shift @token; + # Alternation. + push @res, parse_term (); + } + return @res; +} + +sub parse_term () +{ + my @res = parse_fact (); + while (defined $token[0] && $token[0] eq '&') + { + shift @token; + # Cartesian product. + my @lhs = @res; + @res = (); + for my $rhs (parse_fact ()) + { + for my $lhs (@lhs) + { + push @res, "$lhs\n$rhs"; + } + } + } + return @res; +} + +sub parse_fact () +{ + my @res; + die "unexpected end of expression" + unless defined $token[0]; + + if ($token[0] eq '(') + { + shift @token; + @res = parse_expr (); + die "unexpected $token[0], expected )" + unless $token[0] eq ')'; + shift @token; + } + elsif ($token[0] eq '[') + { + shift @token; + @res = (parse_expr (), ''); + die "unexpected $token[0], expected ]" + unless $token[0] eq ']'; + shift @token; + } + else + { + @res = parse_dirs (); + } + return @res; +} + +sub parse_dirs () +{ + my @res; + die "unexpected end of expression" + unless defined $token[0]; + + if ($token[0] eq '#d') + { + shift @token; + @res = ("%code {\n#define\n}"); + shift @token; + } + elsif ($token[0] eq '%s') + { + shift @token; + @res = ("%skeleton \"$token[0]\""); + shift @token; + } + else + { + @res = $token[0]; + shift @token; + } + + return @res; +} + +###################################################################### + sub getopt () { use Getopt::Long; - my $verbose = 0; - %option = ("h|help" => sub { help ($verbose) }, - "v|verbose" => sub { ++$verbose }, - "c|cflags=s" => \$cflags, - "i|iterations=i" => \$iterations); + my %option = ( + "b|bench=s" => \$bench, + "c|cflags=s" => \$cflags, + "d|directive=s" => \@directive, + "g|grammar=s" => \$grammar, + "h|help" => sub { help ($verbose) }, + "i|iterations=i" => \$iterations, + "q|quiet" => sub { --$verbose }, + "v|verbose" => sub { ++$verbose }, + ); Getopt::Long::Configure ("bundling", "pass_through"); GetOptions (%option) or exit 1; @@ -699,9 +1068,48 @@ sub getopt () ###################################################################### getopt; -print STDERR "Using bison=$bison, cc=$cc, cxx=$cxx, cflags=$cflags.\n"; -# bench_push_parser(); -bench_variant_parser(); + +# Create the directory we work in. +mkdir "benches" or die "cannot create benches" + unless -d "benches"; +my $count = 1; +++$count + while -d "benches/$count"; +my $dir = "benches/$count"; +mkdir $dir + or die "cannot create $dir"; +chdir $dir + or die "cannot chdir $dir"; + +# The following message is tailored to please Emacs' compilation-mode. +verbose 1, "Entering directory `$dir'\n"; +verbose 1, "Using bison=$bison.\n"; +verbose 2, "Using cc=$cc.\n"; +verbose 2, "Using cxx=$cxx.\n"; +verbose 2, "Using cflags=$cflags.\n"; +verbose 2, "Grammar: $grammar\n"; + + +# Support -b: predefined benches. +my %bench = + ( + "fusion" => \&bench_fusion_parser, + "push" => \&bench_push_parser, + "variant" => \&bench_variant_parser, + ); + +if (defined $bench) +{ + die "invalid argument for --bench: $bench" + unless defined $bench{$bench}; + &{$bench{$bench}}(); + exit 0; +} +else +{ + # Launch the bench marking. + bench ($grammar, @ARGV); +} ### Setup "GNU" style for perl-mode and cperl-mode. ## Local Variables: