=head1 NAME
-bench.pl - perform benches on Bison parsers.
+bench.pl - bench marks for Bison parsers.
=head1 SYNOPSIS
- ./bench.pl
+ ./bench.pl [OPTIONS]... DIRECTIVES
+
+=head1 DIRECTIVES
+
+Specify the set of benches to run. The following grammar defines the
+I<directives>:
+
+ directives ::=
+ directives | directives -- Alternation
+ | directives & directives -- Concatenation
+ | [ directives> ] -- Optional
+ | ( directives> ) -- Parentheses
+ | %s skeleton -- %skeleton "skeleton"
+ | #d definition -- %code { #define definition }
+ | directive
+
+Parentheses only group to override precedence. For instance:
+
+ [ %debug ] & [ %error-verbose ] & [ %define variant ]
+
+will generate eight different cases.
=head1 OPTIONS
+=over 4
+
+=item B<-b>, B<--bench>
+
+Predefined benches, that is, combimation between a grammar and a I<directives>
+request.
+
+=over 4
+
+=item I<fusion>
+
+Test F<lalr1.cc> with three stacks against F<lalr1-fusion.cc> which
+uses a single one.
+
+=item I<push>
+
+Test the push parser vs. the pull interface. Use the C parser.
+
+=item I<variant>
+
+Test the use of variants instead of union in the C++ parser.
+
+=back
+
=item B<-c>, B<--cflags>=I<flags>
-Flags to pass to the C or C++ compiler.
+Flags to pass to the C or C++ compiler. Defaults to -O2.
+
+=item B<-d>, B<--directive>=I<directives>
+
+Add a set of Bison directives to bench against each other.
+
+=item B<-g>, B<--grammar>=I<grammar>
+
+Select the base I<grammar> to use. Defaults to I<calc>.
+
+=over 4
+
+=item I<calc>
+
+Traditional calculator.
+
+=item I<list>
+
+C++ grammar that uses std::string and std::list. Can be used with
+or without %define variant.
+
+=item I<triangular>
+
+Artificial grammar with very long rules.
+
+=back
+
+=item B<-h>, B<--help>
+
+Display this message and exit succesfully. The more verbose, the more
+details.
=item B<-i>, B<--iterations>=I<integer>
-Say how many times a single test of the bench must be run.
+Say how many times a single test of the bench must be run. If
+negative, specify the minimum number of CPU seconds to run. Defaults
+to -1.
+
+=item B<-q>, B<--quiet>
+
+Decrease the verbosity level (defaults to 1).
=item B<-v>, B<--verbose>
-Raise the verbosity level. Currently only affects B<--help>.
+Raise the verbosity level (defaults to 1).
+
+=back
=cut
+use strict;
use IO::File;
-use Benchmark qw (:all);
+##################################################################
+
+=head1 VARIABLES
+
+=over 4
+
+=item C<@bench>
+
+The list of benches to run.
+
+=item C<$bison>
+
+The Bison program to use to compile the grammar.
+
+=item C<$cc>
+
+The C compiler.
+
+=item C<$cxx>
+
+The C++ compiler.
+
+=item C<$cflags>
+
+Compiler flags (C or C++).
+
+=item C<@directive>
+
+A list of directive sets to measure against each other.
+
+=item C<$iterations>
+
+The number of times the parser is run for a bench.
+
+=item C<$verbose>
+
+Verbosity level.
+
+=back
+
+=cut
+
+my $bench;
my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison';
my $cc = $ENV{'CC'} || 'gcc';
my $cxx = $ENV{'CXX'} || 'g++';
-# Compiler flags (C or C++).
-my $cflags = '';
-# The number of times the parser is run for a bench.
-my $iterations = 50;
+my $cflags = '-O2';
+my @directive = ();
+my $grammar = 'calc';
+my $iterations = -1;
+my $verbose = 1;
-##################################################################
+=head1 FUNCTIONS
+
+=over 4
-=head1 DESCRIPTIONS
+=item C<verbose($level, $message)>
-=head2 Functions
+Report the C<$message> is C<$level> E<lt>= C<$verbose>.
-=over 4
+=cut
+
+sub verbose($$)
+{
+ my ($level, $message) = @_;
+ print STDERR $message
+ if $level <= $verbose;
+}
+
+
+######################################################################
=item C<directives($bench, @directive)>
Format the list of directives for Bison for bench named C<$bench>.
-The special fake C<%variant> directive requests the use of
-Boost.Variants instead of a regular union. So don't pass it, it is
-not a valid directive.
-
=cut
sub directives($@)
{
- my ($bench, @directives) = @_;
+ my ($bench, @directive) = @_;
my $res = "/* Directives for bench `$bench'. */\n";
- for my $d (@directives)
- {
- $res .= $d . "\n"
- unless $d eq '%variant';
- }
+ $res .= join ("\n", @directive) . "\n";
$res .= "/* End of directives for bench `$bench'. */\n";
return $res;
}
-=item C<triangular_grammar ($base, $max, @directives)>
+######################################################################
+
+=item C<generate_grammar_triangular ($base, $max, @directive)>
Create a large triangular grammar which looks like :
C<$base> is the base name for the file to create (F<$base.y>).
C<$max> is the number of such rules (here, 5). You may pass
-additional Bison C<@directives>.
+additional Bison C<@directive>.
The created parser is self contained: it includes its scanner, and
source of input.
=cut
-sub triangular_grammar ($$$)
+sub generate_grammar_triangular ($$@)
{
- my ($base, $max, @directives) = @_;
- my $directives = directives ($base, @directives);
+ my ($base, $max, @directive) = @_;
+ my $directives = directives ($base, @directive);
my $out = new IO::File ">$base.y"
or die;
}
##################################################################
-=item C<calc_grammar ($base, $max, @directives)>
+
+=item C<generate_grammar_calc ($base, $max, @directive)>
Generate a Bison file F<$base.y> for a calculator parser in C. Pass
-the additional Bison C<@directives>. C<$max> is ignored, but left to
+the additional Bison C<@directive>. C<$max> is ignored, but left to
have the same interface as C<triangular_grammar>.
=cut
-sub calc_grammar ($$$)
+sub generate_grammar_calc ($$@)
{
- my ($base, $max, @directives) = @_;
- my $directives = directives ($base, @directives);
+ my ($base, $max, @directive) = @_;
+ my $directives = directives ($base, @directive);
+
+ # Putting this request here is stupid, since the input will be
+ # generated each time we generate a grammar.
+ calc_input ('calc', 200);
my $out = new IO::File ">$base.y"
or die;
##################################################################
-=item C<variant_grammar ($base, $max, @directives)>
+=item C<generate_grammar_list ($base, $max, @directive)>
-Generate a Bison file F<$base.y> that uses, or not, the Boost.Variants
-depending on the C<@directives>.
+Generate a Bison file F<$base.y> for a C++ parser that uses C++
+objects (std::string, std::list). Tailored for using %define variant.
=cut
-sub variant_grammar ($$$)
+sub generate_grammar_list ($$@)
{
- my ($base, $max, @directives) = @_;
- my $directives = directives ($base, @directives);
- my $variant = grep { '%variant' } @directives;
-
+ my ($base, $max, @directive) = @_;
+ my $directives = directives ($base, @directive);
+ my $variant = grep { /%define "?variant"?/ } @directive;
my $out = new IO::File ">$base.y"
or die;
print $out <<EOF;
-%debug
%language "C++"
%defines
+$directives
-%code requires // code for the .hh file
+%code requires // *.h
{
#include <string>
}
-%code // code for the .cc file
+%code // *.c
{
#include <algorithm>
#include <iostream>
// Prototype of the yylex function providing subsequent tokens.
static yy::parser::token_type yylex(yy::parser::semantic_type* yylval);
-#define STAGE_MAX ($max * 10)
+#define STAGE_MAX ($max * 10) // max = $max
+
#define USE_VARIANTS $variant
#if USE_VARIANTS
# define IF_VARIANTS(True, False) True
#else
# define IF_VARIANTS(True, False) False
#endif
+
+#ifdef ONE_STAGE_BUILD
+# define IF_ONE_STAGE_BUILD(True, False) True
+#else
+# define IF_ONE_STAGE_BUILD(True, False) False
+#endif
}
EOF
if ($variant)
{
print $out <<'EOF';
-%code variant {int,std::string}
%token <std::string> TEXT
%token <int> NUMBER
%printer { std::cerr << "Number: " << $$; } <int>
}
else
{
- # Not using Boost variants.
+ # Not using Bison variants.
print $out <<'EOF';
%union {int ival; std::string* sval;}
%token <sval> TEXT
return yy::parser::token::END_OF_FILE;
else if (stage % 2)
{
- IF_VARIANTS(*yylval, yylval->ival) = stage;
+#if USE_VARIANTS
+# ifdef ONE_STAGE_BUILD
+ yylval->build(stage);
+# else
+ yylval->build<int>() = stage;
+# endif
+#else
+ yylval->ival = stage;
+#endif
return yy::parser::token::NUMBER;
}
else
{
- IF_VARIANTS(*yylval =, yylval->sval = new) std::string("A string.");
+#if USE_VARIANTS
+# ifdef ONE_STAGE_BUILD
+ yylval->build(std::string("A string."));
+# else
+ yylval->build<std::string>() = std::string("A string.");
+# endif
+#else
+ yylval->sval = new std::string("A string.");
+#endif
return yy::parser::token::TEXT;
}
abort();
int main(int argc, char *argv[])
{
yy::parser p;
+#if YYDEBUG
p.set_debug_level(!!getenv("YYDEBUG"));
+#endif
p.parse();
return 0;
}
##################################################################
+=item C<generate_grammar ($name, $base, @directive)>
+
+Generate F<$base.y> by calling C<&generate_grammar_$name>.
+
+=cut
+
+sub generate_grammar ($$@)
+{
+ my ($name, $base, @directive) = @_;
+ verbose 3, "Generating $base.y\n";
+ my %generator =
+ (
+ "calc" => \&generate_grammar_calc,
+ "list" => \&generate_grammar_list,
+ "triangular" => \&generate_grammar_triangular,
+ );
+ &{$generator{$name}}($base, 200, @directive);
+}
+
+##################################################################
+
+=item C<run ($command)>
+
+Run, possibly verbosely, the shell C<$command>.
+
+=cut
+
+sub run ($)
+{
+ my ($command) = @_;
+ verbose 3, "$command\n";
+ system ("$command") == 0
+ or die "$command failed";
+}
+
+##################################################################
+
=item C<compile ($base)>
Compile C<$base.y> to an executable C, Using the C or C++ compiler
my $compiler = $language eq 'C++' ? $cxx : $cc;
- system ("$bison $base.y -o $base.c") == 0
- or die;
- system ("$compiler -o $base $cflags $base.c") == 0
- or die;
+ run "$bison $base.y -o $base.c";
+ run "$compiler -o $base $cflags $base.c";
}
-=item C<bench_grammar ($gram, %bench)>
-
-Generate benches for C<$gram>. C<$gram> should be C<calc> or
-C<triangle>. C<%bench> is a hash of the form:
+######################################################################
- $name => @directives
+=item C<bench ($grammar, @token)>
-where C<$name> is the name of the bench, and C<@directives> are the
-Bison directive to use for this bench. All the benches are compared
-against each other, repeated 50 times.
+Generate benches for the C<$grammar> and the directive specification
+given in the list of C<@token>.
=cut
-sub bench_grammar ($%)
+sub bench ($@)
{
- my ($gram, %test) = @_;
+ my ($grammar, @token) = @_;
+ use Benchmark qw (:all :hireswallclock);
+
+ my @directive = parse (@token);
# Set up the benches as expected by timethese.
my %bench;
- while (my ($name, $directives) = each %test)
+ # A counter of directive sets.
+ my $count = 1;
+ for my $d (@directive)
+ {
+ $bench{$count} = $d;
+ printf " %2d. %s\n", $count, join (' ', split ("\n", $d));
+ $count++;
+ };
+
+ # For each bench, capture the size.
+ my %size;
+
+ while (my ($name, $directives) = each %bench)
{
- print STDERR "$name\n";
- # Call the Bison input file generator.
- my $generator = "$gram" . "_grammar";
- &$generator ($name, 200, @$directives);
+ generate_grammar ($grammar, $name, $directives);
+ # Compile the executable.
compile ($name);
$bench{$name} = "system ('./$name');";
+ chop($size{$name} = `wc -c <$name`);
}
- print "$gram:\n";
# Run the benches.
+ #
+ # STYLE can be any of 'all', 'none', 'noc', 'nop' or 'auto'. 'all'
+ # shows each of the 5 times available ('wallclock' time, user time,
+ # system time, user time of children, and system time of
+ # children). 'noc' shows all except the two children times. 'nop'
+ # shows only wallclock and the two children times. 'auto' (the
+ # default) will act as 'all' unless the children times are both
+ # zero, in which case it acts as 'noc'. 'none' prevents output.
+ verbose 3, "Running the benches for $grammar\n";
my $res = timethese ($iterations, \%bench, 'nop');
- # Output the result.
+
+ # Output the speed result.
cmpthese ($res, 'nop');
+
+ # Display the sizes.
+ print "Sizes (decreasing):\n";
+ my $width = 10;
+ for my $bench (keys %size)
+ {
+ $width = length $bench
+ if $width < length $bench;
+ }
+ # Benches sorted by decreasing size.
+ my @benches_per_size = sort {$size{$b} <=> $size{$a}} keys %size;
+ for my $bench (@benches_per_size)
+ {
+ printf "%${width}s: %5.2fkB\n", $bench, $size{$bench} / 1024;
+ }
}
+######################################################################
=item C<bench_push_parser ()>
sub bench_push_parser ()
{
- calc_input ('calc', 200);
- bench_grammar
- ('calc',
- (
- "pull-impure" => [],
- "pull-pure" => ['%define api.pure'],
- "push-impure" => ['%define api.push_pull "both"'],
- "push-pure" => ['%define api.push_pull "both"', '%define api.pure'],
- )
- );
+ bench ('calc',
+ (
+ '[', '%define api.pure', ']',
+ '&',
+ '[', '%define api.push_pull "both"', ']'
+ ));
}
+######################################################################
+
=item C<bench_variant_parser ()>
Bench the C++ lalr1.cc parser using Boost.Variants or %union.
sub bench_variant_parser ()
{
- bench_grammar
- ('variant',
- (
- "union" => [],
- "variant" => ['%variant'],
- )
+ bench ('list',
+ qw(
+ %s lalr1.cc
+ &
+ [ %debug ]
+ &
+ [ %define variant
+ &
+ [ #d VARIANT_DESTROY ]
+ &
+ [ #d ONE_STAGE_BUILD ]
+ ]
+ )
+ );
+}
+
+######################################################################
+
+=item C<bench_fusion_parser ()>
+
+Bench the C++ lalr1.cc parser using Boost.Variants or %union.
+
+=cut
+
+sub bench_fusion_parser ()
+{
+ bench ('list',
+ qw(
+ %s lalr1-split.cc
+ |
+ %s lalr1.cc
+ )
);
}
-output => \*STDOUT });
}
+######################################################################
+
+# The list of tokens parsed by the following functions.
+my @token;
+
+# Parse directive specifications:
+# expr: term (| term)*
+# term: fact (& fact)*
+# fact: ( expr ) | [ expr ] | dirs
+# dirs: %s SKELETON | #d DEFINE | directive
+sub parse (@)
+{
+ @token = @_;
+ verbose 3, "Parsing: @token\n";
+ my @res = parse_expr ();
+ die "expected end of directives, unexpected: @token"
+ if defined $token[0];
+ return @res;
+}
+
+sub parse_expr ()
+{
+ my @res = parse_term ();
+ while (defined $token[0] && $token[0] eq '|')
+ {
+ shift @token;
+ # Alternation.
+ push @res, parse_term ();
+ }
+ return @res;
+}
+
+sub parse_term ()
+{
+ my @res = parse_fact ();
+ while (defined $token[0] && $token[0] eq '&')
+ {
+ shift @token;
+ # Cartesian product.
+ my @lhs = @res;
+ @res = ();
+ for my $rhs (parse_fact ())
+ {
+ for my $lhs (@lhs)
+ {
+ push @res, "$lhs\n$rhs";
+ }
+ }
+ }
+ return @res;
+}
+
+sub parse_fact ()
+{
+ my @res;
+ die "unexpected end of expression"
+ unless defined $token[0];
+
+ if ($token[0] eq '(')
+ {
+ shift @token;
+ @res = parse_expr ();
+ die "unexpected $token[0], expected )"
+ unless $token[0] eq ')';
+ shift @token;
+ }
+ elsif ($token[0] eq '[')
+ {
+ shift @token;
+ @res = (parse_expr (), '');
+ die "unexpected $token[0], expected ]"
+ unless $token[0] eq ']';
+ shift @token;
+ }
+ else
+ {
+ @res = parse_dirs ();
+ }
+ return @res;
+}
+
+sub parse_dirs ()
+{
+ my @res;
+ die "unexpected end of expression"
+ unless defined $token[0];
+
+ if ($token[0] eq '#d')
+ {
+ shift @token;
+ @res = ("%code {\n#define\n}");
+ shift @token;
+ }
+ elsif ($token[0] eq '%s')
+ {
+ shift @token;
+ @res = ("%skeleton \"$token[0]\"");
+ shift @token;
+ }
+ else
+ {
+ @res = $token[0];
+ shift @token;
+ }
+
+ return @res;
+}
+
+######################################################################
+
sub getopt ()
{
use Getopt::Long;
- my $verbose = 0;
- %option = ("h|help" => sub { help ($verbose) },
- "v|verbose" => sub { ++$verbose },
- "c|cflags=s" => \$cflags,
- "i|iterations=i" => \$iterations);
+ my %option = (
+ "b|bench=s" => \$bench,
+ "c|cflags=s" => \$cflags,
+ "d|directive=s" => \@directive,
+ "g|grammar=s" => \$grammar,
+ "h|help" => sub { help ($verbose) },
+ "i|iterations=i" => \$iterations,
+ "q|quiet" => sub { --$verbose },
+ "v|verbose" => sub { ++$verbose },
+ );
Getopt::Long::Configure ("bundling", "pass_through");
GetOptions (%option)
or exit 1;
######################################################################
getopt;
-print STDERR "Using bison=$bison, cc=$cc, cxx=$cxx, cflags=$cflags.\n";
-# bench_push_parser();
-bench_variant_parser();
+
+# Create the directory we work in.
+mkdir "benches" or die "cannot create benches"
+ unless -d "benches";
+my $count = 1;
+++$count
+ while -d "benches/$count";
+my $dir = "benches/$count";
+mkdir $dir
+ or die "cannot create $dir";
+chdir $dir
+ or die "cannot chdir $dir";
+
+# The following message is tailored to please Emacs' compilation-mode.
+verbose 1, "Entering directory `$dir'\n";
+verbose 1, "Using bison=$bison.\n";
+verbose 2, "Using cc=$cc.\n";
+verbose 2, "Using cxx=$cxx.\n";
+verbose 2, "Using cflags=$cflags.\n";
+verbose 2, "Grammar: $grammar\n";
+
+
+# Support -b: predefined benches.
+my %bench =
+ (
+ "fusion" => \&bench_fusion_parser,
+ "push" => \&bench_push_parser,
+ "variant" => \&bench_variant_parser,
+ );
+
+if (defined $bench)
+{
+ die "invalid argument for --bench: $bench"
+ unless defined $bench{$bench};
+ &{$bench{$bench}}();
+ exit 0;
+}
+else
+{
+ # Launch the bench marking.
+ bench ($grammar, @ARGV);
+}
### Setup "GNU" style for perl-mode and cperl-mode.
## Local Variables: