=head1 NAME
-bench.pl - perform benches on Bison parsers.
+bench.pl - bench marks for Bison parsers.
=head1 SYNOPSIS
- ./bench.pl [OPTIONS]... BENCHES
+ ./bench.pl [OPTIONS]... DIRECTIVES
-=head1 BENCHES
+=head1 DIRECTIVES
-Specify the set of benches to run. I<bench-name> should be one of:
+Specify the set of benches to run. The following grammar defines the
+I<directives>:
+
+ directives ::=
+ directives | directives -- Alternation
+ | directives & directives -- Concatenation
+ | [ directives> ] -- Optional
+ | ( directives> ) -- Parentheses
+ | #d NAME[=VALUE] -- %code { #define NAME [VALUE] }
+ | %d NAME[=VALUE] -- %define NAME ["VALUE"]
+ | %s skeleton -- %skeleton "skeleton"
+ | directive
+
+Parentheses only group to override precedence. For instance:
+
+ [ %debug ] & [ %error-verbose ] & [ %define variant ]
+
+will generate eight different cases.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-b>, B<--bench>
+
+Predefined benches, that is, combimation between a grammar and a I<directives>
+request.
=over 4
=back
-=head1 OPTIONS
-
=item B<-c>, B<--cflags>=I<flags>
Flags to pass to the C or C++ compiler. Defaults to -O2.
+=item B<-d>, B<--directive>=I<directives>
+
+Add a set of Bison directives to bench against each other.
+
+=item B<-g>, B<--grammar>=I<grammar>
+
+Select the base I<grammar> to use. Defaults to I<calc>.
+
+=over 4
+
+=item I<calc>
+
+Traditional calculator.
+
+=item I<list>
+
+C++ grammar that uses std::string and std::list. Can be used with
+or without %define variant.
+
+=item I<triangular>
+
+Artificial grammar with very long rules.
+
+=back
+
=item B<-h>, B<--help>
Display this message and exit succesfully. The more verbose, the more
Compiler flags (C or C++).
+=item C<@directive>
+
+A list of directive sets to measure against each other.
+
=item C<$iterations>
The number of times the parser is run for a bench.
=cut
+my $bench;
my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison';
my $cc = $ENV{'CC'} || 'gcc';
my $cxx = $ENV{'CXX'} || 'g++';
my $cflags = '-O2';
+my @directive = ();
+my $grammar = 'calc';
my $iterations = -1;
my $verbose = 1;
my ($base, $max, @directive) = @_;
my $directives = directives ($base, @directive);
+ # Putting this request here is stupid, since the input will be
+ # generated each time we generate a grammar.
+ calc_input ('calc', 200);
+
my $out = new IO::File ">$base.y"
or die;
print $out <<EOF;
##################################################################
-=item C<generate_grammar_variant ($base, $max, @directive)>
+=item C<generate_grammar_list ($base, $max, @directive)>
-Generate a Bison file F<$base.y> that uses, or not, the Boost.Variants
-depending on the C<@directive>.
+Generate a Bison file F<$base.y> for a C++ parser that uses C++
+objects (std::string, std::list). Tailored for using %define variant.
=cut
-sub generate_grammar_variant ($$@)
+sub generate_grammar_list ($$@)
{
my ($base, $max, @directive) = @_;
my $directives = directives ($base, @directive);
- my $variant = grep { $_ eq '%define variant' } @directive;
-
+ my $variant = grep { /%define "?variant"?/ } @directive;
+ my $lex_symbol = grep { /%define "?lex_symbol"?/ } @directive;
my $out = new IO::File ">$base.y"
or die;
print $out <<EOF;
%language "C++"
%defines
+%locations
$directives
-%code requires // variant.h
+%code requires // *.h
{
#include <string>
}
-%code // variant.c
+%code // *.c
{
#include <algorithm>
#include <iostream>
#include <sstream>
-// Prototype of the yylex function providing subsequent tokens.
-static yy::parser::token_type yylex(yy::parser::semantic_type* yylval);
-
#define STAGE_MAX ($max * 10) // max = $max
+
+#define USE_LEX_SYMBOL $lex_symbol
#define USE_VARIANTS $variant
-#if USE_VARIANTS
-# define IF_VARIANTS(True, False) True
+
+ // Prototype of the yylex function providing subsequent tokens.
+ static
+#if USE_LEX_SYMBOL
+ yy::parser::symbol_type yylex();
#else
-# define IF_VARIANTS(True, False) False
+ yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
+ yy::parser::location_type* yylloc);
#endif
+
+ // Conversion to string.
+ template <typename T>
+ inline
+ std::string
+ string_cast (const T& t)
+ {
+ std::ostringstream o;
+ o << t;
+ return o.str ();
+ }
}
+
+%token END_OF_FILE 0
EOF
if ($variant)
%token <int> NUMBER
%printer { std::cerr << "Number: " << $$; } <int>
%printer { std::cerr << "Text: " << $$; } <std::string>
-%token END_OF_FILE 0
%type <std::string> text result
%%
text:
/* nothing */ { /* This will generate an empty string */ }
-| text TEXT { std::swap($$,$1); $$.append($2); }
-| text NUMBER {
- std::swap($$,$1);
- std::ostringstream ss;
- ss << ' ' << $2;
- $$.append(ss.str());
- }
+| text TEXT { std::swap ($$, $2); }
+| text NUMBER { $$ = string_cast($2); }
;
EOF
}
else
{
- # Not using Boost variants.
+ # Not using Bison variants.
print $out <<'EOF';
%union {int ival; std::string* sval;}
%token <sval> TEXT
%token <ival> NUMBER
%printer { std::cerr << "Number: " << $$; } <ival>
%printer { std::cerr << "Text: " << *$$; } <sval>
-%token END_OF_FILE 0
%type <sval> text result
%%
text:
/* nothing */ { $$ = new std::string; }
-| text TEXT { $$->append(*$2); delete $2; }
-| text NUMBER {
- std::ostringstream ss;
- ss << ' ' << $2;
- $$->append(ss.str());
- }
+| text TEXT { delete $1; $$ = $2; }
+| text NUMBER { delete $1; $$ = new std::string (string_cast ($2)); }
;
EOF
}
print $out <<'EOF';
%%
+#
+
static
-yy::parser::token_type
-yylex(yy::parser::semantic_type* yylval)
+#if USE_LEX_SYMBOL
+yy::parser::symbol_type yylex()
+#else
+yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
+ yy::parser::location_type* yylloc)
+#endif
{
+ typedef yy::parser::location_type location_type;
+ typedef yy::parser::token token;
static int stage = -1;
++stage;
if (stage == STAGE_MAX)
- return yy::parser::token::END_OF_FILE;
+ {
+#if USE_LEX_SYMBOL
+ return yy::parser::make_END_OF_FILE (yy::location());
+#else
+ *yylloc = location_type ();
+ return token::END_OF_FILE;
+#endif
+ }
else if (stage % 2)
{
- IF_VARIANTS(yylval->build<int>(), yylval->ival) = stage;
- return yy::parser::token::NUMBER;
+#if USE_LEX_SYMBOL
+ return yy::parser::make_NUMBER (stage, yy::location());
+#else
+# if defined ONE_STAGE_BUILD
+ yylval->build(stage);
+# elif USE_VARIANTS
+ yylval->build<int>() = stage;
+# else
+ yylval->ival = stage;
+# endif
+ *yylloc = location_type ();
+ return token::NUMBER;
+#endif
}
else
{
- IF_VARIANTS(yylval->build<std::string>() =, yylval->sval = new) std::string("A string.");
- return yy::parser::token::TEXT;
+#if USE_LEX_SYMBOL
+ return yy::parser::make_TEXT ("A string.", yy::location());
+#else
+# if defined ONE_STAGE_BUILD
+ yylval->build(std::string("A string."));
+# elif USE_VARIANTS
+ yylval->build<std::string>() = std::string("A string.");
+# else
+ yylval->sval = new std::string("A string.");
+# endif
+ *yylloc = location_type ();
+ return token::TEXT;
+#endif
}
abort();
}
sub generate_grammar ($$@)
{
my ($name, $base, @directive) = @_;
- verbose 2, "Generating $base.y\n";
+ verbose 3, "Generating $base.y\n";
my %generator =
(
"calc" => \&generate_grammar_calc,
+ "list" => \&generate_grammar_list,
"triangular" => \&generate_grammar_triangular,
- "variant" => \&generate_grammar_variant,
);
&{$generator{$name}}($base, 200, @directive);
}
sub run ($)
{
my ($command) = @_;
- verbose 2, "$command\n";
+ verbose 3, "$command\n";
system ("$command") == 0
or die "$command failed";
}
######################################################################
-=item C<bench_grammar ($gram, %bench)>
-
-Generate benches for C<$gram>. C<$gram> should be C<calc> or
-C<triangle>. C<%bench> is a hash of the form:
+=item C<bench ($grammar, @token)>
- $name => @directive
-
-where C<$name> is the name of the bench, and C<@directive> are the
-Bison directive to use for this bench. All the benches are compared
-against each other, repeated 50 times.
+Generate benches for the C<$grammar> and the directive specification
+given in the list of C<@token>.
=cut
-sub bench_grammar ($%)
+sub bench ($@)
{
- my ($gram, %test) = @_;
-
+ my ($grammar, @token) = @_;
use Benchmark qw (:all :hireswallclock);
+ my @directive = parse (@token);
+
# Set up the benches as expected by timethese.
my %bench;
+ # A counter of directive sets.
+ my $count = 1;
+ for my $d (@directive)
+ {
+ $bench{$count} = $d;
+ printf " %2d. %s\n", $count, join (' ', split ("\n", $d));
+ $count++;
+ };
+
# For each bench, capture the size.
my %size;
- while (my ($name, $directives) = each %test)
+
+ while (my ($name, $directives) = each %bench)
{
- generate_grammar ($gram, $name, @$directives);
+ generate_grammar ($grammar, $name, $directives);
# Compile the executable.
compile ($name);
$bench{$name} = "system ('./$name');";
# shows only wallclock and the two children times. 'auto' (the
# default) will act as 'all' unless the children times are both
# zero, in which case it acts as 'noc'. 'none' prevents output.
- verbose 2, "Running the benches for $gram\n";
+ verbose 3, "Running the benches for $grammar\n";
my $res = timethese ($iterations, \%bench, 'nop');
# Output the speed result.
sub bench_push_parser ()
{
- calc_input ('calc', 200);
- bench_grammar
- ('calc',
- (
- "pull-impure" => [],
- "pull-pure" => ['%define api.pure'],
- "push-impure" => ['%define api.push_pull "both"'],
- "push-pure" => ['%define api.push_pull "both"', '%define api.pure'],
- )
- );
+ bench ('calc',
+ qw(
+ [ %d api.pure ]
+ &
+ [ %d api.push_pull=both ]
+ ));
}
######################################################################
=item C<bench_variant_parser ()>
-Bench the C++ lalr1.cc parser using Boost.Variants or %union.
+Bench the C++ lalr1.cc parser using variants or %union.
=cut
sub bench_variant_parser ()
{
- bench_grammar
- ('variant',
- (
- "f-union" => ['%skeleton "lalr1-fusion.cc"'],
- "f-uni-deb" => ['%skeleton "lalr1-fusion.cc"', '%debug'],
- "f-var" => ['%skeleton "lalr1-fusion.cc"', '%define variant'],
- "f-var-deb" => ['%skeleton "lalr1-fusion.cc"', '%debug', '%define variant'],
- "f-var-dtr" => ['%skeleton "lalr1-fusion.cc"', '%define variant', "%code {\n#define VARIANT_DESTROY\n}"],
- "f-var-deb-dtr" => ['%skeleton "lalr1-fusion.cc"', '%debug', '%define variant', "%code {\n#define VARIANT_DESTROY\n}"],
- )
+ bench ('list',
+ qw(
+ [
+ %d variant
+ &
+ [ #d ONE_STAGE_BUILD | %d lex_symbol ]
+ ]
+ )
);
}
sub bench_fusion_parser ()
{
- bench_grammar
- ('variant',
- (
- "split" => [],
- "fused" => ['%skeleton "lalr1-fusion.cc"'],
- )
+ bench ('list',
+ qw(
+ %s lalr1-split.cc
+ |
+ %s lalr1.cc
+ )
);
}
######################################################################
+# The end of the directives to parse.
+my $eod = "end of directives";
+# The list of tokens parsed by the following functions.
+my @token;
+
+# eat ($EXPECTED)
+# ---------------
+# Check that the current token is $EXPECTED, and move to the next.
+sub eat ($)
+{
+ my ($expected) = @_;
+ die "expected $expected, unexpected: $token[0] (@token)\n"
+ unless $token[0] eq $expected;
+ shift @token;
+}
+
+# Parse directive specifications:
+# expr: term (| term)*
+# term: fact (& fact)*
+# fact: ( expr ) | [ expr ] | dirs
+# dirs: %s SKELETON | #d NAME[=VALUE] | %d NAME[=VALUE] | directive
+sub parse (@)
+{
+ @token = (@_, $eod);
+ verbose 3, "Parsing: @token\n";
+ my @res = parse_expr ();
+ eat ($eod);
+ return @res;
+}
+
+sub parse_expr ()
+{
+ my @res = parse_term ();
+ while ($token[0] eq '|')
+ {
+ eat ('|');
+ # Alternation.
+ push @res, parse_term ();
+ }
+ return @res;
+}
+
+sub parse_term ()
+{
+ my @res = parse_fact ();
+ while ($token[0] eq '&')
+ {
+ eat ('&');
+ # Cartesian product.
+ my @lhs = @res;
+ @res = ();
+ for my $rhs (parse_fact ())
+ {
+ for my $lhs (@lhs)
+ {
+ push @res, $lhs . ($lhs && $rhs ? "\n" : "") . $rhs;
+ }
+ }
+ }
+ return @res;
+}
+
+sub parse_fact ()
+{
+ my @res;
+ die "unexpected end of expression"
+ unless defined $token[0];
+
+ if ($token[0] eq '(')
+ {
+ eat ('(');
+ @res = parse_expr ();
+ eat (')');
+ }
+ elsif ($token[0] eq '[')
+ {
+ eat ('[');
+ @res = (parse_expr (), '');
+ eat (']');
+ }
+ else
+ {
+ @res = parse_dirs ();
+ }
+ return @res;
+}
+
+sub parse_dirs ()
+{
+ my @res;
+ die "unexpected end of expression"
+ unless defined $token[0];
+
+ if ($token[0] eq '#d')
+ {
+ eat ('#d');
+ $token[0] =~ s/(.*?)=(.*)/$1 $2/;
+ @res = ("%code {\n#define $token[0]\n}");
+ shift @token;
+ }
+ elsif ($token[0] eq '%d')
+ {
+ shift @token;
+ $token[0] =~ s/(.*?)=(.*)/$1 "$2"/;
+ @res = ("%define $token[0]");
+ shift @token;
+ }
+ elsif ($token[0] eq '%s')
+ {
+ shift @token;
+ @res = ("%skeleton \"$token[0]\"");
+ shift @token;
+ }
+ else
+ {
+ @res = $token[0];
+ shift @token;
+ }
+
+ return @res;
+}
+
+######################################################################
+
sub getopt ()
{
use Getopt::Long;
my %option = (
+ "b|bench=s" => \$bench,
"c|cflags=s" => \$cflags,
+ "d|directive=s" => \@directive,
+ "g|grammar=s" => \$grammar,
"h|help" => sub { help ($verbose) },
"i|iterations=i" => \$iterations,
"q|quiet" => sub { --$verbose },
######################################################################
getopt;
+
+# Create the directory we work in.
+mkdir "benches" or die "cannot create benches"
+ unless -d "benches";
+my $count = 1;
+++$count
+ while -d "benches/$count";
+my $dir = "benches/$count";
+mkdir $dir
+ or die "cannot create $dir";
+chdir $dir
+ or die "cannot chdir $dir";
+
+# The following message is tailored to please Emacs' compilation-mode.
+verbose 1, "Entering directory `$dir'\n";
verbose 1, "Using bison=$bison.\n";
-verbose 1, "Using cc=$cc.\n";
-verbose 1, "Using cxx=$cxx.\n";
-verbose 1, "Using cflags=$cflags.\n";
+verbose 2, "Using cc=$cc.\n";
+verbose 2, "Using cxx=$cxx.\n";
+verbose 2, "Using cflags=$cflags.\n";
+verbose 2, "Grammar: $grammar\n";
+
-for my $b (@ARGV)
+# Support -b: predefined benches.
+my %bench =
+ (
+ "fusion" => \&bench_fusion_parser,
+ "push" => \&bench_push_parser,
+ "variant" => \&bench_variant_parser,
+ );
+
+if (defined $bench)
+{
+ die "invalid argument for --bench: $bench"
+ unless defined $bench{$bench};
+ &{$bench{$bench}}();
+ exit 0;
+}
+else
{
- verbose 1, "Running benchmark $b.\n";
- bench_fusion_parser() if $b eq "fusion";
- bench_push_parser() if $b eq "push";
- bench_variant_parser() if $b eq "variant";
+ # Launch the bench marking.
+ bench ($grammar, @ARGV);
}
### Setup "GNU" style for perl-mode and cperl-mode.