X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/3a2803df74bf43b384db752e87be94a1f64fa847..4fc55348e59ab81573d188c8fd529ed9faf81444:/etc/bench.pl.in diff --git a/etc/bench.pl.in b/etc/bench.pl.in index 22328cfa..a95e646b 100755 --- a/etc/bench.pl.in +++ b/etc/bench.pl.in @@ -19,15 +19,41 @@ =head1 NAME -bench.pl - perform benches on Bison parsers. +bench.pl - bench marks for Bison parsers. =head1 SYNOPSIS - ./bench.pl [OPTIONS]... BENCHES + ./bench.pl [OPTIONS]... DIRECTIVES -=head1 BENCHES +=head1 DIRECTIVES -Specify the set of benches to run. I should be one of: +Specify the set of benches to run. The following grammar defines the +I: + + directives ::= + directives | directives -- Alternation + | directives & directives -- Concatenation + | [ directives> ] -- Optional + | ( directives> ) -- Parentheses + | #d NAME[=VALUE] -- %code { #define NAME [VALUE] } + | %d NAME[=VALUE] -- %define NAME ["VALUE"] + | %s skeleton -- %skeleton "skeleton" + | directive + +Parentheses only group to override precedence. For instance: + + [ %debug ] & [ %error-verbose ] & [ %define variant ] + +will generate eight different cases. + +=head1 OPTIONS + +=over 4 + +=item B<-b>, B<--bench> + +Predefined benches, that is, combimation between a grammar and a I +request. =over 4 @@ -46,12 +72,35 @@ Test the use of variants instead of union in the C++ parser. =back -=head1 OPTIONS - =item B<-c>, B<--cflags>=I Flags to pass to the C or C++ compiler. Defaults to -O2. +=item B<-d>, B<--directive>=I + +Add a set of Bison directives to bench against each other. + +=item B<-g>, B<--grammar>=I + +Select the base I to use. Defaults to I. + +=over 4 + +=item I + +Traditional calculator. + +=item I + +C++ grammar that uses std::string and std::list. Can be used with +or without %define variant. + +=item I + +Artificial grammar with very long rules. + +=back + =item B<-h>, B<--help> Display this message and exit succesfully. The more verbose, the more @@ -104,6 +153,10 @@ The C++ compiler. Compiler flags (C or C++). +=item C<@directive> + +A list of directive sets to measure against each other. + =item C<$iterations> The number of times the parser is run for a bench. @@ -116,10 +169,13 @@ Verbosity level. =cut +my $bench; my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison'; my $cc = $ENV{'CC'} || 'gcc'; my $cxx = $ENV{'CXX'} || 'g++'; my $cflags = '-O2'; +my @directive = (); +my $grammar = 'calc'; my $iterations = -1; my $verbose = 1; @@ -147,21 +203,13 @@ sub verbose($$) Format the list of directives for Bison for bench named C<$bench>. -The special fake C<%variant> directive requests the use of -Boost.Variants instead of a regular union. So don't pass it, it is -not a valid directive. - =cut sub directives($@) { my ($bench, @directive) = @_; my $res = "/* Directives for bench `$bench'. */\n"; - for my $d (@directive) - { - $res .= $d . "\n" - unless $d eq '%variant'; - } + $res .= join ("\n", @directive) . "\n"; $res .= "/* End of directives for bench `$bench'. */\n"; return $res; } @@ -319,6 +367,10 @@ sub generate_grammar_calc ($$@) my ($base, $max, @directive) = @_; my $directives = directives ($base, @directive); + # Putting this request here is stupid, since the input will be + # generated each time we generate a grammar. + calc_input ('calc', 200); + my $out = new IO::File ">$base.y" or die; print $out < +=item C -Generate a Bison file F<$base.y> that uses, or not, the Boost.Variants -depending on the C<@directive>. +Generate a Bison file F<$base.y> for a C++ parser that uses C++ +objects (std::string, std::list). Tailored for using %define variant. =cut -sub generate_grammar_variant ($$@) +sub generate_grammar_list ($$@) { my ($base, $max, @directive) = @_; my $directives = directives ($base, @directive); - my $variant = grep { $_ eq '%variant' } @directive; - + my $variant = grep { /%define "?variant"?/ } @directive; + my $lex_symbol = grep { /%define "?lex_symbol"?/ } @directive; my $out = new IO::File ">$base.y" or die; print $out < } -%code // variant.c +%code // *.c { #include #include #include -// Prototype of the yylex function providing subsequent tokens. -static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); - #define STAGE_MAX ($max * 10) // max = $max + +#define USE_LEX_SYMBOL $lex_symbol #define USE_VARIANTS $variant -#if USE_VARIANTS -# define IF_VARIANTS(True, False) True + + // Prototype of the yylex function providing subsequent tokens. + static +#if USE_LEX_SYMBOL + yy::parser::symbol_type yylex(); #else -# define IF_VARIANTS(True, False) False + yy::parser::token_type yylex(yy::parser::semantic_type* yylval, + yy::parser::location_type* yylloc); #endif + + // Conversion to string. + template + inline + std::string + string_cast (const T& t) + { + std::ostringstream o; + o << t; + return o.str (); + } } + +%token END_OF_FILE 0 EOF if ($variant) { print $out <<'EOF'; -%code variant {int,std::string} %token TEXT %token NUMBER %printer { std::cerr << "Number: " << $$; } %printer { std::cerr << "Text: " << $$; } -%token END_OF_FILE 0 %type text result %% @@ -578,26 +645,20 @@ result: text: /* nothing */ { /* This will generate an empty string */ } -| text TEXT { std::swap($$,$1); $$.append($2); } -| text NUMBER { - std::swap($$,$1); - std::ostringstream ss; - ss << ' ' << $2; - $$.append(ss.str()); - } +| text TEXT { std::swap ($$, $2); } +| text NUMBER { $$ = string_cast($2); } ; EOF } else { - # Not using Boost variants. + # Not using Bison variants. print $out <<'EOF'; %union {int ival; std::string* sval;} %token TEXT %token NUMBER %printer { std::cerr << "Number: " << $$; } %printer { std::cerr << "Text: " << *$$; } -%token END_OF_FILE 0 %type text result %% @@ -607,35 +668,68 @@ result: text: /* nothing */ { $$ = new std::string; } -| text TEXT { $$->append(*$2); delete $2; } -| text NUMBER { - std::ostringstream ss; - ss << ' ' << $2; - $$->append(ss.str()); - } +| text TEXT { delete $1; $$ = $2; } +| text NUMBER { delete $1; $$ = new std::string (string_cast ($2)); } ; EOF } print $out <<'EOF'; %% +# + static -yy::parser::token_type -yylex(yy::parser::semantic_type* yylval) +#if USE_LEX_SYMBOL +yy::parser::symbol_type yylex() +#else +yy::parser::token_type yylex(yy::parser::semantic_type* yylval, + yy::parser::location_type* yylloc) +#endif { + typedef yy::parser::location_type location_type; + typedef yy::parser::token token; static int stage = -1; ++stage; if (stage == STAGE_MAX) - return yy::parser::token::END_OF_FILE; + { +#if USE_LEX_SYMBOL + return yy::parser::make_END_OF_FILE (yy::location()); +#else + *yylloc = location_type (); + return token::END_OF_FILE; +#endif + } else if (stage % 2) { - IF_VARIANTS(yylval->build(), yylval->ival) = stage; - return yy::parser::token::NUMBER; +#if USE_LEX_SYMBOL + return yy::parser::make_NUMBER (stage, yy::location()); +#else +# if defined ONE_STAGE_BUILD + yylval->build(stage); +# elif USE_VARIANTS + yylval->build() = stage; +# else + yylval->ival = stage; +# endif + *yylloc = location_type (); + return token::NUMBER; +#endif } else { - IF_VARIANTS(yylval->build() =, yylval->sval = new) std::string("A string."); - return yy::parser::token::TEXT; +#if USE_LEX_SYMBOL + return yy::parser::make_TEXT ("A string.", yy::location()); +#else +# if defined ONE_STAGE_BUILD + yylval->build(std::string("A string.")); +# elif USE_VARIANTS + yylval->build() = std::string("A string."); +# else + yylval->sval = new std::string("A string."); +# endif + *yylloc = location_type (); + return token::TEXT; +#endif } abort(); } @@ -671,12 +765,12 @@ Generate F<$base.y> by calling C<&generate_grammar_$name>. sub generate_grammar ($$@) { my ($name, $base, @directive) = @_; - verbose 2, "Generating $base.y\n"; + verbose 3, "Generating $base.y\n"; my %generator = ( "calc" => \&generate_grammar_calc, + "list" => \&generate_grammar_list, "triangular" => \&generate_grammar_triangular, - "variant" => \&generate_grammar_variant, ); &{$generator{$name}}($base, 200, @directive); } @@ -692,7 +786,7 @@ Run, possibly verbosely, the shell C<$command>. sub run ($) { my ($command) = @_; - verbose 2, "$command\n"; + verbose 3, "$command\n"; system ("$command") == 0 or die "$command failed"; } @@ -720,32 +814,37 @@ sub compile ($) ###################################################################### -=item C - -Generate benches for C<$gram>. C<$gram> should be C or -C. C<%bench> is a hash of the form: - - $name => @directive +=item C -where C<$name> is the name of the bench, and C<@directive> are the -Bison directive to use for this bench. All the benches are compared -against each other, repeated 50 times. +Generate benches for the C<$grammar> and the directive specification +given in the list of C<@token>. =cut -sub bench_grammar ($%) +sub bench ($@) { - my ($gram, %test) = @_; - + my ($grammar, @token) = @_; use Benchmark qw (:all :hireswallclock); + my @directive = parse (@token); + # Set up the benches as expected by timethese. my %bench; + # A counter of directive sets. + my $count = 1; + for my $d (@directive) + { + $bench{$count} = $d; + printf " %2d. %s\n", $count, join (' ', split ("\n", $d)); + $count++; + }; + # For each bench, capture the size. my %size; - while (my ($name, $directives) = each %test) + + while (my ($name, $directives) = each %bench) { - generate_grammar ($gram, $name, @$directives); + generate_grammar ($grammar, $name, $directives); # Compile the executable. compile ($name); $bench{$name} = "system ('./$name');"; @@ -761,7 +860,7 @@ sub bench_grammar ($%) # shows only wallclock and the two children times. 'auto' (the # default) will act as 'all' unless the children times are both # zero, in which case it acts as 'noc'. 'none' prevents output. - verbose 2, "Running the benches for $gram\n"; + verbose 3, "Running the benches for $grammar\n"; my $res = timethese ($iterations, \%bench, 'nop'); # Output the speed result. @@ -794,38 +893,32 @@ interfaces. sub bench_push_parser () { - calc_input ('calc', 200); - bench_grammar - ('calc', - ( - "pull-impure" => [], - "pull-pure" => ['%define api.pure'], - "push-impure" => ['%define api.push_pull "both"'], - "push-pure" => ['%define api.push_pull "both"', '%define api.pure'], - ) - ); + bench ('calc', + qw( + [ %d api.pure ] + & + [ %d api.push_pull=both ] + )); } ###################################################################### =item C -Bench the C++ lalr1.cc parser using Boost.Variants or %union. +Bench the C++ lalr1.cc parser using variants or %union. =cut sub bench_variant_parser () { - bench_grammar - ('variant', - ( - "f-union" => ['%skeleton "lalr1-fusion.cc"'], - "f-uni-deb" => ['%skeleton "lalr1-fusion.cc"', '%debug'], - "f-var" => ['%skeleton "lalr1-fusion.cc"', '%variant'], - "f-var-deb" => ['%skeleton "lalr1-fusion.cc"', '%debug', '%variant'], - "f-var-dtr" => ['%skeleton "lalr1-fusion.cc"', '%variant', "%code {\n#define VARIANT_DESTROY\n}"], - "f-var-deb-dtr" => ['%skeleton "lalr1-fusion.cc"', '%debug', '%variant', "%code {\n#define VARIANT_DESTROY\n}"], - ) + bench ('list', + qw( + [ + %d variant + & + [ #d ONE_STAGE_BUILD | %d lex_symbol ] + ] + ) ); } @@ -839,12 +932,12 @@ Bench the C++ lalr1.cc parser using Boost.Variants or %union. sub bench_fusion_parser () { - bench_grammar - ('variant', - ( - "split" => [], - "fused" => ['%skeleton "lalr1-fusion.cc"'], - ) + bench ('list', + qw( + %s lalr1-split.cc + | + %s lalr1.cc + ) ); } @@ -863,11 +956,138 @@ sub help ($) ###################################################################### +# The end of the directives to parse. +my $eod = "end of directives"; +# The list of tokens parsed by the following functions. +my @token; + +# eat ($EXPECTED) +# --------------- +# Check that the current token is $EXPECTED, and move to the next. +sub eat ($) +{ + my ($expected) = @_; + die "expected $expected, unexpected: $token[0] (@token)\n" + unless $token[0] eq $expected; + shift @token; +} + +# Parse directive specifications: +# expr: term (| term)* +# term: fact (& fact)* +# fact: ( expr ) | [ expr ] | dirs +# dirs: %s SKELETON | #d NAME[=VALUE] | %d NAME[=VALUE] | directive +sub parse (@) +{ + @token = (@_, $eod); + verbose 3, "Parsing: @token\n"; + my @res = parse_expr (); + eat ($eod); + return @res; +} + +sub parse_expr () +{ + my @res = parse_term (); + while ($token[0] eq '|') + { + eat ('|'); + # Alternation. + push @res, parse_term (); + } + return @res; +} + +sub parse_term () +{ + my @res = parse_fact (); + while ($token[0] eq '&') + { + eat ('&'); + # Cartesian product. + my @lhs = @res; + @res = (); + for my $rhs (parse_fact ()) + { + for my $lhs (@lhs) + { + push @res, $lhs . ($lhs && $rhs ? "\n" : "") . $rhs; + } + } + } + return @res; +} + +sub parse_fact () +{ + my @res; + die "unexpected end of expression" + unless defined $token[0]; + + if ($token[0] eq '(') + { + eat ('('); + @res = parse_expr (); + eat (')'); + } + elsif ($token[0] eq '[') + { + eat ('['); + @res = (parse_expr (), ''); + eat (']'); + } + else + { + @res = parse_dirs (); + } + return @res; +} + +sub parse_dirs () +{ + my @res; + die "unexpected end of expression" + unless defined $token[0]; + + if ($token[0] eq '#d') + { + eat ('#d'); + $token[0] =~ s/(.*?)=(.*)/$1 $2/; + @res = ("%code {\n#define $token[0]\n}"); + shift @token; + } + elsif ($token[0] eq '%d') + { + shift @token; + $token[0] =~ s/(.*?)=(.*)/$1 "$2"/; + @res = ("%define $token[0]"); + shift @token; + } + elsif ($token[0] eq '%s') + { + shift @token; + @res = ("%skeleton \"$token[0]\""); + shift @token; + } + else + { + @res = $token[0]; + shift @token; + } + + return @res; +} + +###################################################################### + sub getopt () { use Getopt::Long; my %option = ( + "b|bench=s" => \$bench, "c|cflags=s" => \$cflags, + "d|directive=s" => \@directive, + "g|grammar=s" => \$grammar, "h|help" => sub { help ($verbose) }, "i|iterations=i" => \$iterations, "q|quiet" => sub { --$verbose }, @@ -881,17 +1101,47 @@ sub getopt () ###################################################################### getopt; + +# Create the directory we work in. +mkdir "benches" or die "cannot create benches" + unless -d "benches"; +my $count = 1; +++$count + while -d "benches/$count"; +my $dir = "benches/$count"; +mkdir $dir + or die "cannot create $dir"; +chdir $dir + or die "cannot chdir $dir"; + +# The following message is tailored to please Emacs' compilation-mode. +verbose 1, "Entering directory `$dir'\n"; verbose 1, "Using bison=$bison.\n"; -verbose 1, "Using cc=$cc.\n"; -verbose 1, "Using cxx=$cxx.\n"; -verbose 1, "Using cflags=$cflags.\n"; +verbose 2, "Using cc=$cc.\n"; +verbose 2, "Using cxx=$cxx.\n"; +verbose 2, "Using cflags=$cflags.\n"; +verbose 2, "Grammar: $grammar\n"; + -for my $b (@ARGV) +# Support -b: predefined benches. +my %bench = + ( + "fusion" => \&bench_fusion_parser, + "push" => \&bench_push_parser, + "variant" => \&bench_variant_parser, + ); + +if (defined $bench) +{ + die "invalid argument for --bench: $bench" + unless defined $bench{$bench}; + &{$bench{$bench}}(); + exit 0; +} +else { - verbose 1, "Running benchmark $b.\n"; - bench_fusion_parser() if $b eq "fusion"; - bench_push_parser() if $b eq "push"; - bench_variant_parser() if $b eq "variant"; + # Launch the bench marking. + bench ($grammar, @ARGV); } ### Setup "GNU" style for perl-mode and cperl-mode.