%define token.prefix.

[bison.git] / etc / bench.pl.in
diff --git a/etc/bench.pl.in b/etc/bench.pl.in

index 91c35b997aa218feeb3b06c22128e9ca30a0c012..f8fca8ac714f9f7a9cda4fb63dd4e97e073846e2 100755 (executable)
--- a/etc/bench.pl.in
+++ b/etc/bench.pl.in
@@ -19,15 +19,41 @@
  
  =head1 NAME
  
  
  =head1 NAME
  
-bench.pl - perform benches on Bison parsers.
+bench.pl - bench marks for Bison parsers.
  
  =head1 SYNOPSIS
  
  
  =head1 SYNOPSIS
  
-  ./bench.pl [OPTIONS]... BENCHES
+  ./bench.pl [OPTIONS]... DIRECTIVES
  
  
-=head1 BENCHES
+=head1 DIRECTIVES
  
  
-Specify the set of benches to run.  I<bench-name> should be one of:
+Specify the set of benches to run.  The following grammar defines the
+I<directives>:
+
+   directives ::=
+       directives | directives  -- Alternation
+     | directives & directives  -- Concatenation
+     | [ directives> ]          -- Optional
+     | ( directives> )          -- Parentheses
+     | #d NAME[=VALUE]          -- %code { #define NAME [VALUE] }
+     | %d NAME[=VALUE]          -- %define NAME ["VALUE"]
+     | %s skeleton              -- %skeleton "skeleton"
+     | directive
+
+Parentheses only group to override precedence.  For instance:
+
+  [ %debug ] & [ %error-verbose ] & [ %define variant ]
+
+will generate eight different cases.
+
+=head1 OPTIONS
+
+=over 4
+
+=item B<-b>, B<--bench>
+
+Predefined benches, that is, combimation between a grammar and a I<directives>
+request.
  
  =over 4
  
  
  =over 4
  
@@ -46,12 +72,35 @@ Test the use of variants instead of union in the C++ parser.
  
  =back
  
  
  =back
  
-=head1 OPTIONS
-
  =item B<-c>, B<--cflags>=I<flags>
  
  Flags to pass to the C or C++ compiler.  Defaults to -O2.
  
  =item B<-c>, B<--cflags>=I<flags>
  
  Flags to pass to the C or C++ compiler.  Defaults to -O2.
  
+=item B<-d>, B<--directive>=I<directives>
+
+Add a set of Bison directives to bench against each other.
+
+=item B<-g>, B<--grammar>=I<grammar>
+
+Select the base I<grammar> to use.  Defaults to I<calc>.
+
+=over 4
+
+=item I<calc>
+
+Traditional calculator.
+
+=item I<list>
+
+C++ grammar that uses std::string and std::list.  Can be used with
+or without %define variant.
+
+=item I<triangular>
+
+Artificial grammar with very long rules.
+
+=back
+
  =item B<-h>, B<--help>
  
  Display this message and exit succesfully.  The more verbose, the more
  =item B<-h>, B<--help>
  
  Display this message and exit succesfully.  The more verbose, the more
@@ -104,6 +153,10 @@ The C++ compiler.
  
  Compiler flags (C or C++).
  
  
  Compiler flags (C or C++).
  
+=item C<@directive>
+
+A list of directive sets to measure against each other.
+
  =item C<$iterations>
  
  The number of times the parser is run for a bench.
  =item C<$iterations>
  
  The number of times the parser is run for a bench.
@@ -116,10 +169,13 @@ Verbosity level.
  
  =cut
  
  
  =cut
  
+my $bench;
  my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison';
  my $cc = $ENV{'CC'} || 'gcc';
  my $cxx = $ENV{'CXX'} || 'g++';
  my $cflags = '-O2';
  my $bison = $ENV{'BISON'} || '@abs_top_builddir@/tests/bison';
  my $cc = $ENV{'CC'} || 'gcc';
  my $cxx = $ENV{'CXX'} || 'g++';
  my $cflags = '-O2';
+my @directive = ();
+my $grammar = 'calc';
  my $iterations = -1;
  my $verbose = 1;
  
  my $iterations = -1;
  my $verbose = 1;
  
@@ -311,6 +367,10 @@ sub generate_grammar_calc ($$@)
    my ($base, $max, @directive) = @_;
    my $directives = directives ($base, @directive);
  
    my ($base, $max, @directive) = @_;
    my $directives = directives ($base, @directive);
  
+  # Putting this request here is stupid, since the input will be
+  # generated each time we generate a grammar.
+  calc_input ('calc', 200);
+
    my $out = new IO::File ">$base.y"
      or die;
    print $out <<EOF;
    my $out = new IO::File ">$base.y"
      or die;
    print $out <<EOF;
@@ -508,48 +568,65 @@ EOF
  
  ##################################################################
  
  
  ##################################################################
  
-=item C<generate_grammar_variant ($base, $max, @directive)>
+=item C<generate_grammar_list ($base, $max, @directive)>
  
  
-Generate a Bison file F<$base.y> that uses, or not, the Boost.Variants
-depending on the C<@directive>.
+Generate a Bison file F<$base.y> for a C++ parser that uses C++
+objects (std::string, std::list).  Tailored for using %define variant.
  
  =cut
  
  
  =cut
  
-sub generate_grammar_variant ($$@)
+sub generate_grammar_list ($$@)
  {
    my ($base, $max, @directive) = @_;
    my $directives = directives ($base, @directive);
  {
    my ($base, $max, @directive) = @_;
    my $directives = directives ($base, @directive);
-  my $variant = grep { $_ eq '%define variant' } @directive;
-
+  my $variant = grep { /%define "?variant"?/ } @directive;
+  my $lex_symbol = grep { /%define "?lex_symbol"?/ } @directive;
    my $out = new IO::File ">$base.y"
      or die;
    print $out <<EOF;
  %language "C++"
  %defines
    my $out = new IO::File ">$base.y"
      or die;
    print $out <<EOF;
  %language "C++"
  %defines
+%locations
  $directives
  
  $directives
  
-%code requires // variant.h
+%code requires // *.h
  {
  #include <string>
  }
  
  {
  #include <string>
  }
  
-%code // variant.c
+%code // *.c
  {
  #include <algorithm>
  #include <iostream>
  #include <sstream>
  
  {
  #include <algorithm>
  #include <iostream>
  #include <sstream>
  
-// Prototype of the yylex function providing subsequent tokens.
-static yy::parser::token_type yylex(yy::parser::semantic_type* yylval);
-
  #define STAGE_MAX    ($max * 10) // max = $max
  #define STAGE_MAX    ($max * 10) // max = $max
+
+#define USE_LEX_SYMBOL $lex_symbol
  #define USE_VARIANTS $variant
  #define USE_VARIANTS $variant
-#if USE_VARIANTS
-# define IF_VARIANTS(True, False) True
+
+  // Prototype of the yylex function providing subsequent tokens.
+  static
+#if USE_LEX_SYMBOL
+  yy::parser::symbol_type yylex();
  #else
  #else
-# define IF_VARIANTS(True, False) False
+  yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
+                               yy::parser::location_type* yylloc);
  #endif
  #endif
+
+  // Conversion to string.
+  template <typename T>
+    inline
+    std::string
+    string_cast (const T& t)
+  {
+    std::ostringstream o;
+    o << t;
+    return o.str ();
+  }
  }
  }
+
+%token END_OF_FILE 0
  EOF
  
    if ($variant)
  EOF
  
    if ($variant)
@@ -559,7 +636,6 @@ EOF
  %token <int> NUMBER
  %printer { std::cerr << "Number: " << $$; } <int>
  %printer { std::cerr << "Text: " << $$; } <std::string>
  %token <int> NUMBER
  %printer { std::cerr << "Number: " << $$; } <int>
  %printer { std::cerr << "Text: " << $$; } <std::string>
-%token END_OF_FILE 0
  %type <std::string> text result
  
  %%
  %type <std::string> text result
  
  %%
@@ -569,13 +645,8 @@ result:
  
  text:
    /* nothing */                { /* This will generate an empty string */ }
  
  text:
    /* nothing */                { /* This will generate an empty string */ }
-| text TEXT            { std::swap($$,$1); $$.append($2); }
-| text NUMBER          {
-                         std::swap($$,$1);
-                          std::ostringstream ss;
-                         ss << ' ' << $2;
-                         $$.append(ss.str());
-                        }
+| text TEXT            { std::swap ($$, $2); }
+| text NUMBER          { $$ = string_cast($2); }
  ;
  EOF
      }
  ;
  EOF
      }
@@ -588,7 +659,6 @@ EOF
  %token <ival> NUMBER
  %printer { std::cerr << "Number: " << $$; } <ival>
  %printer { std::cerr << "Text: " << *$$; } <sval>
  %token <ival> NUMBER
  %printer { std::cerr << "Number: " << $$; } <ival>
  %printer { std::cerr << "Text: " << *$$; } <sval>
-%token END_OF_FILE 0
  %type <sval> text result
  
  %%
  %type <sval> text result
  
  %%
@@ -598,35 +668,71 @@ result:
  
  text:
    /* nothing */                { $$ = new std::string; }
  
  text:
    /* nothing */                { $$ = new std::string; }
-| text TEXT            { $$->append(*$2); delete $2; }
-| text NUMBER          {
-                         std::ostringstream ss;
-                         ss << ' ' << $2;
-                         $$->append(ss.str());
-                        }
+| text TEXT            { delete $1; $$ = $2; }
+| text NUMBER          { delete $1; $$ = new std::string (string_cast ($2)); }
  ;
  EOF
      }
  
    print $out <<'EOF';
  %%
  ;
  EOF
      }
  
    print $out <<'EOF';
  %%
+#
+
  static
  static
-yy::parser::token_type
-yylex(yy::parser::semantic_type* yylval)
+#if USE_LEX_SYMBOL
+yy::parser::symbol_type yylex()
+#else
+yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
+                             yy::parser::location_type* yylloc)
+#endif
  {
  {
+  typedef yy::parser::token token;
    static int stage = -1;
    ++stage;
    if (stage == STAGE_MAX)
    static int stage = -1;
    ++stage;
    if (stage == STAGE_MAX)
-    return yy::parser::token::END_OF_FILE;
+    {
+#if USE_LEX_SYMBOL
+      return yy::parser::make_symbol <token::END_OF_FILE> (yy::location());
+#else
+      *yylloc = yy::location ();
+      return token::END_OF_FILE;
+#endif
+    }
    else if (stage % 2)
      {
    else if (stage % 2)
      {
-      IF_VARIANTS(yylval->build<int>(), yylval->ival) = stage;
-      return yy::parser::token::NUMBER;
+#if USE_LEX_SYMBOL
+      return yy::parser::make_symbol <token::NUMBER> (stage, yy::location());
+#elif defined ONE_STAGE_BUILD
+      yylval->build(stage);
+      *yylloc = yy::location ();
+      return token::NUMBER;
+#elif USE_VARIANTS
+      yylval->build<int>() = stage;
+      *yylloc = yy::location ();
+      return token::NUMBER;
+#else
+      yylval->ival = stage;
+      *yylloc = yy::location ();
+      return token::NUMBER;
+#endif
      }
    else
      {
      }
    else
      {
-      IF_VARIANTS(yylval->build<std::string>() =, yylval->sval = new) std::string("A string.");
-      return yy::parser::token::TEXT;
+#if USE_LEX_SYMBOL
+      return yy::parser::make_symbol <token::TEXT> ("A string.", yy::location());
+#elif defined ONE_STAGE_BUILD
+      yylval->build(std::string("A string."));
+      *yylloc = yy::location ();
+      return token::TEXT;
+#elif USE_VARIANTS
+      yylval->build<std::string>() = std::string("A string.");
+      *yylloc = yy::location ();
+      return token::TEXT;
+#else
+      yylval->sval = new std::string("A string.");
+      *yylloc = yy::location ();
+      return token::TEXT;
+#endif
      }
    abort();
  }
      }
    abort();
  }
@@ -662,12 +768,12 @@ Generate F<$base.y> by calling C<&generate_grammar_$name>.
  sub generate_grammar ($$@)
  {
    my ($name, $base, @directive) = @_;
  sub generate_grammar ($$@)
  {
    my ($name, $base, @directive) = @_;
-  verbose 2, "Generating $base.y\n";
+  verbose 3, "Generating $base.y\n";
    my %generator =
      (
        "calc"       => \&generate_grammar_calc,
    my %generator =
      (
        "calc"       => \&generate_grammar_calc,
+      "list"       => \&generate_grammar_list,
        "triangular" => \&generate_grammar_triangular,
        "triangular" => \&generate_grammar_triangular,
-      "variant"    => \&generate_grammar_variant,
      );
    &{$generator{$name}}($base, 200, @directive);
  }
      );
    &{$generator{$name}}($base, 200, @directive);
  }
@@ -683,7 +789,7 @@ Run, possibly verbosely, the shell C<$command>.
  sub run ($)
  {
    my ($command) = @_;
  sub run ($)
  {
    my ($command) = @_;
-  verbose 2, "$command\n";
+  verbose 3, "$command\n";
    system ("$command") == 0
      or die "$command failed";
  }
    system ("$command") == 0
      or die "$command failed";
  }
@@ -711,32 +817,37 @@ sub compile ($)
  
  ######################################################################
  
  
  ######################################################################
  
-=item C<bench_grammar ($gram, %bench)>
-
-Generate benches for C<$gram>.  C<$gram> should be C<calc> or
-C<triangle>.  C<%bench> is a hash of the form:
+=item C<bench ($grammar, @token)>
  
  
-  $name => @directive
-
-where C<$name> is the name of the bench, and C<@directive> are the
-Bison directive to use for this bench.  All the benches are compared
-against each other, repeated 50 times.
+Generate benches for the C<$grammar> and the directive specification
+given in the list of C<@token>.
  
  =cut
  
  
  =cut
  
-sub bench_grammar ($%)
+sub bench ($@)
  {
  {
-  my ($gram, %test) = @_;
-
+  my ($grammar, @token) = @_;
    use Benchmark qw (:all :hireswallclock);
  
    use Benchmark qw (:all :hireswallclock);
  
+  my @directive = parse (@token);
+
    # Set up the benches as expected by timethese.
    my %bench;
    # Set up the benches as expected by timethese.
    my %bench;
+  # A counter of directive sets.
+  my $count = 1;
+  for my $d (@directive)
+    {
+      $bench{$count} = $d;
+      printf " %2d. %s\n", $count, join (' ', split ("\n", $d));
+      $count++;
+    };
+
    # For each bench, capture the size.
    my %size;
    # For each bench, capture the size.
    my %size;
-  while (my ($name, $directives) = each %test)
+
+  while (my ($name, $directives) = each %bench)
      {
      {
-      generate_grammar ($gram, $name, @$directives);
+      generate_grammar ($grammar, $name, $directives);
        # Compile the executable.
        compile ($name);
        $bench{$name} = "system ('./$name');";
        # Compile the executable.
        compile ($name);
        $bench{$name} = "system ('./$name');";
@@ -752,7 +863,7 @@ sub bench_grammar ($%)
    # shows only wallclock and the two children times.  'auto' (the
    # default) will act as 'all' unless the children times are both
    # zero, in which case it acts as 'noc'.  'none' prevents output.
    # shows only wallclock and the two children times.  'auto' (the
    # default) will act as 'all' unless the children times are both
    # zero, in which case it acts as 'noc'.  'none' prevents output.
-  verbose 2, "Running the benches for $gram\n";
+  verbose 3, "Running the benches for $grammar\n";
    my $res = timethese ($iterations, \%bench, 'nop');
  
    # Output the speed result.
    my $res = timethese ($iterations, \%bench, 'nop');
  
    # Output the speed result.
@@ -785,39 +896,32 @@ interfaces.
  
  sub bench_push_parser ()
  {
  
  sub bench_push_parser ()
  {
-  calc_input ('calc', 200);
-  bench_grammar
-    ('calc',
-     (
-      "pull-impure" => [],
-      "pull-pure"   => ['%define api.pure'],
-      "push-impure" => ['%define api.push_pull "both"'],
-      "push-pure"   => ['%define api.push_pull "both"', '%define api.pure'],
-     )
-    );
+  bench ('calc',
+         qw(
+            [ %d api.pure ]
+            &
+            [ %d api.push_pull=both ]
+         ));
  }
  
  ######################################################################
  
  =item C<bench_variant_parser ()>
  
  }
  
  ######################################################################
  
  =item C<bench_variant_parser ()>
  
-Bench the C++ lalr1.cc parser using Boost.Variants or %union.
+Bench the C++ lalr1.cc parser using variants or %union.
  
  =cut
  
  sub bench_variant_parser ()
  {
  
  =cut
  
  sub bench_variant_parser ()
  {
-  bench_grammar
-    ('variant',
-     (
-      "f-union"         => ['%skeleton "lalr1.cc"'],
-      "f-uni-deb"   => ['%skeleton "lalr1.cc"', '%debug'],
-      "f-var"       => ['%skeleton "lalr1.cc"', '%define variant'],
-      "f-var-deb" => ['%skeleton "lalr1.cc"', '%debug', '%define variant'],
-      "f-var-dtr"       => ['%skeleton "lalr1.cc"', '%define variant', "%code {\n#define VARIANT_DESTROY\n}"],
-      "f-var-deb-dtr" => ['%skeleton "lalr1.cc"', '%debug', '%define variant', "%code {\n#define VARIANT_DESTROY\n}"],
-      "f-var-deb-dtr-ass" => ['%skeleton "lalr1.cc"', '%debug', '%define variant', "%code {\n#define VARIANT_DESTROY\n}", "%define assert"],
-     )
+  bench ('list',
+         qw(
+            [
+              %d variant
+              &
+              [ #d ONE_STAGE_BUILD | %d lex_symbol ]
+            ]
+         )
      );
  }
  
      );
  }
  
@@ -831,12 +935,12 @@ Bench the C++ lalr1.cc parser using Boost.Variants or %union.
  
  sub bench_fusion_parser ()
  {
  
  sub bench_fusion_parser ()
  {
-  bench_grammar
-    ('variant',
-     (
-      "split"         => ['%skeleton "lalr1-split.cc"'],
-      "fused"         => ['%skeleton "lalr1.cc"'],
-     )
+  bench ('list',
+         qw(
+             %s lalr1-split.cc
+           |
+             %s lalr1.cc
+         )
      );
  }
  
      );
  }
  
@@ -855,11 +959,138 @@ sub help ($)
  
  ######################################################################
  
  
  ######################################################################
  
+# The end of the directives to parse.
+my $eod = "end of directives";
+# The list of tokens parsed by the following functions.
+my @token;
+
+# eat ($EXPECTED)
+# ---------------
+# Check that the current token is $EXPECTED, and move to the next.
+sub eat ($)
+{
+  my ($expected) = @_;
+  die "expected $expected, unexpected: $token[0] (@token)\n"
+    unless $token[0] eq $expected;
+  shift @token;
+}
+
+# Parse directive specifications:
+#   expr: term (| term)*
+#   term: fact (& fact)*
+#   fact: ( expr ) | [ expr ] | dirs
+#   dirs: %s SKELETON | #d NAME[=VALUE] | %d NAME[=VALUE] | directive
+sub parse (@)
+{
+  @token = (@_, $eod);
+  verbose 3, "Parsing: @token\n";
+  my @res = parse_expr ();
+  eat ($eod);
+  return @res;
+}
+
+sub parse_expr ()
+{
+  my @res = parse_term ();
+  while ($token[0] eq '|')
+    {
+      eat ('|');
+      # Alternation.
+      push @res, parse_term ();
+    }
+  return @res;
+}
+
+sub parse_term ()
+{
+  my @res = parse_fact ();
+  while ($token[0] eq '&')
+    {
+      eat ('&');
+      # Cartesian product.
+      my @lhs = @res;
+      @res = ();
+      for my $rhs (parse_fact ())
+        {
+          for my $lhs (@lhs)
+            {
+              push @res, $lhs . ($lhs && $rhs ? "\n" : "") . $rhs;
+            }
+        }
+    }
+  return @res;
+}
+
+sub parse_fact ()
+{
+  my @res;
+  die "unexpected end of expression"
+    unless defined $token[0];
+
+  if ($token[0] eq '(')
+    {
+      eat ('(');
+      @res = parse_expr ();
+      eat (')');
+    }
+  elsif ($token[0] eq '[')
+    {
+      eat ('[');
+      @res = (parse_expr (), '');
+      eat (']');
+    }
+  else
+    {
+      @res = parse_dirs ();
+    }
+  return @res;
+}
+
+sub parse_dirs ()
+{
+  my @res;
+  die "unexpected end of expression"
+    unless defined $token[0];
+
+  if ($token[0] eq '#d')
+    {
+      eat ('#d');
+      $token[0] =~ s/(.*?)=(.*)/$1 $2/;
+      @res = ("%code {\n#define $token[0]\n}");
+      shift @token;
+    }
+  elsif ($token[0] eq '%d')
+    {
+      shift @token;
+      $token[0] =~ s/(.*?)=(.*)/$1 "$2"/;
+      @res = ("%define $token[0]");
+      shift @token;
+    }
+  elsif ($token[0] eq '%s')
+    {
+      shift @token;
+      @res = ("%skeleton \"$token[0]\"");
+      shift @token;
+    }
+  else
+    {
+      @res = $token[0];
+      shift @token;
+    }
+
+  return @res;
+}
+
+######################################################################
+
  sub getopt ()
  {
    use Getopt::Long;
    my %option = (
  sub getopt ()
  {
    use Getopt::Long;
    my %option = (
+    "b|bench=s"      => \$bench,
      "c|cflags=s"     => \$cflags,
      "c|cflags=s"     => \$cflags,
+    "d|directive=s"  => \@directive,
+    "g|grammar=s"    => \$grammar,
      "h|help"         => sub { help ($verbose) },
      "i|iterations=i" => \$iterations,
      "q|quiet"        => sub { --$verbose },
      "h|help"         => sub { help ($verbose) },
      "i|iterations=i" => \$iterations,
      "q|quiet"        => sub { --$verbose },
@@ -873,17 +1104,47 @@ sub getopt ()
  ######################################################################
  
  getopt;
  ######################################################################
  
  getopt;
+
+# Create the directory we work in.
+mkdir "benches" or die "cannot create benches"
+  unless -d "benches";
+my $count = 1;
+++$count
+  while -d "benches/$count";
+my $dir = "benches/$count";
+mkdir $dir
+  or die "cannot create $dir";
+chdir $dir
+  or die "cannot chdir $dir";
+
+# The following message is tailored to please Emacs' compilation-mode.
+verbose 1, "Entering directory `$dir'\n";
  verbose 1, "Using bison=$bison.\n";
  verbose 1, "Using bison=$bison.\n";
-verbose 1, "Using cc=$cc.\n";
-verbose 1, "Using cxx=$cxx.\n";
-verbose 1, "Using cflags=$cflags.\n";
+verbose 2, "Using cc=$cc.\n";
+verbose 2, "Using cxx=$cxx.\n";
+verbose 2, "Using cflags=$cflags.\n";
+verbose 2, "Grammar: $grammar\n";
+
  
  
-for my $b (@ARGV)
+# Support -b: predefined benches.
+my %bench =
+  (
+   "fusion"   => \&bench_fusion_parser,
+   "push"     => \&bench_push_parser,
+   "variant"  => \&bench_variant_parser,
+  );
+
+if (defined $bench)
+{
+  die "invalid argument for --bench: $bench"
+    unless defined $bench{$bench};
+  &{$bench{$bench}}();
+  exit 0;
+}
+else
  {
  {
-  verbose 1, "Running benchmark $b.\n";
-  bench_fusion_parser()  if $b eq "fusion";
-  bench_push_parser()    if $b eq "push";
-  bench_variant_parser() if $b eq "variant";
+  # Launch the bench marking.
+  bench ($grammar, @ARGV);
  }
  
  ### Setup "GNU" style for perl-mode and cperl-mode.
  }
  
  ### Setup "GNU" style for perl-mode and cperl-mode.