icuSources/test/perf/perldriver/Dataset.pm

   1 #!/usr/local/bin/perl
   2 #  ********************************************************************
   3 #  * COPYRIGHT:
   4 #  * Copyright (c) 2002, International Business Machines Corporation and
   5 #  * others. All Rights Reserved.
   6 #  ********************************************************************
   7
   8 package Dataset;
   9 use Statistics::Descriptive;
  10 use Statistics::Distributions;
  11 use strict;
  12
  13 # Create a new Dataset with the given data.
  14 sub new {
  15     my ($class) = shift;
  16     my $self = bless {
  17         _data => \@_,
  18         _scale => 1.0,
  19         _mean => 0.0,
  20         _error => 0.0,
  21     }, $class;
  22
  23     my $n = @_;
  24
  25     if ($n >= 1) {
  26         my $stats = Statistics::Descriptive::Full->new();
  27         $stats->add_data(@{$self->{_data}});
  28         $self->{_mean} = $stats->mean();
  29
  30         if ($n >= 2) {
  31             # Use a t distribution rather than Gaussian because (a) we
  32             # assume an underlying normal dist, (b) we do not know the
  33             # standard deviation -- we estimate it from the data, and (c)
  34             # we MAY have a small sample size (also works for large n).
  35             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
  36             $self->{_error} = $t * $stats->standard_deviation();
  37         }
  38     }
  39
  40     $self;
  41 }
  42
  43 # Set a scaling factor for all data; 1.0 means no scaling.
  44 # Scale must be > 0.
  45 sub setScale {
  46     my ($self, $scale) = @_;
  47     $self->{_scale} = $scale;
  48 }
  49
  50 # Multiply the scaling factor by a value.
  51 sub scaleBy {
  52     my ($self, $a) = @_;
  53     $self->{_scale} *= $a;
  54 }
  55
  56 # Return the mean.
  57 sub getMean {
  58     my $self = shift;
  59     return $self->{_mean} * $self->{_scale};
  60 }
  61
  62 # Return a 99% error based on the t distribution.  The dataset
  63 # is desribed as getMean() +/- getError().
  64 sub getError {
  65     my $self = shift;
  66     return $self->{_error} * $self->{_scale};
  67 }
  68
  69 # Divide two Datasets and return a new one, maintaining the
  70 # mean+/-error.  The new Dataset has no data points.
  71 sub divide {
  72     my $self = shift;
  73     my $rhs = shift;
  74
  75     my $minratio = ($self->{_mean} - $self->{_error}) /
  76                    ($rhs->{_mean} + $rhs->{_error});
  77     my $maxratio = ($self->{_mean} + $self->{_error}) /
  78                    ($rhs->{_mean} - $rhs->{_error});
  79
  80     my $result = Dataset->new();
  81     $result->{_mean} = ($minratio + $maxratio) / 2;
  82     $result->{_error} = $result->{_mean} - $minratio;
  83     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
  84     $result;
  85 }
  86
  87 # subtracts two Datasets and return a new one, maintaining the
  88 # mean+/-error.  The new Dataset has no data points.
  89 sub subtract {
  90     my $self = shift;
  91     my $rhs = shift;
  92
  93     my $result = Dataset->new();
  94     $result->{_mean} = $self->{_mean} - $rhs->{_mean};
  95     $result->{_error} = $self->{_error} + $rhs->{_error};
  96     $result->{_scale} = $self->{_scale};
  97     $result;
  98 }
  99
 100 # adds two Datasets and return a new one, maintaining the
 101 # mean+/-error.  The new Dataset has no data points.
 102 sub add {
 103     my $self = shift;
 104     my $rhs = shift;
 105
 106     my $result = Dataset->new();
 107     $result->{_mean} = $self->{_mean} + $rhs->{_mean};
 108     $result->{_error} = $self->{_error} + $rhs->{_error};
 109     $result->{_scale} = $self->{_scale};
 110     $result;
 111 }
 112
 113 # Divides a dataset by a scalar.
 114 # The new Dataset has no data points.
 115 sub divideByScalar {
 116     my $self = shift;
 117     my $s = shift;
 118
 119     my $result = Dataset->new();
 120     $result->{_mean} = $self->{_mean}/$s;
 121     $result->{_error} = $self->{_error}/$s;
 122     $result->{_scale} = $self->{_scale};
 123     $result;
 124 }
 125
 126 # Divides a dataset by a scalar.
 127 # The new Dataset has no data points.
 128 sub multiplyByScalar {
 129     my $self = shift;
 130     my $s = shift;
 131
 132     my $result = Dataset->new();
 133     $result->{_mean} = $self->{_mean}*$s;
 134     $result->{_error} = $self->{_error}*$s;
 135     $result->{_scale} = $self->{_scale};
 136     $result;
 137 }
 138
 139 1;