icuSources/test/perf/perldriver/Dataset.pm

   1 #!/usr/local/bin/perl
   2 #  ********************************************************************
   3 #  * Copyright (C) 2016 and later: Unicode, Inc. and others.
   4 #  * License & terms of use: http://www.unicode.org/copyright.html#License
   5 #  ********************************************************************
   6 #  ********************************************************************
   7 #  * COPYRIGHT:
   8 #  * Copyright (c) 2002, International Business Machines Corporation and
   9 #  * others. All Rights Reserved.
  10 #  ********************************************************************
  11
  12 package Dataset;
  13 use Statistics::Descriptive;
  14 use Statistics::Distributions;
  15 use strict;
  16
  17 # Create a new Dataset with the given data.
  18 sub new {
  19     my ($class) = shift;
  20     my $self = bless {
  21         _data => \@_,
  22         _scale => 1.0,
  23         _mean => 0.0,
  24         _error => 0.0,
  25     }, $class;
  26
  27     my $n = @_;
  28
  29     if ($n >= 1) {
  30         my $stats = Statistics::Descriptive::Full->new();
  31         $stats->add_data(@{$self->{_data}});
  32         $self->{_mean} = $stats->mean();
  33
  34         if ($n >= 2) {
  35             # Use a t distribution rather than Gaussian because (a) we
  36             # assume an underlying normal dist, (b) we do not know the
  37             # standard deviation -- we estimate it from the data, and (c)
  38             # we MAY have a small sample size (also works for large n).
  39             my $t = Statistics::Distributions::tdistr($n-1, 0.005);
  40             $self->{_error} = $t * $stats->standard_deviation();
  41         }
  42     }
  43
  44     $self;
  45 }
  46
  47 # Set a scaling factor for all data; 1.0 means no scaling.
  48 # Scale must be > 0.
  49 sub setScale {
  50     my ($self, $scale) = @_;
  51     $self->{_scale} = $scale;
  52 }
  53
  54 # Multiply the scaling factor by a value.
  55 sub scaleBy {
  56     my ($self, $a) = @_;
  57     $self->{_scale} *= $a;
  58 }
  59
  60 # Return the mean.
  61 sub getMean {
  62     my $self = shift;
  63     return $self->{_mean} * $self->{_scale};
  64 }
  65
  66 # Return a 99% error based on the t distribution.  The dataset
  67 # is desribed as getMean() +/- getError().
  68 sub getError {
  69     my $self = shift;
  70     return $self->{_error} * $self->{_scale};
  71 }
  72
  73 # Divide two Datasets and return a new one, maintaining the
  74 # mean+/-error.  The new Dataset has no data points.
  75 sub divide {
  76     my $self = shift;
  77     my $rhs = shift;
  78
  79     my $minratio = ($self->{_mean} - $self->{_error}) /
  80                    ($rhs->{_mean} + $rhs->{_error});
  81     my $maxratio = ($self->{_mean} + $self->{_error}) /
  82                    ($rhs->{_mean} - $rhs->{_error});
  83
  84     my $result = Dataset->new();
  85     $result->{_mean} = ($minratio + $maxratio) / 2;
  86     $result->{_error} = $result->{_mean} - $minratio;
  87     $result->{_scale} = $self->{_scale} / $rhs->{_scale};
  88     $result;
  89 }
  90
  91 # subtracts two Datasets and return a new one, maintaining the
  92 # mean+/-error.  The new Dataset has no data points.
  93 sub subtract {
  94     my $self = shift;
  95     my $rhs = shift;
  96
  97     my $result = Dataset->new();
  98     $result->{_mean} = $self->{_mean} - $rhs->{_mean};
  99     $result->{_error} = $self->{_error} + $rhs->{_error};
 100     $result->{_scale} = $self->{_scale};
 101     $result;
 102 }
 103
 104 # adds two Datasets and return a new one, maintaining the
 105 # mean+/-error.  The new Dataset has no data points.
 106 sub add {
 107     my $self = shift;
 108     my $rhs = shift;
 109
 110     my $result = Dataset->new();
 111     $result->{_mean} = $self->{_mean} + $rhs->{_mean};
 112     $result->{_error} = $self->{_error} + $rhs->{_error};
 113     $result->{_scale} = $self->{_scale};
 114     $result;
 115 }
 116
 117 # Divides a dataset by a scalar.
 118 # The new Dataset has no data points.
 119 sub divideByScalar {
 120     my $self = shift;
 121     my $s = shift;
 122
 123     my $result = Dataset->new();
 124     $result->{_mean} = $self->{_mean}/$s;
 125     $result->{_error} = $self->{_error}/$s;
 126     $result->{_scale} = $self->{_scale};
 127     $result;
 128 }
 129
 130 # Divides a dataset by a scalar.
 131 # The new Dataset has no data points.
 132 sub multiplyByScalar {
 133     my $self = shift;
 134     my $s = shift;
 135
 136     my $result = Dataset->new();
 137     $result->{_mean} = $self->{_mean}*$s;
 138     $result->{_error} = $self->{_error}*$s;
 139     $result->{_scale} = $self->{_scale};
 140     $result;
 141 }
 142
 143 1;