[apple/icu.git] / icuSources / test / perf / perldriver / Dataset.pm

#!/usr/local/bin/perl
#  ********************************************************************
#  * Copyright (C) 2016 and later: Unicode, Inc. and others.
#  * License & terms of use: http://www.unicode.org/copyright.html#License
#  ********************************************************************
#  ********************************************************************
#  * COPYRIGHT:
#  * Copyright (c) 2002, International Business Machines Corporation and
#  * others. All Rights Reserved.
#  ********************************************************************

package Dataset;
use Statistics::Descriptive;
use Statistics::Distributions;
use strict;

# Create a new Dataset with the given data.
sub new {
    my ($class) = shift;
    my $self = bless {
        _data => \@_,
        _scale => 1.0,
        _mean => 0.0,
        _error => 0.0,
    }, $class;

    my $n = @_;
    
    if ($n >= 1) {
        my $stats = Statistics::Descriptive::Full->new();
        $stats->add_data(@{$self->{_data}});
        $self->{_mean} = $stats->mean();

        if ($n >= 2) {
            # Use a t distribution rather than Gaussian because (a) we
            # assume an underlying normal dist, (b) we do not know the
            # standard deviation -- we estimate it from the data, and (c)
            # we MAY have a small sample size (also works for large n).
            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
            $self->{_error} = $t * $stats->standard_deviation();
        }
    }

    $self;
}

# Set a scaling factor for all data; 1.0 means no scaling.
# Scale must be > 0.
sub setScale {
    my ($self, $scale) = @_;
    $self->{_scale} = $scale;
}

# Multiply the scaling factor by a value.
sub scaleBy {
    my ($self, $a) = @_;
    $self->{_scale} *= $a;
}

# Return the mean.
sub getMean {
    my $self = shift;
    return $self->{_mean} * $self->{_scale};
}

# Return a 99% error based on the t distribution.  The dataset
# is desribed as getMean() +/- getError().
sub getError {
    my $self = shift;
    return $self->{_error} * $self->{_scale};
}

# Divide two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub divide {
    my $self = shift;
    my $rhs = shift;
    
    my $minratio = ($self->{_mean} - $self->{_error}) /
                   ($rhs->{_mean} + $rhs->{_error});
    my $maxratio = ($self->{_mean} + $self->{_error}) /
                   ($rhs->{_mean} - $rhs->{_error});

    my $result = Dataset->new();
    $result->{_mean} = ($minratio + $maxratio) / 2;
    $result->{_error} = $result->{_mean} - $minratio;
    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
    $result;
}

# subtracts two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub subtract {
    my $self = shift;
    my $rhs = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
    $result->{_error} = $self->{_error} + $rhs->{_error};
    $result->{_scale} = $self->{_scale};
    $result;
}

# adds two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub add {
    my $self = shift;
    my $rhs = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
    $result->{_error} = $self->{_error} + $rhs->{_error};
    $result->{_scale} = $self->{_scale};
    $result;
}

# Divides a dataset by a scalar.
# The new Dataset has no data points.
sub divideByScalar {
    my $self = shift;
    my $s = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean}/$s;
    $result->{_error} = $self->{_error}/$s;
    $result->{_scale} = $self->{_scale};
    $result;
}

# Divides a dataset by a scalar.
# The new Dataset has no data points.
sub multiplyByScalar {
    my $self = shift;
    my $s = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean}*$s;
    $result->{_error} = $self->{_error}*$s;
    $result->{_scale} = $self->{_scale};
    $result;
}

1;
Commit	Line	Data
b75a7d8f A	1	#!/usr/local/bin/perl
b75a7d8f A	2	# ********************************************************************
f3c0d7a5 A	3	# * Copyright (C) 2016 and later: Unicode, Inc. and others.
	4	# * License & terms of use: http://www.unicode.org/copyright.html#License
	5	# ********************************************************************
	6	# ********************************************************************
b75a7d8f A	7	# * COPYRIGHT:
	8	# * Copyright (c) 2002, International Business Machines Corporation and
	9	# * others. All Rights Reserved.
	10	# ********************************************************************
	11
	12	package Dataset;
	13	use Statistics::Descriptive;
	14	use Statistics::Distributions;
	15	use strict;
	16
	17	# Create a new Dataset with the given data.
	18	sub new {
	19	my ($class) = shift;
	20	my $self = bless {
	21	_data => \@_,
	22	_scale => 1.0,
	23	_mean => 0.0,
	24	_error => 0.0,
	25	}, $class;
	26
	27	my $n = @_;
	28
	29	if ($n >= 1) {
	30	my $stats = Statistics::Descriptive::Full->new();
	31	$stats->add_data(@{$self->{_data}});
	32	$self->{_mean} = $stats->mean();
	33
	34	if ($n >= 2) {
	35	# Use a t distribution rather than Gaussian because (a) we
	36	# assume an underlying normal dist, (b) we do not know the
	37	# standard deviation -- we estimate it from the data, and (c)
	38	# we MAY have a small sample size (also works for large n).
	39	my $t = Statistics::Distributions::tdistr($n-1, 0.005);
	40	$self->{_error} = $t * $stats->standard_deviation();
	41	}
	42	}
	43
	44	$self;
	45	}
	46
	47	# Set a scaling factor for all data; 1.0 means no scaling.
	48	# Scale must be > 0.
	49	sub setScale {
	50	my ($self, $scale) = @_;
	51	$self->{_scale} = $scale;
	52	}
	53
	54	# Multiply the scaling factor by a value.
	55	sub scaleBy {
	56	my ($self, $a) = @_;
	57	$self->{_scale} *= $a;
	58	}
	59
	60	# Return the mean.
	61	sub getMean {
	62	my $self = shift;
	63	return $self->{_mean} * $self->{_scale};
	64	}
	65
	66	# Return a 99% error based on the t distribution. The dataset
	67	# is desribed as getMean() +/- getError().
	68	sub getError {
	69	my $self = shift;
	70	return $self->{_error} * $self->{_scale};
71	}
72
73	# Divide two Datasets and return a new one, maintaining the
74	# mean+/-error. The new Dataset has no data points.
75	sub divide {
76	my $self = shift;
77	my $rhs = shift;
78
79	my $minratio = ($self->{_mean} - $self->{_error}) /
80	($rhs->{_mean} + $rhs->{_error});
81	my $maxratio = ($self->{_mean} + $self->{_error}) /
82	($rhs->{_mean} - $rhs->{_error});
83
84	my $result = Dataset->new();
85	$result->{_mean} = ($minratio + $maxratio) / 2;
86	$result->{_error} = $result->{_mean} - $minratio;
87	$result->{_scale} = $self->{_scale} / $rhs->{_scale};
88	$result;
89	}
90
91	# subtracts two Datasets and return a new one, maintaining the
92	# mean+/-error. The new Dataset has no data points.
93	sub subtract {
94	my $self = shift;
95	my $rhs = shift;
96
97	my $result = Dataset->new();
98	$result->{_mean} = $self->{_mean} - $rhs->{_mean};
99	$result->{_error} = $self->{_error} + $rhs->{_error};
100	$result->{_scale} = $self->{_scale};
101	$result;
102	}
103
104	# adds two Datasets and return a new one, maintaining the
105	# mean+/-error. The new Dataset has no data points.
106	sub add {
107	my $self = shift;
108	my $rhs = shift;
109
110	my $result = Dataset->new();
111	$result->{_mean} = $self->{_mean} + $rhs->{_mean};
112	$result->{_error} = $self->{_error} + $rhs->{_error};
113	$result->{_scale} = $self->{_scale};
114	$result;
115	}
116
117	# Divides a dataset by a scalar.
118	# The new Dataset has no data points.
119	sub divideByScalar {
120	my $self = shift;
121	my $s = shift;
122
123	my $result = Dataset->new();
124	$result->{_mean} = $self->{_mean}/$s;
125	$result->{_error} = $self->{_error}/$s;
126	$result->{_scale} = $self->{_scale};
127	$result;
128	}
129
130	# Divides a dataset by a scalar.
131	# The new Dataset has no data points.
132	sub multiplyByScalar {
133	my $self = shift;
134	my $s = shift;
135
136	my $result = Dataset->new();
137	$result->{_mean} = $self->{_mean}*$s;
138	$result->{_error} = $self->{_error}*$s;
139	$result->{_scale} = $self->{_scale};
140	$result;
141	}
142
143	1;