[apple/icu.git] / icuSources / test / perf / perldriver / Dataset.pm

#!/usr/local/bin/perl
#  ********************************************************************
#  * COPYRIGHT:
#  * Copyright (c) 2002, International Business Machines Corporation and
#  * others. All Rights Reserved.
#  ********************************************************************

package Dataset;
use Statistics::Descriptive;
use Statistics::Distributions;
use strict;

# Create a new Dataset with the given data.
sub new {
    my ($class) = shift;
    my $self = bless {
        _data => \@_,
        _scale => 1.0,
        _mean => 0.0,
        _error => 0.0,
    }, $class;

    my $n = @_;
    
    if ($n >= 1) {
        my $stats = Statistics::Descriptive::Full->new();
        $stats->add_data(@{$self->{_data}});
        $self->{_mean} = $stats->mean();

        if ($n >= 2) {
            # Use a t distribution rather than Gaussian because (a) we
            # assume an underlying normal dist, (b) we do not know the
            # standard deviation -- we estimate it from the data, and (c)
            # we MAY have a small sample size (also works for large n).
            my $t = Statistics::Distributions::tdistr($n-1, 0.005);
            $self->{_error} = $t * $stats->standard_deviation();
        }
    }

    $self;
}

# Set a scaling factor for all data; 1.0 means no scaling.
# Scale must be > 0.
sub setScale {
    my ($self, $scale) = @_;
    $self->{_scale} = $scale;
}

# Multiply the scaling factor by a value.
sub scaleBy {
    my ($self, $a) = @_;
    $self->{_scale} *= $a;
}

# Return the mean.
sub getMean {
    my $self = shift;
    return $self->{_mean} * $self->{_scale};
}

# Return a 99% error based on the t distribution.  The dataset
# is desribed as getMean() +/- getError().
sub getError {
    my $self = shift;
    return $self->{_error} * $self->{_scale};
}

# Divide two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub divide {
    my $self = shift;
    my $rhs = shift;
    
    my $minratio = ($self->{_mean} - $self->{_error}) /
                   ($rhs->{_mean} + $rhs->{_error});
    my $maxratio = ($self->{_mean} + $self->{_error}) /
                   ($rhs->{_mean} - $rhs->{_error});

    my $result = Dataset->new();
    $result->{_mean} = ($minratio + $maxratio) / 2;
    $result->{_error} = $result->{_mean} - $minratio;
    $result->{_scale} = $self->{_scale} / $rhs->{_scale};
    $result;
}

# subtracts two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub subtract {
    my $self = shift;
    my $rhs = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean} - $rhs->{_mean};
    $result->{_error} = $self->{_error} + $rhs->{_error};
    $result->{_scale} = $self->{_scale};
    $result;
}

# adds two Datasets and return a new one, maintaining the
# mean+/-error.  The new Dataset has no data points.
sub add {
    my $self = shift;
    my $rhs = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean} + $rhs->{_mean};
    $result->{_error} = $self->{_error} + $rhs->{_error};
    $result->{_scale} = $self->{_scale};
    $result;
}

# Divides a dataset by a scalar.
# The new Dataset has no data points.
sub divideByScalar {
    my $self = shift;
    my $s = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean}/$s;
    $result->{_error} = $self->{_error}/$s;
    $result->{_scale} = $self->{_scale};
    $result;
}

# Divides a dataset by a scalar.
# The new Dataset has no data points.
sub multiplyByScalar {
    my $self = shift;
    my $s = shift;
    
    my $result = Dataset->new();
    $result->{_mean} = $self->{_mean}*$s;
    $result->{_error} = $self->{_error}*$s;
    $result->{_scale} = $self->{_scale};
    $result;
}

1;
Commit	Line	Data
b75a7d8f A	1	#!/usr/local/bin/perl
	2	# ********************************************************************
	3	# * COPYRIGHT:
	4	# * Copyright (c) 2002, International Business Machines Corporation and
	5	# * others. All Rights Reserved.
	6	# ********************************************************************
	7
	8	package Dataset;
	9	use Statistics::Descriptive;
	10	use Statistics::Distributions;
	11	use strict;
	12
	13	# Create a new Dataset with the given data.
	14	sub new {
	15	my ($class) = shift;
	16	my $self = bless {
	17	_data => \@_,
	18	_scale => 1.0,
	19	_mean => 0.0,
	20	_error => 0.0,
	21	}, $class;
	22
	23	my $n = @_;
	24
	25	if ($n >= 1) {
	26	my $stats = Statistics::Descriptive::Full->new();
	27	$stats->add_data(@{$self->{_data}});
	28	$self->{_mean} = $stats->mean();
	29
	30	if ($n >= 2) {
	31	# Use a t distribution rather than Gaussian because (a) we
	32	# assume an underlying normal dist, (b) we do not know the
	33	# standard deviation -- we estimate it from the data, and (c)
	34	# we MAY have a small sample size (also works for large n).
	35	my $t = Statistics::Distributions::tdistr($n-1, 0.005);
	36	$self->{_error} = $t * $stats->standard_deviation();
	37	}
	38	}
	39
	40	$self;
	41	}
	42
	43	# Set a scaling factor for all data; 1.0 means no scaling.
	44	# Scale must be > 0.
	45	sub setScale {
	46	my ($self, $scale) = @_;
	47	$self->{_scale} = $scale;
	48	}
	49
	50	# Multiply the scaling factor by a value.
	51	sub scaleBy {
	52	my ($self, $a) = @_;
	53	$self->{_scale} *= $a;
	54	}
	55
	56	# Return the mean.
	57	sub getMean {
	58	my $self = shift;
	59	return $self->{_mean} * $self->{_scale};
	60	}
	61
	62	# Return a 99% error based on the t distribution. The dataset
	63	# is desribed as getMean() +/- getError().
	64	sub getError {
65	my $self = shift;
66	return $self->{_error} * $self->{_scale};
67	}
68
69	# Divide two Datasets and return a new one, maintaining the
70	# mean+/-error. The new Dataset has no data points.
71	sub divide {
72	my $self = shift;
73	my $rhs = shift;
74
75	my $minratio = ($self->{_mean} - $self->{_error}) /
76	($rhs->{_mean} + $rhs->{_error});
77	my $maxratio = ($self->{_mean} + $self->{_error}) /
78	($rhs->{_mean} - $rhs->{_error});
79
80	my $result = Dataset->new();
81	$result->{_mean} = ($minratio + $maxratio) / 2;
82	$result->{_error} = $result->{_mean} - $minratio;
83	$result->{_scale} = $self->{_scale} / $rhs->{_scale};
84	$result;
85	}
86
87	# subtracts two Datasets and return a new one, maintaining the
88	# mean+/-error. The new Dataset has no data points.
89	sub subtract {
90	my $self = shift;
91	my $rhs = shift;
92
93	my $result = Dataset->new();
94	$result->{_mean} = $self->{_mean} - $rhs->{_mean};
95	$result->{_error} = $self->{_error} + $rhs->{_error};
96	$result->{_scale} = $self->{_scale};
97	$result;
98	}
99
100	# adds two Datasets and return a new one, maintaining the
101	# mean+/-error. The new Dataset has no data points.
102	sub add {
103	my $self = shift;
104	my $rhs = shift;
105
106	my $result = Dataset->new();
107	$result->{_mean} = $self->{_mean} + $rhs->{_mean};
108	$result->{_error} = $self->{_error} + $rhs->{_error};
109	$result->{_scale} = $self->{_scale};
110	$result;
111	}
112
113	# Divides a dataset by a scalar.
114	# The new Dataset has no data points.
115	sub divideByScalar {
116	my $self = shift;
117	my $s = shift;
118
119	my $result = Dataset->new();
120	$result->{_mean} = $self->{_mean}/$s;
121	$result->{_error} = $self->{_error}/$s;
122	$result->{_scale} = $self->{_scale};
123	$result;
124	}
125
126	# Divides a dataset by a scalar.
127	# The new Dataset has no data points.
128	sub multiplyByScalar {
129	my $self = shift;
130	my $s = shift;
131
132	my $result = Dataset->new();
133	$result->{_mean} = $self->{_mean}*$s;
134	$result->{_error} = $self->{_error}*$s;
135	$result->{_scale} = $self->{_scale};
136	$result;
137	}
138
139	1;