]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | #!/usr/local/bin/perl |
2 | # ******************************************************************** | |
3 | # * COPYRIGHT: | |
4 | # * Copyright (c) 2002, International Business Machines Corporation and | |
5 | # * others. All Rights Reserved. | |
6 | # ******************************************************************** | |
7 | ||
8 | package Dataset; | |
9 | use Statistics::Descriptive; | |
10 | use Statistics::Distributions; | |
11 | use strict; | |
12 | ||
13 | # Create a new Dataset with the given data. | |
14 | sub new { | |
15 | my ($class) = shift; | |
16 | my $self = bless { | |
17 | _data => \@_, | |
18 | _scale => 1.0, | |
19 | _mean => 0.0, | |
20 | _error => 0.0, | |
21 | }, $class; | |
22 | ||
23 | my $n = @_; | |
24 | ||
25 | if ($n >= 1) { | |
26 | my $stats = Statistics::Descriptive::Full->new(); | |
27 | $stats->add_data(@{$self->{_data}}); | |
28 | $self->{_mean} = $stats->mean(); | |
29 | ||
30 | if ($n >= 2) { | |
31 | # Use a t distribution rather than Gaussian because (a) we | |
32 | # assume an underlying normal dist, (b) we do not know the | |
33 | # standard deviation -- we estimate it from the data, and (c) | |
34 | # we MAY have a small sample size (also works for large n). | |
35 | my $t = Statistics::Distributions::tdistr($n-1, 0.005); | |
36 | $self->{_error} = $t * $stats->standard_deviation(); | |
37 | } | |
38 | } | |
39 | ||
40 | $self; | |
41 | } | |
42 | ||
43 | # Set a scaling factor for all data; 1.0 means no scaling. | |
44 | # Scale must be > 0. | |
45 | sub setScale { | |
46 | my ($self, $scale) = @_; | |
47 | $self->{_scale} = $scale; | |
48 | } | |
49 | ||
50 | # Multiply the scaling factor by a value. | |
51 | sub scaleBy { | |
52 | my ($self, $a) = @_; | |
53 | $self->{_scale} *= $a; | |
54 | } | |
55 | ||
56 | # Return the mean. | |
57 | sub getMean { | |
58 | my $self = shift; | |
59 | return $self->{_mean} * $self->{_scale}; | |
60 | } | |
61 | ||
62 | # Return a 99% error based on the t distribution. The dataset | |
63 | # is desribed as getMean() +/- getError(). | |
64 | sub getError { | |
65 | my $self = shift; | |
66 | return $self->{_error} * $self->{_scale}; | |
67 | } | |
68 | ||
69 | # Divide two Datasets and return a new one, maintaining the | |
70 | # mean+/-error. The new Dataset has no data points. | |
71 | sub divide { | |
72 | my $self = shift; | |
73 | my $rhs = shift; | |
74 | ||
75 | my $minratio = ($self->{_mean} - $self->{_error}) / | |
76 | ($rhs->{_mean} + $rhs->{_error}); | |
77 | my $maxratio = ($self->{_mean} + $self->{_error}) / | |
78 | ($rhs->{_mean} - $rhs->{_error}); | |
79 | ||
80 | my $result = Dataset->new(); | |
81 | $result->{_mean} = ($minratio + $maxratio) / 2; | |
82 | $result->{_error} = $result->{_mean} - $minratio; | |
83 | $result->{_scale} = $self->{_scale} / $rhs->{_scale}; | |
84 | $result; | |
85 | } | |
86 | ||
87 | # subtracts two Datasets and return a new one, maintaining the | |
88 | # mean+/-error. The new Dataset has no data points. | |
89 | sub subtract { | |
90 | my $self = shift; | |
91 | my $rhs = shift; | |
92 | ||
93 | my $result = Dataset->new(); | |
94 | $result->{_mean} = $self->{_mean} - $rhs->{_mean}; | |
95 | $result->{_error} = $self->{_error} + $rhs->{_error}; | |
96 | $result->{_scale} = $self->{_scale}; | |
97 | $result; | |
98 | } | |
99 | ||
100 | # adds two Datasets and return a new one, maintaining the | |
101 | # mean+/-error. The new Dataset has no data points. | |
102 | sub add { | |
103 | my $self = shift; | |
104 | my $rhs = shift; | |
105 | ||
106 | my $result = Dataset->new(); | |
107 | $result->{_mean} = $self->{_mean} + $rhs->{_mean}; | |
108 | $result->{_error} = $self->{_error} + $rhs->{_error}; | |
109 | $result->{_scale} = $self->{_scale}; | |
110 | $result; | |
111 | } | |
112 | ||
113 | # Divides a dataset by a scalar. | |
114 | # The new Dataset has no data points. | |
115 | sub divideByScalar { | |
116 | my $self = shift; | |
117 | my $s = shift; | |
118 | ||
119 | my $result = Dataset->new(); | |
120 | $result->{_mean} = $self->{_mean}/$s; | |
121 | $result->{_error} = $self->{_error}/$s; | |
122 | $result->{_scale} = $self->{_scale}; | |
123 | $result; | |
124 | } | |
125 | ||
126 | # Divides a dataset by a scalar. | |
127 | # The new Dataset has no data points. | |
128 | sub multiplyByScalar { | |
129 | my $self = shift; | |
130 | my $s = shift; | |
131 | ||
132 | my $result = Dataset->new(); | |
133 | $result->{_mean} = $self->{_mean}*$s; | |
134 | $result->{_error} = $self->{_error}*$s; | |
135 | $result->{_scale} = $self->{_scale}; | |
136 | $result; | |
137 | } | |
138 | ||
139 | 1; |