]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | #!/usr/local/bin/perl |
2 | # ******************************************************************** | |
f3c0d7a5 A |
3 | # * Copyright (C) 2016 and later: Unicode, Inc. and others. |
4 | # * License & terms of use: http://www.unicode.org/copyright.html#License | |
5 | # ******************************************************************** | |
6 | # ******************************************************************** | |
b75a7d8f A |
7 | # * COPYRIGHT: |
8 | # * Copyright (c) 2002, International Business Machines Corporation and | |
9 | # * others. All Rights Reserved. | |
10 | # ******************************************************************** | |
11 | ||
12 | package Dataset; | |
13 | use Statistics::Descriptive; | |
14 | use Statistics::Distributions; | |
15 | use strict; | |
16 | ||
17 | # Create a new Dataset with the given data. | |
18 | sub new { | |
19 | my ($class) = shift; | |
20 | my $self = bless { | |
21 | _data => \@_, | |
22 | _scale => 1.0, | |
23 | _mean => 0.0, | |
24 | _error => 0.0, | |
25 | }, $class; | |
26 | ||
27 | my $n = @_; | |
28 | ||
29 | if ($n >= 1) { | |
30 | my $stats = Statistics::Descriptive::Full->new(); | |
31 | $stats->add_data(@{$self->{_data}}); | |
32 | $self->{_mean} = $stats->mean(); | |
33 | ||
34 | if ($n >= 2) { | |
35 | # Use a t distribution rather than Gaussian because (a) we | |
36 | # assume an underlying normal dist, (b) we do not know the | |
37 | # standard deviation -- we estimate it from the data, and (c) | |
38 | # we MAY have a small sample size (also works for large n). | |
39 | my $t = Statistics::Distributions::tdistr($n-1, 0.005); | |
40 | $self->{_error} = $t * $stats->standard_deviation(); | |
41 | } | |
42 | } | |
43 | ||
44 | $self; | |
45 | } | |
46 | ||
47 | # Set a scaling factor for all data; 1.0 means no scaling. | |
48 | # Scale must be > 0. | |
49 | sub setScale { | |
50 | my ($self, $scale) = @_; | |
51 | $self->{_scale} = $scale; | |
52 | } | |
53 | ||
54 | # Multiply the scaling factor by a value. | |
55 | sub scaleBy { | |
56 | my ($self, $a) = @_; | |
57 | $self->{_scale} *= $a; | |
58 | } | |
59 | ||
60 | # Return the mean. | |
61 | sub getMean { | |
62 | my $self = shift; | |
63 | return $self->{_mean} * $self->{_scale}; | |
64 | } | |
65 | ||
66 | # Return a 99% error based on the t distribution. The dataset | |
67 | # is desribed as getMean() +/- getError(). | |
68 | sub getError { | |
69 | my $self = shift; | |
70 | return $self->{_error} * $self->{_scale}; | |
71 | } | |
72 | ||
73 | # Divide two Datasets and return a new one, maintaining the | |
74 | # mean+/-error. The new Dataset has no data points. | |
75 | sub divide { | |
76 | my $self = shift; | |
77 | my $rhs = shift; | |
78 | ||
79 | my $minratio = ($self->{_mean} - $self->{_error}) / | |
80 | ($rhs->{_mean} + $rhs->{_error}); | |
81 | my $maxratio = ($self->{_mean} + $self->{_error}) / | |
82 | ($rhs->{_mean} - $rhs->{_error}); | |
83 | ||
84 | my $result = Dataset->new(); | |
85 | $result->{_mean} = ($minratio + $maxratio) / 2; | |
86 | $result->{_error} = $result->{_mean} - $minratio; | |
87 | $result->{_scale} = $self->{_scale} / $rhs->{_scale}; | |
88 | $result; | |
89 | } | |
90 | ||
91 | # subtracts two Datasets and return a new one, maintaining the | |
92 | # mean+/-error. The new Dataset has no data points. | |
93 | sub subtract { | |
94 | my $self = shift; | |
95 | my $rhs = shift; | |
96 | ||
97 | my $result = Dataset->new(); | |
98 | $result->{_mean} = $self->{_mean} - $rhs->{_mean}; | |
99 | $result->{_error} = $self->{_error} + $rhs->{_error}; | |
100 | $result->{_scale} = $self->{_scale}; | |
101 | $result; | |
102 | } | |
103 | ||
104 | # adds two Datasets and return a new one, maintaining the | |
105 | # mean+/-error. The new Dataset has no data points. | |
106 | sub add { | |
107 | my $self = shift; | |
108 | my $rhs = shift; | |
109 | ||
110 | my $result = Dataset->new(); | |
111 | $result->{_mean} = $self->{_mean} + $rhs->{_mean}; | |
112 | $result->{_error} = $self->{_error} + $rhs->{_error}; | |
113 | $result->{_scale} = $self->{_scale}; | |
114 | $result; | |
115 | } | |
116 | ||
117 | # Divides a dataset by a scalar. | |
118 | # The new Dataset has no data points. | |
119 | sub divideByScalar { | |
120 | my $self = shift; | |
121 | my $s = shift; | |
122 | ||
123 | my $result = Dataset->new(); | |
124 | $result->{_mean} = $self->{_mean}/$s; | |
125 | $result->{_error} = $self->{_error}/$s; | |
126 | $result->{_scale} = $self->{_scale}; | |
127 | $result; | |
128 | } | |
129 | ||
130 | # Divides a dataset by a scalar. | |
131 | # The new Dataset has no data points. | |
132 | sub multiplyByScalar { | |
133 | my $self = shift; | |
134 | my $s = shift; | |
135 | ||
136 | my $result = Dataset->new(); | |
137 | $result->{_mean} = $self->{_mean}*$s; | |
138 | $result->{_error} = $self->{_error}*$s; | |
139 | $result->{_scale} = $self->{_scale}; | |
140 | $result; | |
141 | } | |
142 | ||
143 | 1; |