3 * Compute running mean, variance, and extrema of a stream of numbers.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
24 // Needed due to PHP non-bug <https://bugs.php.net/bug.php?id=49828>.
25 define( 'NEGATIVE_INF', -INF
);
28 * Represents a running summary of a stream of numbers.
30 * RunningStat instances are accumulator-like objects that provide a set of
31 * continuously-updated summary statistics for a stream of numbers, without
32 * requiring that each value be stored. The measures it provides are the
33 * arithmetic mean, variance, standard deviation, and extrema (min and max);
34 * together they describe the central tendency and statistical dispersion of a
37 * One RunningStat instance can be merged into another; the resultant
38 * RunningStat has the state it would have had if it had accumulated each
39 * individual point. This allows data to be summarized in parallel and in
40 * stages without loss of fidelity.
42 * Based on a C++ implementation by John D. Cook:
43 * <http://www.johndcook.com/standard_deviation.html>
44 * <http://www.johndcook.com/skewness_kurtosis.html>
46 * The in-line documentation for this class incorporates content from the
47 * English Wikipedia articles "Variance", "Algorithms for calculating
48 * variance", and "Standard deviation".
52 class RunningStat
implements Countable
{
54 /** @var int Number of samples. **/
57 /** @var float The first moment (or mean, or expected value). **/
60 /** @var float The second central moment (or variance). **/
63 /** @var float The least value in the set. **/
66 /** @var float The greatest value in the set. **/
67 public $max = NEGATIVE_INF
;
70 * Count the number of accumulated values.
71 * @return int Number of values
73 public function count() {
78 * Add a number to the data set.
79 * @param int|float $x Value to add
81 public function push( $x ) {
84 $this->min
= min( $this->min
, $x );
85 $this->max
= max( $this->max
, $x );
89 $delta = $x - $this->m1
;
90 $delta_n = $delta / $this->n
;
91 $this->m1 +
= $delta_n;
92 $this->m2 +
= $delta * $delta_n * $n1;
96 * Get the mean, or expected value.
98 * The arithmetic mean is the sum of all measurements divided by the number
99 * of observations in the data set.
103 public function getMean() {
108 * Get the estimated variance.
110 * Variance measures how far a set of numbers is spread out. A small
111 * variance indicates that the data points tend to be very close to the
112 * mean (and hence to each other), while a high variance indicates that the
113 * data points are very spread out from the mean and from each other.
115 * @return float Estimated variance
117 public function getVariance() {
118 if ( $this->n
=== 0 ) {
119 // The variance of the empty set is undefined.
121 } elseif ( $this->n
=== 1 ) {
124 return $this->m2
/ ( $this->n
- 1.0 );
129 * Get the estimated standard deviation.
131 * The standard deviation of a statistical population is the square root of
132 * its variance. It shows how much variation from the mean exists. In
133 * addition to expressing the variability of a population, the standard
134 * deviation is commonly used to measure confidence in statistical conclusions.
136 * @return float Estimated standard deviation
138 public function getStdDev() {
139 return sqrt( $this->getVariance() );
143 * Merge another RunningStat instance into this instance.
145 * This instance then has the state it would have had if all the data had
146 * been accumulated by it alone.
148 * @param RunningStat RunningStat instance to merge into this one
150 public function merge( RunningStat
$other ) {
151 // If the other RunningStat is empty, there's nothing to do.
152 if ( $other->n
=== 0 ) {
156 // If this RunningStat is empty, copy values from other RunningStat.
157 if ( $this->n
=== 0 ) {
158 $this->n
= $other->n
;
159 $this->m1
= $other->m1
;
160 $this->m2
= $other->m2
;
161 $this->min
= $other->min
;
162 $this->max
= $other->max
;
166 $n = $this->n +
$other->n
;
167 $delta = $other->m1
- $this->m1
;
168 $delta2 = $delta * $delta;
170 $this->m1
= ( ( $this->n
* $this->m1
) +
( $other->n
* $other->m1
) ) / $n;
171 $this->m2
= $this->m2 +
$other->m2 +
( $delta2 * $this->n
* $other->n
/ $n );
172 $this->min
= min( $this->min
, $other->min
);
173 $this->max
= max( $this->max
, $other->max
);