lexer: Add comments and update style in lex_source_get__().
[pspp.git] / src / math / moments.c
blob40180f7b7ecdb461c46eaa31b42c1927835c7b66
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "math/moments.h"
21 #include <assert.h>
22 #include <math.h>
23 #include <stdlib.h>
25 #include "data/val-type.h"
26 #include "data/value.h"
27 #include "libpspp/misc.h"
29 #include "gl/xalloc.h"
31 #include "gettext.h"
32 #define _(msgid) gettext (msgid)
34 /* Calculates variance, skewness, and kurtosis into *VARIANCE,
35 *SKEWNESS, and *KURTOSIS if they are non-null and not greater
36 moments than MAX_MOMENT. Accepts W as the total weight, D1 as
37 the total deviation from the estimated mean, and D2, D3, and
38 D4 as the sum of the squares, cubes, and 4th powers,
39 respectively, of the deviation from the estimated mean. */
40 static void
41 calc_moments (enum moment max_moment,
42 double w, double d1, double d2, double d3, double d4,
43 double *variance, double *skewness, double *kurtosis)
45 assert (w > 0.);
47 if (max_moment >= MOMENT_VARIANCE && w > 1.)
49 double s2 = (d2 - pow2 (d1) / w) / (w - 1.);
50 if (variance != NULL)
51 *variance = s2;
53 /* From _SPSS Statistical Algorithms, 2nd ed.,
54 0-918469-89-9, section "DESCRIPTIVES". */
55 if (fabs (s2) >= 1e-20)
57 if (max_moment >= MOMENT_SKEWNESS && skewness != NULL && w > 2.)
59 double s3 = s2 * sqrt (s2);
60 double g1 = (w * d3) / ((w - 1.0) * (w - 2.0) * s3);
61 if (isfinite (g1))
62 *skewness = g1;
64 if (max_moment >= MOMENT_KURTOSIS && kurtosis != NULL && w > 3.)
66 double den = (w - 2.) * (w - 3.) * pow2 (s2);
67 double g2 = (w * (w + 1) * d4 / (w - 1.) / den
68 - 3. * pow2 (d2) / den);
69 if (isfinite (g2))
70 *kurtosis = g2;
76 /* Two-pass moments. */
78 /* A set of two-pass moments. */
79 struct moments
81 enum moment max_moment; /* Highest-order moment we're computing. */
82 int pass; /* Current pass (1 or 2). */
84 /* Pass one. */
85 double w1; /* Total weight for pass 1, so far. */
86 double sum; /* Sum of values so far. */
87 double mean; /* Mean = sum / w1. */
89 /* Pass two. */
90 double w2; /* Total weight for pass 2, so far. */
91 double d1; /* Sum of deviations from the mean. */
92 double d2; /* Sum of squared deviations from the mean. */
93 double d3; /* Sum of cubed deviations from the mean. */
94 double d4; /* Sum of (deviations from the mean)**4. */
97 /* Initializes moments M for calculating moment MAX_MOMENT and
98 lower moments. */
99 static void
100 init_moments (struct moments *m, enum moment max_moment)
102 assert (m != NULL);
103 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
104 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
105 m->max_moment = max_moment;
106 moments_clear (m);
109 /* Clears out a set of moments so that it can be reused for a new
110 set of values. The moments to be calculated are not changed. */
111 void
112 moments_clear (struct moments *m)
114 m->pass = 1;
115 m->w1 = m->w2 = 0.;
116 m->sum = 0.;
119 /* Creates and returns a data structure for calculating moment
120 MAX_MOMENT and lower moments on a data series. The user
121 should call moments_pass_one() for each value in the series,
122 then call moments_pass_two() for the same set of values in the
123 same order, then call moments_calculate() to obtain the
124 moments. The user may ask for the mean at any time during the
125 first pass (using moments_calculate()), but otherwise no
126 statistics may be requested until the end of the second pass.
127 Call moments_destroy() when the moments are no longer
128 needed. */
129 struct moments *
130 moments_create (enum moment max_moment)
132 struct moments *m = xmalloc (sizeof *m);
133 init_moments (m, max_moment);
134 return m;
137 /* Adds VALUE with the given WEIGHT to the calculation of
138 moments for the first pass. */
139 void
140 moments_pass_one (struct moments *m, double value, double weight)
142 assert (m != NULL);
143 assert (m->pass == 1);
145 if (value != SYSMIS && weight > 0.)
147 m->sum += value * weight;
148 m->w1 += weight;
152 /* Adds VALUE with the given WEIGHT to the calculation of
153 moments for the second pass. */
154 void
155 moments_pass_two (struct moments *m, double value, double weight)
157 assert (m != NULL);
159 if (m->pass == 1)
161 m->pass = 2;
162 m->mean = m->w1 != 0. ? m->sum / m->w1 : 0.;
163 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
166 if (value != SYSMIS && weight >= 0.)
168 double d = value - m->mean;
169 double d1_delta = d * weight;
170 m->d1 += d1_delta;
171 if (m->max_moment >= MOMENT_VARIANCE)
173 double d2_delta = d1_delta * d;
174 m->d2 += d2_delta;
175 if (m->max_moment >= MOMENT_SKEWNESS)
177 double d3_delta = d2_delta * d;
178 m->d3 += d3_delta;
179 if (m->max_moment >= MOMENT_KURTOSIS)
181 double d4_delta = d3_delta * d;
182 m->d4 += d4_delta;
186 m->w2 += weight;
190 /* Calculates moments based on the input data. Stores the total
191 weight in *WEIGHT, the mean in *MEAN, the variance in
192 *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
193 *KURTOSIS. Any of these result parameters may be null
194 pointers, in which case the values are not calculated. If any
195 result cannot be calculated, either because they are undefined
196 based on the input data or because their moments are higher
197 than the maximum requested on moments_create(), then SYSMIS is
198 stored into that result. */
199 void
200 moments_calculate (const struct moments *m,
201 double *weight,
202 double *mean, double *variance,
203 double *skewness, double *kurtosis)
205 assert (m != NULL);
207 if (mean != NULL)
208 *mean = SYSMIS;
209 if (variance != NULL)
210 *variance = SYSMIS;
211 if (skewness != NULL)
212 *skewness = SYSMIS;
213 if (kurtosis != NULL)
214 *kurtosis = SYSMIS;
216 if (weight != NULL)
217 *weight = m->w1;
219 /* How many passes so far? */
220 if (m->pass == 1)
222 /* In the first pass we can only calculate the mean. */
223 if (mean != NULL && m->w1 > 0.)
224 *mean = m->sum / m->w1;
226 else
228 /* After the second pass we can calculate any stat. */
229 assert (m->pass == 2);
231 if (m->w2 > 0.)
233 if (mean != NULL)
234 *mean = m->mean;
235 calc_moments (m->max_moment,
236 m->w2, m->d1, m->d2, m->d3, m->d4,
237 variance, skewness, kurtosis);
242 /* Destroys a set of moments. */
243 void
244 moments_destroy (struct moments *m)
246 free (m);
249 /* Calculates the requested moments on the CNT values in ARRAY.
250 Each value is given a weight of 1. The total weight is stored
251 into *WEIGHT (trivially) and the mean, variance, skewness, and
252 kurtosis are stored into *MEAN, *VARIANCE, *SKEWNESS, and
253 *KURTOSIS, respectively. Any of the result pointers may be
254 null, in which case no value is stored. */
255 void
256 moments_of_doubles (const double *array, size_t cnt,
257 double *weight,
258 double *mean, double *variance,
259 double *skewness, double *kurtosis)
261 enum moment max_moment;
262 struct moments m;
263 size_t idx;
265 if (kurtosis != NULL)
266 max_moment = MOMENT_KURTOSIS;
267 else if (skewness != NULL)
268 max_moment = MOMENT_SKEWNESS;
269 else if (variance != NULL)
270 max_moment = MOMENT_VARIANCE;
271 else
272 max_moment = MOMENT_MEAN;
274 init_moments (&m, max_moment);
275 for (idx = 0; idx < cnt; idx++)
276 moments_pass_one (&m, array[idx], 1.);
277 for (idx = 0; idx < cnt; idx++)
278 moments_pass_two (&m, array[idx], 1.);
279 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
282 /* Calculates the requested moments on the CNT numeric values in
283 ARRAY. Each value is given a weight of 1. The total weight
284 is stored into *WEIGHT (trivially) and the mean, variance,
285 skewness, and kurtosis are stored into *MEAN, *VARIANCE,
286 *SKEWNESS, and *KURTOSIS, respectively. Any of the result
287 pointers may be null, in which case no value is stored. */
288 void
289 moments_of_values (const union value *array, size_t cnt,
290 double *weight,
291 double *mean, double *variance,
292 double *skewness, double *kurtosis)
294 enum moment max_moment;
295 struct moments m;
296 size_t idx;
298 if (kurtosis != NULL)
299 max_moment = MOMENT_KURTOSIS;
300 else if (skewness != NULL)
301 max_moment = MOMENT_SKEWNESS;
302 else if (variance != NULL)
303 max_moment = MOMENT_VARIANCE;
304 else
305 max_moment = MOMENT_MEAN;
307 init_moments (&m, max_moment);
308 for (idx = 0; idx < cnt; idx++)
309 moments_pass_one (&m, array[idx].f, 1.);
310 for (idx = 0; idx < cnt; idx++)
311 moments_pass_two (&m, array[idx].f, 1.);
312 moments_calculate (&m, weight, mean, variance, skewness, kurtosis);
315 /* One-pass moments. */
317 /* A set of one-pass moments. */
318 struct moments1
320 enum moment max_moment; /* Highest-order moment we're computing. */
321 double w; /* Total weight so far. */
322 double d1; /* Sum of deviations from the mean. */
323 double d2; /* Sum of squared deviations from the mean. */
324 double d3; /* Sum of cubed deviations from the mean. */
325 double d4; /* Sum of (deviations from the mean)**4. */
328 /* Initializes one-pass moments M for calculating moment
329 MAX_MOMENT and lower moments. */
330 static void
331 init_moments1 (struct moments1 *m, enum moment max_moment)
333 assert (m != NULL);
334 assert (max_moment == MOMENT_MEAN || max_moment == MOMENT_VARIANCE
335 || max_moment == MOMENT_SKEWNESS || max_moment == MOMENT_KURTOSIS);
336 m->max_moment = max_moment;
337 moments1_clear (m);
340 /* Clears out a set of one-pass moments so that it can be reused
341 for a new set of values. The moments to be calculated are not
342 changed. */
343 void
344 moments1_clear (struct moments1 *m)
346 m->w = 0.;
347 m->d1 = m->d2 = m->d3 = m->d4 = 0.;
350 /* Creates and returns a data structure for calculating moment
351 MAX_MOMENT and lower moments on a data series in a single
352 pass. The user should call moments1_add() for each value in
353 the series. The user may call moments1_calculate() to obtain
354 the current moments at any time. Call moments1_destroy() when
355 the moments are no longer needed.
357 One-pass moments should only be used when two passes over the
358 data are impractical. */
359 struct moments1 *
360 moments1_create (enum moment max_moment)
362 struct moments1 *m = xmalloc (sizeof *m);
363 init_moments1 (m, max_moment);
364 return m;
367 /* Adds VALUE with the given WEIGHT to the calculation of
368 one-pass moments. */
369 void
370 moments1_add (struct moments1 *m, double value, double weight)
372 assert (m != NULL);
374 if (value != SYSMIS && weight > 0.)
376 double prev_w, v1;
378 prev_w = m->w;
379 m->w += weight;
380 v1 = (weight / m->w) * (value - m->d1);
381 m->d1 += v1;
383 if (m->max_moment >= MOMENT_VARIANCE)
385 double v2 = v1 * v1;
386 double w_prev_w = m->w * prev_w;
387 double prev_m2 = m->d2;
389 m->d2 += w_prev_w / weight * v2;
390 if (m->max_moment >= MOMENT_SKEWNESS)
392 double w2 = weight * weight;
393 double v3 = v2 * v1;
394 double prev_m3 = m->d3;
396 m->d3 += (-3. * v1 * prev_m2
397 + w_prev_w / w2 * (m->w - 2. * weight) * v3);
398 if (m->max_moment >= MOMENT_KURTOSIS)
400 double w3 = w2 * weight;
401 double v4 = v2 * v2;
403 m->d4 += (-4. * v1 * prev_m3
404 + 6. * v2 * prev_m2
405 + ((pow2 (m->w) - 3. * weight * prev_w)
406 * v4 * w_prev_w / w3));
413 /* Calculates one-pass moments based on the input data. Stores
414 the total weight in *WEIGHT, the mean in *MEAN, the variance
415 in *VARIANCE, the skewness in *SKEWNESS, and the kurtosis in
416 *KURTOSIS. Any of these result parameters may be null
417 pointers, in which case the values are not calculated. If any
418 result cannot be calculated, either because they are undefined
419 based on the input data or because their moments are higher
420 than the maximum requested on moments_create(), then SYSMIS is
421 stored into that result. */
422 void
423 moments1_calculate (const struct moments1 *m,
424 double *weight,
425 double *mean, double *variance,
426 double *skewness, double *kurtosis)
428 assert (m != NULL);
430 if (mean != NULL)
431 *mean = SYSMIS;
432 if (variance != NULL)
433 *variance = SYSMIS;
434 if (skewness != NULL)
435 *skewness = SYSMIS;
436 if (kurtosis != NULL)
437 *kurtosis = SYSMIS;
439 if (weight != NULL)
440 *weight = m->w;
442 if (m->w > 0.)
444 if (mean != NULL)
445 *mean = m->d1;
447 calc_moments (m->max_moment,
448 m->w, 0., m->d2, m->d3, m->d4,
449 variance, skewness, kurtosis);
453 /* Destroy one-pass moments M. */
454 void
455 moments1_destroy (struct moments1 *m)
457 free (m);
461 double
462 calc_semean (double var, double W)
464 return sqrt (var / W);
467 /* Returns the standard error of the skewness for the given total
468 weight W.
470 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
471 section "DESCRIPTIVES". */
472 double
473 calc_seskew (double W)
475 return sqrt ((6. * W * (W - 1.)) / ((W - 2.) * (W + 1.) * (W + 3.)));
478 /* Returns the standard error of the kurtosis for the given total
479 weight W.
481 From _SPSS Statistical Algorithms, 2nd ed., 0-918469-89-9,
482 section "DESCRIPTIVES", except that the sqrt symbol is omitted
483 there. */
484 double
485 calc_sekurt (double W)
487 return sqrt ((4. * (pow2 (W) - 1.) * pow2 (calc_seskew (W)))
488 / ((W - 3.) * (W + 5.)));