Tries to use 65 or less columns in all input and output lines of the examples.
[maxima.git] / share / stats / numstats.lisp
blob71781a207667670b2e114c0fa9fed8c3725509b8
1 ;; COPYRIGHT NOTICE
2 ;;
3 ;; Copyright (C) 2006 Mario Rodriguez Riotorto
4 ;;
5 ;; This program is free software; you can redistribute
6 ;; it and/or modify it under the terms of the
7 ;; GNU General Public License as published by
8 ;; the Free Software Foundation; either version 2
9 ;; of the License, or (at your option) any later version.
10 ;;
11 ;; This program is distributed in the hope that it
12 ;; will be useful, but WITHOUT ANY WARRANTY;
13 ;; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details at
16 ;; http://www.gnu.org/copyleft/gpl.html
18 ;; This is a set of numerical routines used by package 'stats'
20 ;; For questions, suggestions, bugs and the like, feel free
21 ;; to contact me at
22 ;; mario @@@ edu DOT xunta DOT es
23 ;; www.biomates.net
26 (in-package :maxima)
28 ;; para traducir funciones de usuario a lisp:
29 ;; (meval '($translate $random_normal))
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
34 ;; ;;
35 ;; Wilcoxon-Mann-Whitney recursion ;;
36 ;; ;;
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39 ;; This recursion is used when computing exact probabilities
40 ;; for the distribution of the rank statistic in the Wilcoxon-
41 ;; Mann-Whitney test, specially for small samples. It's defined as
42 ;; a(i,j,k) = a(i,j-1,j-k) + a(i-k,j-1,j-k-1)
43 ;; Reference:
44 ;; Klotz, Jerome (2006) 'A Computational Approach to Statistics',
45 ;; available at www.stat.wisc.edu/~klotz/Book.pdf
46 (defun rank_sum_recursion (i j k)
47 (cond ((or (< i 0) (< j 0) (< k 0)) 0)
48 ((= i 0) 1)
49 (t (+ (rank_sum_recursion i (- j 1) (- j k))
50 (rank_sum_recursion (- i k) (- j 1) (+ j (- k) (- 1)) )))))
54 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
55 ;; ;;
56 ;; Signed rank recursion ;;
57 ;; ;;
58 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60 ;; This recursion is used when computing exact probabilities
61 ;; for the true distribution of statistic in the signed rank test,
62 ;; specially for small samples. It's defined as
63 ;; a(i,j) = a(i,j-1) + a(i-j,j-1)
64 ;; Reference:
65 ;; Klotz, Jerome (2006) 'A Computational Approach to Statistics',
66 ;; available at www.stat.wisc.edu/~klotz/Book.pdf
67 ;; R statistical package. File signrank.c
68 (defun signed_rank_recursion (i j)
69 (let* ((u (/ (* j (+ j 1)) 2))
70 (c (floor (/ u 4))))
71 (if (> i c) (setf i (- u i)))
72 (cond ((or (< i 0) (< j 0)) 0)
73 ((= i 0) 1)
74 (t (+ (signed_rank_recursion i (- j 1))
75 (signed_rank_recursion (- i j) (- j 1)))))))
80 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
81 ;; ;;
82 ;; Shapiro-Wilk test for normality ;;
83 ;; ;;
84 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86 ;; Calculates the algebraic polynomial of order (- (lengh cc) 1)
87 ;; with array of coefficients cc. Zero order coefficient is (aref cc 0).
88 (defun swpoly (cc x)
89 (let ( (ret-val (aref cc 0))
90 (nord (length cc))
92 (when (> nord 1)
93 (setf p (* x (aref cc (1- nord))))
94 (do ((j (- nord 2) (1- j)))
95 ((= j 0) 'done)
96 (setf p (* x (+ p (aref cc j)))))
97 (setf ret-val (+ ret-val p)) )
98 ret-val ) )
101 ;; Calculates the Shapiro-Wilk test and its significance level
102 ;; This is a translation from Fortran to Lisp of swilk.f, which translation
103 ;; to C is used by the R statistical package. This is
104 ;; Algorithm AS R94, Applied Statistics (1995), vol.44, no.4, 547-551
105 ;; Argument 'x' is a Maxima list containing numbers. Optional argument 'n1'
106 ;; is the number of uncensored observations; this option is not documented
107 ;; in Maxima at this moment, since I haven't checked it; in fact, the R
108 ;; function 'shapiro.test' doesn't make use of this second argument (I don't know why).
109 (defun $test_normality (x &optional n1)
110 (declare (special $stats_numer))
111 (unless (or ($listp x)
112 (and ($matrixp x) (= ($length ($first x)) 1)))
113 (merror "First argument of 'test_normality' must be a Maxima list"))
114 (when ($matrixp x)
115 (setq x ($first ($transpose x))))
116 (setf x (sort (map 'list #'$float (rest x)) #'<))
117 (if (null n1) (setf n1 (length x)))
118 (let* (($numer $stats_numer)
119 (n (length x))
120 (n2 (floor (/ n 2)))
121 w pw ; W statistic and its p-value
122 ; initialized data
123 (z90 1.2816) (z95 1.6449)
124 (z99 2.3263) (zm 1.7509) (zss 0.56268)
125 (bf1 0.8378) (xx90 0.556) (xx95 0.622)
126 (sqrth 0.70711) (small 1e-19) (pi6 1.909859) (stqr 1.047198)
127 ; polynomial coefficients
128 (g (make-array 2 :element-type 'flonum
129 :initial-contents '(-2.273 0.459)))
130 (c1 (make-array 6 :element-type 'flonum
131 :initial-contents '(0.0 0.221157 -0.147981 -2.07119 4.434685 -2.706056)))
132 (c2 (make-array 6 :element-type 'flonum
133 :initial-contents '(0.0 0.042981 -0.293762 -1.752461 5.682633 -3.582633)))
134 (c3 (make-array 4 :element-type 'flonum
135 :initial-contents '(0.544 -0.39978 0.025054 -6.714e-4)))
136 (c4 (make-array 4 :element-type 'flonum
137 :initial-contents '(1.3822 -0.77857 0.062767 -0.0020322)))
138 (c5 (make-array 4 :element-type 'flonum
139 :initial-contents '(-1.5861 -0.31082 -0.083751 0.0038915)))
140 (c6 (make-array 3 :element-type 'flonum
141 :initial-contents '(-0.4803 -0.082676 0.0030302)))
142 (c7 (make-array 2 :element-type 'flonum
143 :initial-contents '(0.164 0.533)))
144 (c8 (make-array 2 :element-type 'flonum
145 :initial-contents '(0.1736 0.315)))
146 (c9 (make-array 2 :element-type 'flonum
147 :initial-contents '(0.256 -0.00635)))
148 ncens nn2 i1 range delta a2 a1 an25 an rsn fac ssumm2 summ2 xi xx y w1
149 ssassx xsx asa sax ssa ssx sx sa zbar zsd zfm z99f z95f
150 z90f bf ld s m gamma a)
152 (setf an (coerce n 'flonum))
153 (setf ncens (- n n1)) ; number of censored observations
154 (setf nn2 (truncate n 2))
155 (if (or (< n 3) (> n 5000))
156 (merror "Sample size must be between 3 and 5000"))
157 (setf range (- (car (last x)) (first x)))
158 (if (< range small)
159 (merror "All observations are identical"))
160 (if (< n1 3) (merror "Number of uncensored observations is too small"))
161 (if (< ncens 0)
162 (merror "Number of uncensored observations is greater than total sample size!!"))
163 (if (and (> ncens 0) (< n 20))
164 (merror "Sample size is too small, and censored data are not allowed"))
165 (setf delta (/ ncens an))
166 (if (> delta 0.8)
167 (merror "Ratio of censored observations is too great (>80%)"))
169 ; calculate coefficients for statistic w
170 (setf a (make-array (1+ n2) :element-type 'flonum :initial-element 0.0))
171 (cond ((= n 3)
172 (setf (aref a 1) sqrth))
173 (t (setf an25 (+ 0.25 an))
174 (setf summ2 0.0)
175 (do ((i 1 (1+ i)))
176 ((> i n2) 'done)
177 (setf (aref a i)
178 (coerce (mul* 1.414213562373095
179 (simplify
180 (list '(%inverse_erf)
181 (add* (mul* 2.0 (/ (- i 0.375) an25)) -1))))
182 'flonum))
183 (setf summ2 (+ summ2 (* (aref a i) (aref a i)))))
184 (setf summ2 (* summ2 2.0))
185 (setf ssumm2 (sqrt summ2))
186 (setf rsn (/ 1.0 (sqrt an)))
187 (setf a1 (- (swpoly c1 rsn) (/ (aref a 1) ssumm2)))
188 ; normalize a
189 (cond ((> n 5)
190 (setf i1 3)
191 (setf a2 (- (swpoly c2 rsn) (/ (aref a 2) ssumm2) ))
192 (setf fac (sqrt (/ (+ summ2
193 (* (aref a 1) (aref a 1) -2.0)
194 (* (aref a 2) (aref a 2) -2.0))
195 (+ 1.0 (* a1 a1 -2.0) (* a2 a2 -2.0)))))
196 (setf (aref a 2) a2) )
197 (t (setf i1 2)
198 (setf fac (sqrt (/ (- summ2 (* 2.0 (aref a 1) (aref a 1)))
199 (- 1.0 (* 2.0 a1 a1))))) ))
200 (setf (aref a 1) a1)
201 (do ((i i1 (1+ i)))
202 ((> i nn2) 'done)
203 (setf (aref a i) (/ (aref a i) (- fac)))) ))
205 ; check for correct sort order on range - scaled X
206 (setf xx (/ (first x) range))
207 (setf sx xx)
208 (setf sa (- (aref a 1)))
209 (do ((j (1- n) (1- j))
210 (i 1 i))
211 ((>= i n1) 'done)
212 (setf xi (/ (nth i x) range))
213 (setf sx (+ sx xi))
214 (setf i (1+ i))
215 (if (/= i j)
216 (setf sa (+ sa (* (signum (- i j)) (aref a (min i j))))))
217 (setf xx xi) )
219 ; Calculate W statistic as squared correlation
220 ; between data and coefficients
221 (setf sa (/ sa n1))
222 (setf sx (/ sx n1))
223 (setf ssa 0.0
224 ssx 0.0
225 sax 0.0)
226 (do ((j (1- n) (1- j))
227 (i 0 (1+ i)))
228 ((>= i n1) 'done)
229 (if (/= i j)
230 (setf asa (- (* (signum (- i j)) (aref a (+ 1 (min i j)))) sa))
231 (setf asa (- sa)))
232 (setf xsx (- (/ (nth i x) range) sx))
233 (setf ssa (+ ssa (* asa asa)))
234 (setf ssx (+ ssx (* xsx xsx)))
235 (setf sax (+ sax (* asa xsx))) )
237 ; w1 equals (1-w) calculated to avoid excessive rounding error
238 ; for W very near 1 (a potential problem in very large samples)
239 (setf ssassx (sqrt (* ssa ssx)))
240 (setf w1 (/ (* (- ssassx sax) (+ ssassx sax))
241 (* ssa ssx)))
242 (setf w (- 1 w1))
244 ; calculate significance level for w
245 (tagbody
246 (when (= n 3)
247 (setf pw (* pi6 (- ($asin (sqrt w)) stqr)))
248 (go fin))
249 (setf y (log w1)
250 xx (log an))
251 (cond ((<= n 11)
252 (setf gamma (swpoly g an))
253 (when (>= y gamma)
254 (setf pw small)
255 (go fin))
256 (setf y (- (log (- gamma y)))
257 m (swpoly c3 an)
258 s (exp (swpoly c4 an))))
259 (t ; n >= 12
260 (setf m (swpoly c5 xx)
261 s (exp (swpoly c6 xx)))))
263 ; censoring by proportion ncens/n
264 ; calculate mean and sd of normal equivalent deviate of w
265 (when (> ncens 0)
266 (setf ld (- (log delta))
267 bf (+ 1.0 (* xx bf1)))
268 (setf z90f (+ z90 (* bf (expt (swpoly c7 (expt xx90 xx)) ld))))
269 (setf z95f (+ z95 (* bf (expt (swpoly c8 (expt xx95 xx)) ld))))
270 (setf z99f (+ z99 (* bf (expt (swpoly c9 xx) ld))))
271 ; regress z90F,...,z99F on normal deviates z90,...,z99 to get
272 ; pseudo-mean and pseudo-sd of z as the slope and intercept
273 (setf zfm (/ (+ z90f z95f z99f) 3.0))
274 (setf zsd (/ (+ (* z90 (- z90f zfm))
275 (* z95 (- z95f zfm))
276 (* z99 (- z99f zfm)))
277 zss))
278 (setf zbar (- zfm (* zsd zm)))
279 (setf m (+ m (* zbar s)))
280 (setf s (* s zsd)) )
282 (setf pw (- 0.5 (* 0.5 (mfuncall '%erf (/ (- y m)
283 (* 1.41421356 s))))))
284 fin)
286 ; the result is an 'inference_result' Maxima object
287 ($inference_result
288 "SHAPIRO - WILK TEST"
289 (list '(mlist) (list '(mlist) '$statistic w)
290 (list '(mlist) '$p_value pw))
291 (list '(mlist) 1 2) ) ))