3 ;; Copyright (C) 2006 Mario Rodriguez Riotorto
5 ;; This program is free software; you can redistribute
6 ;; it and/or modify it under the terms of the
7 ;; GNU General Public License as published by
8 ;; the Free Software Foundation; either version 2
9 ;; of the License, or (at your option) any later version.
11 ;; This program is distributed in the hope that it
12 ;; will be useful, but WITHOUT ANY WARRANTY;
13 ;; without even the implied warranty of MERCHANTABILITY
14 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details at
16 ;; http://www.gnu.org/copyleft/gpl.html
18 ;; This is a set of numerical routines used by package 'stats'
20 ;; For questions, suggestions, bugs and the like, feel free
22 ;; mario @@@ edu DOT xunta DOT es
28 ;; para traducir funciones de usuario a lisp:
29 ;; (meval '($translate $random_normal))
33 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
35 ;; Wilcoxon-Mann-Whitney recursion ;;
37 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39 ;; This recursion is used when computing exact probabilities
40 ;; for the distribution of the rank statistic in the Wilcoxon-
41 ;; Mann-Whitney test, specially for small samples. It's defined as
42 ;; a(i,j,k) = a(i,j-1,j-k) + a(i-k,j-1,j-k-1)
44 ;; Klotz, Jerome (2006) 'A Computational Approach to Statistics',
45 ;; available at www.stat.wisc.edu/~klotz/Book.pdf
46 (defun rank_sum_recursion (i j k
)
47 (cond ((or (< i
0) (< j
0) (< k
0)) 0)
49 (t (+ (rank_sum_recursion i
(- j
1) (- j k
))
50 (rank_sum_recursion (- i k
) (- j
1) (+ j
(- k
) (- 1)) )))))
54 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
56 ;; Signed rank recursion ;;
58 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
60 ;; This recursion is used when computing exact probabilities
61 ;; for the true distribution of statistic in the signed rank test,
62 ;; specially for small samples. It's defined as
63 ;; a(i,j) = a(i,j-1) + a(i-j,j-1)
65 ;; Klotz, Jerome (2006) 'A Computational Approach to Statistics',
66 ;; available at www.stat.wisc.edu/~klotz/Book.pdf
67 ;; R statistical package. File signrank.c
68 (defun signed_rank_recursion (i j
)
69 (let* ((u (/ (* j
(+ j
1)) 2))
71 (if (> i c
) (setf i
(- u i
)))
72 (cond ((or (< i
0) (< j
0)) 0)
74 (t (+ (signed_rank_recursion i
(- j
1))
75 (signed_rank_recursion (- i j
) (- j
1)))))))
80 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
82 ;; Shapiro-Wilk test for normality ;;
84 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
86 ;; Calculates the algebraic polynomial of order (- (lengh cc) 1)
87 ;; with array of coefficients cc. Zero order coefficient is (aref cc 0).
89 (let ( (ret-val (aref cc
0))
93 (setf p
(* x
(aref cc
(1- nord
))))
94 (do ((j (- nord
2) (1- j
)))
96 (setf p
(* x
(+ p
(aref cc j
)))))
97 (setf ret-val
(+ ret-val p
)) )
101 ;; Calculates the Shapiro-Wilk test and its significance level
102 ;; This is a translation from Fortran to Lisp of swilk.f, which translation
103 ;; to C is used by the R statistical package. This is
104 ;; Algorithm AS R94, Applied Statistics (1995), vol.44, no.4, 547-551
105 ;; Argument 'x' is a Maxima list containing numbers. Optional argument 'n1'
106 ;; is the number of uncensored observations; this option is not documented
107 ;; in Maxima at this moment, since I haven't checked it; in fact, the R
108 ;; function 'shapiro.test' doesn't make use of this second argument (I don't know why).
109 (defun $test_normality
(x &optional n1
)
110 (declare (special $stats_numer
))
111 (unless (or ($listp x
)
112 (and ($matrixp x
) (= ($length
($first x
)) 1)))
113 (merror "First argument of 'test_normality' must be a Maxima list"))
115 (setq x
($first
($transpose x
))))
116 (setf x
(sort (map 'list
#'$float
(rest x
)) #'<))
117 (if (null n1
) (setf n1
(length x
)))
118 (let* (($numer $stats_numer
)
121 w pw
; W statistic and its p-value
123 (z90 1.2816) (z95 1.6449)
124 (z99 2.3263) (zm 1.7509) (zss 0.56268)
125 (bf1 0.8378) (xx90 0.556) (xx95 0.622)
126 (sqrth 0.70711) (small 1e-19) (pi6 1.909859) (stqr 1.047198)
127 ; polynomial coefficients
128 (g (make-array 2 :element-type
'flonum
129 :initial-contents
'(-2.273
0.459)))
130 (c1 (make-array 6 :element-type
'flonum
131 :initial-contents
'(0.0
0.221157 -
0.147981 -
2.07119 4.434685 -
2.706056)))
132 (c2 (make-array 6 :element-type
'flonum
133 :initial-contents
'(0.0
0.042981 -
0.293762 -
1.752461 5.682633 -
3.582633)))
134 (c3 (make-array 4 :element-type
'flonum
135 :initial-contents
'(0.544 -
0.39978 0.025054 -
6.714e-4)))
136 (c4 (make-array 4 :element-type
'flonum
137 :initial-contents
'(1.3822 -
0.77857 0.062767 -
0.0020322)))
138 (c5 (make-array 4 :element-type
'flonum
139 :initial-contents
'(-1.5861 -
0.31082 -
0.083751 0.0038915)))
140 (c6 (make-array 3 :element-type
'flonum
141 :initial-contents
'(-0.4803 -
0.082676 0.0030302)))
142 (c7 (make-array 2 :element-type
'flonum
143 :initial-contents
'(0.164
0.533)))
144 (c8 (make-array 2 :element-type
'flonum
145 :initial-contents
'(0.1736
0.315)))
146 (c9 (make-array 2 :element-type
'flonum
147 :initial-contents
'(0.256 -
0.00635)))
148 ncens nn2 i1 range delta a2 a1 an25 an rsn fac ssumm2 summ2 xi xx y w1
149 ssassx xsx asa sax ssa ssx sx sa zbar zsd zfm z99f z95f
150 z90f bf ld s m gamma a
)
152 (setf an
(coerce n
'flonum
))
153 (setf ncens
(- n n1
)) ; number of censored observations
154 (setf nn2
(truncate n
2))
155 (if (or (< n
3) (> n
5000))
156 (merror "Sample size must be between 3 and 5000"))
157 (setf range
(- (car (last x
)) (first x
)))
159 (merror "All observations are identical"))
160 (if (< n1
3) (merror "Number of uncensored observations is too small"))
162 (merror "Number of uncensored observations is greater than total sample size!!"))
163 (if (and (> ncens
0) (< n
20))
164 (merror "Sample size is too small, and censored data are not allowed"))
165 (setf delta
(/ ncens an
))
167 (merror "Ratio of censored observations is too great (>80%)"))
169 ; calculate coefficients for statistic w
170 (setf a
(make-array (1+ n2
) :element-type
'flonum
:initial-element
0.0))
172 (setf (aref a
1) sqrth
))
173 (t (setf an25
(+ 0.25 an
))
178 (coerce (mul* 1.414213562373095
180 (list '(%inverse_erf
)
181 (add* (mul* 2.0 (/ (- i
0.375) an25
)) -
1))))
183 (setf summ2
(+ summ2
(* (aref a i
) (aref a i
)))))
184 (setf summ2
(* summ2
2.0))
185 (setf ssumm2
(sqrt summ2
))
186 (setf rsn
(/ 1.0 (sqrt an
)))
187 (setf a1
(- (swpoly c1 rsn
) (/ (aref a
1) ssumm2
)))
191 (setf a2
(- (swpoly c2 rsn
) (/ (aref a
2) ssumm2
) ))
192 (setf fac
(sqrt (/ (+ summ2
193 (* (aref a
1) (aref a
1) -
2.0)
194 (* (aref a
2) (aref a
2) -
2.0))
195 (+ 1.0 (* a1 a1 -
2.0) (* a2 a2 -
2.0)))))
196 (setf (aref a
2) a2
) )
198 (setf fac
(sqrt (/ (- summ2
(* 2.0 (aref a
1) (aref a
1)))
199 (- 1.0 (* 2.0 a1 a1
))))) ))
203 (setf (aref a i
) (/ (aref a i
) (- fac
)))) ))
205 ; check for correct sort order on range - scaled X
206 (setf xx
(/ (first x
) range
))
208 (setf sa
(- (aref a
1)))
209 (do ((j (1- n
) (1- j
))
212 (setf xi
(/ (nth i x
) range
))
216 (setf sa
(+ sa
(* (signum (- i j
)) (aref a
(min i j
))))))
219 ; Calculate W statistic as squared correlation
220 ; between data and coefficients
226 (do ((j (1- n
) (1- j
))
230 (setf asa
(- (* (signum (- i j
)) (aref a
(+ 1 (min i j
)))) sa
))
232 (setf xsx
(- (/ (nth i x
) range
) sx
))
233 (setf ssa
(+ ssa
(* asa asa
)))
234 (setf ssx
(+ ssx
(* xsx xsx
)))
235 (setf sax
(+ sax
(* asa xsx
))) )
237 ; w1 equals (1-w) calculated to avoid excessive rounding error
238 ; for W very near 1 (a potential problem in very large samples)
239 (setf ssassx
(sqrt (* ssa ssx
)))
240 (setf w1
(/ (* (- ssassx sax
) (+ ssassx sax
))
244 ; calculate significance level for w
247 (setf pw
(* pi6
(- ($asin
(sqrt w
)) stqr
)))
252 (setf gamma
(swpoly g an
))
256 (setf y
(- (log (- gamma y
)))
258 s
(exp (swpoly c4 an
))))
260 (setf m
(swpoly c5 xx
)
261 s
(exp (swpoly c6 xx
)))))
263 ; censoring by proportion ncens/n
264 ; calculate mean and sd of normal equivalent deviate of w
266 (setf ld
(- (log delta
))
267 bf
(+ 1.0 (* xx bf1
)))
268 (setf z90f
(+ z90
(* bf
(expt (swpoly c7
(expt xx90 xx
)) ld
))))
269 (setf z95f
(+ z95
(* bf
(expt (swpoly c8
(expt xx95 xx
)) ld
))))
270 (setf z99f
(+ z99
(* bf
(expt (swpoly c9 xx
) ld
))))
271 ; regress z90F,...,z99F on normal deviates z90,...,z99 to get
272 ; pseudo-mean and pseudo-sd of z as the slope and intercept
273 (setf zfm
(/ (+ z90f z95f z99f
) 3.0))
274 (setf zsd
(/ (+ (* z90
(- z90f zfm
))
276 (* z99
(- z99f zfm
)))
278 (setf zbar
(- zfm
(* zsd zm
)))
279 (setf m
(+ m
(* zbar s
)))
282 (setf pw
(- 0.5 (* 0.5 (mfuncall '%erf
(/ (- y m
)
283 (* 1.41421356 s
))))))
286 ; the result is an 'inference_result' Maxima object
288 "SHAPIRO - WILK TEST"
289 (list '(mlist) (list '(mlist) '$statistic w
)
290 (list '(mlist) '$p_value pw
))
291 (list '(mlist) 1 2) ) ))