modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / htslib / kfunc.c
blob323e70fcb7bfa05b842168d20b09f589c189673b
1 /* The MIT License
3 Copyright (C) 2010, 2013 Genome Research Ltd.
4 Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk>
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice shall be
15 included in all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 SOFTWARE.
27 #include <config.h>
29 #include <math.h>
30 #include <stdlib.h>
31 #include "htslib/kfunc.h"
33 /* Log gamma function
34 * \log{\Gamma(z)}
35 * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
37 double kf_lgamma(double z)
39 double x = 0;
40 x += 0.1659470187408462e-06 / (z+7);
41 x += 0.9934937113930748e-05 / (z+6);
42 x -= 0.1385710331296526 / (z+5);
43 x += 12.50734324009056 / (z+4);
44 x -= 176.6150291498386 / (z+3);
45 x += 771.3234287757674 / (z+2);
46 x -= 1259.139216722289 / (z+1);
47 x += 676.5203681218835 / z;
48 x += 0.9999999999995183;
49 return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);
52 /* complementary error function
53 * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt
54 * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66
56 double kf_erfc(double x)
58 const double p0 = 220.2068679123761;
59 const double p1 = 221.2135961699311;
60 const double p2 = 112.0792914978709;
61 const double p3 = 33.912866078383;
62 const double p4 = 6.37396220353165;
63 const double p5 = .7003830644436881;
64 const double p6 = .03526249659989109;
65 const double q0 = 440.4137358247522;
66 const double q1 = 793.8265125199484;
67 const double q2 = 637.3336333788311;
68 const double q3 = 296.5642487796737;
69 const double q4 = 86.78073220294608;
70 const double q5 = 16.06417757920695;
71 const double q6 = 1.755667163182642;
72 const double q7 = .08838834764831844;
73 double expntl, z, p;
74 z = fabs(x) * M_SQRT2;
75 if (z > 37.) return x > 0.? 0. : 2.;
76 expntl = exp(z * z * - .5);
77 if (z < 10. / M_SQRT2) // for small z
78 p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)
79 / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);
80 else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));
81 return x > 0.? 2. * p : 2. * (1. - p);
84 /* The following computes regularized incomplete gamma functions.
85 * Formulas are taken from Wiki, with additional input from Numerical
86 * Recipes in C (for modified Lentz's algorithm) and AS245
87 * (http://lib.stat.cmu.edu/apstat/245).
89 * A good online calculator is available at:
91 * http://www.danielsoper.com/statcalc/calc23.aspx
93 * It calculates upper incomplete gamma function, which equals
94 * kf_gammaq(s,z)*tgamma(s).
97 #define KF_GAMMA_EPS 1e-14
98 #define KF_TINY 1e-290
100 // regularized lower incomplete gamma function, by series expansion
101 static double _kf_gammap(double s, double z)
103 double sum, x;
104 int k;
105 for (k = 1, sum = x = 1.; k < 100; ++k) {
106 sum += (x *= z / (s + k));
107 if (x / sum < KF_GAMMA_EPS) break;
109 return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));
111 // regularized upper incomplete gamma function, by continued fraction
112 static double _kf_gammaq(double s, double z)
114 int j;
115 double C, D, f;
116 f = 1. + z - s; C = f; D = 0.;
117 // Modified Lentz's algorithm for computing continued fraction
118 // See Numerical Recipes in C, 2nd edition, section 5.2
119 for (j = 1; j < 100; ++j) {
120 double a = j * (s - j), b = (j<<1) + 1 + z - s, d;
121 D = b + a * D;
122 if (D < KF_TINY) D = KF_TINY;
123 C = b + a / C;
124 if (C < KF_TINY) C = KF_TINY;
125 D = 1. / D;
126 d = C * D;
127 f *= d;
128 if (fabs(d - 1.) < KF_GAMMA_EPS) break;
130 return exp(s * log(z) - z - kf_lgamma(s) - log(f));
133 double kf_gammap(double s, double z)
135 return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z);
138 double kf_gammaq(double s, double z)
140 return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z);
143 /* Regularized incomplete beta function. The method is taken from
144 * Numerical Recipe in C, 2nd edition, section 6.4. The following web
145 * page calculates the incomplete beta function, which equals
146 * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b):
148 * http://www.danielsoper.com/statcalc/calc36.aspx
150 static double kf_betai_aux(double a, double b, double x)
152 double C, D, f;
153 int j;
154 if (x == 0.) return 0.;
155 if (x == 1.) return 1.;
156 f = 1.; C = f; D = 0.;
157 // Modified Lentz's algorithm for computing continued fraction
158 for (j = 1; j < 200; ++j) {
159 double aa, d;
160 int m = j>>1;
161 aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))
162 : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));
163 D = 1. + aa * D;
164 if (D < KF_TINY) D = KF_TINY;
165 C = 1. + aa / C;
166 if (C < KF_TINY) C = KF_TINY;
167 D = 1. / D;
168 d = C * D;
169 f *= d;
170 if (fabs(d - 1.) < KF_GAMMA_EPS) break;
172 return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;
174 double kf_betai(double a, double b, double x)
176 return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);
179 #ifdef KF_MAIN
180 #include <stdio.h>
181 int main(int argc, char *argv[])
183 double x = 5.5, y = 3;
184 double a, b;
185 printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x));
186 printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y));
187 a = 2; b = 2; x = 0.5;
188 printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));
189 return 0;
191 #endif
194 // log\binom{n}{k}
195 static double lbinom(int n, int k)
197 if (k == 0 || n == k) return 0;
198 return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
201 // n11 n12 | n1_
202 // n21 n22 | n2_
203 //-----------+----
204 // n_1 n_2 | n
206 // hypergeometric distribution
207 static double hypergeo(int n11, int n1_, int n_1, int n)
209 return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
212 typedef struct {
213 int n11, n1_, n_1, n;
214 double p;
215 } hgacc_t;
217 // incremental version of hypergenometric distribution
218 static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
220 if (n1_ || n_1 || n) {
221 aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
222 } else { // then only n11 changed; the rest fixed
223 if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
224 if (n11 == aux->n11 + 1) { // incremental
225 aux->p *= (double)(aux->n1_ - aux->n11) / n11
226 * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
227 aux->n11 = n11;
228 return aux->p;
230 if (n11 == aux->n11 - 1) { // incremental
231 aux->p *= (double)aux->n11 / (aux->n1_ - n11)
232 * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
233 aux->n11 = n11;
234 return aux->p;
237 aux->n11 = n11;
239 aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
240 return aux->p;
243 double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
245 int i, j, max, min;
246 double p, q, left, right;
247 hgacc_t aux;
248 int n1_, n_1, n;
250 n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
251 max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
252 min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_)
253 if (min < 0) min = 0; // min n11, for left tail
254 *two = *_left = *_right = 1.;
255 if (min == max) return 1.; // no need to do test
256 q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
257 // left tail
258 p = hypergeo_acc(min, 0, 0, 0, &aux);
259 for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow
260 left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
261 --i;
262 if (p < 1.00000001 * q) left += p;
263 else --i;
264 // right tail
265 p = hypergeo_acc(max, 0, 0, 0, &aux);
266 for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow
267 right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
268 ++j;
269 if (p < 1.00000001 * q) right += p;
270 else ++j;
271 // two-tail
272 *two = left + right;
273 if (*two > 1.) *two = 1.;
274 // adjust left and right
275 if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
276 else left = 1.0 - right + q;
277 *_left = left; *_right = right;
278 return q;