modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / c_cpp / etc / KaKs_Calculator / src / base.cpp
blobf02560692acdbfcb5ca7cf5e3015d414d934b0ef
1 /************************************************************
2 * Copyright (C) 2005, BGI of Chinese Academy of Sciences
3 * All rights reserved.
5 * Filename: base.cpp
6 * Abstract: Definition of base class for KaKs methods.
8 * Version: 1.0
9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
10 * Date: Feb.2, 2005
12 *************************************************************/
14 #include "base.h"
17 /******** Global variables ********/
20 /* The Genetic Codes
21 http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
22 Last update of the Genetic Codes: October 05, 2000 */
23 int genetic_code=1; //from 1 to 23
24 /* Genetic standard codon table, !=stop codon */
25 const char* transl_table[] = {
26 "FFLLSSSSYY!!CC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "1-Standard Code",
27 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS!!VVVVAAAADDEEGGGG", "2-Vertebrate Mitochondrial Code",
28 "FFLLSSSSYY!!CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "3-Yeast Mitochondrial Code",
29 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "4-Mold Mitochondrial Code",
30 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "5-Invertebrate Mitochondrial Code",
31 "FFLLSSSSYYQQCC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "6-Ciliate, Dasycladacean and Hexamita Code",
32 "", "7-",
33 "", "8-",
34 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "9-Echinoderm and Flatworm Mitochondrial Code",
35 "FFLLSSSSYY!!CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "10-Euplotid Nuclear Code",
36 "FFLLSSSSYY!!CC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "11-Bacterial and Plant Plastid Code",
37 "FFLLSSSSYY!!CC!WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "12-Alternative Yeast Nuclear Code",
38 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "13-Ascidian Mitochondrial Code",
39 "FFLLSSSSYYY!CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "14-Alternative Flatworm Mitochondrial Code",
40 "FFLLSSSSYY!QCC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "15-Blepharisma Nuclear Code",
41 "FFLLSSSSYY!LCC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "16-Chlorophycean Mitochondrial Code",
42 "", "17-",
43 "", "18-",
44 "", "19-",
45 "", "20-",
46 "FFLLSSSSYY!!CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "21-Trematode Mitochondrial Code",
47 "FFLLSS!SYY!LCC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "22-Scenedesmus obliquus mitochondrial Code",
48 "FF!LSSSSYY!!CC!WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "23-Thraustochytrium Mitochondrial Code"
51 string seq_name; //sequences' name
52 unsigned long length; //sequences' length
53 double GC[4]; //GC Content
54 //********End of Global variables**********
56 //Constructor function
57 Base::Base() {
59 int i;
60 for(i=0; i<5; i++) {
61 Si[i] = Vi[i] = L[i] = NULL;
63 for (i=0; i<NUMBER_OF_RATES; i++) {
64 KAPPA[i] = 1;
67 SEKa = SEKs = AICc = lnL = AkaikeWeight = NA;
68 Ka = Ks = Sd = Nd = S = N = snp = t = kappa = NULL;
70 model = "";
73 /********************************************
74 * Function: addString
75 * Input Parameter: string, string, string
76 * Output: result = result + str + flag
77 * Return Value: void
78 * Note: flag = "\t" (default) or "\n"
79 *********************************************/
80 void Base::addString(string &result, string str, string flag) {
81 result += str;
82 result += flag;
85 /**********************************************************************
86 * Function: getAminoAcid
87 * Input Parameter: codon or codon's id
88 * Output: Calculate the amino acid according to codon or codon's id.
89 * Return Value: char
90 ***********************************************************************/
91 char Base::getAminoAcid(string codon) {
92 return transl_table[2*(genetic_code-1)][getID(codon)];
94 char Base::getAminoAcid(int id) {
95 return transl_table[2*(genetic_code-1)][id];
98 /**********************************
99 * Function: getNumNonsense
100 * Input Parameter: int
101 * Output: get the number of nonsense codons
102 * Return Value: int
103 ***********************************/
104 int Base::getNumNonsense(int genetic_code) {
106 int num, i;
107 for(num=i=0; i<CODON; i++) {
108 if(getAminoAcid(i)=='!') num++;
111 return num;
114 /********************************************
115 * Function: getID
116 * Input Parameter: codon
117 * Output: Get codon's id in array of codon_table.
118 * Return Value: int
119 *********************************************/
120 int Base::getID(string codon) {
121 return (convertChar(codon[0])*XSIZE + convertChar(codon[1])*DNASIZE + convertChar(codon[2]));
124 /********************************************
125 * Function: getCodon
126 * Input Parameter: int
127 * Output: Get the codon according to id;
128 a reverse funtion of getID.
129 * Return Value: string
130 *********************************************/
131 string Base::getCodon(int IDcodon) {
133 string codon = "TTT";
135 if (IDcodon>=0 && IDcodon<64) {
136 codon[0]=convertInt(IDcodon/16);
137 codon[1]=convertInt((IDcodon%16)/4);
138 codon[2]=convertInt(IDcodon%4);
141 return codon;
144 /*********************************************
145 * Function: convertChar
146 * Input Parameter: ch as char
147 * Output: Convert a char-T,C,A,G into a digit
148 * 0,1,2,3, respectively.
149 * Return Value: int.
150 **********************************************/
151 int Base::convertChar(char ch) {
152 int ret = -1;
153 switch(ch) {
154 case 'T':case 'U':
155 ret = 0;
156 break;
157 case 'C':
158 ret = 1;
159 break;
160 case 'A':
161 ret = 2;
162 break;
163 case 'G':
164 ret = 3;
165 break;
167 return ret;
170 /********************************************
171 * Function: convertInt
172 * Input Parameter: int
173 * Output: Convert a digit- 0,1,2,3 into a
174 * char-T,C,A,G, respectively.
175 * Return Value: char
176 *********************************************/
177 char Base::convertInt(int i) {
178 char ch = '-';
179 switch(i) {
180 case 0:
181 ch = 'T';
182 break;
183 case 1:
184 ch = 'C';
185 break;
186 case 2:
187 ch = 'A';
188 break;
189 case 3:
190 ch = 'G';
191 break;
193 return ch;
196 /********************************************
197 * Function: stringtoUpper
198 * Input Parameter: string
199 * Output: upper string
200 * Return Value: string
201 *********************************************/
202 string Base::stringtoUpper(string str) {
203 int j;
204 for(j=0; str[j] = toupper(str[j]), j<str.length(); j++);
205 return str;
208 /********************************************
209 * Function: getRandom
210 * Input Parameter: void
211 * Output: Generate a radnom integer
212 * Return Value: int
213 *********************************************/
214 int Base::getRandom() {
215 srand((unsigned)time(NULL));
216 return rand();
219 /********************************************
220 * Function: initArray
221 * Input Parameter: array of int/double, int, int/double(default=0)
222 * Output: Init the array x[0...n-1]=value
223 * Return Value: int
224 *********************************************/
225 int Base::initArray(double x[], int n, double value) {
226 int i;
227 for(i=0; i<n; i++) x[i] = value;
228 return 0;
231 int Base::initArray(int x[], int n, int value) {
232 int i;
233 for(i=0; i<n; i++) x[i] = value;
234 return 0;
237 /********************************************
238 * Function: sumArray
239 * Input Parameter: double/int, int, int(default=0)
240 * Output: Sum of array x[]
241 * Return Value: double/int
242 *********************************************/
243 double Base::sumArray(double x[], int end, int begin) {
244 int i;
245 double sum=0.;
246 for(i=begin; i<end; sum += x[i], i++);
247 return sum;
250 int Base::sumArray(int x[], int end, int begin) {
251 int i, sum=0;
252 for(i=begin; i<end; sum += x[i], i++);
253 return sum;
256 /********************************************
257 * Function: norm
258 * Input Parameter: array of double, int
259 * Output: Sqrt of the sum of the elements' square
260 sqrt(x0*x0 + x1*x1 + ...)
261 * Return Value: double
262 *********************************************/
263 double Base::norm(double x[], int n) {
264 int i;
265 double t=0;
267 for(i=0; i<n; t += square(x[i]), i++);
269 return sqrt(t);
272 /********************************************
273 * Function: scaleArray
274 * Input Parameter: double, array of double, int
275 * Output: Elements in array are mutipled by scale
276 * Return Value: int
277 *********************************************/
278 int Base::scaleArray(double scale, double x[], int n) {
279 int i;
280 for (i=0; i<n; i++) x[i] *= scale;
282 return 1;
285 /********************************************
286 * Function: copyArray
287 * Input Parameter: array, array, int
288 * Output: Copy array's values one by one: to[] = from[]
289 * Return Value: int
290 *********************************************/
291 int Base::copyArray(double from[], double to[], int n) {
292 int i;
293 for(i=0; i<n; i++) to[i] = from[i];
295 return 1;
298 /********************************************
299 * Function: innerp
300 * Input Parameter: array, array, int
301 * Output: Sum of 'n' products multiplied by
302 two elements in x[], y[].
303 * Return Value: int
304 *********************************************/
305 double Base::innerp(double x[], double y[], int n) {
307 int i;
308 double t=0;
310 for(i=0; i<n; t += x[i]*y[i], i++);
312 return t;
315 /********************************************
316 * Function: initIdentityMatrix
317 * Input Parameter: array of double, int
318 * Output: Set x[i,j]=0 when x!=j and
319 x[i,j]=1 when x=j
320 * Return Value: int
321 *********************************************/
322 int Base::initIdentityMatrix(double x[], int n) {
324 int i,j;
326 for (i=0; i<n; i++) {
327 for(j=0; j<n; x[i*n+j]=0, j++);
328 x[i*n+i] = 1;
331 return 0;
335 /************************************************
336 * Function: writeFile
337 * Input Parameter: string, string
338 * Output: Write content into the given file.
339 * Return Value: True if succeed, otherwise false.
340 *************************************************/
341 bool Base::writeFile(string output_filename, const char* result) {
343 bool flag = true;
344 try {
345 //file name is ok
346 if (output_filename!="" && output_filename.length()>0) {
347 ofstream os(output_filename.c_str());
348 if (!os.is_open()) throw 1;
350 os<<result;
351 os.close();
354 catch (...) {
355 cout<<"Error in writing to file..."<<endl;
356 flag = false;
359 return flag;
363 /*****************************************************
364 * Function: parseOutput
365 * Input Parameter: void
366 * Output: Parse estimated results for outputing
367 * Return Value: string
369 Order: "Sequence", "Method", "Ka", "Ks", "Ka/Ks",
370 "P-Value(Fisher)", "Length", "S-Sites", "N-Sites", "Fold-Sites(0:2:4)",
371 "Substitutions", "S-Substitutions", "N-Substitutions", "Fold-S-Substitutions(0:2:4)", "Fold-N-Substitutions(0:2:4)",
372 "Divergence-Time", "Substitution-Rate-Ratio(rTC:rAG:rTA:rCG:rTG:rCA/rCA)", "GC(1:2:3)", "ML-Score", "AICc",
373 "Model"
374 ******************************************************/
375 string Base::parseOutput() {
377 int i;
378 string result = "", tmp;
380 //Sequence name
381 addString(result, seq_name);
382 //Method name
383 addString(result, name);
385 //Ka
386 if (Ka<SMALLVALUE) {
387 tmp = "NA";
389 else {
390 tmp = CONVERT<string>(Ka);
392 addString(result, tmp);
394 //Ks
395 if (Ks<SMALLVALUE) {
396 tmp = "NA";
398 else {
399 tmp = CONVERT<string>(Ks);
401 addString(result, tmp);
403 //Ka/Ks
404 if(Ks<SMALLVALUE || Ks==NA || Ka==NA) {
405 tmp = "NA";
407 else {
408 tmp = CONVERT<string>(Ka/Ks);
410 addString(result, tmp);
412 //Fisher's test: p_value
413 if (Sd<SMALLVALUE || Nd<SMALLVALUE || S<SMALLVALUE || N<SMALLVALUE)
414 tmp = "NA";
415 else {
416 tmp = CONVERT<string>(fisher(Sd,Nd,S-Sd,N-Nd));
418 addString(result, tmp);
420 //Length of compared pairwise sequences
421 addString(result, CONVERT<string>(length));
423 //Synonymous(S) sites
424 if (S<SMALLVALUE) {
425 tmp = "NA";
427 else {
428 tmp = CONVERT<string>(S);
430 addString(result, tmp);
432 //Nonsynonymous(N) sites
433 if (N<SMALLVALUE) {
434 tmp = "NA";
436 else {
437 tmp = CONVERT<string>(N);
439 addString(result, tmp);
441 //L[0], L[2], L[4] only for Prof.Li's series(LWL85, LPB93...)
442 if (L[0]<SMALLVALUE && L[2]<SMALLVALUE && L[4]<SMALLVALUE) {
443 tmp = "NA";
445 else {
446 tmp = CONVERT<string>(L[0]); tmp += ":";
447 tmp += CONVERT<string>(L[2]); tmp += ":";
448 tmp += CONVERT<string>(L[4]);
450 addString(result, tmp);
453 //Substitutions
454 addString(result, CONVERT<string>(snp));
456 //Sysnonymous(Sd) Substitutions(Nd)
457 if (Sd>SMALLVALUE) {
458 tmp = CONVERT<string>(Sd);
460 else {
461 tmp = "NA";
463 addString(result, tmp);
465 //Nonsysnonymous Substitutions(Nd)
466 if (Nd>SMALLVALUE) {
467 tmp = CONVERT<string>(Nd);
469 else {
470 tmp = "NA";
472 addString(result, tmp);
474 //Si for Li's series' methods(LWL85, LPB93...)
475 if (Si[0]!=NULL || Si[2]!=NULL || Si[4]!=NULL) { //Si[0], Si[2], Si[4]
476 tmp = CONVERT<string>(Si[0]); tmp += ":";
477 tmp += CONVERT<string>(Si[2]); tmp += ":";
478 tmp += CONVERT<string>(Si[4]);
480 else {
481 tmp = "NA";
483 addString(result, tmp);
485 //Vi for Li's series' methods(LWL85, LPB93...)
486 if (Vi[0]!=NULL || Vi[2]!=NULL || Vi[4]!=NULL) { //Vi[0], Vi[2], Vi[4]
487 tmp = CONVERT<string>(Vi[0]); tmp += ":";
488 tmp += CONVERT<string>(Vi[2]); tmp += ":";
489 tmp += CONVERT<string>(Vi[4]);
491 else {
492 tmp = "NA";
494 addString(result, tmp);
497 //Divergence time or distance t = (S*Ks+N*Ka)/(S+N)
498 if(t<SMALLVALUE) {
499 tmp = "NA";
501 else {
502 tmp = CONVERT<string>(t);
504 addString(result, tmp);
506 //Substitution-Rate-Ratio(rTC:rAG:rTA:rCG:rTG:rCA/rCA)
507 for(i=0, tmp=""; i<NUMBER_OF_RATES-1; i++) {
508 tmp += CONVERT<string>(KAPPA[i]);
509 tmp += ":";
511 tmp += CONVERT<string>(KAPPA[i]);
512 addString(result, tmp);
514 //GC Content
515 tmp = CONVERT<string>(GC[0]); tmp += "(";
516 tmp += CONVERT<string>(GC[1]); tmp += ":";
517 tmp += CONVERT<string>(GC[2]); tmp += ":";
518 tmp += CONVERT<string>(GC[3]); tmp += ")";
519 addString(result, tmp);
521 //Maximum Likelihood Value
522 if (lnL==NA) tmp = "NA";
523 else tmp = CONVERT<string>(lnL);
524 addString(result, tmp);
526 //AICc
527 if (AICc==NA) tmp = "NA";
528 else tmp = CONVERT<string>(AICc);
529 addString(result, tmp);
531 //Akaike weight in model selection
532 if (AkaikeWeight==NA) tmp = "NA";
533 else tmp = CONVERT<string>(AkaikeWeight);
534 addString(result, tmp);
536 //Selected Model according to AICc
537 if (model==""||model.length()==0) tmp = "NA";
538 else tmp = model;
539 addString(result, tmp, "\n");
542 //Standard Errors
543 if (SEKa==NA) tmp = "NA";
544 else tmp = CONVERT<string>(SEKa);
545 addString(result, tmp, "\t");
547 if (SEKs==NA) tmp = "NA";
548 else tmp = CONVERT<string>(SEKs);
549 addString(result, tmp, "\n");
552 return result;
555 /**************************************************
556 * Function: fisher
557 * Input Parameter: double, double, double, double
558 * Output: Compute p-value by Fisher exact test
559 * Return Value: double
560 ***************************************************/
561 double Base::fisher(double sd, double nd, double s, double n) {
562 double denominator, numerator, prob_total, prob_current, sum, fac_sum;
563 double matrix[4], R[2], C[2];
564 int i, j;
566 denominator = numerator = prob_total = prob_current = sum = fac_sum = 0.0;
568 matrix[0]=sd; matrix[1]=s; matrix[2]=nd; matrix[3]=n;
569 //Row & Column
570 R[0]=matrix[0]+matrix[2]; R[1]=matrix[1]+matrix[3];
571 C[0]=matrix[0]+matrix[1]; C[1]=matrix[2]+matrix[3];
572 sum = R[0]+R[1];
574 //Calculate the numberator that is a constant
575 numerator += factorial(R[0]);
576 numerator += factorial(R[1]);
577 numerator += factorial(C[0]);
578 numerator += factorial(C[1]);
580 //Log of Factorial of N
581 fac_sum = factorial(sum);
582 for(i=0, denominator=fac_sum; i<4; i++) {
583 denominator += factorial(matrix[i]);
585 //Probability of current situtation
586 prob_current = exp(numerator-denominator);
588 //Two-tail probabilities if less than prob_current
589 for(i=0; i<=R[0]; i++) {
590 matrix[0] = i;
591 matrix[1] = C[0]-i;
592 matrix[2] = R[0]-i;
593 matrix[3] = R[1]-C[0]+i;
594 if (matrix[0]>=0 && matrix[1]>=0 && matrix[2]>=0 && matrix[3]>=0) {
595 for(j=0, denominator=fac_sum; j<4; j++) {
596 denominator += factorial(matrix[j]);
598 double temp = numerator-denominator;
599 temp = exp(numerator-denominator);
600 if (temp<=prob_current) {
601 prob_total += temp;
606 return prob_total;
609 /**************************************************
610 * Function: factorial
611 * Input Parameter: n
612 * Output: Compute the factorial of 'n', then return
613 the log of it.
614 * Return Value: double
615 ***************************************************/
616 double Base::factorial(double n) {
617 double temp=1.0;
618 if (n>0) {
619 n = n + 1;
620 double x = 0;
621 x += 0.1659470187408462e-06/(n+7);
622 x += 0.9934937113930748e-05/(n+6);
623 x -= 0.1385710331296526 /(n+5);
624 x += 12.50734324009056 /(n+4);
625 x -= 176.6150291498386 /(n+3);
626 x += 771.3234287757674 /(n+2);
627 x -= 1259.139216722289 /(n+1);
628 x += 676.5203681218835 /(n);
629 x += 0.9999999999995183;
630 temp = log(x) - 5.58106146679532777 - n +(n - 0.5)*log(n + 6.5);
633 return (temp);
639 int matby (double a[], double b[], double c[], int n,int m,int k)
640 // a[n*m], b[m*k], c[n*k] ...... c = a*b
643 int i,j,i1;
644 double t;
645 FOR (i,n) FOR(j,k) {
646 for (i1=0,t=0; i1<m; i1++) t+=a[i*m+i1]*b[i1*k+j];
647 c[i*k+j] = t;
649 return (0);
656 void PMatrixTaylor(double P[], double n, double t) {
658 // Get approximate PMat using polynomial approximation
659 // P(t) = I + Qt + (Qt)^2/2 + (Qt)^3/3!
661 int nterms=1000, i,j,k, c[2],ndiff,pos=0,from[3],to[3];
662 double *Q=space, *Q1=Q+n*n, *Q2=Q1+n*n, mr, div;
664 FOR (i,n*n) Q[i]=0;
665 for (i=0; i<n; i++) FOR (j,i) {
666 from[0]=i/16; from[1]=(i/4)%4; from[2]=i%4;
667 to[0]=j/16; to[1]=(j/4)%4; to[2]=j%4;
668 c[0]=GenetCode[com.icode][i];
669 c[1]=GenetCode[com.icode][j];
670 if (c[0]==-1 || c[1]==-1) continue;
671 for (k=0,ndiff=0; k<3; k++) if (from[k]!=to[k]) { ndiff++; pos=k; }
672 if (ndiff!=1) continue;
673 Q[i*n+j]=1;
674 if ((from[pos]+to[pos]-1)*(from[pos]+to[pos]-5)==0) Q[i*n+j]*=kappa;
675 if(c[0]!=c[1]) Q[i*n+j]*=omega;
676 Q[j*n+i]=Q[i*n+j];
678 FOR(i,n) FOR(j,n) Q[i*n+j]*=pi[j];
679 for (i=0,mr=0;i<n;i++) {
680 Q[i*n+i]=-sum(Q+i*n,n); mr-=pi[i]*Q[i*n+i];
682 FOR(i,n*n) Q[i]*=t/mr;
684 xtoy(Q,P,n*n); FOR(i,n) P[i*n+i]++; // I + Qt
685 xtoy(Q,Q1,n*n);
686 for (i=2,div=2;i<nterms;i++,div*=i) { // k is divisor
687 matby(Q, Q1, Q2, n, n, n);
688 for(j=0,mr=0;j<n*n;j++) { P[j]+=Q2[j]/div; mr+=fabs(Q2[j]); }
689 mr/=div;
690 // if(debug) printf("Pmat term %d: norm=%.6e\n", i, mr);
691 if (mr<e) break;
692 xtoy(Q2,Q1,n*n);
695 FOR(i,n*n) if(P[i]<0) P[i]=0;