1 /******************************************************************
2 * Copyright (c) 2006, BGI of Chinese Academy of Sciences
6 * Abstract: Declaration of base class for KaKs methods.
9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
12 * Modified Version: 1.2
14 * Modified Date: Apr. 2006
15 ******************************************************************/
16 #pragma warning(disable:4786) //Disable warning when using vector
21 #define VERSION "1.2, Apr. 2006"
23 #define CODONLENGTH 3 //Length of codon
24 #define DNASIZE 4 //A C G T
25 #define XSIZE DNASIZE*DNASIZE //Size of one group AXX (X=A,C,G,T)
26 #define CODON 64 //Codon Size
28 #define NA -1 //Not Available
29 #define NCODE 23 //Number of genetic codes
30 #define NNCODE NCODE*2 //Double of the number genetic codes
31 #define SMALLVALUE 1e-6 //Value near to zero
32 #define NUMBER_OF_RATES 6 //Number of substitution rates
33 #define MODELCOUNT 14 //Number of candidate models
35 #define gammap(x,alpha) (alpha*(1-pow(x,-1/alpha)))
36 #define square(a) ((a)*(a))
38 #define min2(a,b) ((a)<(b)?(a):(b))
39 #define max2(a,b) ((a)>(b)?(a):(b))
40 #define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))
43 /* Stanard lib of C++ */
56 /*****Global variables*****/
57 extern const char* transl_table
[]; //Genetic codon table
58 extern int genetic_code
; //ID of codon table from 1 to 23
60 extern string seq_name
; //Pairwise sequences' name
61 extern unsigned long length
; //Length of compared sequences
62 extern double GC
[4]; //GC Content of entire sequences(GC[0]) and three codon positions (GC[1--3])
63 //End of Global variables
66 /* Convert one type to any other type */
67 template<class out_type
,class in_value
>
68 out_type
CONVERT(const in_value
& t
) {
70 //Put the value 't' into the stream
73 //Put the stream into the 'result'
84 /* Write the content into file */
85 bool writeFile(string output_filename
, const char* result
);
89 /* Format string for outputing into file */
90 void addString(string
&result
, string str
, string flag
="\t");
92 /* Generate a radnom integer */
95 /* Convert a char-T,C,A,G into a digit 0,1,2,3, respectively */
96 int convertChar(char ch
);
97 /* Convert a digit-0,1,2,3 into a char T,C,A,G, respectively */
98 char convertInt(int ch
);
99 /* Convert a string to uppercase */
100 string
stringtoUpper(string str
);
102 /* Calculate the amino acid of codon by codon */
103 char getAminoAcid(string codon
);
104 /* Calculate the amino acid of codon by codon's id*/
105 char getAminoAcid(int id
);
106 /* Get the number of stop codon in a given genetic code table */
107 int getNumNonsense(int genetic_code
);
109 /* Return the codon's id from codon table */
110 int getID(string codon
);
111 /* Return a codon according to the id */
112 string
getCodon(int IDcodon
);
114 /* Sum array's elements */
115 double sumArray(double x
[], int end
, int begin
=0);
116 int sumArray(int x
[], int end
, int begin
=0);
118 /* Init value to array */
119 int initArray(double x
[], int n
, double value
=0.0);
120 int initArray(int x
[], int n
, int value
=0);
122 /* Elements in array are mutipled by scale */
123 int scaleArray(double scale
, double x
[], int n
);
124 /* Sqrt of the sum of the elements' square */
125 double norm(double x
[], int n
);
126 /* Copy array's values one by one: to[] = from[] */
127 int copyArray(double from
[], double to
[], int n
);
128 /* Sum of 'n' products multiplied by two elements x[], y[] */
129 double innerp(double x
[], double y
[], int n
);
130 /* Set x[i,j]=0 when x!=j and x[i,j]=1 when x=j */
131 int initIdentityMatrix(double x
[], int n
);
134 /* Compute p-value by Fisher exact test to justify the validity of ka/ks */
135 double fisher(double cs
, double us
, double cn
, double un
);
137 double factorial(double n
);
141 /* Name of method for calculating ka/ks */
143 /* Sysnonymous sites(S) and nonsysnonymous sites(N) */
145 /* Number of sysnonymous differences(Sd) and nonsysnonymous differences(Nd), snp=Sd+Nd */
147 /* Number of sysnonymous substitutions(ks) and nonsysnonymous substitutions(ka) per site */
149 /* Standard Errors for Ka and Ks */
151 /* Transitional(Si) and transversional(Vi) substitutions */
153 /* 0-fold, 2-fold, 4-fold */
155 /* Total Numbers of substitutions per i-th type site: K[i]=A[i]+B[i] */
163 /* Transition/transversion rate ratio, tc: between pyrimidines, ag: between purines */
164 double kappa
, kappatc
, kappaag
;
167 /* Divergence time, substitutions per site: t = (S*Ks+N*Ka)/(S+N)*/
169 /* Maximum Likelihood value */
171 /* Akaike Information Criterion (AICc) */
173 /* Akaike Weight in Model Selection */
175 /* Name of mutation model according to AICc */
177 /* Transition/transversion mutation ratio(s) or substitution rates */
178 double KAPPA
[NUMBER_OF_RATES
];
181 /* Store Maximum Likilhood results for Model Selection and Model Averaging */
183 string result
; //estimated results
184 double AICc
; //the value of a modified AIC
185 double freq
[CODON
]; //Codon frequency
186 double rate
[NUMBER_OF_RATES
]; //Six subsitution rates
188 double t
; //divergence time