modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / c_cpp / etc / KaKs_Calculator / src / LWL85.cpp
bloba7033d179954ee3f6e35e5945cff2c4d5200aaed
1 /*********************************************************
2 * Copyright (C) 2005, BGI of Chinese Academy of Sciences
3 * All rights reserved.
5 * Filename: LWL85.cpp
6 * Abstract: Definition of LWL85 and Modified LWL85 class.
8 * Version: 1.0
9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
10 * Date: Feb., 2005
12 References:
13 Li WH, Wu CI, Luo CC (1985) A new method for
14 estimating synonymous and nonsynonymous rates of nucleotide
15 substitution considering the relative likelihood of nucleotide
16 and codon changes. Mol. Biol. Evol. 2:150-174.
18 Tzeng Y-H, Pan R, Li W-H (2004) Comparison of Three Methods
19 for Estimating Rates of Synonymous and Nonsynonymous Nucleotide
20 Substitutions. Mol. Biol. Evol. 21:2290-2298.
21 **********************************************************/
23 #include "LWL85.h"
26 LWL85::LWL85() {
27 int i;
29 name = "LWL";
30 for(i=0; i<5; i++) K[i] = A[i] = B[i] = Pi[i] = Qi[i] = 0;
33 /************************************************
34 * Function: CountSiteAndDiff
35 * Input Parameter: codon1, codon2
36 * Output: Calculate synonymous and nonsynonymous
37 sites and differences between two codons.
38 * Return Value: void
39 *************************************************/
40 void LWL85::CountSiteAndDiff(string codon1, string codon2) {
42 int i,j,k;
43 int num=0, diff[5];
44 string temp1 = "", temp2="";
45 int sum=1, stop=0;
47 //Count sites
48 for(i=0; i<CODONLENGTH; i++) {
49 L[getCodonClass(codon1,i)]++;
50 L[getCodonClass(codon2,i)]++;
53 //Count differences
54 for (i=0; i<3; i++) {
55 diff[i] = -1;
56 if (codon1[i]!=codon2[i]) {
57 diff[num] = i;
58 num++;
62 //Two codons are identical
63 if(num==0) return;
65 //Sum is the number of evolution pathway.
66 for (i=1; i<=num; i++) sum *= i;
68 snp += num;
70 for(i=0; i<CODONLENGTH; i++)
71 Si_temp[2*i] = Vi_temp[2*i] =0.0;
73 //One difference
74 if(num==1) {
75 TransitionTransversion(codon1, codon2, diff[0]);
78 //Two differences: 2 pathways for evolution (i,j)
79 if(num==2) {
80 for(i=0;i<num;i++) {
81 for(j=0;j<num;j++) {
82 if(i!=j) {
83 temp1 = codon1;
84 temp1[diff[i]] = codon2[diff[i]];
85 if (getAminoAcid(temp1)!='!') {
86 //pathway: codon1 <-> temp1 <-> codon2
87 TransitionTransversion(codon1, temp1, diff[i]);
88 TransitionTransversion(temp1, codon2, diff[j]);
90 else {
91 stop++;
98 //Three differences: 6 pathways for evolution (i,j,k)
99 if(num==3) {
100 for (i=0;i<3;i++) {
101 for (j=0;j<3;j++) {
102 for (k=0;k<3;k++) {
103 if ((i!=j) && (i!=k) && (j!=k)) {
104 temp1 = codon1;
105 temp1[diff[i]] = codon2[diff[i]];
106 temp2 = temp1;
107 temp2[diff[j]] = codon2[diff[j]];
108 if (getAminoAcid(temp1)!='!' && getAminoAcid(temp2)!='!') {
109 //pathway: codon1 <-> temp1 <-> temp2 <-> codon2
110 TransitionTransversion(codon1, temp1, diff[i]);
111 TransitionTransversion(temp1, temp2, diff[j]);
112 TransitionTransversion(temp2, codon2, diff[k]);
114 else
115 stop++;
122 //Add pair-codon's differences to Si and Vi
123 for(i=0; i<CODONLENGTH && (sum-stop)>0; i++) {
124 Si[2*i] += (Si_temp[2*i]/(sum-stop));
125 Vi[2*i] += (Vi_temp[2*i]/(sum-stop));
129 /************************************************
130 * Function: getCodonClass
131 * Input Parameter: codon, position(0,1,2)
132 * Output: return 0,2,4-fold of codon at a given position.
133 * Return Value: 0 or 2 or 4
134 *************************************************/
135 int LWL85::getCodonClass(string codon, int pos) {
136 int i;
137 int codonClass = 0;
139 for(i=0; i<4; i++) {
140 if (i!=convertChar(codon[pos])) {
141 string temp = codon;
142 temp[pos] = convertInt(i);
143 if (getAminoAcid(temp)!='!' && getAminoAcid(temp)==getAminoAcid(codon)) {
144 codonClass++;
148 if (codonClass>0 && codonClass<3) {
149 codonClass = 2;
151 else if (codonClass==3) {
152 codonClass = 4;
155 return codonClass;
158 /************************************************
159 * Function: TransitionTransversion
160 * Input Parameter: codon1, codon2, position(0,1,2)
161 * Output: Calculate synonymous and nonsynonymous differences
162 of two codons at a given position.
163 * Return Value: int
164 * Note: Follow kakstools.py sent from Prof.Li
165 *************************************************/
166 int LWL85::TransitionTransversion(string codon1, string codon2, int pos) {
168 //1:synonymous, 0:nonsynonymous
169 int isSyn = 0;
171 /* Follow kakstools.py sent from Prof.Li WH */
172 //CGA, CGG, AGA, AGG
173 if ((codon1=="CGA" && codon2=="AGA" && pos==0)||(codon1=="AGA" && codon2=="CGA" && pos==0)) {
174 isSyn = 1;
176 if ((codon1=="CGG" && codon2=="AGG" && pos==0)||(codon1=="AGG" && codon2=="CGG" && pos==0)) {
177 isSyn = 1;
179 if (isSyn==1) {
180 int c1=getCodonClass(codon1,pos);
181 int c2=getCodonClass(codon2,pos);
182 Si_temp[c1] += 0.5; //different from the following
183 Vi_temp[c2] += 0.5;
185 return 1;
188 /* Normal situation */
189 //Synonymous: T(0)<->C(1), A(2)<->G(3)
190 int sum = convertChar(codon1[pos]) + convertChar(codon2[pos]);
191 if(sum==5 || sum==1)
192 isSyn = 1;
194 int class1=getCodonClass(codon1,pos);
195 int class2=getCodonClass(codon2,pos);
196 if (isSyn==1) {
197 Si_temp[class1] += 0.5;
198 Si_temp[class2] += 0.5;
200 if (isSyn==0) {
201 Vi_temp[class1] += 0.5;
202 Vi_temp[class2] += 0.5;
205 return 1;
208 /************************************************
209 * Function: preProcess
210 * Input Parameter: seq1, seq2
211 * Output: preprocess for Run
212 * Return Value: void
213 *************************************************/
214 void LWL85::preProcess(string seq1, string seq2) {
216 long i;
217 double ts=0, tv=0;
218 double ai[5], bi[5];
220 for(i=0; i<seq1.length(); i+=3) {
221 CountSiteAndDiff(seq1.substr(i,3), seq2.substr(i,3));
224 for(i=0; i<5; i+=2) {
226 ai[i] = bi[i] = 0.0;
228 ts+=Si[i];
229 tv+=Vi[i];
231 L[i] = L[i]/2.0;
232 Pi[i] = Si[i]/L[i];
233 Qi[i] = Vi[i]/L[i];
235 if ((1 - 2*Pi[i] - Qi[i])>0 && (1 - 2*Qi[i])>0) {
236 ai[i] = 1/(1 - 2*Pi[i] - Qi[i]);
237 bi[i] = 1/(1 - 2*Qi[i]);
240 if (ai[i]>0 && bi[i]>0) {
242 if (log(bi[i])>=0) {
243 B[i] = 0.5*log(bi[i]);
246 if ((0.5*log(ai[i]) - 0.25*log(bi[i]))>=0) {
247 A[i] = 0.5*log(ai[i]) - 0.25*log(bi[i]);
250 K[i] = A[i] + B[i];
255 if(tv>0)
256 kappa = 2*ts/tv;
257 else
258 kappa = 2;
260 //For output formatting
261 KAPPA[0] = KAPPA[1] = kappa;
263 return;
266 /************************************************
267 * Function: Run
268 * Input Parameter: seq1, seq2
269 * Output: Main function for calculating Ka&Ks.
270 * Return Value: void
271 *************************************************/
272 string LWL85::Run(string seq1, string seq2) {
274 preProcess(seq1, seq2);
276 S = L[2]/3 + L[4];
277 N = L[0] + 2*L[2]/3;
279 Sd = L[2]*A[2] + L[4]*K[4];
280 Ks = Sd / S;
282 Nd = L[0]*K[0] + L[2]*B[2];
283 Ka = Nd/N;
285 t = (S*Ks+N*Ka)/(S+N);
287 return parseOutput();
292 MLWL85::MLWL85() {
293 name = "MLWL";
297 /*For more detail see reference: Tzeng Y-H, Pan R, Li W-H (2004) Mol. Biol. Evol.*/
298 int MLWL85::TransitionTransversion(string codon1, string codon2, int pos) {
300 //1:synonymous, 0:nonsynonymous, -1:uncalculate
301 int isSyn=-1;
303 //Ile: ATT, ATC, ATA; Met: ATA
304 if ((codon1=="ATA" && codon2=="ATG" && pos==2)||(codon1=="ATG" && codon2=="ATA" && pos==2)) {
305 isSyn = 0;
307 if ((codon1=="ATA" && (codon2=="ATC"||codon2=="ATT") && pos==2) || ( (codon1=="ATC"||codon1=="ATT") && codon2=="ATA" && pos==2)) {
308 isSyn = 1;
311 //Arg: CGA, CGG, AGA, AGG
312 if ((codon1=="CGA" && codon2=="AGA" && pos==0)||(codon1=="AGA" && codon2=="CGA" && pos==0)) {
313 isSyn = 1;
315 if ((codon1=="CGG" && codon2=="AGG" && pos==0)||(codon1=="AGG" && codon2=="CGG" && pos==0)) {
316 isSyn = 1;
319 //Synonymous: A<->G, C<->T
320 //Normal situation
321 if (isSyn==-1) {
322 int sum = convertChar(codon1[pos]) + convertChar(codon2[pos]);
323 if (sum==5 || sum==1)
324 isSyn = 1;
325 else
326 isSyn = 0;
329 int class1=getCodonClass(codon1,pos);
330 int class2=getCodonClass(codon2,pos);
331 if (isSyn==1) {
332 Si_temp[class1] += 0.5;
333 Si_temp[class2] += 0.5;
335 if (isSyn==0) {
336 Vi_temp[class1] += 0.5;
337 Vi_temp[class2] += 0.5;
340 return 0;
343 /* One of differences between MLWL85 and LWL85 is allowing for kappa in S and N */
344 string MLWL85::Run(string stra, string strb) {
346 long i;
347 double ts=0.0, tv=0.0; //Transition, Transversion
348 double ai[5], bi[5];
350 for(i=0; i<stra.length(); i+=3) {
351 this->CountSiteAndDiff(stra.substr(i,3), strb.substr(i,3));
354 for(i=0; i<5; i+=2) {
356 ai[i] = bi[i] = 0;
358 ts+=Si[i];
359 tv+=Vi[i];
361 L[i] = L[i]/2.0;
362 Pi[i] = Si[i]/L[i];
363 Qi[i] = Vi[i]/L[i];
365 if ((1 - 2*Pi[i] - Qi[i])>0 && (1 - 2*Qi[i])>0) {
366 ai[i] = 1/(1 - 2*Pi[i] - Qi[i]);
367 bi[i] = 1/(1 - 2*Qi[i]);
370 if (ai[i]>0 && bi[i]>0) {
372 if (log(bi[i])>=0) {
373 B[i] = 0.5*log(bi[i]);
376 if ((0.5*log(ai[i]) - 0.25*log(bi[i]))>=0) {
377 A[i] = 0.5*log(ai[i]) - 0.25*log(bi[i]);
380 K[i] = A[i] + B[i];
384 kappa = 2.0*ts/tv;
385 if (ts<SMALLVALUE || tv<SMALLVALUE) kappa = 1;
387 KAPPA[0] = KAPPA[1] = kappa;
389 if (kappa>2.0) {
390 S = (kappa-1)*L[2]/(kappa+1) + L[4];
391 N = L[0] + 2*L[2]/(kappa+1);
393 else {
394 if(kappa>0.5) {
395 S = (kappa-0.5)*L[2]/(kappa+1.5) + L[4];
396 N = L[0] + 2*L[2]/(kappa+1.5);
398 else {
399 S = L[2]/3 + L[4];
400 N = 2*L[2]/3 + L[0];
404 Sd = L[2]*A[2] + L[4]*K[4];
405 Ks = Sd/S;
407 Nd = L[0]*K[0] + L[2]*B[2];
408 Ka = Nd/N;
410 t = (S*Ks+N*Ka)/(S+N);
412 return parseOutput();