1 /************************************************************
2 * Copyright (C) 2005, BGI of Chinese Academy of Sciences
6 * Abstract: Declaration of KAKS class including several methods.
9 * Author: Zhang Zhang (zhanghzhang@genomics.org.cn)
12 *************************************************************/
18 string items
[] = {"Sequence", "Method", "Ka", "Ks", "Ka/Ks",
19 "P-Value(Fisher)", "Length", "S-Sites", "N-Sites", "Fold-Sites(0:2:4)",
20 "Substitutions", "S-Substitutions", "N-Substitutions", "Fold-S-Substitutions(0:2:4)", "Fold-N-Substitutions(0:2:4)",
21 "Divergence-Time", "Substitution-Rate-Ratio(rTC:rAG:rTA:rCG:rTG:rCA/rCA)", "GC(1:2:3)", "ML-Score", "AICc",
22 "Akaike-Weight", "Model"}; //21 outputing items
24 //Format of outputing parameters
26 for (i
=0; i
<sizeof(items
)/sizeof(string
); i
++) titleInfo
.push_back(items
[i
]);
28 //Load Methods' Names and References for -h in linux, also for windows' tool tip
29 method_name
.push_back("NG"); method_ref
.push_back("Nei, M. and Gojobori, T. (1986)");
30 method_name
.push_back("LWL"); method_ref
.push_back( "Li, W.H., Wu, C.I. and Luo, C.C. (1985)");
31 method_name
.push_back("LPB"); method_ref
.push_back("Li, W.H. (1993) and Pamilo, P. and Bianchi, N.O. (1993)");
32 method_name
.push_back("MLWL"); method_ref
.push_back("Tzeng, Y.H., Pan, R. and Li, W.H. (2004)");
33 method_name
.push_back("MLPB"); method_ref
.push_back("Tzeng, Y.H., Pan, R. and Li, W.H. (2004)");
34 method_name
.push_back("GY"); method_ref
.push_back("Goldman, N. and Yang, Z. (1994)");
35 method_name
.push_back("YN"); method_ref
.push_back("Yang, Z. and Nielsen, R. (2000)");
36 method_name
.push_back("MYN"); method_ref
.push_back("Zhang Z., Jun L. and Jun Y. (2006)");
37 method_name
.push_back("MS"); method_ref
.push_back("Model Selection according to the AICc");
38 method_name
.push_back("MA"); method_ref
.push_back("Model Averaging on a set of candidate models");
42 method_ref
.push_back("Nei, M. and Gojobori, T. (1986) Mol. Biol. Evol., 3, 418-426.");
43 method_ref
.push_back("Li, W.H., Wu, C.I. and Luo, C.C. (1985) Mol. Biol. Evol., 2, 150-174.");
44 method_ref
.push_back("Li, W.H. (1993) J. Mol. Evol., 36, 96-99. Pamilo, P. and Bianchi, N.O. (1993) Mol. Biol. Evol., 10, 271-281.");
45 method_ref
.push_back("Tzeng, Y.H., Pan, R. and Li, W.H. (2004) Mol. Biol. Evol., 21, 2290-2298.");
46 method_ref
.push_back("Tzeng, Y.H., Pan, R. and Li, W.H. (2004) Mol. Biol. Evol., 21, 2290-2298.");
47 method_ref
.push_back("Goldman, N. and Yang, Z. (1994) Mol. Biol. Evol., 11, 725-736.");
48 method_ref
.push_back("Yang, Z. and Nielsen, R. (2000) Mol. Biol. Evol., 17, 32-43.");
49 method_ref
.push_back("Zhang, Z., Li, J. and Yu, J. (2006) BMC Evolutionary Biology, 6, 44.");
50 method_ref
.push_back("Model Selection according to the AICc");
51 method_ref
.push_back("Model Averaging on a set of candidate models");
68 int KAKS::Initialize() {
70 none
= ng86
= lpb93
= lwl85
= mlwl85
= mlpb93
= yn00
= gy94
= myn
= ms
= ma
= false;
71 result4Win
= result
= seq_name
= seq1
= seq2
= "";
72 seq_filename
= output_filename
= detail_filename
= "";
73 result
= details
= "";
80 int KAKS::Uninitialize() {
89 /* Get GCC of entire sequences GC[0] and of three codon positions GC[1,2,3] */
90 void KAKS::getGCContent(string str
) {
94 for(i
=0; i
<str
.length(); i
+=3) {
95 string codon
= str
.substr(i
, 3);
97 if(codon
[j
]=='G' || codon
[j
]=='C') GC
[j
+1]++;
101 GC
[0] = sumArray(GC
, 4, 1) / str
.length();
102 for(i
=1; i
<4; i
++) GC
[i
] /= (str
.length()/3);
107 /****************************************************
108 * Function: ReadCalculateSeq
109 * Input Parameter: string
110 * Output: Read sequences, check sequences' validity
111 and calculate Ka and Ks.
112 * Return Value: True if succeed, otherwise false.
114 * Note: Using axt file for low memory
115 *****************************************************/
116 bool KAKS::ReadCalculateSeq(string filename
) {
122 ifstream
is(filename
.c_str());
124 cout
<<"Error in opening file..."<<endl
;
128 showParaInfo(); //Show information on display
130 result
= getTitleInfo();
133 if (output_filename
!="" && output_filename
.length()>0) {
134 os
.open(output_filename
.c_str());
137 string temp
="", name
="", str
="";
139 while (getline(is
, temp
, '\n')) {
143 getline(is
, temp
, '\n');
146 getline(is
, temp
, '\n');
149 //Get GCC at three codon positions
151 //Check str's validility and calculate
152 if (checkValid(name
, str
.substr(0, str
.length()/2), str
.substr(str
.length()/2, str
.length()/2))) {
153 cout
<<++number
<<" "<<name
<<"\t";
154 if(!calculateKaKs()) throw 1; //calculate Ka&Ks using selected method(s)
170 /**************************************************
171 * Function: checkValid
172 * Input Parameter: string, string, string
173 * Output: Check validity of pairwise sequences
174 * Return Value: True if succeed, otherwise false.
175 ***************************************************/
176 bool KAKS::checkValid(string name
, string str1
, string str2
) {
181 //Check whether (sequence length)/3==0
182 if (str1
.length()!=str1
.length() || str1
.length()%3!=0 || str2
.length()%3!=0) {
183 cout
<<endl
<<"Error. The size of two sequences in "<<"'"<<name
<<"' is not equal."<<endl
;
187 //Delete gap and stop codon
190 for(i
=0; i
<str1
.length(); i
+=3) {
191 for(found
=false, j
=0; j
<3 && !found
; j
++) {
192 if (str1
[j
+i
]=='-' || str2
[j
+i
]=='-') {
195 str1
[i
+j
] = toupper(str1
[i
+j
]);
196 str2
[i
+j
] = toupper(str2
[i
+j
]);
197 if (convertChar(str1
[i
+j
])==-1 || convertChar(str2
[i
+j
])==-1) {
202 if ((getAminoAcid(str1
.substr(i
,3))=='!') || (getAminoAcid(str2
.substr(i
,3))=='!')) {
207 str1
= str1
.replace(i
, 3, "");
208 str2
= str2
.replace(i
, 3, "");
213 //pass value into private variables
216 //pass value into extern variables
218 length
= str1
.length();
227 /**************************************************
229 * Input Parameter: int, const char* []
230 * Output: Calculate Ka/Ks and output.
232 ***************************************************/
233 bool KAKS::Run(int argc
, const char* argv
[]) {
238 //Judge whether input parameters are legal
239 if (!parseParameter(argc
, argv
)) {
243 //Record the start time
244 static time_t time_start
= time(NULL
);
246 //Read sequences and calculate Ka & Ks
247 if (!ReadCalculateSeq(seq_filename
)) {
251 //Write details for model-selected method
252 if(!writeFile(detail_filename
, (getTitleInfo()+details
).c_str())) {
256 //Time used for running
257 time_t t
= time(NULL
)-time_start
;
258 int h
=t
/3600, m
=(t
%3600)/60, s
=t
-(t
/60)*60;
261 cout
<<"Mission accomplished. (Time elapsed: ";
262 if(h
) cout
<<h
<<":"<<m
<<":"<<s
<<")"<<endl
;
263 else cout
<<m
<<":"<<s
<<")"<<endl
;
272 /**************************************************
273 * Function: parseParameter
274 * Input Parameter: int, const char* []
275 * Output: Parse the input parameters
277 ***************************************************/
278 bool KAKS::parseParameter(int argc
, const char* argv
[]) {
288 if (seq_filename
=="") throw 1;
289 //else for only one parameter - seq_filename
291 else if (argc
==2) {//help information
293 if(temp
=="-H"||temp
=="-h") {
303 int inputflag
=0, outputflag
=0, codeflag
=0;
304 for (i
=1; i
<argc
; i
++) {
306 temp
= stringtoUpper(argv
[i
]);
309 if ((i
+1)<argc
&& inputflag
==0) {
310 seq_filename
= argv
[++i
];
318 else if (temp
=="-O") {
319 if ((i
+1)<argc
&& outputflag
==0) {
320 output_filename
= argv
[++i
];
328 else if (temp
=="-C") {
329 if ((i
+1)<argc
&& codeflag
==0) {
330 genetic_code
= CONVERT
<int>(argv
[++i
]);
331 if (genetic_code
<1 || genetic_code
>NCODE
|| strlen(transl_table
[2*(genetic_code
-1)])<1)
339 //Details for Model Selection
340 else if (temp
=="-D") {
341 if ((i
+1)>argc
) throw 1;
342 detail_filename
= argv
[++i
];
345 //Algorithm(s) selected
346 else if (temp
=="-M") {
347 if ((i
+1)>argc
) throw 1;
348 temp
= stringtoUpper(argv
[++i
]);
349 if (temp
=="NONE") none
= true;
350 else if(temp
=="NG") ng86
= true;
351 else if(temp
=="LWL") lwl85
= true;
352 else if(temp
=="LPB") lpb93
= true;
353 else if(temp
=="MLPB") mlpb93
= true;
354 else if(temp
=="MLWL") mlwl85
= true;
355 else if(temp
=="GY") gy94
= true;
356 else if(temp
=="YN") yn00
= true;
357 else if(temp
=="MYN") myn
= true;
358 else if(temp
=="MS") ms
= true;
359 else if(temp
=="MA") ma
= true;
360 else if(temp
=="ALL") {
361 ng86
= lpb93
= lwl85
= mlwl85
= mlpb93
= gy94
= yn00
= myn
= ms
= ma
= true;
368 //If no input or output file, report error
369 if(inputflag
==0 || outputflag
==0) throw 1;
371 //Default: use ma to to calculate Ka and Ks
372 if (!(none
+ng86
+lpb93
+lwl85
+mlwl85
+mlpb93
+gy94
+yn00
+myn
+ms
+ma
)) {
378 cout
<<"Input parameter(s) error."<<endl
;
379 cout
<<"For help information: Kaks_Calculator -h"<<endl
;
387 /*******************************************************
388 * Function: Calculate
389 * Input Parameter: void
390 * Output: Calculate kaks and output results.
394 ********************************************************/
395 bool KAKS::calculateKaKs() {
402 if (none
) start_NONE();
403 if (ng86
) start_NG86();
404 if (lwl85
) start_LWL85();
405 if (mlwl85
) start_MLWL85();
406 if (lpb93
) start_LPB93();
407 if (mlpb93
) start_MLPB93();
408 if (gy94
) start_GY94();
409 if (yn00
) start_YN00();
410 if (myn
) start_MYN();
411 if (ms
||ma
) start_MSMA();
413 result4Win
+= result
;
414 //Write into the file
415 if (output_filename
.length()>0 && os
.is_open()) {
428 //NONE: NG without correction for multiple substitution
429 void KAKS::start_NONE() {
432 result
+= zz
.Run(seq1
, seq2
);
436 void KAKS::start_NG86() {
439 result
+= zz
.Run(seq1
, seq2
);
443 void KAKS::start_LWL85() {
446 result
+= zz
.Run(seq1
, seq2
);
450 void KAKS::start_MLWL85() {
453 result
+= zz
.Run(seq1
, seq2
);
457 void KAKS::start_LPB93() {
460 result
+= zz
.Run(seq1
, seq2
);
464 void KAKS::start_MLPB93() {
467 result
+= zz
.Run(seq1
, seq2
);
471 void KAKS::start_GY94() {
474 result
+= zz
.Run(seq1
.c_str(), seq2
.c_str());
478 void KAKS::start_YN00() {
481 result
+= zz
.Run(seq1
, seq2
);
485 void KAKS::start_MYN() {
488 result
+= zz
.Run(seq1
, seq2
);
491 /************************************************
492 * Function: start_MSMA
493 * Input Parameter: void
494 * Output: Calculate Ka and Ks using the method of
495 model selection or model averaging.
497 *************************************************/
498 void KAKS::start_MSMA() {
500 vector
<MLResult
> result4MA
; //generated by MS and used by MA
504 string tmp
= zz1
.Run(seq1
.c_str(), seq2
.c_str(), result4MA
, details
);
512 result
+= zz2
.Run(seq1
.c_str(), seq2
.c_str(), result4MA
);
516 /************************************************
517 * Function: showParaInfo
519 * Output: print on display
521 *************************************************/
522 void KAKS::showParaInfo() {
525 if (none
) cout
<<"NONE"<<" ";
526 if (ng86
) cout
<<"NG"<<" ";
527 if (lwl85
) cout
<<"LWL"<<" ";
528 if (mlwl85
) cout
<<"MLWL"<<" ";
529 if (lpb93
) cout
<<"LPB"<<" ";
530 if (mlpb93
) cout
<<"MLPB"<<" ";
531 if (gy94
) cout
<<"GY"<<" ";
532 if (yn00
) cout
<<"YN"<<" ";
533 if (myn
) cout
<<"MYN"<<" ";
534 if (ms
) cout
<<"MS"<<" ";
537 cout
<<endl
<<"Genetic code: "<<transl_table
[2*(genetic_code
-1)+1]<<endl
;
538 cout
<<"Please wait while reading sequences and calculating..."<<endl
;
541 /************************************************
542 * Function: getTitleInfo
544 * Output: get title information of outputing file
545 * Return Value: string
546 *************************************************/
547 string
KAKS::getTitleInfo() {
552 if (titleInfo
.size()>0) {
553 //Add "\t" to items except the last one
554 for (; i
<titleInfo
.size()-1; i
++) addString(title
, titleInfo
[i
]);
555 //The last item is added by "\n"
556 addString(title
, titleInfo
[i
], "\n");
562 /***********************************
565 * Output: Display help infomation.
567 ************************************/
568 void KAKS::helpInfo() {
572 cout
<<"******************************************************************"<<endl
;
573 cout
<<" Program: KaKs_Calculator"<<endl
;
574 cout
<<" Version: "<<VERSION
<<endl
;
575 cout
<<" Description: Calculate Ka and Ks through model selection and model averaging."<<endl
;
576 cout
<<"******************************************************************"<<endl
;
579 cout
<<"Usage: Kaks_Calculator <options>"<<endl
;
580 cout
<<"\t-i\tAxt file name for calculating Ka & Ks."<<endl
;
581 cout
<<"\t-o\tOutput file name for saving results."<<endl
;
582 cout
<<"\t-c\tGenetic code table (Default = 1-Standard Code)."<<endl
;
583 for(i
=0,j
=0; i
<NNCODE
; i
+=2) {
584 if(strlen(transl_table
[i
])>0) {
585 cout
<<"\t\t "<<transl_table
[i
+1]<<"\t";
594 cout
<<"\t\t (More information about the Genetic Codes: http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c)"<<endl
;
596 cout
<<"\t-m\tMethods for estimating Ka and Ks and theirs references(Default = MA)"<<endl
;
597 for (i
=0; i
<method_name
.size(); i
++) {
598 cout
<<"\t\t "<<method_name
[i
].c_str();
599 cout
<<"\t\t"<<method_ref
[i
].c_str()<<endl
;
601 cout
<<"\t\t ALL(including all above methods)"<<endl
;
603 cout
<<"\t-d\tFile name for details about each candidate model when using the method of MS"<<endl
;
606 cout
<<"Example:"<<endl
;
607 cout
<<"\tKaKs_Calculator -i test.axt -o test.axt.kaks\t//use MA method based on a more suitable model and standard code"<<endl
;
608 cout
<<"\tKaKs_Calculator -i test.axt -o test.axt.kaks -c 2\t//use MA method and vertebrate mitochondrial code"<<endl
;
609 cout
<<"\tKaKs_Calculator -i test.axt -o test.axt.kaks -m LWL -m MYN\t//use LWL and MYN methods, and standard Code"<<endl
;
612 cout
<<"For help information: KaKs_Calculator -h"<<endl
;
613 cout
<<"Please send bugs or advice to Zhang Zhang at zhangzhang@genomics.org.cn."<<endl
;