modified: SpatialOmicsCoord.py
[GalaxyCodeBases.git] / c_cpp / etc / KaKs_Calculator / src / AXTConvertor.cpp
blob4794b4276c295808b0330b98c2fce508dcf2ffdc
2 #pragma warning(disable:4786)
3 #include<fstream>
4 #include<string>
5 #include<iostream>
6 #include<sstream>
7 #include<vector>
9 #include <stdlib.h>
11 using namespace std;
13 /* Convert one type to any other type */
14 template<class out_type,class in_value>
15 out_type CONVERT(const in_value & t) {
16 stringstream stream;
17 //Put the value 't' into the stream
18 stream<<t;
19 out_type result;
20 //Put the stream into the 'result'
21 stream>>result;
23 return result;
27 struct Seq {
28 string name;
29 string seq;
32 vector<Seq> sequence;
33 vector<string> FileContent;
35 string input_filename;
36 string output_filename;
39 string convertNum(int i);
40 int convertFile(string input_filename);
41 void pushintoVector(Seq temp);
42 bool readClustal();
43 bool readMsf();
44 bool readNexus();
45 bool readPhylip();
46 bool readPir();
47 string trim(string str);
48 string stringtoUpper(string str);
49 bool isBlank(string str);
51 int main(int argc, char* argv[]) {
53 if (argc!=3) {
54 cout<<"Error(s) in parameters..."<<endl;
55 cout<<"Description: Convert Clustal/Msf/Nexus/Phylip/Pir format sequences to AXT ones."<<endl;
56 cout<<"Usage: AXTConvertor [Clustal/Msf/Nexus/Phylip/Pir] [AXT]"<<endl;
57 return 1;
60 input_filename = argv[1];
61 output_filename = argv[2];
63 FileContent.clear();
64 sequence.clear();
66 return convertFile(input_filename);
71 int convertFile(string input_filename) {
73 int i,j, flag=1;;
75 try {
76 ifstream is(input_filename.c_str());
77 if (!is) {
78 cout<<"Error in opening file..."<<endl;
79 throw 1;
82 //Read the file's content saved in the vector of FileContent
83 cout<<"Reading sequences..."<<endl;
84 string temp = "";
85 FileContent.clear();
86 while (getline(is, temp, '\n')) {
87 FileContent.push_back(temp);
88 temp = "";
90 is.close();
91 is.clear();
93 //Parse FileContent and convert to axt file
94 cout<<"Converting..."<<endl;
95 if (readClustal() || readPhylip() || readMsf() || readNexus() || readPir()) {
97 FileContent.clear();
99 ofstream os(output_filename.c_str());
100 if(!os || !os.is_open()) {
101 cout<<"No permission to file. Please check it."<<endl;
104 //Write pairwise sequences
105 for(i=0; i<sequence.size(); i++) {
106 for(j=i+1; j<sequence.size(); j++) {
107 temp = sequence[i].name + "&";
108 temp += sequence[j].name;
109 os<<temp<<endl;
111 os<<sequence[i].seq<<endl;
112 os<<sequence[j].seq<<endl;
113 os<<endl;
116 os.close();
117 cout<<"Mission accomplished."<<endl;
120 else {
121 cout<<"The sequence format can not be recognized. Please check it."<<endl;
124 FileContent.clear();
125 sequence.clear();
127 catch (...) {
128 cout<<"Error(s) in converting sequences into AXT format..."<<endl;
129 flag = 0;
132 return flag;
135 bool readClustal() {
137 CLUSTAL W (1.7) multiple sequence alignment
139 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
140 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
141 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
142 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
145 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
146 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
147 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
148 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
150 int j, i = stringtoUpper(FileContent[0]).find("CLUSTAL");
152 if (i<0) {
153 return false;
156 sequence.clear();
157 for (i=1; i<FileContent.size(); i++) {
158 if (isBlank(FileContent[i])) {
159 continue;
161 j = FileContent[i].find(" ", 0);
162 if (FileContent[i].substr(0,j).empty()) {
163 j = FileContent[i].find(" ", 1 );
165 Seq temp;
166 temp.name = FileContent[i].substr(0, j);
167 temp.seq = FileContent[i].substr(j+1, FileContent[i].length()-1);
168 pushintoVector(temp);
172 return true;
175 bool readPhylip() {
176 /* 4 50
177 AK1 ACACCCGTGC TTGGCAATAC CGATCCAAGC GCCGTGATGC TTGAGGCGGT
178 AK2 ATACCAGTAC TCGGCAAGAC CGATCCAAAC GCCGAGATGC TCGAGGCCGA
179 AK3 ACACCCGTGC TTGGCAATAC CGATCCAAGC GCCGTGATGC TTGAGGCGGT
180 AK4 ATACCAGTAC TCGGCAAGAC CGATCCAAAC GCCGAGATGC TCGAGGCCGA
182 TGACAATAAT AAGGGCGTAG AGATCAGGGG CGAGTCTCGA TTTAGAATTT
183 TGACAATAAT AAGGGAGTAG AGATCATGGG CGAGTCACGA TTCAAAATTT
184 TGACAATAAT AAGGGCGTAG AGATCAGGGG CGAGTCTCGA TTTAGAATTT
185 TGACAATAAT AAGGGAGTAG AGATCATGGG CGAGTCACGA TTCAAAATTT
187 int i,j;
188 string num = "", firstline = FileContent[0];
190 for (i=0; i<firstline.length() && num==""; i++) {
191 while (isdigit(firstline[i])) {
192 num += firstline[i];
193 i++;
197 if (num=="") {
198 return false;
201 sequence.clear();
202 for(i=1; !isBlank(FileContent[i]); i++) {
204 j = FileContent[i].find(' ');
206 Seq temp;
207 temp.name = FileContent[i].substr(0, j);
208 temp.seq = FileContent[i].substr(j+1, FileContent[i].length()-1);
209 pushintoVector(temp);
212 if (atoi(num.c_str())!=sequence.size()){
213 return false;
216 for (j=0; i<FileContent.size(); i++) {
217 if (isBlank(FileContent[i])) {
218 j = 0;
219 continue;
221 Seq temp;
222 temp.name = sequence[j++].name;
223 temp.seq = FileContent[i];
224 pushintoVector(temp);
227 return true;
230 bool readMsf() {
232 .....
235 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
236 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
237 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
238 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
241 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
242 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
243 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
244 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
247 int i;
248 for (i=0; i<FileContent.size(); i++) {
249 if (trim(FileContent[i])=="//") {
250 break;
254 if (i==FileContent.size()) {
255 return false;
258 sequence.clear();
259 for (i++; i<FileContent.size(); i++) {
260 if (isBlank(FileContent[i])) {
261 continue;
263 int j = FileContent[i].find(" ", 0);
264 Seq temp;
265 temp.name = FileContent[i].substr(0, j);
266 temp.seq = FileContent[i].substr(j+1, FileContent[i].length()-1);
267 pushintoVector(temp);
270 return true;
273 bool readNexus() {
275 #nexus
276 ......
277 ...dimensions ntax=4...
278 ......
279 matrix
280 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
281 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
282 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
283 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
285 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
286 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
287 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
288 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
291 int i, j;
292 if (trim(stringtoUpper(FileContent[0]))!="#NEXUS") {
293 return false;
296 string num = "";
297 for (i=1; i<FileContent.size(); i++) {
299 string temp = trim(FileContent[i]);
301 //Get the number of sequence
302 if (num=="") {
303 j = (stringtoUpper(temp)).find("NTAX=");
304 if (j>0) {
305 j+=5;
306 while (isdigit(temp[j])) {
307 num += temp[j++];
311 //Find the sequence start line
312 temp = stringtoUpper(temp);
313 j = temp.find("MATRIX");
314 if (j>-1) {
315 break;
319 if (i==FileContent.size()) {
320 return false;
323 sequence.clear();
324 for (i++; i<FileContent.size(); i++) {
325 if (isBlank(FileContent[i])) {
326 continue;
328 //end of sequence
329 if (trim(FileContent[i])==";") {
330 break;
333 int j = FileContent[i].find(" ", 0);
334 Seq temp;
335 temp.name = trim(FileContent[i].substr(0, j));
336 temp.seq = trim(FileContent[i].substr(j+1, FileContent[i].length()-1));
337 pushintoVector(temp);
340 if (sequence.size()!=atoi(num.c_str())) {
341 return false;
344 return true;
348 bool readPir() {
350 >RL;Homo sapiens
351 Homo sapiens RNA sequence
352 AGUCGAGUC---GCAGAAACGCAUGAC-GACCACAUUUU-CCUUGCAAAG*
353 >RL;Pan paniscus
354 Pan paniscus RNA sequence
355 AGUCGCGUCG--GCAGAAACGCAUGACGGACCACAUCAU-CCUUGCAAAG*
356 >RL;Gorilla gorilla
357 Gorilla gorilla RNA sequence
358 AGUCGCGUCG--GCAGAUACGCAUCACGGAC-ACAUCAUCCCUCGCAGAG*
360 if (FileContent[0][0]!='>') {
361 return false;
364 sequence.clear();
365 int i, j;
366 for (i=0; i<FileContent.size(); i++) {
367 if(FileContent[i][0]=='>') {
368 j = FileContent[i].find(";");
369 if (j<0) {
370 return false;
373 Seq temp;
374 temp.name = trim(FileContent[i].substr(j+1, FileContent[i].length()-1));
376 temp.seq = "";
377 i+=2;
378 while (!isBlank(FileContent[i])) {
379 temp.seq = temp.seq + FileContent[i];
380 if (FileContent[i][FileContent[i].length()-1]=='*')
381 break;
382 i++;
385 pushintoVector(temp);
389 return true;
393 void pushintoVector(Seq temp) {
395 int i;
397 temp.name = trim(temp.name);
399 if (!isalpha(temp.seq[temp.seq.length()-1])) {
400 temp.seq = temp.seq.replace(temp.seq.length()-1, 1, "");
402 temp.seq = trim(temp.seq);
403 //Push into vector
404 for(i=0; i<sequence.size(); i++) {
405 string kdjf = sequence[i].name;
406 if (temp.name==sequence[i].name) {
407 sequence[i].seq += temp.seq;
408 break;
411 if (i==sequence.size()) {
412 sequence.push_back(temp);
416 string trim(string str) {
417 int i;
418 for(i=0; i<str.length(); i++) {
419 if (str[i]==' ' || iscntrl(str[i])) {
420 str = str.replace(i, 1, "");
421 i--;
424 return str;
427 string stringtoUpper(string str) {
428 int i;
429 for(i=0; i<str.length(); i++) {
430 if (isalpha(str[i])) {
431 str[i] = toupper(str[i]);
434 return str;
437 string convertNum(int i) {
438 string str = "";
439 int k;
440 do {
441 k = i % 10;
442 str = (char)(k+48) + str;
443 i = (i - k)/ 10;
444 } while(i>0);
446 return str;
449 bool isBlank(string str) {
451 int i, num;
452 bool flag=false;
454 if (str.length()==0 || str=="") {
455 flag = true;
457 else {
458 for(i=0,num=0; i<str.length(); i++) {
459 if (!isalpha(str[i]) && !isdigit(str[i])) num++;
461 if(num==str.length())
462 flag = true;
465 return flag;