2 #pragma warning(disable:4786)
13 /* Convert one type to any other type */
14 template<class out_type
,class in_value
>
15 out_type
CONVERT(const in_value
& t
) {
17 //Put the value 't' into the stream
20 //Put the stream into the 'result'
33 vector
<string
> FileContent
;
35 string input_filename
;
36 string output_filename
;
39 string
convertNum(int i
);
40 int convertFile(string input_filename
);
41 void pushintoVector(Seq temp
);
47 string
trim(string str
);
48 string
stringtoUpper(string str
);
49 bool isBlank(string str
);
51 int main(int argc
, char* argv
[]) {
54 cout
<<"Error(s) in parameters..."<<endl
;
55 cout
<<"Description: Convert Clustal/Msf/Nexus/Phylip/Pir format sequences to AXT ones."<<endl
;
56 cout
<<"Usage: AXTConvertor [Clustal/Msf/Nexus/Phylip/Pir] [AXT]"<<endl
;
60 input_filename
= argv
[1];
61 output_filename
= argv
[2];
66 return convertFile(input_filename
);
71 int convertFile(string input_filename
) {
76 ifstream
is(input_filename
.c_str());
78 cout
<<"Error in opening file..."<<endl
;
82 //Read the file's content saved in the vector of FileContent
83 cout
<<"Reading sequences..."<<endl
;
86 while (getline(is
, temp
, '\n')) {
87 FileContent
.push_back(temp
);
93 //Parse FileContent and convert to axt file
94 cout
<<"Converting..."<<endl
;
95 if (readClustal() || readPhylip() || readMsf() || readNexus() || readPir()) {
99 ofstream
os(output_filename
.c_str());
100 if(!os
|| !os
.is_open()) {
101 cout
<<"No permission to file. Please check it."<<endl
;
104 //Write pairwise sequences
105 for(i
=0; i
<sequence
.size(); i
++) {
106 for(j
=i
+1; j
<sequence
.size(); j
++) {
107 temp
= sequence
[i
].name
+ "&";
108 temp
+= sequence
[j
].name
;
111 os
<<sequence
[i
].seq
<<endl
;
112 os
<<sequence
[j
].seq
<<endl
;
117 cout
<<"Mission accomplished."<<endl
;
121 cout
<<"The sequence format can not be recognized. Please check it."<<endl
;
128 cout
<<"Error(s) in converting sequences into AXT format..."<<endl
;
137 CLUSTAL W (1.7) multiple sequence alignment
139 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
140 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
141 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
142 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
145 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
146 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
147 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
148 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
150 int j
, i
= stringtoUpper(FileContent
[0]).find("CLUSTAL");
157 for (i
=1; i
<FileContent
.size(); i
++) {
158 if (isBlank(FileContent
[i
])) {
161 j
= FileContent
[i
].find(" ", 0);
162 if (FileContent
[i
].substr(0,j
).empty()) {
163 j
= FileContent
[i
].find(" ", 1 );
166 temp
.name
= FileContent
[i
].substr(0, j
);
167 temp
.seq
= FileContent
[i
].substr(j
+1, FileContent
[i
].length()-1);
168 pushintoVector(temp
);
177 AK1 ACACCCGTGC TTGGCAATAC CGATCCAAGC GCCGTGATGC TTGAGGCGGT
178 AK2 ATACCAGTAC TCGGCAAGAC CGATCCAAAC GCCGAGATGC TCGAGGCCGA
179 AK3 ACACCCGTGC TTGGCAATAC CGATCCAAGC GCCGTGATGC TTGAGGCGGT
180 AK4 ATACCAGTAC TCGGCAAGAC CGATCCAAAC GCCGAGATGC TCGAGGCCGA
182 TGACAATAAT AAGGGCGTAG AGATCAGGGG CGAGTCTCGA TTTAGAATTT
183 TGACAATAAT AAGGGAGTAG AGATCATGGG CGAGTCACGA TTCAAAATTT
184 TGACAATAAT AAGGGCGTAG AGATCAGGGG CGAGTCTCGA TTTAGAATTT
185 TGACAATAAT AAGGGAGTAG AGATCATGGG CGAGTCACGA TTCAAAATTT
188 string num
= "", firstline
= FileContent
[0];
190 for (i
=0; i
<firstline
.length() && num
==""; i
++) {
191 while (isdigit(firstline
[i
])) {
202 for(i
=1; !isBlank(FileContent
[i
]); i
++) {
204 j
= FileContent
[i
].find(' ');
207 temp
.name
= FileContent
[i
].substr(0, j
);
208 temp
.seq
= FileContent
[i
].substr(j
+1, FileContent
[i
].length()-1);
209 pushintoVector(temp
);
212 if (atoi(num
.c_str())!=sequence
.size()){
216 for (j
=0; i
<FileContent
.size(); i
++) {
217 if (isBlank(FileContent
[i
])) {
222 temp
.name
= sequence
[j
++].name
;
223 temp
.seq
= FileContent
[i
];
224 pushintoVector(temp
);
235 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
236 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
237 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
238 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
241 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
242 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
243 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
244 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
248 for (i
=0; i
<FileContent
.size(); i
++) {
249 if (trim(FileContent
[i
])=="//") {
254 if (i
==FileContent
.size()) {
259 for (i
++; i
<FileContent
.size(); i
++) {
260 if (isBlank(FileContent
[i
])) {
263 int j
= FileContent
[i
].find(" ", 0);
265 temp
.name
= FileContent
[i
].substr(0, j
);
266 temp
.seq
= FileContent
[i
].substr(j
+1, FileContent
[i
].length()-1);
267 pushintoVector(temp
);
277 ...dimensions ntax=4...
280 AK1 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
281 AK2 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
282 AK3 ACACCCGTGCTTGGCAATACCGATCCAAGCGCCGTGATGCTTGAGGCGGTTGACAATAAT
283 AK4 ATACCAGTACTCGGCAAGACCGATCCAAACGCCGAGATGCTCGAGGCCGATGACAATAAT
285 AK1 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
286 AK2 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
287 AK3 AAGGGCGTAGAGATCAGGGGCGAGTCTCGATTTAGAATTTTCCCCCCGTTCTCAAATGAG
288 AK4 AAGGGAGTAGAGATCATGGGCGAGTCACGATTCAAAATTTTTCCCCCGTTGTCAAAGGAG
292 if (trim(stringtoUpper(FileContent
[0]))!="#NEXUS") {
297 for (i
=1; i
<FileContent
.size(); i
++) {
299 string temp
= trim(FileContent
[i
]);
301 //Get the number of sequence
303 j
= (stringtoUpper(temp
)).find("NTAX=");
306 while (isdigit(temp
[j
])) {
311 //Find the sequence start line
312 temp
= stringtoUpper(temp
);
313 j
= temp
.find("MATRIX");
319 if (i
==FileContent
.size()) {
324 for (i
++; i
<FileContent
.size(); i
++) {
325 if (isBlank(FileContent
[i
])) {
329 if (trim(FileContent
[i
])==";") {
333 int j
= FileContent
[i
].find(" ", 0);
335 temp
.name
= trim(FileContent
[i
].substr(0, j
));
336 temp
.seq
= trim(FileContent
[i
].substr(j
+1, FileContent
[i
].length()-1));
337 pushintoVector(temp
);
340 if (sequence
.size()!=atoi(num
.c_str())) {
351 Homo sapiens RNA sequence
352 AGUCGAGUC---GCAGAAACGCAUGAC-GACCACAUUUU-CCUUGCAAAG*
354 Pan paniscus RNA sequence
355 AGUCGCGUCG--GCAGAAACGCAUGACGGACCACAUCAU-CCUUGCAAAG*
357 Gorilla gorilla RNA sequence
358 AGUCGCGUCG--GCAGAUACGCAUCACGGAC-ACAUCAUCCCUCGCAGAG*
360 if (FileContent
[0][0]!='>') {
366 for (i
=0; i
<FileContent
.size(); i
++) {
367 if(FileContent
[i
][0]=='>') {
368 j
= FileContent
[i
].find(";");
374 temp
.name
= trim(FileContent
[i
].substr(j
+1, FileContent
[i
].length()-1));
378 while (!isBlank(FileContent
[i
])) {
379 temp
.seq
= temp
.seq
+ FileContent
[i
];
380 if (FileContent
[i
][FileContent
[i
].length()-1]=='*')
385 pushintoVector(temp
);
393 void pushintoVector(Seq temp
) {
397 temp
.name
= trim(temp
.name
);
399 if (!isalpha(temp
.seq
[temp
.seq
.length()-1])) {
400 temp
.seq
= temp
.seq
.replace(temp
.seq
.length()-1, 1, "");
402 temp
.seq
= trim(temp
.seq
);
404 for(i
=0; i
<sequence
.size(); i
++) {
405 string kdjf
= sequence
[i
].name
;
406 if (temp
.name
==sequence
[i
].name
) {
407 sequence
[i
].seq
+= temp
.seq
;
411 if (i
==sequence
.size()) {
412 sequence
.push_back(temp
);
416 string
trim(string str
) {
418 for(i
=0; i
<str
.length(); i
++) {
419 if (str
[i
]==' ' || iscntrl(str
[i
])) {
420 str
= str
.replace(i
, 1, "");
427 string
stringtoUpper(string str
) {
429 for(i
=0; i
<str
.length(); i
++) {
430 if (isalpha(str
[i
])) {
431 str
[i
] = toupper(str
[i
]);
437 string
convertNum(int i
) {
442 str
= (char)(k
+48) + str
;
449 bool isBlank(string str
) {
454 if (str
.length()==0 || str
=="") {
458 for(i
=0,num
=0; i
<str
.length(); i
++) {
459 if (!isalpha(str
[i
]) && !isdigit(str
[i
])) num
++;
461 if(num
==str
.length())