3 # a quick&dirty script to convert "yass -d 1 " output files into
7 # usage: yass2blast.pl [-blast|-fasta|-axt] {yassoutputfiles}\n";
12 # Print an alignment according to $format = {"fasta",blast","blast header","axt"}
17 my ($nbAlignments,$nbSSequences,$pos_q_str,$pos_q_end,$pos_s_str,$pos_s_end,$name_q,$name_s,$size_q,$size_s,$reverse,$evalue,$score,$bitscore,$bias,$ts,$tv,$proba,$entropy,$fasta1,$middle,$fasta2,$format,$newchunk_q,$newchunk_s) = @_ ;
19 # 2) reverse and complement sequences to keep the "query order"
20 if ($reverse eq "-") {
21 # change first file positions (increasing order)
23 $pos_q_str = $pos_q_end;
26 $fasta1 =~ tr
/ATUGCRYKMSWVBDHatugcrykmswvbdh/TAACGYRMKWSBVHDtaacgyrmkwsbvhd
/;
27 $fasta1 = reverse($fasta1);
28 $fasta2 =~ tr
/ATUGCRYKMSWVBDHatugcrykmswvbdh/TAACGYRMKWSBVHDtaacgyrmkwsbvhd
/;
29 $fasta2 = reverse($fasta2);
30 $middle = reverse($middle);
36 if ($format eq "fasta") {
37 # 3.1) fasta but with "-" to keep alignments
38 print ">".$nbAlignments."a||".$name_q.": [".$pos_q_str."-".$pos_q_end ."]".$reverse."\n";
40 print ">".$nbAlignments."b||".$name_s.": [".$pos_s_str."-".$pos_s_end ."]".$reverse."\n";
43 } elsif ($format eq "axt") {
44 # 3.2) axt (blastz) format
45 print $nbAlignments."\t".$name_q."\t".$pos_q_str."\t".$pos_q_end ."\t".$name_s."\t".$pos_s_str."\t".$pos_s_end ."\t".$reverse."\t".$score."\n";
49 } elsif ($format eq "blast") {
50 # 3.3) blast like format
51 $middle =~ tr
/:\./ /;
52 my $nbMatches = ($middle =~ tr/\|//);
53 # a) print Sequence Header
55 print ">".$nbSSequences."_0 ".$name_s."\n";
56 print " Length = ".$size_s."\n";
59 # b) print Alignment Header
60 print " Score = ".$bitscore." bits (".$score."), Expect = ".$evalue."\n";
61 print " Identities = ".$nbMatches."/".(length($middle))." (".(int(100*$nbMatches/length($middle)))."%)\n";
62 print " Strand = Plus / ";
73 my $i_fasta1 = $pos_q_str;
75 if ($reverse eq "-") {
76 $i_fasta2 = $pos_s_end;
78 $i_fasta2 = $pos_s_str;
80 while ($c < length($middle)){
81 my $subfasta1 = substr($fasta1,$c,60);
82 my $submiddle = substr($middle,$c,60);
83 my $subfasta2 = substr($fasta2,$c,60);
84 my $letters_subfasta1 = $subfasta1; $letters_subfasta1 =~ s/-//g;
85 my $letters_subfasta2 = $subfasta2; $letters_subfasta2 =~ s/-//g;
86 my $nbletters_subfasta1 = length($letters_subfasta1);
87 my $nbletters_subfasta2 = length($letters_subfasta2);
88 printf ("Query: %-9d ",$i_fasta1); $i_fasta1 += $nbletters_subfasta1-1;
90 printf (" %d\n", $i_fasta1); $i_fasta1 += 1;
96 if ($reverse eq "-") {
97 printf ("Sbjct: %-9d ",$i_fasta2); $i_fasta2 -= $nbletters_subfasta2-1;
99 printf (" %d\n", $i_fasta2); $i_fasta2 -= 1;
101 printf ("Sbjct: %-9d ",$i_fasta2); $i_fasta2 += $nbletters_subfasta2-1;
103 printf (" %d\n", $i_fasta2); $i_fasta2 += 1;
108 } elsif ($format eq "blastheader") {
109 # 3.4) just the header of blast
111 if (length ($name_s) > 60) {
112 print "".$nbSSequences."_0 ".(substr($name_s,0,60))."... ".$bitscore." ".$evalue."\n";
114 print "".$nbSSequences."_0 ".$name_s;
115 for(my $e = length ($name_s); $e <= 63 ;$e ++) {
118 print " ".$bitscore." ".$evalue."\n";
120 }#1_0 embl|AF417609|AF417609 Colpidium campylum telomerase RNA gen... 32 0.063
127 my ($yassOutputFile,$format) = @_ ;
129 open(FIC
,$yassOutputFile) or die print "cant open ".$yassOutputFile."\n";
131 my $nbAlignments = 0;
132 my $nbSSequences = 0;
134 my $previous_name_s = "";
135 my $previous_name_q = "";
181 while (my $line = <FIC
>){
183 # potential header (for alignemnts)
184 if ($format eq "blastheader" && !($name_q eq $previous_name_q)) {
185 print "Query= ".$name_q."\n";
186 print " (".$size_q." letters)\n\n\n";
188 print "Sequences producing significant alignments: (bits) Value\n\n";
189 $previous_name_q = $name_q;
192 # lines begining with "*"
195 # lines begining with "*("
196 if($line =~ /^\*\(/ ) {
198 # print previous alignments
199 if (!($middle eq "")){
200 &PrintAlign
($nbAlignments,$nbSSequences,
201 $pos_q_str,$pos_q_end,$pos_s_str,$pos_s_end,
202 $name_q,$name_s,$size_q,$size_s,
203 $reverse,$evalue,$score,$bitscore,
204 $bias,$ts,$tv,$proba,$entropy,
205 $fasta1,$middle,$fasta2,$format,
206 ($previous_name_q eq "") || !($previous_name_q eq $name_q),
207 ($previous_name_s eq "") || !($previous_name_s eq $name_s)
215 # count number of new alignments (and new sequences)
217 if (($previous_name_s eq "") ||
218 !($previous_name_s eq $name_s)){
224 #*(546486-566813)(515659-536310) Ev: 0 s: 20328/20652 f
225 $line =~ m/\(([0-9]+)-([0-9]+)\)\(([0-9]+)-([0-9]+)\) Ev: (.+) s: ([0-9]+)\/([0-9]+) ([rf
])$/;
235 if ($reverse =~ /^f/) {
240 } elsif ($line =~ /^\* score/ ){
242 $line =~ m/([0-9]+).*bitscore = (.*)/;
245 } elsif ($line =~ /^\* mutations/){
247 #* mutations per triplet 129, 103, 124 (4.54e-04) | ts : 190 tv : 166 | entropy : 5.88743
248 $line =~ m/^\* mutations per triplet ([0-9]+, [0-9]+, [0-9]+) \((.*)\) \| ts : ([0-9]+) tv : ([0-9]+) \| entropy : (.*)/;
256 $line =~ m/\*[ ]*"(.+)" \(([0-9]+) bp\)[ ]*\/[ ]*"(.+)" \
(([0-9]+) bp\
)[ ]*$/;
257 $previous_name_q = $name_q;
260 $previous_name_s = $name_s;
266 if($line =~ /^[A-Za-z-]+$/){
274 } elsif ($line =~ /^[| :.]+$/ && (length($fasta1) > length($fasta2)) ){
281 if (!($middle eq "")){
282 &PrintAlign
($nbAlignments,$nbSSequences,
283 $pos_q_str,$pos_q_end,$pos_s_str,$pos_s_end,
284 $name_q,$name_s,$size_q,$size_s,
285 $reverse,$evalue,$score,$bitscore,
286 $bias,$ts,$tv,$proba,$entropy,
287 $fasta1,$middle,$fasta2,$format,
288 ($previous_name_q eq "") &&!($previous_name_q eq $name_q),
289 ($previous_name_s eq "") &&!($previous_name_s eq $name_s)
296 return $nbAlignments;
307 my $outputformat = "blast";
309 ($#ARGV >= 0) or die "yass2blast:\n".
310 " a quick&dirty script to convert \"yass -d 1 \" output files into\n".
315 " usage: yass2blast.pl [-blast|-fasta|-axt] {yassoutputfiles}\n".
319 for (my $i = 0 ; $i <= $#ARGV ; $i++) {
320 #a) select on the fly parameters ...
321 if (($ARGV[$i]) eq "-blast") {
322 $outputformat = "blast";
323 } elsif (($ARGV[$i]) eq "-fasta") {
324 $outputformat = "fasta";
325 } elsif (($ARGV[$i]) eq "-axt") {
326 $outputformat = "axt";
329 if ($outputformat eq "blast") {
330 &ScanFile
($ARGV[$i],"blastheader");
333 &ScanFile
($ARGV[$i],$outputformat);