2 # BioPerl module for Bio::AlignIO::emboss
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason@bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::AlignIO::emboss - Parse EMBOSS alignment output (from applications water and needle)
20 # do not use the object directly
22 # read in an alignment from the EMBOSS program water
23 my $in = Bio::AlignIO->new(-format => 'emboss',
24 -file => 'seq.water');
25 while( my $aln = $in->next_aln ) {
26 # do something with the alignment
31 This object handles parsing and writing pairwise sequence alignments
32 from the EMBOSS suite.
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 of the bugs and their resolution. Bug reports can be submitted via the
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHOR - Jason Stajich
66 Email jason@bioperl.org
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
76 # Let the code begin...
79 package Bio
::AlignIO
::emboss
;
80 use vars
qw($EMBOSSTitleLen $EMBOSSLineLen);
83 use Bio::LocatableSeq;
85 use base qw(Bio::AlignIO);
94 $self->SUPER::_initialize
(@args);
95 $self->{'_type'} = undef;
101 Usage : $aln = $stream->next_aln()
102 Function: returns the next alignment in the stream.
103 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
112 my %data = ( 'seq1' => {
123 'type' => $self->{'_type'}, # to restore type from
124 # previous aln if possible
127 while( defined($_ = $self->_readline) ) {
128 next if( /^\#?\s+$/ || /^\#+\s*$/ );
129 if( /^\#(\=|\-)+\s*$/) {
130 last if( $seenbegin);
131 } elsif( /(Local|Global):\s*(\S+)\s+vs\s+(\S+)/ ||
132 /^\#\s+Program:\s+(\S+)/ )
134 my ($name1,$name2) = ($2,$3);
135 if( ! defined $name1 ) { # Handle EMBOSS 2.2.X
137 $name1 = $name2 = '';
139 $data{'type'} = $1 eq 'Local' ?
'water' : 'needle';
141 $data{'seq1'}->{'name'} = $name1;
142 $data{'seq2'}->{'name'} = $name2;
144 $self->{'_type'} = $data{'type'};
146 } elsif( /Score:\s+(\S+)/ ) {
148 } elsif( /^\#\s+(1|2):\s+(\S+)/ && ! $data{"seq$1"}->{'name'} ) {
150 $nm = substr($nm,0,$EMBOSSTitleLen); # emboss has a max seq length
152 $nm .= "-". $names{$nm};
155 $data{"seq$1"}->{'name'} = $nm;
156 } elsif( $data{'seq1'}->{'name'} &&
157 /^\Q$data{'seq1'}->{'name'}/ ) {
161 while( defined ($_) ) {
162 my $align_other = '';
164 if($count == 0 || $count == 2 ) {
166 my ($seq,$align,$start,$end);
167 if( $count == 2 && $data{'seq2'}->{'name'} eq '' ) {
168 # weird boundary condition
169 ($start,$align,$end) = @l;
172 ($seq,$start,$end) = @l
174 ($seq,$start,$align,$end) = @l;
177 my $seqname = sprintf("seq%d", ($count == 0) ?
'1' : '2');
178 $data{$seqname}->{'data'} .= $align;
179 $data{$seqname}->{'start'} ||= $start;
180 $data{$seqname}->{'end'} = $end;
181 $current[$count] = [ $start,$align || ''];
185 $data{'align'} .= $_;
189 last if( $count++ == 2);
190 $_ = $self->_readline();
193 if( $data{'type'} eq 'needle' ) {
194 # which ever one is shorter we want to bring it up to
195 # length. Man this stinks.
196 my ($s1,$s2) = ($data{'seq1'}, $data{'seq2'});
198 my $d = length($current[0]->[1]) - length($current[2]->[1]);
199 if( $d < 0 ) { # s1 is smaller, need to add some
200 # compare the starting points for this alignment line
201 if( $current[0]->[0] <= 1 ) {
202 $s1->{'data'} = ('-' x
abs($d)) . $s1->{'data'};
203 $data{'align'} = (' 'x
abs($d)).$data{'align'};
205 $s1->{'data'} .= '-' x
abs($d);
206 $data{'align'} .= ' 'x
abs($d);
208 } elsif( $d > 0) { # s2 is smaller, need to add some
209 if( $current[2]->[0] <= 1 ) {
210 $s2->{'data'} = ('-' x
abs($d)) . $s2->{'data'};
211 $data{'align'} = (' 'x
abs($d)).$data{'align'};
213 $s2->{'data'} .= '-' x
abs($d);
214 $data{'align'} .= ' 'x
abs($d);
221 return unless $seenbegin;
222 my $aln = Bio
::SimpleAlign
->new(-verbose
=> $self->verbose(),
223 -score
=> $data{'score'},
224 -source
=> "EMBOSS-".$data{'type'});
226 foreach my $seqname ( qw(seq1 seq2) ) {
227 return unless ( defined $data{$seqname} );
228 $data{$seqname}->{'name'} ||= $seqname;
229 my $seq = Bio
::LocatableSeq
->new
230 ('-seq' => $data{$seqname}->{'data'},
231 '-display_id' => $data{$seqname}->{'name'},
232 '-start' => $data{$seqname}->{'start'},
233 '-end' => $data{$seqname}->{'end'},
234 '-alphabet' => $self->alphabet,
244 Usage : $stream->write_aln(@aln)
245 Function: writes the $aln object into the stream in emboss format
246 Returns : 1 for success and 0 for error
247 Args : L<Bio::Align::AlignI> object
253 my ($self,@aln) = @_;
255 $self->throw("Sorry: writing emboss output is not currently available! \n");