2 # BioPerl module for Bio::AlignIO::proda
4 # based on the Bio::SeqIO modules
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
7 # and the Bio::SimpleAlign module of Ewan Birney
9 # Copyright Albert Vilella
11 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::AlignIO::proda - proda sequence input/output stream
18 This provides the basic capabilities to parse the output alignments
19 from the ProDA multiple sequence alignment program
20 (http://proda.stanford.edu)
24 Do not use this module directly. Use it via the Bio::AlignIO class.
28 This object can transform Bio::Align::AlignI objects to and from proda
35 User feedback is an integral part of the evolution of this and other
36 Bioperl modules. Send your comments and suggestions preferably to one
37 of the Bioperl mailing lists. Your participation is much appreciated.
39 bioperl-l@bioperl.org - General discussion
40 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
44 Please direct usage questions or support issues to the mailing list:
46 I<bioperl-l@bioperl.org>
48 rather than to the module maintainer directly. Many experienced and
49 reponsive experts will be able look at the problem and quickly
50 address it. Please include a thorough description of the problem
51 with code and data examples if at all possible.
55 Report bugs to the Bioperl bug tracking system to help us keep track
56 the bugs and their resolution. Bug reports can be submitted via the
59 https://github.com/bioperl/bioperl-live/issues
61 =head1 AUTHORS - Albert Vilella
63 Email: avilella-at-gmail-dot-com
68 The rest of the documentation details each of the object
69 methods. Internal methods are usually preceded with a _
73 # Let the code begin...
75 package Bio
::AlignIO
::proda
;
76 use vars
qw($LINELENGTH);
81 use base qw(Bio::AlignIO);
86 Usage : $alignio = Bio::AlignIO->new(-format => 'proda',
88 Function: returns a new Bio::AlignIO object to handle proda files
89 Returns : Bio::AlignIO::proda object
90 Args : -verbose => verbosity setting (-1, 0, 1, 2)
91 -file => name of file to read in or to write, with ">"
92 -fh => alternative to -file param - provide a filehandle
93 to read from or write to
94 -format => alignment format to process or produce
95 -percentages => display a percentage of identity
96 in each line of the alignment (proda only)
97 -linelength=> alignment output line length (default 60)
102 my ( $self, @args ) = @_;
103 $self->SUPER::_initialize
(@args);
104 my ( $percentages, $ll ) =
105 $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
106 defined $percentages && $self->percentages($percentages);
107 $self->line_length( $ll || $LINELENGTH );
113 Usage : $aln = $stream->next_aln()
114 Function: returns the next alignment in the stream
115 Returns : Bio::Align::AlignI object
118 See L<Bio::Align::AlignI> for details
126 while ( $first_line = $self->_readline ) {
127 last if $first_line !~ /^$/;
129 $self->_pushback($first_line);
130 if ( defined( $first_line = $self->_readline )
131 && $first_line !~ /\(/ )
134 "trying to parse a file which does not start with proda headers"
137 # use it inside the loop
138 $self->_pushback($first_line);
141 my $aln = Bio
::SimpleAlign
->new(
143 -verbose
=> $self->verbose
147 $self->{_lastline
} = '';
148 my ($first_block, $seen_block, $seen_header) = (0,0,0);
149 my @ids; my @ids_copy;
150 while ( defined( $_ = $self->_readline ) ) {
151 next if (/^\s+$/ && !$first_block);
152 if (/^\s$/) { # line contains no description
158 # break the loop if we come to the end of the current alignment
159 # and push back the proda header
160 if (/\(/ && $seen_header) {
161 $self->_pushback($_);
165 if (/\(/ && !$seen_header) {
166 @ids = split(' ', $_);
171 my ( $seqname, $aln_line ) = ( '', '' );
172 if (/^\s*(\S+)\s*\/\s
*(\d
+)-(\d
+)\s
+(\S
+)\s
*$/ox
) {
175 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
177 # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
179 elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
180 ( $seqname, $aln_line ) = ( $1, $2 );
181 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
182 $self->{_lastline
} = $_;
187 $self->{_lastline
} = $_;
191 # we ended up the first block and now are on the second
192 @ids_copy = @ids unless(defined($ids_copy[0])); #FIXME - hacky
193 my $seqname_with_coords = shift(@ids_copy);
194 if ($seqname_with_coords !~ /$seqname/) {
196 $self->throw("header and body of the alignment dont match");
199 $alignments{$seqname_with_coords} .= $aln_line;
201 if ( !$seen_block ) {
202 if (exists $order{$seqname_with_coords}) {
203 $self->warn("Duplicate sequence : $seqname\n".
204 "Can't guarantee alignment quality");
207 $order{$seqname_with_coords} = $order++;
213 my ( $sname, $start, $end );
214 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
215 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
216 ( $sname, $start, $end ) = ( $1, $2, $3 );
219 ( $sname, $start ) = ( $name, 1 );
220 my $str = $alignments{$name};
221 $str =~ s/[^A-Za-z]//g;
224 my $seq = Bio
::LocatableSeq
->new(
225 -seq
=> $alignments{$name},
229 -alphabet
=> $self->alphabet,
234 return $aln if $aln->num_sequences;
241 Usage : $stream->write_aln(@aln)
242 Function: writes the proda-format object (.aln) into the stream
243 Returns : 1 for success and 0 for error
244 Args : Bio::Align::AlignI object
249 my ($self,@aln) = @_;
250 $self->throw_not_implemented();
256 Usage : $obj->percentages($newval)
257 Function: Set the percentages flag - whether or not to show percentages in
259 Returns : value of percentages
260 Args : newvalue (optional)
266 my ( $self, $value ) = @_;
267 if ( defined $value ) {
268 $self->{'_percentages'} = $value;
270 return $self->{'_percentages'};
276 Usage : $obj->line_length($newval)
277 Function: Set the alignment output line length
278 Returns : value of line_length
279 Args : newvalue (optional)
285 my ( $self, $value ) = @_;
286 if ( defined $value ) {
287 $self->{'_line_length'} = $value;
289 return $self->{'_line_length'};
295 Usage : $obj->no_header($newval)
296 Function: Set if the alignment input has a CLUSTAL header or not
297 Returns : value of no_header
298 Args : newvalue (optional)
304 my ( $self, $value ) = @_;
305 if ( defined $value ) {
306 $self->{'_no_header'} = $value;
308 return $self->{'_no_header'};