2 # BioPerl module for Bio::AlignIO::clustalw
4 # based on the Bio::SeqIO modules
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
7 # and the Bio::SimpleAlign module of Ewan Birney
9 # Copyright Peter Schattner
11 # You may distribute this module under the same terms as perl itself
14 # POD documentation - main docs before the code
18 Bio::AlignIO::clustalw - clustalw sequence input/output stream
22 Do not use this module directly. Use it via the Bio::AlignIO class.
26 This object can transform Bio::Align::AlignI objects to and from clustalw
33 User feedback is an integral part of the evolution of this and other
34 Bioperl modules. Send your comments and suggestions preferably to one
35 of the Bioperl mailing lists. Your participation is much appreciated.
37 bioperl-l@bioperl.org - General discussion
38 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
42 Please direct usage questions or support issues to the mailing list:
44 I<bioperl-l@bioperl.org>
46 rather than to the module maintainer directly. Many experienced and
47 reponsive experts will be able look at the problem and quickly
48 address it. Please include a thorough description of the problem
49 with code and data examples if at all possible.
53 Report bugs to the Bioperl bug tracking system to help us keep track
54 the bugs and their resolution. Bug reports can be submitted via the
57 https://github.com/bioperl/bioperl-live/issues
59 =head1 AUTHORS - Peter Schattner
61 Email: schattner@alum.mit.edu
66 The rest of the documentation details each of the object
67 methods. Internal methods are usually preceded with a _
71 # Let the code begin...
73 package Bio
::AlignIO
::clustalw
;
74 use vars
qw($LINELENGTH $CLUSTALPRINTVERSION);
79 $CLUSTALPRINTVERSION = '1.81';
80 use base qw(Bio::AlignIO);
85 Usage : $alignio = Bio::AlignIO->new(-format => 'clustalw',
87 Function: returns a new Bio::AlignIO object to handle clustalw files
88 Returns : Bio::AlignIO::clustalw object
89 Args : -verbose => verbosity setting (-1, 0, 1, 2)
90 -file => name of file to read in or to write, with ">"
91 -fh => alternative to -file param - provide a filehandle
92 to read from or write to
93 -format => alignment format to process or produce
94 -percentages => display a percentage of identity
95 in each line of the alignment (clustalw only)
96 -linelength=> alignment output line length (default 60)
101 my ( $self, @args ) = @_;
102 $self->SUPER::_initialize
(@args);
103 my ( $percentages, $ll ) =
104 $self->_rearrange( [qw(PERCENTAGES LINELENGTH)], @args );
105 defined $percentages && $self->percentages($percentages);
106 $self->line_length( $ll || $LINELENGTH );
112 Usage : $aln = $stream->next_aln()
113 Function: returns the next alignment in the stream
114 Returns : Bio::Align::AlignI object
117 See L<Bio::Align::AlignI> for details
125 while ( $first_line = $self->_readline ) {
126 last if $first_line !~ /^$/;
128 $self->_pushback($first_line);
129 if ( defined( $first_line = $self->_readline )
130 && $first_line !~ /CLUSTAL/ )
133 "trying to parse a file which does not start with a CLUSTAL header"
137 my $aln = Bio
::SimpleAlign
->new(
138 -source
=> 'clustalw',
139 -verbose
=> $self->verbose
143 $self->{_lastline
} = '';
144 my ($first_block, $seen_block) = (0,0);
145 while ( defined( $_ = $self->_readline ) ) {
146 next if (/^\s+$/ && !$first_block);
147 if (/^\s$/) { # line contains no description
152 # break the loop if we come to the end of the current alignment
153 # and push back the CLUSTAL header
155 $self->_pushback($_);
159 my ( $seqname, $aln_line ) = ( '', '' );
160 if (/^\s*(\S+)\s*\/\s
*(\d
+)-(\d
+)\s
+(\S
+)\s
*$/ox
) {
163 ( $seqname, $aln_line ) = ( "$1:$2-$3", $4 );
165 # } elsif( /^\s*(\S+)\s+(\S+)\s*$/ox ) { without trailing numbers
167 elsif (/^\s*(\S+)\s+(\S+)\s*\d*\s*$/ox) { # with numbers
168 ( $seqname, $aln_line ) = ( $1, $2 );
169 if ( $seqname =~ /^[\*\.\+\:]+$/ ) {
170 $self->{_lastline
} = $_;
175 $self->{_lastline
} = $_;
179 if ( !$seen_block ) {
180 if (exists $order{$seqname}) {
181 $self->warn("Duplicate sequence : $seqname\n".
182 "Can't guarantee alignment quality");
185 $order{$seqname} = $order++;
189 $alignments{$seqname} .= $aln_line;
192 my ( $sname, $start, $end );
193 foreach my $name ( sort { $order{$a} <=> $order{$b} } keys %alignments ) {
194 if ( $name =~ /(\S+):(\d+)-(\d+)/ ) {
195 ( $sname, $start, $end ) = ( $1, $2, $3 );
198 ( $sname, $start ) = ( $name, 1 );
199 my $str = $alignments{$name};
200 $str =~ s/[^A-Za-z]//g;
203 my $seq = Bio
::LocatableSeq
->new
205 '-seq' => $alignments{$name},
206 '-display_id' => $sname,
209 '-alphabet' => $self->alphabet,
214 # not sure if this should be a default option - or we can pass in
215 # an option to do this in the future? --jason stajich
216 # $aln->map_chars('\.','-');
218 # no sequences added, so just return
219 return $aln if $aln->num_sequences;
226 Usage : $stream->write_aln(@aln)
227 Function: writes the clustalw-format object (.aln) into the stream
228 Returns : 1 for success and 0 for error
229 Args : Bio::Align::AlignI object
234 my ( $self, @aln ) = @_;
235 my ( $count, $length, $seq, @seq, $tempcount, $line_len );
236 $line_len = $self->line_length || $LINELENGTH;
237 foreach my $aln (@aln) {
238 if ( !$aln || !$aln->isa('Bio::Align::AlignI') ) {
240 "Must provide a Bio::Align::AlignI object when calling write_aln"
244 my $matchline = $aln->match_line;
245 if ( $self->force_displayname_flat ) {
246 $aln->set_displayname_flat(1);
249 sprintf( "CLUSTAL W (%s) multiple sequence alignment\n\n\n",
250 $CLUSTALPRINTVERSION )
252 $length = $aln->length();
253 $count = $tempcount = 0;
254 @seq = $aln->each_seq();
256 foreach $seq (@seq) {
257 $max = length( $aln->displayname( $seq->get_nse() ) )
258 if ( length( $aln->displayname( $seq->get_nse() ) ) > $max );
261 while ( $count < $length ) {
262 my ( $linesubstr, $first ) = ( '', 1 );
263 foreach $seq (@seq) {
266 # Following lines are to suppress warnings
267 # if some sequences in the alignment are much longer than others.
270 my $seqchars = $seq->seq();
272 if ( length($seqchars) >= ( $count + $line_len ) ) {
273 $substring = substr( $seqchars, $count, $line_len );
276 substr( $matchline, $count, $line_len );
281 elsif ( length($seqchars) >= $count ) {
282 $substring = substr( $seqchars, $count );
284 $linesubstr = substr( $matchline, $count );
293 "%-" . $max . "s %s\n",
294 $aln->displayname( $seq->get_nse() ), $substring
299 my $percentages = '';
300 if ( $self->percentages ) {
301 my ($strcpy) = ($linesubstr);
302 my $count = ( $strcpy =~ tr/\*// );
304 sprintf( "\t%d%%", 100 * ( $count / length($linesubstr) ) );
308 "%-" . $max . "s %s%s\n",
309 '', $linesubstr, $percentages
312 $self->_print( sprintf("\n\n") ) or return;
316 $self->flush if $self->_flush_on_write && defined $self->_fh;
323 Usage : $obj->percentages($newval)
324 Function: Set the percentages flag - whether or not to show percentages in
326 Returns : value of percentages
327 Args : newvalue (optional)
333 my ( $self, $value ) = @_;
334 if ( defined $value ) {
335 $self->{'_percentages'} = $value;
337 return $self->{'_percentages'};
343 Usage : $obj->line_length($newval)
344 Function: Set the alignment output line length
345 Returns : value of line_length
346 Args : newvalue (optional)
352 my ( $self, $value ) = @_;
353 if ( defined $value ) {
354 $self->{'_line_length'} = $value;
356 return $self->{'_line_length'};