2 # BioPerl module for Bio::AlignIO::largemultifasta
4 # based on the Bio::SeqIO::largefasta module
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
8 # and the SimpleAlign.pm module of Ewan Birney
10 # Copyright Albert Vilella
12 # You may distribute this module under the same terms as perl itself
15 # POD documentation - main docs before the code
19 Bio::AlignIO::largemultifasta - Largemultifasta MSA Sequence
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
28 This object can transform L<Bio::SimpleAlign> objects to and from
29 largemultifasta flat file databases. This is for the fasta sequence
30 format NOT FastA analysis program. To process the pairwise alignments
31 from a FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO
34 Reimplementation of Bio::AlignIO::fasta modules so that creates
35 temporary files instead of keeping the whole sequences in memory.
41 Please direct usage questions or support issues to the mailing list:
43 I<bioperl-l@bioperl.org>
45 rather than to the module maintainer directly. Many experienced and
46 reponsive experts will be able look at the problem and quickly
47 address it. Please include a thorough description of the problem
48 with code and data examples if at all possible.
52 Report bugs to the Bioperl bug tracking system to help us keep track
53 the bugs and their resolution. Bug reports can be submitted via the
56 https://github.com/bioperl/bioperl-live/issues
58 =head1 AUTHORS - Albert Vilella, Heikki Lehvaslaiho
60 Email: avilella-at-gmail-dot-com, heikki-at-bioperl-dot-org
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
70 # Let the code begin...
72 package Bio
::AlignIO
::largemultifasta
;
75 use Bio
::Seq
::LargeLocatableSeq
;
76 use Bio
::Seq
::SeqFactory
;
78 use base
qw(Bio::AlignIO Bio::SeqIO Bio::SimpleAlign);
83 $self->SUPER::_initialize
(@args);
84 if( ! defined $self->sequence_factory ) {
85 $self->sequence_factory(Bio
::Seq
::SeqFactory
->new(
86 -verbose
=> $self->verbose(),
87 -type
=> 'Bio::Seq::LargeLocatableSeq'
95 Usage : $seq = $stream->next_seq()
96 Function: returns the next sequence in the stream while taking care
98 Returns : Bio::Seq object
105 my $largeseq = $self->sequence_factory->create(-alphabet
=>$self->alphabet);
106 my ($id,$fulldesc,$entry);
109 while( defined ($entry = $self->_readline) ) {
110 if( $seen == 1 && $entry =~ /^\s*>/ ) {
111 $self->_pushback($entry);
114 if ( $entry eq '>' ) {
116 } elsif( $entry =~ /\s*>(.+?)$/ ) {
118 ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
119 or $self->warn("Can't parse fasta header");
120 $largeseq->display_id($id);
121 $largeseq->primary_id($id);
122 $largeseq->desc($fulldesc);
125 $largeseq->add_sequence_as_string($entry);
127 (++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
129 if( ! $seen ) { return; }
137 Usage : $aln = $stream->next_aln()
138 Function: returns the next alignment in the stream.
139 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
148 my $aln = Bio
::SimpleAlign
->new();
149 while (defined ($largeseq = $self->next_seq) ) {
150 $aln->add_seq($largeseq);
151 $self->debug("sequence readed\n");
154 my $alnlen = $aln->length;
155 foreach my $largeseq ( $aln->each_seq ) {
156 if( $largeseq->length < $alnlen ) {
157 my ($diff) = ($alnlen - $largeseq->length);
158 $largeseq->seq("-" x
$diff);
162 return $aln if $aln->num_sequences;
170 Usage : $stream->write_aln(@aln)
171 Function: writes the $aln object into the stream in largemultifasta format
172 Returns : 1 for success and 0 for error
173 Args : L<Bio::Align::AlignI> object
179 my ($self,@aln) = @_;
180 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
182 foreach my $aln (@aln) {
183 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
184 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
187 foreach $rseq ( $aln->each_seq() ) {
188 $name = $aln->displayname($rseq->get_nse());
190 $desc = $rseq->description || '';
191 $self->_print (">$name $desc\n") or return ;
193 $length = length($seq);
194 while( ($count * 60 ) < $length ) {
195 $seqsub = substr($seq,$count*60,60);
196 $self->_print ("$seqsub\n") or return ;
201 $self->flush if $self->_flush_on_write && defined $self->_fh;