t/SeqFeature/Generic.t: fix typo on required module for testing
[bioperl-live.git] / lib / Bio / AlignIO / largemultifasta.pm
blob56dc0e098731831ef12a1a3c9d775ccad87c62d7
2 # BioPerl module for Bio::AlignIO::largemultifasta
4 # based on the Bio::SeqIO::largefasta module
5 # by Ewan Birney <birney@ebi.ac.uk>
6 # and Lincoln Stein <lstein@cshl.org>
8 # and the SimpleAlign.pm module of Ewan Birney
10 # Copyright Albert Vilella
12 # You may distribute this module under the same terms as perl itself
13 # _history
14 # January 20, 2004
15 # POD documentation - main docs before the code
17 =head1 NAME
19 Bio::AlignIO::largemultifasta - Largemultifasta MSA Sequence
20 input/output stream
22 =head1 SYNOPSIS
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
26 =head1 DESCRIPTION
28 This object can transform L<Bio::SimpleAlign> objects to and from
29 largemultifasta flat file databases. This is for the fasta sequence
30 format NOT FastA analysis program. To process the pairwise alignments
31 from a FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO
32 module.
34 Reimplementation of Bio::AlignIO::fasta modules so that creates
35 temporary files instead of keeping the whole sequences in memory.
37 =head1 FEEDBACK
39 =head2 Support
41 Please direct usage questions or support issues to the mailing list:
43 I<bioperl-l@bioperl.org>
45 rather than to the module maintainer directly. Many experienced and
46 reponsive experts will be able look at the problem and quickly
47 address it. Please include a thorough description of the problem
48 with code and data examples if at all possible.
50 =head2 Reporting Bugs
52 Report bugs to the Bioperl bug tracking system to help us keep track
53 the bugs and their resolution. Bug reports can be submitted via the
54 web:
56 https://github.com/bioperl/bioperl-live/issues
58 =head1 AUTHORS - Albert Vilella, Heikki Lehvaslaiho
60 Email: avilella-at-gmail-dot-com, heikki-at-bioperl-dot-org
63 =head1 APPENDIX
65 The rest of the documentation details each of the object
66 methods. Internal methods are usually preceded with a _
68 =cut
70 # Let the code begin...
72 package Bio::AlignIO::largemultifasta;
73 use strict;
75 use Bio::Seq::LargeLocatableSeq;
76 use Bio::Seq::SeqFactory;
78 use base qw(Bio::AlignIO Bio::SeqIO Bio::SimpleAlign);
81 sub _initialize {
82 my($self,@args) = @_;
83 $self->SUPER::_initialize(@args);
84 if( ! defined $self->sequence_factory ) {
85 $self->sequence_factory(Bio::Seq::SeqFactory->new(
86 -verbose => $self->verbose(),
87 -type => 'Bio::Seq::LargeLocatableSeq'
88 ));
92 =head2 next_seq
94 Title : next_seq
95 Usage : $seq = $stream->next_seq()
96 Function: returns the next sequence in the stream while taking care
97 of the length
98 Returns : Bio::Seq object
99 Args : NONE
101 =cut
103 sub next_seq {
104 my ($self) = @_;
105 my $largeseq = $self->sequence_factory->create(-alphabet=>$self->alphabet);
106 my ($id,$fulldesc,$entry);
107 my $count = 0;
108 my $seen = 0;
109 while( defined ($entry = $self->_readline) ) {
110 if( $seen == 1 && $entry =~ /^\s*>/ ) {
111 $self->_pushback($entry);
112 return $largeseq;
114 if ( $entry eq '>' ) {
115 $seen = 1; next;
116 } elsif( $entry =~ /\s*>(.+?)$/ ) {
117 $seen = 1;
118 ($id,$fulldesc) = ($1 =~ /^\s*(\S+)\s*(.*)$/)
119 or $self->warn("Can't parse fasta header");
120 $largeseq->display_id($id);
121 $largeseq->primary_id($id);
122 $largeseq->desc($fulldesc);
123 } else {
124 $entry =~ s/\s+//g;
125 $largeseq->add_sequence_as_string($entry);
127 (++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
129 if( ! $seen ) { return; }
130 return $largeseq;
134 =head2 next_aln
136 Title : next_aln
137 Usage : $aln = $stream->next_aln()
138 Function: returns the next alignment in the stream.
139 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
140 or on error
141 Args : NONE
143 =cut
145 sub next_aln {
146 my $self = shift;
147 my $largeseq;
148 my $aln = Bio::SimpleAlign->new();
149 while (defined ($largeseq = $self->next_seq) ) {
150 $aln->add_seq($largeseq);
151 $self->debug("sequence readed\n");
154 my $alnlen = $aln->length;
155 foreach my $largeseq ( $aln->each_seq ) {
156 if( $largeseq->length < $alnlen ) {
157 my ($diff) = ($alnlen - $largeseq->length);
158 $largeseq->seq("-" x $diff);
162 return $aln if $aln->num_sequences;
163 return;
167 =head2 write_aln
169 Title : write_aln
170 Usage : $stream->write_aln(@aln)
171 Function: writes the $aln object into the stream in largemultifasta format
172 Returns : 1 for success and 0 for error
173 Args : L<Bio::Align::AlignI> object
176 =cut
178 sub write_aln {
179 my ($self,@aln) = @_;
180 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
182 foreach my $aln (@aln) {
183 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
184 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
185 next;
187 foreach $rseq ( $aln->each_seq() ) {
188 $name = $aln->displayname($rseq->get_nse());
189 $seq = $rseq->seq();
190 $desc = $rseq->description || '';
191 $self->_print (">$name $desc\n") or return ;
192 $count =0;
193 $length = length($seq);
194 while( ($count * 60 ) < $length ) {
195 $seqsub = substr($seq,$count*60,60);
196 $self->_print ("$seqsub\n") or return ;
197 $count++;
201 $self->flush if $self->_flush_on_write && defined $self->_fh;
202 return 1;