1 # BioPerl module for Bio::SeqIO::metafasta
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::SeqIO::metafasta - metafasta sequence input/output stream
19 Do not use this module directly. Use it via the Bio::SeqIO class.
23 # read the metafasta file
24 $io = Bio::SeqIO->new(-file => "test.metafasta",
25 -format => "metafasta" );
31 This object can transform Bio::Seq::Meta objects to and from metafasta
34 For sequence part the code is an exact copy of Bio::SeqIO::fasta
35 module. The only added bits deal with meta data IO.
37 The format of a metafasta file is
40 ABCDEFHIJKLMNOPQRSTUVWXYZ
42 NBNAANCNJCNNNONNCNNUNNXNZ
44 LBSAARCLJCLSMOIMCHHULRXRZ
46 where the sequence block is followed by one or several meta blocks.
47 Each meta block starts with the ampersand character '&' in the first
48 column and is immediately followed by the name of the meta data which
49 continues until the new line. The meta data follows it. All
50 characters, except new line, are important in meta data.
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to one
58 of the Bioperl mailing lists. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Heikki Lehvaslaiho
84 Email heikki-at-bioperl-dot-org
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
93 # Let the code begin...
95 package Bio
::SeqIO
::metafasta
;
100 use Bio::Seq::SeqFactory;
101 use Bio::Seq::SeqFastaSpeedFactory;
104 use base qw(Bio::SeqIO);
109 my($self,@args) = @_;
110 $self->SUPER::_initialize
(@args);
111 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
112 $width && $self->width($width);
113 unless ( defined $self->sequence_factory ) {
114 $self->sequence_factory(Bio
::Seq
::SeqFastaSpeedFactory
->new());
121 Usage : $seq = $stream->next_seq()
122 Function: returns the next sequence in the stream
123 Returns : Bio::Seq object
133 return unless my $entry = $self->_readline;
136 if ($entry =~ m/\A\s*\Z/s) { # very first one
137 return unless $entry = $self->_readline;
142 my ($top,$sequence) = split(/\n/,$entry,2);
143 defined $sequence && $sequence =~ s/>//g;
146 ($sequence, @metas) = split /\n&/, $sequence;
149 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
150 ($id,$fulldesc) = ($1,$2);
153 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
154 # between > and name \AE
155 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
157 # for empty sequences we need to know the mol.type
158 $alphabet = $self->alphabet();
159 if(defined $sequence && length($sequence) == 0) {
160 if(! defined($alphabet)) {
161 # let's default to dna
165 # we don't need it really, so disable
169 $seq = $self->sequence_factory->create(
172 # Ewan's note - I don't think this healthy
173 # but obviously to taste.
176 -alphabet
=> $alphabet,
180 $seq = $seq->primary_seq;
181 bless $seq, 'Bio::Seq::Meta';
183 foreach my $meta (@metas) {
184 my ($name,$string) = split /\n/, $meta;
186 $string =~ s/\n//g; # Remove newlines, spaces are important
187 $seq->named_meta($name, $string);
190 # if there wasn't one before, set the guessed type
191 unless ( defined $alphabet ) {
192 $self->alphabet($seq->alphabet());
200 Usage : $stream->write_seq(@seq)
201 Function: writes the $seq object into the stream
202 Returns : 1 for success and 0 for error
203 Args : array of 1 to n Bio::PrimarySeqI objects
208 my ($self,@seq) = @_;
209 my $width = $self->width;
210 foreach my $seq (@seq) {
211 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
212 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
215 my $top = $seq->display_id();
216 if ($seq->can('desc') and my $desc = $seq->desc()) {
220 if(length($str) > 0) {
221 $str =~ s/(.{1,$width})/$1\n/g;
225 $self->_print (">",$top,"\n",$str) or return;
226 if ($seq->isa('Bio::Seq::MetaI')) {
227 foreach my $meta ($seq->meta_names) {
228 my $str = $seq->named_meta($meta);
229 $str =~ s/(.{1,$width})/$1\n/g;
230 $self->_print ("&",$meta,"\n",$str);
235 $self->flush if $self->_flush_on_write && defined $self->_fh;
242 Usage : $obj->width($newval)
243 Function: Get/Set the line width for METAFASTA output
244 Returns : value of width
245 Args : newvalue (optional)
251 my ($self,$value) = @_;
252 if( defined $value) {
253 $self->{'width'} = $value;
255 return $self->{'width'} || $WIDTH;