Bio::Tools::CodonTable::is_start_codon: check in case of ambiguous codons (#266)
[bioperl-live.git] / lib / Bio / SeqIO / metafasta.pm
blob34fde002b7e23c6c2c85d8ca92a77aa60db9bd06
1 # BioPerl module for Bio::SeqIO::metafasta
3 # Please direct questions and support issues to <bioperl-l@bioperl.org>
5 # Cared for by Heikki Lehvaslaiho
7 # Copyright Heikki Lehvaslaiho
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::SeqIO::metafasta - metafasta sequence input/output stream
17 =head1 SYNOPSIS
19 Do not use this module directly. Use it via the Bio::SeqIO class.
21 use Bio::SeqIO;
23 # read the metafasta file
24 $io = Bio::SeqIO->new(-file => "test.metafasta",
25 -format => "metafasta" );
27 $seq = $io->next_seq;
29 =head1 DESCRIPTION
31 This object can transform Bio::Seq::Meta objects to and from metafasta
32 flat file databases.
34 For sequence part the code is an exact copy of Bio::SeqIO::fasta
35 module. The only added bits deal with meta data IO.
37 The format of a metafasta file is
39 >test
40 ABCDEFHIJKLMNOPQRSTUVWXYZ
41 &charge
42 NBNAANCNJCNNNONNCNNUNNXNZ
43 &chemical
44 LBSAARCLJCLSMOIMCHHULRXRZ
46 where the sequence block is followed by one or several meta blocks.
47 Each meta block starts with the ampersand character '&' in the first
48 column and is immediately followed by the name of the meta data which
49 continues until the new line. The meta data follows it. All
50 characters, except new line, are important in meta data.
52 =head1 FEEDBACK
54 =head2 Mailing Lists
56 User feedback is an integral part of the evolution of this and other
57 Bioperl modules. Send your comments and suggestions preferably to one
58 of the Bioperl mailing lists. Your participation is much appreciated.
60 bioperl-l@bioperl.org - General discussion
61 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
63 =head2 Support
65 Please direct usage questions or support issues to the mailing list:
67 I<bioperl-l@bioperl.org>
69 rather than to the module maintainer directly. Many experienced and
70 reponsive experts will be able look at the problem and quickly
71 address it. Please include a thorough description of the problem
72 with code and data examples if at all possible.
74 =head2 Reporting Bugs
76 Report bugs to the Bioperl bug tracking system to help us keep track
77 the bugs and their resolution. Bug reports can be submitted via the
78 web:
80 https://github.com/bioperl/bioperl-live/issues
82 =head1 AUTHOR - Heikki Lehvaslaiho
84 Email heikki-at-bioperl-dot-org
86 =head1 APPENDIX
88 The rest of the documentation details each of the object
89 methods. Internal methods are usually preceded with a _
91 =cut
93 # Let the code begin...
95 package Bio::SeqIO::metafasta;
97 use vars qw($WIDTH);
98 use strict;
100 use Bio::Seq::SeqFactory;
101 use Bio::Seq::SeqFastaSpeedFactory;
102 use Bio::Seq::Meta;
104 use base qw(Bio::SeqIO);
106 BEGIN { $WIDTH = 60}
108 sub _initialize {
109 my($self,@args) = @_;
110 $self->SUPER::_initialize(@args);
111 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
112 $width && $self->width($width);
113 unless ( defined $self->sequence_factory ) {
114 $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
118 =head2 next_seq
120 Title : next_seq
121 Usage : $seq = $stream->next_seq()
122 Function: returns the next sequence in the stream
123 Returns : Bio::Seq object
124 Args : NONE
126 =cut
128 sub next_seq {
129 my( $self ) = @_;
130 my $seq;
131 my $alphabet;
132 local $/ = "\n>";
133 return unless my $entry = $self->_readline;
135 chomp($entry);
136 if ($entry =~ m/\A\s*\Z/s) { # very first one
137 return unless $entry = $self->_readline;
138 chomp($entry);
140 $entry =~ s/^>//;
142 my ($top,$sequence) = split(/\n/,$entry,2);
143 defined $sequence && $sequence =~ s/>//g;
145 my @metas;
146 ($sequence, @metas) = split /\n&/, $sequence;
148 my ($id,$fulldesc);
149 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
150 ($id,$fulldesc) = ($1,$2);
153 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
154 # between > and name \AE
155 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
157 # for empty sequences we need to know the mol.type
158 $alphabet = $self->alphabet();
159 if(defined $sequence && length($sequence) == 0) {
160 if(! defined($alphabet)) {
161 # let's default to dna
162 $alphabet = "dna";
164 } else {
165 # we don't need it really, so disable
166 $alphabet = undef;
169 $seq = $self->sequence_factory->create(
170 -seq => $sequence,
171 -id => $id,
172 # Ewan's note - I don't think this healthy
173 # but obviously to taste.
174 #-primary_id => $id,
175 -desc => $fulldesc,
176 -alphabet => $alphabet,
177 -direct => 1,
180 $seq = $seq->primary_seq;
181 bless $seq, 'Bio::Seq::Meta';
183 foreach my $meta (@metas) {
184 my ($name,$string) = split /\n/, $meta;
185 # $split ||= '';
186 $string =~ s/\n//g; # Remove newlines, spaces are important
187 $seq->named_meta($name, $string);
190 # if there wasn't one before, set the guessed type
191 unless ( defined $alphabet ) {
192 $self->alphabet($seq->alphabet());
194 return $seq;
197 =head2 write_seq
199 Title : write_seq
200 Usage : $stream->write_seq(@seq)
201 Function: writes the $seq object into the stream
202 Returns : 1 for success and 0 for error
203 Args : array of 1 to n Bio::PrimarySeqI objects
205 =cut
207 sub write_seq {
208 my ($self,@seq) = @_;
209 my $width = $self->width;
210 foreach my $seq (@seq) {
211 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
212 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
214 my $str = $seq->seq;
215 my $top = $seq->display_id();
216 if ($seq->can('desc') and my $desc = $seq->desc()) {
217 $desc =~ s/\n//g;
218 $top .= " $desc";
220 if(length($str) > 0) {
221 $str =~ s/(.{1,$width})/$1\n/g;
222 } else {
223 $str = "\n";
225 $self->_print (">",$top,"\n",$str) or return;
226 if ($seq->isa('Bio::Seq::MetaI')) {
227 foreach my $meta ($seq->meta_names) {
228 my $str = $seq->named_meta($meta);
229 $str =~ s/(.{1,$width})/$1\n/g;
230 $self->_print ("&",$meta,"\n",$str);
235 $self->flush if $self->_flush_on_write && defined $self->_fh;
236 return 1;
239 =head2 width
241 Title : width
242 Usage : $obj->width($newval)
243 Function: Get/Set the line width for METAFASTA output
244 Returns : value of width
245 Args : newvalue (optional)
248 =cut
250 sub width{
251 my ($self,$value) = @_;
252 if( defined $value) {
253 $self->{'width'} = $value;
255 return $self->{'width'} || $WIDTH;