Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / AlignIO / metafasta.pm
blobec3a851276544ccc40f39b646dda0d123fed747d
2 # BioPerl module for Bio::AlignIO::metafasta
4 # Copyright Heikki Lehvaslaiho
6 # You may distribute this module under the same terms as perl itself
8 # POD documentation - main docs before the code
10 =head1 NAME
12 Bio::AlignIO::metafasta - Metafasta MSA Sequence input/output stream
14 =head1 SYNOPSIS
16 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
18 =head1 DESCRIPTION
20 This object can transform L<Bio::SimpleAlign> objects to and from
21 metafasta flat file databases.
23 The format of a metafasta file is
25 >test/1-25
26 ABCDEFHIJKLMNOPQRSTUVWXYZ
27 &charge
28 NBNAANCNJCNNNONNCNNUNNXNZ
29 &chemical
30 LBSAARCLJCLSMOIMCHHULRXRZ
32 where the sequence block is followed by one or several meta blocks.
33 Each meta block starts with the ampersand character '&' in the first
34 column and is immediately followed by the name of the meta data which
35 continues until the new line. The meta data follows it. All
36 characters, except new line, are important in meta data.
38 =head1 SEE ALSO
40 L<Bio::SeqIO::metafasta>
42 =head1 FEEDBACK
44 =head2 Support
46 Please direct usage questions or support issues to the mailing list:
48 I<bioperl-l@bioperl.org>
50 rather than to the module maintainer directly. Many experienced and
51 reponsive experts will be able look at the problem and quickly
52 address it. Please include a thorough description of the problem
53 with code and data examples if at all possible.
55 =head2 Reporting Bugs
57 Report bugs to the Bioperl bug tracking system to help us keep track
58 the bugs and their resolution. Bug reports can be submitted via the
59 web:
61 https://github.com/bioperl/bioperl-live/issues
63 =head1 AUTHOR - Heikki Lehvaslaiho
65 Email heikki-at-bioperl-dot-org
67 =head1 APPENDIX
69 The rest of the documentation details each of the object
70 methods. Internal methods are usually preceded with a _
72 =cut
74 # Let the code begin...
76 package Bio::AlignIO::metafasta;
77 use vars qw($WIDTH);
78 use strict;
80 use Bio::SimpleAlign;
81 use Bio::Seq::Meta;
82 use Bio::Seq::SeqFactory;
83 use Bio::Seq::SeqFastaSpeedFactory;
85 use base qw(Bio::AlignIO);
87 BEGIN { $WIDTH = 60}
89 sub _initialize {
90 my($self,@args) = @_;
91 $self->SUPER::_initialize(@args);
92 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
93 $width && $self->width($width);
96 =head2 next_aln
98 Title : next_aln
99 Usage : $aln = $stream->next_aln()
100 Function: returns the next alignment in the stream.
101 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
102 or on error
103 Args : NONE
105 =cut
107 sub next_aln {
108 my( $self ) = @_;
109 my $seq;
110 my $alphabet;
111 local $/ = "\n>";
113 my $aln = Bio::SimpleAlign->new();
115 while(defined (my $entry = $self->_readline)) {
116 chomp($entry);
117 if ($entry =~ m/\A\s*\Z/s) { # very first one
118 return unless $entry = $self->_readline;
119 chomp($entry);
121 $entry =~ s/^>//;
123 my ($top,$sequence) = split(/\n/,$entry,2);
124 defined $sequence && $sequence =~ s/>//g;
126 my @metas;
127 ($sequence, @metas) = split /\n&/, $sequence;
129 my ($id, $start, $end);
130 if ( $top =~ /(\S+)\/(\d+)-(\d+)/ ) {
131 $id = $1;
132 $start = $2;
133 $end = $3;
135 elsif ($top =~ /(\S+)/) {
136 $id = $1;
137 $start = 1;
138 $end = length($sequence);
141 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
143 $seq = Bio::Seq::Meta->new('-seq' => $sequence,
144 '-display_id' => $id,
145 '-start' => $start,
146 '-end' => $end,
147 '-alphabet' => $self->alphabet,
150 foreach my $meta (@metas) {
151 my ($name,$string) = split /\n/, $meta;
152 $string =~ s/\n//g; # Remove newlines, spaces are important
153 $seq->named_meta($name, $string);
156 $aln->add_seq($seq);
158 # alignment needs seqs all the same length, pad with gaps
159 my $alnlen = $aln->length;
160 foreach my $seq ( $aln->each_seq ) {
161 if ( $seq->length < $alnlen ) {
162 my ($diff) = ($alnlen - $seq->length);
163 $seq->seq( $seq->seq() . "-" x $diff);
167 return $aln if $aln->num_sequences;
168 return;
171 =head2 write_aln
173 Title : write_aln
174 Usage : $stream->write_aln(@aln)
175 Function: writes the $aln object into the stream in fasta format
176 Returns : 1 for success and 0 for error
177 Args : L<Bio::Align::AlignI> object
179 =cut
181 sub write_aln {
182 my ($self,@aln) = @_;
183 my $width = $self->width;
185 foreach my $aln (@aln) {
186 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
187 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
188 next;
190 foreach my $seq ( $aln->each_seq() ) {
191 my $name = $aln->displayname($seq->get_nse);
193 my $str = $seq->seq();
194 if(length($str) > 0) {
195 $str =~ s/(.{1,$width})/$1\n/g;
196 } else {
197 $str = "\n";
199 $self->_print (">",$name,"\n",$str) or return;
200 if ($seq->isa('Bio::Seq::MetaI')) {
201 foreach my $meta ($seq->meta_names) {
202 my $str = $seq->named_meta($meta);
203 $str =~ s/(.{1,$width})/$1\n/g;
204 $self->_print ("&",$meta,"\n",$str);
209 $self->flush if $self->_flush_on_write && defined $self->_fh;
210 return 1;
214 =head2 width
216 Title : width
217 Usage : $obj->width($newval)
218 Function: Get/Set the line width for METAFASTA output
219 Returns : value of width
220 Args : newvalue (optional)
223 =cut
225 sub width{
226 my ($self,$value) = @_;
227 if( defined $value) {
228 $self->{'width'} = $value;
230 return $self->{'width'} || $WIDTH;