2 # BioPerl module for Bio::AlignIO::xmfa
4 # Copyright Chris Fields
6 # You may distribute this module under the same terms as perl itself
7 # POD documentation - main docs before the code
11 Bio::AlignIO::xmfa - XMFA MSA Sequence input/output stream
15 Do not use this module directly. Use it via the L<Bio::AlignIO>
20 This object can transform L<Bio::SimpleAlign> objects from
21 XMFA flat file databases. For more information, see:
23 http://asap.ahabs.wisc.edu/mauve-aligner/mauve-user-guide/mauve-output-file-formats.html
25 This module is based on the AlignIO::fasta parser written by
30 Finish write_aln(), clean up code, allow LargeLocatableSeq (e.g. for
31 very large sequences from Mauve).
37 Please direct usage questions or support issues to the mailing list:
39 I<bioperl-l@bioperl.org>
41 rather than to the module maintainer directly. Many experienced and
42 reponsive experts will be able look at the problem and quickly
43 address it. Please include a thorough description of the problem
44 with code and data examples if at all possible.
48 Report bugs to the Bioperl bug tracking system to help us keep track
49 the bugs and their resolution. Bug reports can be submitted via the
52 https://github.com/bioperl/bioperl-live/issues
60 The rest of the documentation details each of the object
61 methods. Internal methods are usually preceded with a _
65 # Let the code begin...
67 package Bio
::AlignIO
::xmfa
;
70 use base
qw(Bio::AlignIO);
76 Usage : $aln = $stream->next_aln
77 Function: returns the next alignment in the stream.
78 Returns : Bio::Align::AlignI object - returns 0 on end of file
80 Args : -width => optional argument to specify the width sequence
81 will be written (60 chars by default)
83 See L<Bio::Align::AlignI>
89 my ($width) = $self->_rearrange([qw(WIDTH)],@_);
90 $self->width($width || $WIDTH);
92 my ($name, $tempname, $seqchar);
93 my $aln = Bio
::SimpleAlign
->new();
96 while (defined (my $entry = $self->_readline) ) {
98 if ( index($entry, '=') == 0 ) {
99 if (defined $name && $seqchar) {
100 my $seq = $self->_process_seq($name, $seqchar);
103 if ($aln && $entry =~ m{score\s*=\s*(\d+)}) {
109 } elsif ( $entry =~ m{^>.+$}xms) {
110 if ( defined $name ) {
111 my $seq = $self->_process_seq($name, $seqchar);
121 # this catches last sequence if '=' is not present (Mauve)
122 if ( defined $name ) {
123 my $seq = $self->_process_seq($name, $seqchar);
126 $aln->num_sequences ?
return $aln : return;
132 Usage : $stream->write_aln(@aln)
133 Function: writes the $aln object into the stream in xmfa format
134 Returns : 1 for success and 0 for error
135 Args : L<Bio::Align::AlignI> object
137 See L<Bio::Align::AlignI>
142 my ($self,@aln) = @_;
143 my $width = $self->width;
144 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub,$start,$end,$strand,$id);
146 foreach my $aln (@aln) {
147 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
148 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
151 #if( $self->force_displayname_flat ) {
152 # $aln->set_displayname_flat(1);
155 foreach $rseq ( $aln->each_seq() ) {
156 ($start, $end, $strand, $id) = ($rseq->start, $rseq->end, $rseq->strand || 0,
158 $strand = ($strand == 1) ?
'+' :
159 ($strand == -1) ?
'-' :
161 $name = sprintf("%d:%d-%d %s %s",$seqct,$start,$end,$strand,$id);
163 $desc = $rseq->description || '';
164 $self->_print (">$name $desc\n") or return ;
166 $length = length($seq);
167 if(defined $seq && $length > 0) {
168 $seq =~ s/(.{1,$width})/$1\n/g;
172 $self->_print($seq) || return 0;
176 $alndesc = "score = ".$aln->score if ($aln->score);
177 $self->_print("= $alndesc\n") || return 0;
180 $self->flush if $self->_flush_on_write && defined $self->_fh;
188 Function: determine number of alphabetic chars
190 Args : sequence string
195 my ($self,$seq) = @_;
196 $seq =~ s/[^A-Z]//gi;
197 return CORE
::length($seq);
203 Usage : $obj->width($newwidth)
204 $width = $obj->width;
205 Function: Get/set width of alignment
206 Returns : integer value of width
207 Args : on set, new value (a scalar or undef, optional)
215 return $self->{'_width'} = shift if @_;
216 return $self->{'_width'} || $WIDTH;
219 ####### PRIVATE #######
222 my ($self, $entry, $seq) = @_;
223 my ($start, $end, $strand, $seqname, $desc, $all);
224 # put away last name and sequence
225 if ( $entry =~ m{^>\s*\d+:(\d+)-(\d+)\s+([+-]{1})(?
:\s
+(\S
+)\s
*(\S\
.*)?
)?
} ) {
226 ($start, $end, $seqname, $desc) = ($1, $2, $4, $5);
227 $strand = ($3 eq '+') ?
1 : -1;
228 } elsif ( $entry !~ /^>\s*\d+:/ ) {
229 $self->throw("First field after '>' must be a number:\n$entry");
231 $self->throw("Line does not comform to XMFA format:\n$entry");
233 my $seqobj = Bio
::LocatableSeq
->new(
237 -display_id
=> $seqname,
238 -description
=> $desc || $all,
241 -alphabet
=> $self->alphabet,
243 $self->debug("Reading $seqname\n");