maint: remove Travis stuff which has been replaced with Github actions (#325)
[bioperl-live.git] / lib / Bio / AlignIO / mega.pm
blobcd90973fa3f3d4e32f7f9554cacd0418bced1cbc
2 # BioPerl module for Bio::AlignIO::mega
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::AlignIO::mega - Parse and Create MEGA format data files
18 =head1 SYNOPSIS
20 use Bio::AlignIO;
21 my $alignio = Bio::AlignIO->new(-format => 'mega',
22 -file => 't/data/hemoglobinA.meg');
24 while( my $aln = $alignio->next_aln ) {
25 # process each alignment or convert to another format like NEXUS
28 =head1 DESCRIPTION
30 This object handles reading and writing data streams in the MEGA
31 format (Kumar and Nei).
34 =head1 FEEDBACK
36 =head2 Mailing Lists
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45 =head2 Support
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
56 =head2 Reporting Bugs
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 of the bugs and their resolution. Bug reports can be submitted the
60 web:
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHOR - Jason Stajich
66 Email jason-at-bioperl.org
68 =head1 APPENDIX
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
73 =cut
76 # Let the code begin...
79 package Bio::AlignIO::mega;
81 use vars qw($MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
82 use strict;
84 use Bio::SimpleAlign;
85 use Bio::LocatableSeq;
87 # symbols are changed due to MEGA's use of '.' for redundant sequences
89 BEGIN {
90 $MEGANAMELEN = 10;
91 $LINELEN = 60;
92 $BLOCKLEN = 10;
93 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard);
95 use base qw(Bio::AlignIO);
98 =head2 next_aln
100 Title : next_aln
101 Usage : $aln = $stream->next_aln()
102 Function: returns the next alignment in the stream.
103 Supports the following MEGA format features:
104 - The file has to start with '#mega'
105 - Reads in the name of the alignment from a comment
106 (anything after '!TITLE: ') .
107 - Reads in the format parameters datatype
109 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
110 or on error
111 Args : NONE
114 =cut
116 sub next_aln{
117 my ($self) = @_;
118 my $entry;
119 my ($alphabet,%seqs);
120 local $Bio::LocatableSeq::OTHER_SYMBOLS = '\*\?\.';
121 local $Bio::LocatableSeq::GAP_SYMBOLS = '\-';
122 my $aln = Bio::SimpleAlign->new(-source => 'mega');
124 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
126 $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
127 unless $entry =~ /^#mega/i;
129 while( defined($entry = $self->_readline() ) ) {
130 local($_) = $entry;
131 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
132 elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
133 my (@fields) = split(/\s+/,$1);
134 foreach my $f ( @fields ) {
135 my ($name,$value) = split(/\=/,$f);
136 if( $name eq 'datatype' ) {
137 $alphabet = $value;
138 } elsif( $name eq 'identical' ) {
139 $aln->match_char($value);
140 } elsif( $name eq 'indel' ) {
141 $aln->gap_char($value);
144 } elsif( /^\#/ ) {
145 last;
148 my @order;
149 while( defined($entry) ) {
150 if( $entry !~ /^\s+$/ ) {
151 # this is to skip the leading '#'
152 my $seqname = substr($entry,1,$MEGANAMELEN-1);
153 $seqname =~ s/(\S+)\s+$/$1/g;
154 my $line = substr($entry,$MEGANAMELEN);
155 $line =~ s/\s+//g;
156 if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
157 $seqs{$seqname} .= $line;
159 $entry = $self->_readline();
162 foreach my $seqname ( @order ) {
163 my $s = $seqs{$seqname};
164 $s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
165 my $end = length($s);
166 my $seq = Bio::LocatableSeq->new('-alphabet' => $alphabet,
167 '-display_id' => $seqname,
168 '-seq' => $seqs{$seqname},
169 '-start' => 1,
170 '-end' => $end);
172 $aln->add_seq($seq);
174 $aln->unmatch;
175 return $aln if $aln->num_sequences;
176 return;
179 =head2 write_aln
181 Title : write_aln
182 Usage : $stream->write_aln(@aln)
183 Function: writes the $aln object into the stream in MEGA format
184 Returns : 1 for success and 0 for error
185 Args : L<Bio::Align::AlignI> object
187 =cut
189 sub write_aln{
190 my ($self,@aln) = @_;
191 my $count = 0;
192 my $wrapped = 0;
193 my $maxname;
195 foreach my $aln ( @aln ) {
196 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
197 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
198 return 0;
199 } elsif( ! $aln->is_flush($self->verbose) ) {
200 $self->warn("All Sequences in the alignment must be the same length");
201 return 0;
203 $aln->match();
204 my $len = $aln->length();
205 my $format = sprintf('datatype=%s identical=%s indel=%s;',
206 $aln->get_seq_by_pos(1)->alphabet(),
207 $aln->match_char, $aln->gap_char);
209 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
210 $aln->id, $format));
212 my ($count, $blockcount,$length) = ( 0,0,$aln->length());
213 $aln->set_displayname_flat();
214 while( $count < $length ) {
215 foreach my $seq ( $aln->each_seq ) {
216 my $seqchars = $seq->seq();
217 $blockcount = 0;
218 my $substring = substr($seqchars, $count, $LINELEN);
219 my @blocks;
220 while( $blockcount < length($substring) ) {
221 push @blocks, substr($substring, $blockcount,$BLOCKLEN);
222 $blockcount += $BLOCKLEN;
224 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
225 substr($aln->displayname($seq->get_nse()),
226 0,$MEGANAMELEN-2),
227 join(' ', @blocks)));
229 $self->_print("\n");
230 $count += $LINELEN;
233 $self->flush if $self->_flush_on_write && defined $self->_fh;
234 return 1;