Bio::Tools::Analysis::Protein::Scansite: move to separate distribution
[bioperl-live.git] / lib / Bio / Tools / Phylo / Molphy.pm
blobd695ea588ac4dc3615d7431b10a4a54a2d910007
2 # BioPerl module for Bio::Tools::Phylo::Molphy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::Tools::Phylo::Molphy - parser for Molphy output
18 =head1 SYNOPSIS
20 use Bio::Tools::Phylo::Molphy;
21 my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22 while( my $r = $parser->next_result ) {
23 # r is a Bio::Tools::Phylo::Molphy::Result object
25 # print the model name
26 print $r->model, "\n";
28 # get the substitution matrix
29 # this is a hash of 3letter aa codes -> 3letter aa codes representing
30 # substitution rate
31 my $smat = $r->substitution_matrix;
32 print "Arg -> Gln substitution rate is %d\n",
33 $smat->{'Arg'}->{'Gln'}, "\n";
35 # get the transition probablity matrix
36 # this is a hash of 3letter aa codes -> 3letter aa codes representing
37 # transition probabilty
38 my $tmat = $r->transition_probability_matrix;
39 print "Arg -> Gln transition probablity is %.2f\n",
40 $tmat->{'Arg'}->{'Gln'}, "\n";
42 # get the frequency for each of the residues
43 my $rfreqs = $r->residue_frequencies;
45 foreach my $residue ( keys %{$rfreqs} ) {
46 printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47 $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
50 my @trees;
51 while( my $t = $r->next_tree ) {
52 push @trees, $t;
55 print "search space is ", $r->search_space, "\n",
56 "1st tree score is ", $trees[0]->score, "\n";
58 # writing to STDOUT, use -file => '>filename' to specify a file
59 my $out = Bio::TreeIO->new(-format => "newick");
60 $out->write_tree($trees[0]); # writing only the 1st tree
63 =head1 DESCRIPTION
65 A parser for Molphy output (protml,dnaml)
67 =head1 FEEDBACK
69 =head2 Mailing Lists
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to
73 the Bioperl mailing list. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
78 =head2 Support
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
89 =head2 Reporting Bugs
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 of the bugs and their resolution. Bug reports can be submitted via the
93 web:
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHOR - Jason Stajich
99 Email jason-at-bioperl.org
101 =head1 APPENDIX
103 The rest of the documentation details each of the object methods.
104 Internal methods are usually preceded with a _
106 =cut
109 # Let the code begin...
112 package Bio::Tools::Phylo::Molphy;
113 use strict;
115 use Bio::Tools::Phylo::Molphy::Result;
116 use Bio::TreeIO;
117 use IO::String;
119 use base qw(Bio::Root::Root Bio::Root::IO);
121 =head2 new
123 Title : new
124 Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
125 Function: Builds a new Bio::Tools::Phylo::Molphy object
126 Returns : Bio::Tools::Phylo::Molphy
127 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
130 =cut
132 sub new {
133 my($class,@args) = @_;
135 my $self = $class->SUPER::new(@args);
136 $self->_initialize_io(@args);
138 return $self;
141 =head2 next_result
143 Title : next_result
144 Usage : my $r = $molphy->next_result
145 Function: Get the next result set from parser data
146 Returns : Bio::Tools::Phylo::Molphy::Result object
147 Args : none
150 =cut
152 sub next_result{
153 my ($self) = @_;
155 # A little statemachine for the parser here
156 my ($state,$transition_ct,
157 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
158 my ( %subst_matrix, @treelines, @treedata, %frequencies);
159 my ( $treenum,$possible_trees, $model);
160 my ($trans_type,$trans_amount);
161 my $parsed = 0;
162 while( defined ( $_ = $self->_readline()) ) {
163 $parsed = 1;
164 if( /^Relative Substitution Rate Matrix/ ) {
165 if( %subst_matrix ) {
166 $self->_pushback($_);
167 last;
169 $state = 0;
170 my ( @tempdata);
171 @resloc = ();
172 while( defined ($_ = $self->_readline) ) {
173 last if (/^\s+$/);
174 # remove leading/trailing spaces
175 s/^\s+//;
176 s/\s+$//;
177 my @data = split;
178 my $i = 0;
179 for my $l ( @data ) {
180 if( $l =~ /\D+/ ) {
181 push @resloc, $l;
183 $i++;
185 push @tempdata, \@data;
187 my $i = 0;
188 for my $row ( @tempdata ) {
189 my $j = 0;
190 for my $col ( @$row ) {
191 if( $i == $j ) {
192 # empty string for diagonals
193 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
194 } else {
195 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
197 $j++;
199 $i++;
201 } elsif( /^Transition Probability Matrix/ ) {
202 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
203 $state = 1;
204 my $newtrans_type = "$3-$1";
205 $trans_amount = $1;
206 if( defined $trans_type ) {
207 # finish processing the transition_matrix
208 my $i =0;
209 foreach my $row ( @transition_matrix ) {
210 my $j = 0;
211 foreach my $col ( @$row ) {
212 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
213 $j++;
215 $i++;
218 $trans_type = $newtrans_type;
219 $transition_ct = 0;
220 @transition_matrix = ();
222 } elsif ( /Acid Frequencies/ ) {
223 $state = 0;
224 $self->_readline(); # skip the next line
225 while( defined( $_ = $self->_readline) ) {
226 unless( /^\s+/) {
227 $self->_pushback($_);
228 last;
230 s/^\s+//;
231 s/\s+$//;
232 my ($index,$res,$model,$data) = split;
233 $frequencies{$res} = [ $model,$data];
235 } elsif( /^(\d+)\s*\/\s*(\d+)\s+(.+)\s+model/ ) {
236 my @save = ($1,$2,$3);
237 # finish processing the transition_matrix
238 my $i =0;
239 foreach my $row ( @transition_matrix ) {
240 my $j = 0;
241 foreach my $col ( @$row ) {
242 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
243 $j++;
245 $i++;
247 if( defined $treenum ) {
248 $self->_pushback($_);
249 last;
252 $state = 2;
253 ($treenum,$possible_trees, $model) = @save;
254 $model =~ s/\s+/ /g;
255 } elsif( $state == 1 ) {
256 next if( /^\s+$/ || /^\s+Ala/);
257 s/^\s+//;
258 s/\s+$//;
259 if( $trans_type eq '1PAM-1.0e7' ) {
260 # because the matrix is split up into 2-10 column sets
261 push @{$transition_matrix[$transition_ct++]}, split ;
262 $transition_ct = 0 if $transition_ct % 20 == 0;
263 } elsif( $trans_type eq '1PAM-1.0e5' ) {
264 # because the matrix is split up into 2-10 column sets
265 my ($res,@row) = split;
266 next if $transition_ct >= 20; # skip last
267 push @{$transition_matrix[$transition_ct++]}, @row;
269 } elsif( $state == 2 ) {
270 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
271 push @treedata, [ $1,$2];
273 # save this for the end so that we can
274 # be efficient and only open one tree parser
275 push @treelines, $_;
278 # waiting till the end to do this, is it better
279 my @trees;
280 if( @treelines ) {
281 my $strdat = IO::String->new(join('',@treelines));
282 my $treeio = Bio::TreeIO->new(-fh => $strdat,
283 -format => 'newick');
284 while( my $tree = $treeio->next_tree ) {
285 if( @treedata ) {
286 my $dat = shift @treedata;
287 # set the associated information
288 $tree->id($dat->[0]);
289 $tree->score($dat->[1]);
291 push @trees, $tree;
294 return unless( $parsed );
295 my $result = Bio::Tools::Phylo::Molphy::Result->new
296 (-trees => \@trees,
297 -substitution_matrix => \%subst_matrix,
298 -frequencies => \%frequencies,
299 -model => $model,
300 -search_space => $possible_trees,
302 while( my ($type,$mat) = each %transition_mat ) {
303 $result->transition_probability_matrix( $type,$mat);
305 $result;