2 # BioPerl module for Bio::Tools::Phylo::Molphy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::Phylo::Molphy - parser for Molphy output
20 use Bio::Tools::Phylo::Molphy;
21 my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22 while( my $r = $parser->next_result ) {
23 # r is a Bio::Tools::Phylo::Molphy::Result object
25 # print the model name
26 print $r->model, "\n";
28 # get the substitution matrix
29 # this is a hash of 3letter aa codes -> 3letter aa codes representing
31 my $smat = $r->substitution_matrix;
32 print "Arg -> Gln substitution rate is %d\n",
33 $smat->{'Arg'}->{'Gln'}, "\n";
35 # get the transition probablity matrix
36 # this is a hash of 3letter aa codes -> 3letter aa codes representing
37 # transition probabilty
38 my $tmat = $r->transition_probability_matrix;
39 print "Arg -> Gln transition probablity is %.2f\n",
40 $tmat->{'Arg'}->{'Gln'}, "\n";
42 # get the frequency for each of the residues
43 my $rfreqs = $r->residue_frequencies;
45 foreach my $residue ( keys %{$rfreqs} ) {
46 printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47 $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
51 while( my $t = $r->next_tree ) {
55 print "search space is ", $r->search_space, "\n",
56 "1st tree score is ", $trees[0]->score, "\n";
58 # writing to STDOUT, use -file => '>filename' to specify a file
59 my $out = Bio::TreeIO->new(-format => "newick");
60 $out->write_tree($trees[0]); # writing only the 1st tree
65 A parser for Molphy output (protml,dnaml)
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to
73 the Bioperl mailing list. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 of the bugs and their resolution. Bug reports can be submitted via the
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHOR - Jason Stajich
99 Email jason-at-bioperl.org
103 The rest of the documentation details each of the object methods.
104 Internal methods are usually preceded with a _
109 # Let the code begin...
112 package Bio
::Tools
::Phylo
::Molphy
;
116 use Bio
::Tools
::Phylo
::Molphy
::Result
;
120 use base
qw(Bio::Root::Root Bio::Root::IO);
125 Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
126 Function: Builds a new Bio::Tools::Phylo::Molphy object
127 Returns : Bio::Tools::Phylo::Molphy
128 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
134 my($class,@args) = @_;
136 my $self = $class->SUPER::new
(@args);
137 $self->_initialize_io(@args);
145 Usage : my $r = $molphy->next_result
146 Function: Get the next result set from parser data
147 Returns : Bio::Tools::Phylo::Molphy::Result object
156 # A little statemachine for the parser here
157 my ($state,$transition_ct,
158 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
159 my ( %subst_matrix, @treelines, @treedata, %frequencies);
160 my ( $treenum,$possible_trees, $model);
161 my ($trans_type,$trans_amount);
163 while( defined ( $_ = $self->_readline()) ) {
165 if( /^Relative Substitution Rate Matrix/ ) {
166 if( %subst_matrix ) {
167 $self->_pushback($_);
173 while( defined ($_ = $self->_readline) ) {
175 # remove leading/trailing spaces
180 for my $l ( @data ) {
186 push @tempdata, \
@data;
189 for my $row ( @tempdata ) {
191 for my $col ( @
$row ) {
193 # empty string for diagonals
194 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
196 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
202 } elsif( /^Transition Probability Matrix/ ) {
203 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
205 my $newtrans_type = "$3-$1";
207 if( defined $trans_type ) {
208 # finish processing the transition_matrix
210 foreach my $row ( @transition_matrix ) {
212 foreach my $col ( @
$row ) {
213 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
219 $trans_type = $newtrans_type;
221 @transition_matrix = ();
223 } elsif ( /Acid Frequencies/ ) {
225 $self->_readline(); # skip the next line
226 while( defined( $_ = $self->_readline) ) {
228 $self->_pushback($_);
233 my ($index,$res,$model,$data) = split;
234 $frequencies{$res} = [ $model,$data];
236 } elsif( /^(\d+)\s*\/\s
*(\d
+)\s
+(.+)\s
+model
/ ) {
237 my @save = ($1,$2,$3);
238 # finish processing the transition_matrix
240 foreach my $row ( @transition_matrix ) {
242 foreach my $col ( @
$row ) {
243 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
248 if( defined $treenum ) {
249 $self->_pushback($_);
254 ($treenum,$possible_trees, $model) = @save;
256 } elsif( $state == 1 ) {
257 next if( /^\s+$/ || /^\s+Ala/);
260 if( $trans_type eq '1PAM-1.0e7' ) {
261 # because the matrix is split up into 2-10 column sets
262 push @
{$transition_matrix[$transition_ct++]}, split ;
263 $transition_ct = 0 if $transition_ct % 20 == 0;
264 } elsif( $trans_type eq '1PAM-1.0e5' ) {
265 # because the matrix is split up into 2-10 column sets
266 my ($res,@row) = split;
267 next if $transition_ct >= 20; # skip last
268 push @
{$transition_matrix[$transition_ct++]}, @row;
270 } elsif( $state == 2 ) {
271 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
272 push @treedata, [ $1,$2];
274 # save this for the end so that we can
275 # be efficient and only open one tree parser
279 # waiting till the end to do this, is it better
282 my $strdat = IO
::String
->new(join('',@treelines));
283 my $treeio = Bio
::TreeIO
->new(-fh
=> $strdat,
284 -format
=> 'newick');
285 while( my $tree = $treeio->next_tree ) {
287 my $dat = shift @treedata;
288 # set the associated information
289 $tree->id($dat->[0]);
290 $tree->score($dat->[1]);
295 return unless( $parsed );
296 my $result = Bio
::Tools
::Phylo
::Molphy
::Result
->new
298 -substitution_matrix
=> \
%subst_matrix,
299 -frequencies
=> \
%frequencies,
301 -search_space
=> $possible_trees,
303 while( my ($type,$mat) = each %transition_mat ) {
304 $result->transition_probability_matrix( $type,$mat);