2 # BioPerl module for Bio::Tools::Phylo::Molphy
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::Tools::Phylo::Molphy - parser for Molphy output
20 use Bio::Tools::Phylo::Molphy;
21 my $parser = Bio::Tools::Phylo::Molphy->new(-file => 'output.protml');
22 while( my $r = $parser->next_result ) {
23 # r is a Bio::Tools::Phylo::Molphy::Result object
25 # print the model name
26 print $r->model, "\n";
28 # get the substitution matrix
29 # this is a hash of 3letter aa codes -> 3letter aa codes representing
31 my $smat = $r->substitution_matrix;
32 print "Arg -> Gln substitution rate is %d\n",
33 $smat->{'Arg'}->{'Gln'}, "\n";
35 # get the transition probablity matrix
36 # this is a hash of 3letter aa codes -> 3letter aa codes representing
37 # transition probabilty
38 my $tmat = $r->transition_probability_matrix;
39 print "Arg -> Gln transition probablity is %.2f\n",
40 $tmat->{'Arg'}->{'Gln'}, "\n";
42 # get the frequency for each of the residues
43 my $rfreqs = $r->residue_frequencies;
45 foreach my $residue ( keys %{$rfreqs} ) {
46 printf "residue %s expected freq: %.2f observed freq: %.2f\n",
47 $residue,$rfreqs->{$residue}->[0], $rfreqs->{$residue}->[1];
51 while( my $t = $r->next_tree ) {
55 print "search space is ", $r->search_space, "\n",
56 "1st tree score is ", $trees[0]->score, "\n";
58 # writing to STDOUT, use -file => '>filename' to specify a file
59 my $out = Bio::TreeIO->new(-format => "newick");
60 $out->write_tree($trees[0]); # writing only the 1st tree
65 A parser for Molphy output (protml,dnaml)
71 User feedback is an integral part of the evolution of this and other
72 Bioperl modules. Send your comments and suggestions preferably to
73 the Bioperl mailing list. Your participation is much appreciated.
75 bioperl-l@bioperl.org - General discussion
76 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
80 Please direct usage questions or support issues to the mailing list:
82 I<bioperl-l@bioperl.org>
84 rather than to the module maintainer directly. Many experienced and
85 reponsive experts will be able look at the problem and quickly
86 address it. Please include a thorough description of the problem
87 with code and data examples if at all possible.
91 Report bugs to the Bioperl bug tracking system to help us keep track
92 of the bugs and their resolution. Bug reports can be submitted via the
95 https://github.com/bioperl/bioperl-live/issues
97 =head1 AUTHOR - Jason Stajich
99 Email jason-at-bioperl.org
103 The rest of the documentation details each of the object methods.
104 Internal methods are usually preceded with a _
109 # Let the code begin...
112 package Bio
::Tools
::Phylo
::Molphy
;
115 use Bio
::Tools
::Phylo
::Molphy
::Result
;
119 use base
qw(Bio::Root::Root Bio::Root::IO);
124 Usage : my $obj = Bio::Tools::Phylo::Molphy->new();
125 Function: Builds a new Bio::Tools::Phylo::Molphy object
126 Returns : Bio::Tools::Phylo::Molphy
127 Args : -fh/-file => $val, # for initing input, see Bio::Root::IO
133 my($class,@args) = @_;
135 my $self = $class->SUPER::new
(@args);
136 $self->_initialize_io(@args);
144 Usage : my $r = $molphy->next_result
145 Function: Get the next result set from parser data
146 Returns : Bio::Tools::Phylo::Molphy::Result object
155 # A little statemachine for the parser here
156 my ($state,$transition_ct,
157 @transition_matrix, %transition_mat, @resloc,) = ( 0,0);
158 my ( %subst_matrix, @treelines, @treedata, %frequencies);
159 my ( $treenum,$possible_trees, $model);
160 my ($trans_type,$trans_amount);
162 while( defined ( $_ = $self->_readline()) ) {
164 if( /^Relative Substitution Rate Matrix/ ) {
165 if( %subst_matrix ) {
166 $self->_pushback($_);
172 while( defined ($_ = $self->_readline) ) {
174 # remove leading/trailing spaces
179 for my $l ( @data ) {
185 push @tempdata, \
@data;
188 for my $row ( @tempdata ) {
190 for my $col ( @
$row ) {
192 # empty string for diagonals
193 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = '';
195 $subst_matrix{$resloc[$i]}->{$resloc[$j]} = $col;
201 } elsif( /^Transition Probability Matrix/ ) {
202 if( /(1\.0e(5|7))\)\s+(\S+)/ ) {
204 my $newtrans_type = "$3-$1";
206 if( defined $trans_type ) {
207 # finish processing the transition_matrix
209 foreach my $row ( @transition_matrix ) {
211 foreach my $col ( @
$row ) {
212 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
218 $trans_type = $newtrans_type;
220 @transition_matrix = ();
222 } elsif ( /Acid Frequencies/ ) {
224 $self->_readline(); # skip the next line
225 while( defined( $_ = $self->_readline) ) {
227 $self->_pushback($_);
232 my ($index,$res,$model,$data) = split;
233 $frequencies{$res} = [ $model,$data];
235 } elsif( /^(\d+)\s*\/\s
*(\d
+)\s
+(.+)\s
+model
/ ) {
236 my @save = ($1,$2,$3);
237 # finish processing the transition_matrix
239 foreach my $row ( @transition_matrix ) {
241 foreach my $col ( @
$row ) {
242 $transition_mat{$trans_type}->{$resloc[$i]}->{$resloc[$j]} = $col;
247 if( defined $treenum ) {
248 $self->_pushback($_);
253 ($treenum,$possible_trees, $model) = @save;
255 } elsif( $state == 1 ) {
256 next if( /^\s+$/ || /^\s+Ala/);
259 if( $trans_type eq '1PAM-1.0e7' ) {
260 # because the matrix is split up into 2-10 column sets
261 push @
{$transition_matrix[$transition_ct++]}, split ;
262 $transition_ct = 0 if $transition_ct % 20 == 0;
263 } elsif( $trans_type eq '1PAM-1.0e5' ) {
264 # because the matrix is split up into 2-10 column sets
265 my ($res,@row) = split;
266 next if $transition_ct >= 20; # skip last
267 push @
{$transition_matrix[$transition_ct++]}, @row;
269 } elsif( $state == 2 ) {
270 if( s/^(\d+)\s+(\-?\d+(\.\d+)?)\s+// ) {
271 push @treedata, [ $1,$2];
273 # save this for the end so that we can
274 # be efficient and only open one tree parser
278 # waiting till the end to do this, is it better
281 my $strdat = IO
::String
->new(join('',@treelines));
282 my $treeio = Bio
::TreeIO
->new(-fh
=> $strdat,
283 -format
=> 'newick');
284 while( my $tree = $treeio->next_tree ) {
286 my $dat = shift @treedata;
287 # set the associated information
288 $tree->id($dat->[0]);
289 $tree->score($dat->[1]);
294 return unless( $parsed );
295 my $result = Bio
::Tools
::Phylo
::Molphy
::Result
->new
297 -substitution_matrix
=> \
%subst_matrix,
298 -frequencies
=> \
%frequencies,
300 -search_space
=> $possible_trees,
302 while( my ($type,$mat) = each %transition_mat ) {
303 $result->transition_probability_matrix( $type,$mat);