2 # BioPerl module for Bio::PopGen::Simulation::Coalescent
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl-dot-org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::PopGen::Simulation::Coalescent - A Coalescent simulation factory
20 use Bio::PopGen::Simulation::Coalescent;
21 my @taxonnames = qw(SpeciesA SpeciesB SpeciesC SpeciesD);
22 my $sim1 = Bio::PopGen::Simulation::Coalescent->new(-samples => \@taxonnames);
24 my $tree = $sim1->next_tree;
26 # add 20 mutations randomly to the tree
27 $sim1->add_Mutations($tree,20);
29 # or for anonymous samples
31 my $sim2 = Bio::PopGen::Simulation::Coalescent->new( -sample_size => 6,
33 my $tree2 = $sim2->next_tree;
34 # add 20 mutations randomly to the tree
35 $sim2->add_Mutations($tree2,20);
39 Builds a random tree every time next_tree is called or up to -maxcount
40 times with branch lengths and provides the ability to randomly add
41 mutations onto the tree with a probabilty proportional to the branch
44 This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
46 Hudson, R. R. 1990. Gene genealogies and the coalescent
47 process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
48 surveys in evolutionary biology. Vol. 7. Oxford University
51 This module was previously named Bio::Tree::RandomTree
57 User feedback is an integral part of the evolution of this and other
58 Bioperl modules. Send your comments and suggestions preferably to
59 the Bioperl mailing list. Your participation is much appreciated.
61 bioperl-l@bioperl.org - General discussion
62 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66 Please direct usage questions or support issues to the mailing list:
68 I<bioperl-l@bioperl.org>
70 rather than to the module maintainer directly. Many experienced and
71 reponsive experts will be able look at the problem and quickly
72 address it. Please include a thorough description of the problem
73 with code and data examples if at all possible.
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 of the bugs and their resolution. Bug reports can be submitted via
81 https://github.com/bioperl/bioperl-live/issues
83 =head1 AUTHOR - Jason Stajich, Matthew Hahn
85 Email jason-at-bioperl-dot-org
86 Email matthew-dot-hahn-at-duke-dot-edu
90 The rest of the documentation details each of the object methods.
91 Internal methods are usually preceded with a _
96 # Let the code begin...
99 package Bio
::PopGen
::Simulation
::Coalescent
;
100 use vars
qw($PRECISION_DIGITS);
103 $PRECISION_DIGITS = 3; # Precision for the branchlength
105 use Bio::Tree::AlleleNode;
106 use Bio::PopGen::Genotype;
109 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
115 Usage : my $obj = Bio::PopGen::Simulation::Coalescent->new();
116 Function: Builds a new Bio::PopGen::Simulation::Coalescent object
117 Returns : an instance of Bio::PopGen::Simulation::Coalescent
118 Args : -samples => arrayref of sample names
120 -sample_size=> number of samples (samps will get a systematic name)
121 -maxcount => [optional] maximum number of trees to provide
126 my ($class,@args) = @_;
127 my $self = $class->SUPER::new
(@args);
129 $self->{'_treecounter'} = 0;
130 $self->{'_maxcount'} = 0;
131 my ($maxcount, $samps,$samplesize ) = $self->_rearrange([qw(MAXCOUNT
137 if( ! defined $samps ) {
138 if( ! defined $samplesize || $samplesize <= 0 ) {
139 $self->throw("Must specify a valid samplesize if parameter -SAMPLE is not specified (sampsize is $samplesize)");
141 foreach ( 1..$samplesize ) { push @samples, "Samp$_"; }
143 if( ref($samps) !~ /ARRAY/i ) {
144 $self->throw("Must specify a valid ARRAY reference to the parameter -SAMPLES, did you forget a leading '\\'?");
149 $self->samples(\
@samples);
150 $self->sample_size(scalar @samples);
151 defined $maxcount && $self->maxcount($maxcount);
158 Usage : my $tree = $factory->next_tree
159 Function: Returns a random tree based on the initialized number of nodes
160 NOTE: if maxcount is not specified on initialization or
161 set to a valid integer, subsequent calls to next_tree will
162 continue to return random trees and never return undef
163 Returns : Bio::Tree::TreeI object
170 # If maxcount is set to something non-zero then next tree will
171 # continue to return valid trees until maxcount is reached
172 # otherwise will always return trees
173 return if( $self->maxcount &&
174 $self->{'_treecounter'}++ >= $self->maxcount );
175 my $size = $self->sample_size;
181 for($in=0;$in < 2*$size -1; $in++ ) {
182 push @tree, { 'nodenum' => "Node$in" };
184 # in C we would have 2 arrays
185 # an array of nodes (tree)
186 # and array of pointers to these nodes (list)
187 # and we just shuffle the list items to do the
188 # tree topology generation
189 # instead in perl, we will have a list of hashes (nodes) called @tree
190 # and a list of integers representing the indexes in tree called @list
192 for($in=0;$in < $size;$in++) {
193 $tree[$in]->{'time'} = 0;
194 $tree[$in]->{'desc1'} = undef;
195 $tree[$in]->{'desc2'} = undef;
200 # generate times for the nodes
201 for($in = $size; $in > 1; $in-- ) {
202 $t+= -2.0 * log(1 - $self->random(1)) / ( $in * ($in-1) );
203 $tree[2 * $size - $in]->{'time'} =$t;
205 # topology generation
206 for ($in = $size; $in > 1; $in-- ) {
207 my $pick = int $self->random($in);
208 my $nodeindex = $list[$pick];
209 my $swap = 2 * $size - $in;
210 $tree[$swap]->{'desc1'} = $nodeindex;
211 $list[$pick] = $list[$in-1];
212 $pick = int rand($in - 1);
213 $nodeindex = $list[$pick];
214 $tree[$swap]->{'desc2'} = $nodeindex;
215 $list[$pick] = $swap;
217 # Let's convert the hashes into nodes
220 foreach my $n ( @tree ) {
222 Bio
::Tree
::AlleleNode
->new(-id
=> $n->{'nodenum'},
223 -branch_length
=> $n->{'time'});
226 foreach my $node ( @nodes ) {
227 my $n = $tree[$ct++];
228 if( defined $n->{'desc1'} ) {
229 $node->add_Descendent($nodes[$n->{'desc1'}]);
231 if( defined $n->{'desc2'} ) {
232 $node->add_Descendent($nodes[$n->{'desc2'}]);
235 my $T = Bio
::Tree
::Tree
->new(-root
=> pop @nodes );
241 Title : add_Mutations
242 Usage : $factory->add_Mutations($tree, $mutcount);
243 Function: Adds mutations to a tree via a random process weighted by
244 branch length (it is a poisson distribution
245 as part of a coalescent process)
247 Args : $tree - Bio::Tree::TreeI
248 $nummut - number of mutations
249 $precision - optional # of digits for precision
255 my ($self,$tree, $nummut,$precision) = @_;
256 $precision ||= $PRECISION_DIGITS;
257 $precision = 10**$precision;
263 my @nodes = $tree->get_nodes();
266 # Jason's somewhat simplistics way of doing a poission
267 # distribution for a fixed number of mutations
268 # build an array and put the node number in a slot
269 # representing the branch to put a mutation on
270 # but weight the number of slots per branch by the
271 # length of the branch ( ancestor's time - node time)
273 foreach my $node ( @nodes ) {
274 if( $node->ancestor ) {
275 my $len = int ( ($node->ancestor->branch_length -
276 $node->branch_length) * $precision);
278 for( my $j =0;$j < $len;$j++) {
285 if( ! $node->isa('Bio::Tree::AlleleNode') ) {
286 bless $node, 'Bio::Tree::AlleleNode'; # rebless it to the right node
288 # This let's us reset the stored genotypes so we can keep reusing the
289 # same tree topology, but throw down mutations multiple times
290 $node->reset_Genotypes;
294 $self->throw("branch len is $branchlen arraylen is $last")
295 unless ( $branchlen == $last );
297 for( my $j = 0; $j < $nummut; $j++) {
298 my $index = int(rand($branchlen));
299 my $branch = $branches[$index];
301 # We're using an infinite sites model so every new
302 # mutation is a new site
303 my $g = Bio
::PopGen
::Genotype
->new(-marker_name
=> "Mutation$j",
305 $nodes[$branch]->add_Genotype($g);
306 push @mutations, "Mutation$j";
307 # Let's add this mutation to all the children (push it down
308 # the branches to the tips)
309 foreach my $child ( $nodes[$branch]->get_all_Descendents ) {
310 $child->add_Genotype($g);
313 # Insure that everyone who doesn't have the mutation
314 # has the ancestral state, which is '0'
315 foreach my $node ( @nodes ) {
316 foreach my $m ( @mutations ) {
317 if( ! $node->has_Marker($m) ) {
318 my $emptyg = Bio
::PopGen
::Genotype
->new(-marker_name
=> $m,
320 $node->add_Genotype($emptyg);
329 Usage : $obj->maxcount($newval)
331 Returns : Maxcount value
332 Args : newvalue (optional)
338 my ($self,$value) = @_;
339 if( defined $value) {
340 if( $value =~ /^(\d+)/ ) {
341 $self->{'maxcount'} = $1;
343 $self->warn("Must specify a valid Positive integer to maxcount");
344 $self->{'maxcount'} = 0;
347 return $self->{'_maxcount'};
353 Usage : $obj->samples($newval)
356 Returns : value of samples
357 Args : newvalue (optional)
363 my ($self,$value) = @_;
364 if( defined $value) {
365 if( ref($value) !~ /ARRAY/i ) {
366 $self->warn("Must specify a valid array ref to the method 'samples'");
369 $self->{'samples'} = $value;
371 return $self->{'samples'};
378 Usage : $obj->sample_size($newval)
381 Returns : value of sample_size
382 Args : newvalue (optional)
388 my ($self,$value) = @_;
389 if( defined $value) {
390 $self->{'sample_size'} = $value;
392 return $self->{'sample_size'};
399 Usage : my $rfloat = $node->random($size)
400 Function: Generates a random number between 0 and $size
401 This is abstracted so that someone can override and provide their
402 own special RNG. This is expected to be a uniform RNG.
403 Returns : Floating point random
404 Args : $maximum size for random number (defaults to 1)
410 my ($self,$max) = @_;