4 CXGN::MixedModels - a package to run user-specified mixed models
8 my $mm = CXGN::MixedModels->new();
9 my $mm->phenotype_file("t/data/phenotype_data.csv");
10 my $mm->dependent_variables(qw | |);
11 my $mm->fixed_factors( qw | | );
12 my $mm->random_factors( qw| | );
13 my $mm->traits( qw| | );
17 Lukas Mueller <lam87@cornell.edu>
23 package CXGN
::MixedModels
;
27 use File
::Slurp qw
| slurp
|;
30 use CXGN
::Phenotypes
::File
;
32 =head2 dependent_variables()
34 sets the dependent variables (a listref of traits)
38 has
'dependent_variables' => (is
=> 'rw', isa
=> 'ArrayRef[Str]|Undef');
40 =head2 fixed_factors()
42 sets the fixed factors (listref)
46 has
'fixed_factors' => (is
=> 'rw', isa
=> 'Ref', default => sub {[]});
48 =head2 fixed_factors_interaction()
50 sets the fixed factors with interaction (listref)
54 has
'fixed_factors_interaction' => (is
=> 'rw', isa
=> 'Ref', default => sub{[]});
56 =head2 variable_slope_factors()
60 has
'variable_slope_factors' => (is
=> 'rw', isa
=> 'Ref', default => sub{[]});
62 =head2 random_factors()
66 has
'random_factors' => (is
=> 'rw', isa
=> 'Ref', default => sub {[]});
68 =head2 variable_slop_intersects
72 has
'variable_slope_intersects' => (is
=> 'rw', isa
=> 'Ref', default => sub {[]});
80 has
'traits' => (is
=> 'rw', isa
=> 'Ref');
88 has
'levels' => (is
=> 'rw', isa
=> 'HashRef' );
90 has
'phenotype_file' => (is
=> 'rw', isa
=> 'CXGN::Phenotypes::File|Undef');
94 the tempfile that contains the phenotypic information.
98 has
'tempfile' => (is
=> 'rw', isa
=> 'Str|Undef');
105 if ($self->tempfile()) {
106 $phenotype_file = CXGN
::Phenotypes
::File
->new( { file
=> $self->tempfile() } );
108 $self->phenotype_file($phenotype_file);
112 =head2 generate_models()
114 generates the model string, in lme4 format, from the current parameters
121 my $tempfile = $self->tempfile();
122 my $dependent_variables = $self->dependent_variables();
123 my $fixed_factors = $self->fixed_factors();
124 my $fixed_factors_interaction = $self->fixed_factors_interaction();
125 my $variable_slope_intersects = $self->variable_slope_intersects();
126 my $random_factors = $self->random_factors();
132 print STDERR
join("\n", ("DV", Dumper
($dependent_variables), "FF", Dumper
($fixed_factors), "RF", Dumper
($random_factors), "TF", $tempfile, "FFI", Dumper
($fixed_factors_interaction), "VSI: ", Dumper
($variable_slope_intersects)));
134 print STDERR Dumper
($fixed_factors);
137 if (! $dependent_variables || scalar(@
$dependent_variables)==0) {
138 die "Need a dependent variable(s) set in CXGN::MixedModels... Ciao!";
141 my $formatted_fixed_factors = "";
142 if (@
$fixed_factors) {
143 $formatted_fixed_factors = join(" + ", @
$fixed_factors);
144 push @addends, $formatted_fixed_factors;
147 my $formatted_fixed_factors_interaction = "";
148 foreach my $interaction (@
$fixed_factors_interaction) {
150 if (ref($interaction)) {
151 $terms = join("*", @
$interaction);
152 push @addends, $terms;
155 $error = "Interaction terms are not correctly defined.";
159 my $formatted_variable_slope_intersects = "";
160 foreach my $variable_slope_groups (@
$variable_slope_intersects) {
161 if (exists($variable_slope_groups->[0]) && exists($variable_slope_groups->[1])) {
162 my $term = " (1+$variable_slope_groups->[0] \| $variable_slope_groups->[1]) ";
163 print STDERR
"TERM: $term\n";
164 $formatted_variable_slope_intersects .= $term;
165 push @addends, $formatted_variable_slope_intersects;
170 my $formatted_random_factors = "";
171 # if ($random_factors_random_slope) {
172 # $formatted_random_factors = " (1 + $random_factors->[0] | $random_factors->[1]) ";
176 if (@
$random_factors) {
177 $formatted_random_factors = join(" + ", map { "(1|$_)" } @
$random_factors);
178 push @addends, $formatted_random_factors;
182 $model .= join(" + ", @addends);
187 sub generate_model_sommer
{
190 my $tempfile = $self->tempfile();
191 my $dependent_variables = $self->dependent_variables();
192 my $fixed_factors = $self->fixed_factors();
193 my $fixed_factors_interaction = $self->fixed_factors_interaction();
194 my $variable_slope_intersects = $self->variable_slope_intersects();
195 my $random_factors = $self->random_factors();
199 ## generate the fixed factor formula
201 my $mmer_fixed_factors = "";
203 if (scalar(@
$dependent_variables) > 1) { die "Works only with one trait for now! :-("; }
204 if (scalar(@
$dependent_variables) > 0) {
205 if (scalar(@
$fixed_factors) == 0) { $mmer_fixed_factors = "1"; }
206 else { $mmer_fixed_factors = join(" + ", @
$fixed_factors); }
208 $mmer_fixed_factors = $dependent_variables->[0] ." ~ ". $mmer_fixed_factors;
211 print STDERR
"mmer_fixed_factors = $mmer_fixed_factors\n";
213 return $mmer_fixed_factors;
219 runs the model along with the data provided in the phenotyping file.
221 Produces two results files. If germplasm was a random factor, is will produce files
222 with the tempfiles stem and the extensions .adjustedBLUPs and .BLUPs, if it was a
223 fixed factor, the extension are .adjustedBLUEs and .BLUEs. The files have the
224 accession identifiers in the first column, with the remaining columns being the
225 adjusted BLUEs, BLUEs, adjusted BLUPs or BLUPs for all the initially selected
232 my $backend = shift || 'Slurm';
233 my $cluster_host = shift || "localhost";
235 my $random_factors = '"'.join('","', @
{$self->random_factors()}).'"';
236 my $fixed_factors = '"'.join('","',@
{$self->fixed_factors()}).'"';
237 my $dependent_variables = '"'.join('","',@
{$self->dependent_variables()}).'"';
239 my $model = $self->generate_model();
241 # generate params_file
243 my $param_file = $self->tempfile().".params";
244 open(my $F, ">", $param_file) || die "Can't open $param_file for writing.";
245 print $F "dependent_variables <- c($dependent_variables)\n";
246 print $F "random_factors <- c($random_factors)\n";
247 print $F "fixed_factors <- c($fixed_factors)\n";
249 print $F "model <- \"$model\"\n";
252 # run r script to create model
254 my $cmd = "R CMD BATCH '--args datafile=\"".$self->tempfile()."\" paramfile=\"".$self->tempfile().".params\"' " . " R/mixed_models.R ".$self->tempfile().".out";
255 print STDERR
"running R command $cmd...\n";
257 print STDERR
"running R command ".$self->tempfile()."...\n";
259 my $ctr = CXGN
::Tools
::Run
->new( { backend
=> $backend, working_dir
=> dirname
($self->tempfile()), backend
=> $backend, submit_host
=> $cluster_host } );
261 $ctr->run_cluster($cmd);
263 while ($ctr->alive()) {