4 Isaak Y Tecle <iyt2@cornell.edu>
8 SGN::Controller::solGS::Anova - a controller for ANOVA. For now, this implements a one-way
9 single trial ANOVA with a possibility for simultanously running anova for multiple traits.
13 package SGN
::Controller
::solGS
::Anova
;
16 use namespace
::autoclean
;
18 use Carp qw
/ carp confess croak /;
24 use File
::Slurp qw
/write_file read_file/;
25 use File
::Spec
::Functions
;
26 use File
::Path qw
/mkpath/;
28 use List
::Util qw
/any uniq all/;
29 use List
::MoreUtils qw
/firstidx/;
30 use Scalar
::Util qw
/weaken reftype looks_like_number/;
31 use Storable qw
/nstore retrieve/;
32 use URI
::FromHash
'uri';
34 BEGIN { extends
'Catalyst::Controller::REST' }
37 default => 'application/json',
39 map => { 'application/json' => 'JSON' },
42 sub anova_check_design
: Path
('/anova/check/design/') Args
(0) {
43 my ( $self, $c ) = @_;
45 my $args = $c->req->param('arguments');
46 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
48 my $design = $self->get_trial_design($c);
49 my $supported = $self->check_design_support($design) if $design;
52 $c->stash->{rest
}{'Error'} = 'This trial has no design to apply ANOVA.';
54 elsif ( $design && !$supported ) {
55 $c->stash->{rest
}{'Error'} = $design
56 . ' design is not supported yet. Please report this to the database team. ';
59 $c->stash->{rest
}{'Design'} = $design;
64 sub anova_traits_list
: Path
('/anova/traits/list/') Args
(0) {
65 my ( $self, $c ) = @_;
67 my $args = $c->req->param('arguments');
68 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
70 my $traits = $self->anova_traits($c);
71 $c->stash->{rest
}{anova_traits
} = $traits;
75 sub anova_phenotype_data
: Path
('/anova/phenotype/data/') Args
(0) {
76 my ( $self, $c ) = @_;
78 my $args = $c->req->param('arguments');
79 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
81 my $trial_id = $c->stash->{trial_id
};
82 my $trait_id = $c->stash->{trait_id
};
84 $c->controller('solGS::Trait')->get_trait_details( $c, $trait_id );
86 my $pheno_file = $self->trial_phenotype_file($c);
88 if ( !-s
$pheno_file ) {
90 $pheno_file = $self->create_anova_phenodata_file($c);
92 if ( !-s
$pheno_file ) {
93 $c->stash->{rest
}{'Error'} =
94 'There is no phenotype data for this trial.';
97 if ( @
{ $c->error } ) {
98 $c->stash->{rest
}{'Error'} =
99 'There was error querying for the phenotype data.';
103 my $categorical = $self->check_categorical_dependent_variable($c);
105 $c->stash->{rest
}{'Error'} =
106 "The trait data is not all numeric. Some or all of the trait values are text. ";
109 $c->stash->{rest
}{'success'} = 'Success.';
113 my $traits_abbrs = $self->get_traits_abbrs($c);
114 $c->stash->{rest
}{trial_id
} = $trial_id;
115 $c->stash->{rest
}{traits_abbrs
} = $traits_abbrs;
119 sub anova_analyis
: Path
('/anova/analysis/') Args
(0) {
120 my ( $self, $c ) = @_;
122 my $args = $c->req->param('arguments');
123 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
125 my $anova_result = $self->check_anova_output($c);
127 $c->controller('solGS::Trait')
128 ->get_trait_details( $c, $c->stash->{trait_id
} );
130 if ( !$anova_result ) {
131 $self->run_anova($c);
134 $self->prepare_response($c);
139 my ( $self, $c ) = @_;
141 my $trial_id = $c->stash->{trial_id
};
144 $c->controller('solGS::Search')->model($c)->trial_traits($trial_id);
145 my $clean_traits = $c->controller('solGS::Utils')->remove_ontology($traits);
147 return $clean_traits;
151 sub create_anova_phenodata_file
{
152 my ( $self, $c ) = @_;
154 my $cached = $c->controller('solGS::CachedResult')
155 ->check_cached_phenotype_data( $c, $c->stash->{trial_id
} );
158 $self->anova_query_jobs_file($c);
159 my $queries = $c->stash->{anova_query_jobs_file
};
161 $c->stash->{dependent_jobs
} = $queries;
162 $c->controller('solGS::AsyncJob')->run_async($c);
165 my $pheno_file = $self->trial_phenotype_file($c);
171 sub trial_phenotype_file
{
172 my ( $self, $c ) = @_;
174 $c->controller('solGS::Files')
175 ->phenotype_file_name( $c, $c->stash->{trial_id
} );
176 return $c->stash->{phenotype_file_name
};
180 sub get_trial_design
{
181 my ( $self, $c ) = @_;
183 my $trial_id = $c->stash->{trial_id
};
185 my $trial = CXGN
::Trial
->new(
187 bcs_schema
=> $self->schema($c),
188 trial_id
=> $trial_id
192 my $design = $trial->get_design_type();
198 sub check_design_support
{
199 my ( $self, $design ) = @_;
201 my $supported_designs = $self->supported_designs();
203 my ($match) = grep( /$design/, @
$supported_designs );
209 sub supported_designs
{
212 my $supported_designs = [qw(Alpha, Augmented, RCBD, CRD)];
214 return $supported_designs;
218 sub get_traits_abbrs
{
219 my ( $self, $c ) = @_;
221 my $trial_id = $c->stash->{trial_id
};
222 my $trait_id = $c->stash->{trait_id
};
224 $c->stash->{pop_id
} = $trial_id;
225 $c->controller('solGS::Trait')->get_all_traits( $c, $trial_id );
226 $c->controller('solGS::Files')->all_traits_file( $c, $trial_id );
228 $c->controller('solGs::Trait')->get_trait_details( $c, $trait_id );
229 my $trait_abbr = $c->stash->{trait_abbr
};
231 my $id_abbr = { 'trait_id' => $trait_id, 'trait_abbr' => $trait_abbr };
232 my $json = JSON
->new();
233 my $traits_abbrs = $json->encode($id_abbr);
235 return $traits_abbrs;
238 sub check_categorical_dependent_variable
{
239 my ( $self, $c ) = @_;
241 my $pheno_file = $self->trial_phenotype_file($c);
242 my $header = ( read_file
( $pheno_file, { binmode => ':utf8' } ) )[0];
243 my @headers = split( /\t/, $header );
245 $c->controller('solGS::Trait')
246 ->get_trait_details( $c, $c->stash->{trait_id
} );
247 my $trait_abbr = $c->stash->{trait_abbr
};
249 my $trait_idx = firstidx
{ $_ eq $trait_abbr } @headers;
250 my $trait_col = $trait_idx + 1;
252 my $trait_values = `cut -f $trait_col $pheno_file 2>&1`;
253 $trait_values =~ s/$trait_abbr|\n//g;
254 my @trait_values = split( /\t/, $trait_values );
256 my $categorical = all
{ $_ =~ /[A-Za-z]/ } @trait_values;
262 sub check_anova_output
{
263 my ( $self, $c ) = @_;
265 $self->anova_table_file($c);
266 my $html_file = $c->stash->{anova_table_html_file
};
268 my $exists = -s
$html_file ?
1 : 0;
274 sub prepare_response
{
275 my ( $self, $c ) = @_;
277 $self->anova_table_file($c);
278 my $anova_txt_file = $c->stash->{anova_table_txt_file
};
279 my $anova_html_file = $c->stash->{anova_table_html_file
};
281 if ( -s
$anova_html_file ) {
282 $self->anova_model_file($c);
283 my $model_file = $c->stash->{anova_model_file
};
285 $self->adj_means_file($c);
286 my $means_file = $c->stash->{adj_means_file
};
288 $self->anova_diagnostics_file($c);
289 my $diagnostics_file = $c->stash->{anova_diagnostics_file
};
292 $anova_txt_file = $c->controller('solGS::Files')
293 ->copy_to_tempfiles_subdir( $c, $anova_txt_file, $dir );
294 $model_file = $c->controller('solGS::Files')
295 ->copy_to_tempfiles_subdir( $c, $model_file, $dir );
296 $means_file = $c->controller('solGS::Files')
297 ->copy_to_tempfiles_subdir( $c, $means_file, $dir );
298 $diagnostics_file = $c->controller('solGS::Files')
299 ->copy_to_tempfiles_subdir( $c, $diagnostics_file, $dir );
301 $c->stash->{rest
}{anova_table_html_file
} =
302 read_file
( $anova_html_file, { binmode => ':utf8' } );
303 $c->stash->{rest
}{anova_table_txt_file
} = $anova_txt_file;
304 $c->stash->{rest
}{anova_model_file
} = $model_file;
305 $c->stash->{rest
}{adj_means_file
} = $means_file;
306 $c->stash->{rest
}{anova_diagnostics_file
} = $diagnostics_file;
309 $self->anova_error_file($c);
310 my $error_file = $c->stash->{anova_error_file
};
312 my $error = read_file
( $error_file, { binmode => ':utf8' } );
313 $c->stash->{rest
}{Error
} = $error;
319 my ( $self, $c ) = @_;
321 $self->anova_query_jobs_file($c);
322 $c->stash->{prerequisite_jobs
} = $c->stash->{anova_query_jobs_file
};
324 $self->anova_r_jobs_file($c);
325 $c->stash->{dependent_jobs
} = $c->stash->{anova_r_jobs_file
};
327 $c->controller('solGS::AsyncJob')->run_async($c);
331 sub run_anova_single_core
{
332 my ( $self, $c ) = @_;
334 $self->anova_query_jobs($c);
335 my $queries = $c->stash->{anova_query_jobs
};
337 $self->anova_r_jobs($c);
338 my $r_jobs = $c->stash->{anova_r_jobs
};
340 foreach my $job (@
$queries) {
341 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
344 foreach my $job (@
$r_jobs) {
345 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
350 sub run_anova_multi_cores
{
351 my ( $self, $c ) = @_;
353 $self->anova_query_jobs_file($c);
354 $c->stash->{prerequisite_jobs
} = $c->stash->{anova_query_jobs_file
};
356 $self->anova_r_jobs_file($c);
357 $c->stash->{dependent_jobs
} = $c->stash->{anova_r_jobs_file
};
359 $c->controller('solGS::AsyncJob')->run_async($c);
364 my ( $self, $c ) = @_;
366 my $trial_id = $c->stash->{trial_id
};
367 my $trait_id = $c->stash->{trait_id
};
369 $self->anova_input_files($c);
370 my $input_file = $c->stash->{anova_input_files
};
372 $self->anova_output_files($c);
373 my $output_file = $c->stash->{anova_output_files
};
375 $c->stash->{analysis_tempfiles_dir
} = $c->stash->{anova_temp_dir
};
377 $c->stash->{input_files
} = $input_file;
378 $c->stash->{output_files
} = $output_file;
379 $c->stash->{r_temp_file
} = "anova-${trial_id}-${trait_id}";
380 $c->stash->{r_script
} = 'R/solGS/anova.r';
382 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
383 my $jobs = $c->stash->{cluster_r_job_args
};
385 if ( reftype
$jobs ne 'ARRAY' ) {
389 $c->stash->{anova_r_jobs
} = $jobs;
393 sub anova_r_jobs_file
{
394 my ( $self, $c ) = @_;
396 $self->anova_r_jobs($c);
397 my $jobs = $c->stash->{anova_r_jobs
};
399 my $temp_dir = $c->stash->{anova_temp_dir
};
400 my $jobs_file = $c->controller('solGS::Files')
401 ->create_tempfile( $temp_dir, 'anova-r-jobs-file' );
403 nstore
$jobs, $jobs_file
404 or croak
"anova r jobs : $! serializing anova r jobs to $jobs_file";
406 $c->stash->{anova_r_jobs_file
} = $jobs_file;
410 sub anova_query_jobs
{
411 my ( $self, $c ) = @_;
413 $self->create_anova_phenotype_data_query_jobs($c);
414 my $jobs = $c->stash->{anova_pheno_query_jobs
};
416 if ( reftype
$jobs ne 'ARRAY' ) {
420 $c->stash->{anova_query_jobs
} = $jobs;
423 sub anova_query_jobs_file
{
424 my ( $self, $c ) = @_;
426 $self->anova_query_jobs($c);
427 my $jobs = $c->stash->{anova_query_jobs
};
430 my $temp_dir = $c->stash->{anova_temp_dir
};
431 my $jobs_file = $c->controller('solGS::Files')
432 ->create_tempfile( $temp_dir, 'anova-query-jobs-file' );
434 nstore
$jobs, $jobs_file
436 "anova query jobs : $! serializing anova query jobs to $jobs_file";
438 $c->stash->{anova_query_jobs_file
} = $jobs_file;
443 sub create_anova_phenotype_data_query_jobs
{
444 my ( $self, $c ) = @_;
446 my $trial_id = $c->stash->{pop_id
} || $c->stash->{trial_id
};
447 $c->controller('solGS::AsyncJob')
448 ->get_trials_phenotype_query_jobs_args( $c, [$trial_id] );
449 my $jobs = $c->stash->{trials_phenotype_query_jobs_args
};
451 if ( reftype
$jobs ne 'ARRAY' ) {
455 $c->stash->{anova_pheno_query_jobs
} = $jobs;
459 sub copy_pheno_file_to_anova_dir
{
460 my ( $self, $c ) = @_;
462 my $pheno_file = $self->trial_phenotype_file($c);
463 my $anova_cache = $c->stash->{anova_cache_dir
};
465 $c->stash->{phenotype_file
} =
466 $c->controller('solGS::Files')->copy_file( $pheno_file, $anova_cache );
470 sub anova_input_files
{
471 my ( $self, $c ) = @_;
473 my $trial_id = $c->stash->{trial_id
};
474 my $trait_id = $c->stash->{trait_id
};
476 my $pheno_file = $self->trial_phenotype_file($c);
478 $self->anova_traits_file($c);
479 my $traits_file = $c->stash->{anova_traits_file
};
481 $c->controller("solGS::Files")->phenotype_metadata_file($c);
482 my $metadata_file = $c->stash->{phenotype_metadata_file
};
484 my $file_list = join( "\t", $pheno_file, $traits_file, $metadata_file );
486 my $tmp_dir = $c->stash->{anova_temp_dir
};
487 my $name = "anova_input_files_${trial_id}_${trait_id}";
489 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
490 write_file
( $tempfile, { binmode => ':utf8' }, $file_list );
492 $c->stash->{anova_input_files
} = $tempfile;
496 sub anova_traits_file
{
497 my ( $self, $c ) = @_;
499 my $trial_id = $c->stash->{trial_id
};
500 my $traits = $c->stash->{trait_abbr
};
502 my $tmp_dir = $c->stash->{anova_temp_dir
};
503 my $name = "anova_traits_file_${trial_id}";
505 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
506 write_file
( $traits_file, { binmode => ':utf8' }, $traits );
508 $c->stash->{anova_traits_file
} = $traits_file;
512 sub anova_output_files
{
513 my ( $self, $c ) = @_;
515 my $trial_id = $c->stash->{trial_id
};
516 my $trait_id = $c->stash->{trait_id
};
518 $self->anova_table_file($c);
519 $self->anova_model_file($c);
520 $self->adj_means_file($c);
521 $self->anova_diagnostics_file($c);
522 $self->anova_error_file($c);
524 my @files = $c->stash->{anova_table_file
};
526 my $file_list = join( "\t",
527 $c->stash->{anova_model_file
},
528 $c->stash->{anova_table_html_file
},
529 $c->stash->{anova_table_txt_file
},
530 $c->stash->{adj_means_file
},
531 $c->stash->{anova_diagnostics_file
},
532 $c->stash->{anova_error_file
},
535 my $tmp_dir = $c->stash->{anova_temp_dir
};
536 my $name = "anova_output_files_${trial_id}_${trait_id}";
538 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
539 write_file
( $tempfile, { binmode => ':utf8' }, $file_list );
541 $c->stash->{anova_output_files
} = $tempfile;
545 sub anova_table_file
{
546 my ( $self, $c ) = @_;
548 my $trial_id = $c->stash->{trial_id
};
549 my $trait_id = $c->stash->{trait_id
};
551 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
554 key
=> "anova_table_html_${trial_id}_${trait_id}",
555 file
=> "anova_table_html_${trial_id}_${trait_id}",
557 stash_key
=> "anova_table_html_file"
560 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
562 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
565 key
=> "anova_table_txt_${trial_id}_${trait_id}",
566 file
=> "anova_table_txt_${trial_id}_${trait_id}",
568 stash_key
=> "anova_table_txt_file"
571 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
575 sub anova_diagnostics_file
{
576 my ( $self, $c ) = @_;
578 my $trial_id = $c->stash->{trial_id
};
579 my $trait_id = $c->stash->{trait_id
};
581 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
584 key
=> "anova_diagnosics_${trial_id}_${trait_id}",
585 file
=> "anova_diagnostics_${trial_id}_${trait_id}",
587 stash_key
=> "anova_diagnostics_file"
590 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
594 sub anova_model_file
{
595 my ( $self, $c ) = @_;
597 my $trial_id = $c->stash->{trial_id
};
598 my $trait_id = $c->stash->{trait_id
};
600 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
603 key
=> "anova_model_${trial_id}_${trait_id}",
604 file
=> "anova_model_${trial_id}_${trait_id}",
605 stash_key
=> "anova_model_file"
608 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
612 sub anova_error_file
{
613 my ( $self, $c ) = @_;
615 $c->stash->{file_id
} = $c->stash->{trial_id
} . '_' . $c->stash->{trait_id
};
616 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
617 $c->stash->{analysis_type
} = 'anova';
619 $c->controller('solGS::Files')->analysis_error_file($c);
624 my ( $self, $c ) = @_;
626 my $trial_id = $c->stash->{trial_id
};
627 my $trait_id = $c->stash->{trait_id
};
629 $c->stash->{cache_dir
} = $c->stash->{anova_cache_dir
};
632 key
=> "adj_means_${trial_id}_${trait_id}",
633 file
=> "adj_means_${trial_id}_${trait_id}",
634 stash_key
=> "adj_means_file"
637 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
642 my ( $self, $c ) = @_;
644 return $c->dbic_schema("Bio::Chado::Schema");
648 sub begin
: Private
{
649 my ( $self, $c ) = @_;
651 $c->controller('solGS::Files')->get_solgs_dirs($c);
655 __PACKAGE__
->meta->make_immutable;