Merge pull request #5248 from solgenomics/topic/batch_update_trials
[sgn.git] / lib / SGN / Controller / solGS / Anova.pm
blob9d1c65246689f22525fa86f023b04744db7bcceb
2 =head1 AUTHOR
4 Isaak Y Tecle <iyt2@cornell.edu>
6 =head1 Name
8 SGN::Controller::solGS::Anova - a controller for ANOVA. For now, this implements a one-way
9 single trial ANOVA with a possibility for simultanously running anova for multiple traits.
11 =cut
13 package SGN::Controller::solGS::Anova;
15 use Moose;
16 use namespace::autoclean;
18 use Carp qw/ carp confess croak /;
20 use CXGN::Trial;
21 use Data::Dumper;
22 use File::Basename;
23 use File::Copy;
24 use File::Slurp qw /write_file read_file/;
25 use File::Spec::Functions;
26 use File::Path qw /mkpath/;
27 use JSON;
28 use List::Util qw/any uniq all/;
29 use List::MoreUtils qw/firstidx/;
30 use Scalar::Util qw /weaken reftype looks_like_number/;
31 use Storable qw/nstore retrieve/;
32 use URI::FromHash 'uri';
34 BEGIN { extends 'Catalyst::Controller::REST' }
36 __PACKAGE__->config(
37 default => 'application/json',
38 stash_key => 'rest',
39 map => { 'application/json' => 'JSON' },
42 sub anova_check_design : Path('/anova/check/design/') Args(0) {
43 my ( $self, $c ) = @_;
45 my $args = $c->req->param('arguments');
46 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
48 my $design = $self->get_trial_design($c);
49 my $supported = $self->check_design_support($design) if $design;
51 if ( !$design ) {
52 $c->stash->{rest}{'Error'} = 'This trial has no design to apply ANOVA.';
54 elsif ( $design && !$supported ) {
55 $c->stash->{rest}{'Error'} = $design
56 . ' design is not supported yet. Please report this to the database team. ';
58 else {
59 $c->stash->{rest}{'Design'} = $design;
64 sub anova_traits_list : Path('/anova/traits/list/') Args(0) {
65 my ( $self, $c ) = @_;
67 my $args = $c->req->param('arguments');
68 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
70 my $traits = $self->anova_traits($c);
71 $c->stash->{rest}{anova_traits} = $traits;
75 sub anova_phenotype_data : Path('/anova/phenotype/data/') Args(0) {
76 my ( $self, $c ) = @_;
78 my $args = $c->req->param('arguments');
79 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
81 my $trial_id = $c->stash->{trial_id};
82 my $trait_id = $c->stash->{trait_id};
84 $c->controller('solGS::Trait')->get_trait_details( $c, $trait_id );
86 my $pheno_file = $self->trial_phenotype_file($c);
88 if ( !-s $pheno_file ) {
90 $pheno_file = $self->create_anova_phenodata_file($c);
92 if ( !-s $pheno_file ) {
93 $c->stash->{rest}{'Error'} =
94 'There is no phenotype data for this trial.';
97 if ( @{ $c->error } ) {
98 $c->stash->{rest}{'Error'} =
99 'There was error querying for the phenotype data.';
102 else {
103 my $categorical = $self->check_categorical_dependent_variable($c);
104 if ($categorical) {
105 $c->stash->{rest}{'Error'} =
106 "The trait data is not all numeric. Some or all of the trait values are text. ";
108 else {
109 $c->stash->{rest}{'success'} = 'Success.';
113 my $traits_abbrs = $self->get_traits_abbrs($c);
114 $c->stash->{rest}{trial_id} = $trial_id;
115 $c->stash->{rest}{traits_abbrs} = $traits_abbrs;
119 sub anova_analyis : Path('/anova/analysis/') Args(0) {
120 my ( $self, $c ) = @_;
122 my $args = $c->req->param('arguments');
123 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
125 my $anova_result = $self->check_anova_output($c);
127 $c->controller('solGS::Trait')
128 ->get_trait_details( $c, $c->stash->{trait_id} );
130 if ( !$anova_result ) {
131 $self->run_anova($c);
134 $self->prepare_response($c);
138 sub anova_traits {
139 my ( $self, $c ) = @_;
141 my $trial_id = $c->stash->{trial_id};
143 my $traits =
144 $c->controller('solGS::Search')->model($c)->trial_traits($trial_id);
145 my $clean_traits = $c->controller('solGS::Utils')->remove_ontology($traits);
147 return $clean_traits;
151 sub create_anova_phenodata_file {
152 my ( $self, $c ) = @_;
154 my $cached = $c->controller('solGS::CachedResult')
155 ->check_cached_phenotype_data( $c, $c->stash->{trial_id} );
157 if ( !$cached ) {
158 $self->anova_query_jobs_file($c);
159 my $queries = $c->stash->{anova_query_jobs_file};
161 $c->stash->{dependent_jobs} = $queries;
162 $c->controller('solGS::AsyncJob')->run_async($c);
165 my $pheno_file = $self->trial_phenotype_file($c);
167 return $pheno_file;
171 sub trial_phenotype_file {
172 my ( $self, $c ) = @_;
174 $c->controller('solGS::Files')
175 ->phenotype_file_name( $c, $c->stash->{trial_id} );
176 return $c->stash->{phenotype_file_name};
180 sub get_trial_design {
181 my ( $self, $c ) = @_;
183 my $trial_id = $c->stash->{trial_id};
185 my $trial = CXGN::Trial->new(
187 bcs_schema => $self->schema($c),
188 trial_id => $trial_id
192 my $design = $trial->get_design_type();
194 return $design;
198 sub check_design_support {
199 my ( $self, $design ) = @_;
201 my $supported_designs = $self->supported_designs();
203 my ($match) = grep( /$design/, @$supported_designs );
205 return $match;
209 sub supported_designs {
210 my $self = shift;
212 my $supported_designs = [qw(Alpha, Augmented, RCBD, CRD)];
214 return $supported_designs;
218 sub get_traits_abbrs {
219 my ( $self, $c ) = @_;
221 my $trial_id = $c->stash->{trial_id};
222 my $trait_id = $c->stash->{trait_id};
224 $c->stash->{pop_id} = $trial_id;
225 $c->controller('solGS::Trait')->get_all_traits( $c, $trial_id );
226 $c->controller('solGS::Files')->all_traits_file( $c, $trial_id );
228 $c->controller('solGs::Trait')->get_trait_details( $c, $trait_id );
229 my $trait_abbr = $c->stash->{trait_abbr};
231 my $id_abbr = { 'trait_id' => $trait_id, 'trait_abbr' => $trait_abbr };
232 my $json = JSON->new();
233 my $traits_abbrs = $json->encode($id_abbr);
235 return $traits_abbrs;
238 sub check_categorical_dependent_variable {
239 my ( $self, $c ) = @_;
241 my $pheno_file = $self->trial_phenotype_file($c);
242 my $header = ( read_file( $pheno_file, { binmode => ':utf8' } ) )[0];
243 my @headers = split( /\t/, $header );
245 $c->controller('solGS::Trait')
246 ->get_trait_details( $c, $c->stash->{trait_id} );
247 my $trait_abbr = $c->stash->{trait_abbr};
249 my $trait_idx = firstidx { $_ eq $trait_abbr } @headers;
250 my $trait_col = $trait_idx + 1;
252 my $trait_values = `cut -f $trait_col $pheno_file 2>&1`;
253 $trait_values =~ s/$trait_abbr|\n//g;
254 my @trait_values = split( /\t/, $trait_values );
256 my $categorical = all { $_ =~ /[A-Za-z]/ } @trait_values;
258 return $categorical;
262 sub check_anova_output {
263 my ( $self, $c ) = @_;
265 $self->anova_table_file($c);
266 my $html_file = $c->stash->{anova_table_html_file};
268 my $exists = -s $html_file ? 1 : 0;
270 return $exists;
274 sub prepare_response {
275 my ( $self, $c ) = @_;
277 $self->anova_table_file($c);
278 my $anova_txt_file = $c->stash->{anova_table_txt_file};
279 my $anova_html_file = $c->stash->{anova_table_html_file};
281 if ( -s $anova_html_file ) {
282 $self->anova_model_file($c);
283 my $model_file = $c->stash->{anova_model_file};
285 $self->adj_means_file($c);
286 my $means_file = $c->stash->{adj_means_file};
288 $self->anova_diagnostics_file($c);
289 my $diagnostics_file = $c->stash->{anova_diagnostics_file};
291 my $dir = 'anova';
292 $anova_txt_file = $c->controller('solGS::Files')
293 ->copy_to_tempfiles_subdir( $c, $anova_txt_file, $dir );
294 $model_file = $c->controller('solGS::Files')
295 ->copy_to_tempfiles_subdir( $c, $model_file, $dir );
296 $means_file = $c->controller('solGS::Files')
297 ->copy_to_tempfiles_subdir( $c, $means_file, $dir );
298 $diagnostics_file = $c->controller('solGS::Files')
299 ->copy_to_tempfiles_subdir( $c, $diagnostics_file, $dir );
301 $c->stash->{rest}{anova_table_html_file} =
302 read_file( $anova_html_file, { binmode => ':utf8' } );
303 $c->stash->{rest}{anova_table_txt_file} = $anova_txt_file;
304 $c->stash->{rest}{anova_model_file} = $model_file;
305 $c->stash->{rest}{adj_means_file} = $means_file;
306 $c->stash->{rest}{anova_diagnostics_file} = $diagnostics_file;
308 else {
309 $self->anova_error_file($c);
310 my $error_file = $c->stash->{anova_error_file};
312 my $error = read_file( $error_file, { binmode => ':utf8' } );
313 $c->stash->{rest}{Error} = $error;
318 sub run_anova {
319 my ( $self, $c ) = @_;
321 $self->anova_query_jobs_file($c);
322 $c->stash->{prerequisite_jobs} = $c->stash->{anova_query_jobs_file};
324 $self->anova_r_jobs_file($c);
325 $c->stash->{dependent_jobs} = $c->stash->{anova_r_jobs_file};
327 $c->controller('solGS::AsyncJob')->run_async($c);
331 sub run_anova_single_core {
332 my ( $self, $c ) = @_;
334 $self->anova_query_jobs($c);
335 my $queries = $c->stash->{anova_query_jobs};
337 $self->anova_r_jobs($c);
338 my $r_jobs = $c->stash->{anova_r_jobs};
340 foreach my $job (@$queries) {
341 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
344 foreach my $job (@$r_jobs) {
345 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
350 sub run_anova_multi_cores {
351 my ( $self, $c ) = @_;
353 $self->anova_query_jobs_file($c);
354 $c->stash->{prerequisite_jobs} = $c->stash->{anova_query_jobs_file};
356 $self->anova_r_jobs_file($c);
357 $c->stash->{dependent_jobs} = $c->stash->{anova_r_jobs_file};
359 $c->controller('solGS::AsyncJob')->run_async($c);
363 sub anova_r_jobs {
364 my ( $self, $c ) = @_;
366 my $trial_id = $c->stash->{trial_id};
367 my $trait_id = $c->stash->{trait_id};
369 $self->anova_input_files($c);
370 my $input_file = $c->stash->{anova_input_files};
372 $self->anova_output_files($c);
373 my $output_file = $c->stash->{anova_output_files};
375 $c->stash->{analysis_tempfiles_dir} = $c->stash->{anova_temp_dir};
377 $c->stash->{input_files} = $input_file;
378 $c->stash->{output_files} = $output_file;
379 $c->stash->{r_temp_file} = "anova-${trial_id}-${trait_id}";
380 $c->stash->{r_script} = 'R/solGS/anova.r';
382 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
383 my $jobs = $c->stash->{cluster_r_job_args};
385 if ( reftype $jobs ne 'ARRAY' ) {
386 $jobs = [$jobs];
389 $c->stash->{anova_r_jobs} = $jobs;
393 sub anova_r_jobs_file {
394 my ( $self, $c ) = @_;
396 $self->anova_r_jobs($c);
397 my $jobs = $c->stash->{anova_r_jobs};
399 my $temp_dir = $c->stash->{anova_temp_dir};
400 my $jobs_file = $c->controller('solGS::Files')
401 ->create_tempfile( $temp_dir, 'anova-r-jobs-file' );
403 nstore $jobs, $jobs_file
404 or croak "anova r jobs : $! serializing anova r jobs to $jobs_file";
406 $c->stash->{anova_r_jobs_file} = $jobs_file;
410 sub anova_query_jobs {
411 my ( $self, $c ) = @_;
413 $self->create_anova_phenotype_data_query_jobs($c);
414 my $jobs = $c->stash->{anova_pheno_query_jobs};
416 if ( reftype $jobs ne 'ARRAY' ) {
417 $jobs = [$jobs];
420 $c->stash->{anova_query_jobs} = $jobs;
423 sub anova_query_jobs_file {
424 my ( $self, $c ) = @_;
426 $self->anova_query_jobs($c);
427 my $jobs = $c->stash->{anova_query_jobs};
429 if ( $jobs->[0] ) {
430 my $temp_dir = $c->stash->{anova_temp_dir};
431 my $jobs_file = $c->controller('solGS::Files')
432 ->create_tempfile( $temp_dir, 'anova-query-jobs-file' );
434 nstore $jobs, $jobs_file
435 or croak
436 "anova query jobs : $! serializing anova query jobs to $jobs_file";
438 $c->stash->{anova_query_jobs_file} = $jobs_file;
443 sub create_anova_phenotype_data_query_jobs {
444 my ( $self, $c ) = @_;
446 my $trial_id = $c->stash->{pop_id} || $c->stash->{trial_id};
447 $c->controller('solGS::AsyncJob')
448 ->get_trials_phenotype_query_jobs_args( $c, [$trial_id] );
449 my $jobs = $c->stash->{trials_phenotype_query_jobs_args};
451 if ( reftype $jobs ne 'ARRAY' ) {
452 $jobs = [$jobs];
455 $c->stash->{anova_pheno_query_jobs} = $jobs;
459 sub copy_pheno_file_to_anova_dir {
460 my ( $self, $c ) = @_;
462 my $pheno_file = $self->trial_phenotype_file($c);
463 my $anova_cache = $c->stash->{anova_cache_dir};
465 $c->stash->{phenotype_file} =
466 $c->controller('solGS::Files')->copy_file( $pheno_file, $anova_cache );
470 sub anova_input_files {
471 my ( $self, $c ) = @_;
473 my $trial_id = $c->stash->{trial_id};
474 my $trait_id = $c->stash->{trait_id};
476 my $pheno_file = $self->trial_phenotype_file($c);
478 $self->anova_traits_file($c);
479 my $traits_file = $c->stash->{anova_traits_file};
481 $c->controller("solGS::Files")->phenotype_metadata_file($c);
482 my $metadata_file = $c->stash->{phenotype_metadata_file};
484 my $file_list = join( "\t", $pheno_file, $traits_file, $metadata_file );
486 my $tmp_dir = $c->stash->{anova_temp_dir};
487 my $name = "anova_input_files_${trial_id}_${trait_id}";
488 my $tempfile =
489 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
490 write_file( $tempfile, { binmode => ':utf8' }, $file_list );
492 $c->stash->{anova_input_files} = $tempfile;
496 sub anova_traits_file {
497 my ( $self, $c ) = @_;
499 my $trial_id = $c->stash->{trial_id};
500 my $traits = $c->stash->{trait_abbr};
502 my $tmp_dir = $c->stash->{anova_temp_dir};
503 my $name = "anova_traits_file_${trial_id}";
504 my $traits_file =
505 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
506 write_file( $traits_file, { binmode => ':utf8' }, $traits );
508 $c->stash->{anova_traits_file} = $traits_file;
512 sub anova_output_files {
513 my ( $self, $c ) = @_;
515 my $trial_id = $c->stash->{trial_id};
516 my $trait_id = $c->stash->{trait_id};
518 $self->anova_table_file($c);
519 $self->anova_model_file($c);
520 $self->adj_means_file($c);
521 $self->anova_diagnostics_file($c);
522 $self->anova_error_file($c);
524 my @files = $c->stash->{anova_table_file};
526 my $file_list = join( "\t",
527 $c->stash->{anova_model_file},
528 $c->stash->{anova_table_html_file},
529 $c->stash->{anova_table_txt_file},
530 $c->stash->{adj_means_file},
531 $c->stash->{anova_diagnostics_file},
532 $c->stash->{anova_error_file},
535 my $tmp_dir = $c->stash->{anova_temp_dir};
536 my $name = "anova_output_files_${trial_id}_${trait_id}";
537 my $tempfile =
538 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
539 write_file( $tempfile, { binmode => ':utf8' }, $file_list );
541 $c->stash->{anova_output_files} = $tempfile;
545 sub anova_table_file {
546 my ( $self, $c ) = @_;
548 my $trial_id = $c->stash->{trial_id};
549 my $trait_id = $c->stash->{trait_id};
551 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
553 my $cache_data = {
554 key => "anova_table_html_${trial_id}_${trait_id}",
555 file => "anova_table_html_${trial_id}_${trait_id}",
556 ext => 'html',
557 stash_key => "anova_table_html_file"
560 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
562 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
564 $cache_data = {
565 key => "anova_table_txt_${trial_id}_${trait_id}",
566 file => "anova_table_txt_${trial_id}_${trait_id}",
567 ext => 'txt',
568 stash_key => "anova_table_txt_file"
571 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
575 sub anova_diagnostics_file {
576 my ( $self, $c ) = @_;
578 my $trial_id = $c->stash->{trial_id};
579 my $trait_id = $c->stash->{trait_id};
581 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
583 my $cache_data = {
584 key => "anova_diagnosics_${trial_id}_${trait_id}",
585 file => "anova_diagnostics_${trial_id}_${trait_id}",
586 ext => '.png',
587 stash_key => "anova_diagnostics_file"
590 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
594 sub anova_model_file {
595 my ( $self, $c ) = @_;
597 my $trial_id = $c->stash->{trial_id};
598 my $trait_id = $c->stash->{trait_id};
600 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
602 my $cache_data = {
603 key => "anova_model_${trial_id}_${trait_id}",
604 file => "anova_model_${trial_id}_${trait_id}",
605 stash_key => "anova_model_file"
608 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
612 sub anova_error_file {
613 my ( $self, $c ) = @_;
615 $c->stash->{file_id} = $c->stash->{trial_id} . '_' . $c->stash->{trait_id};
616 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
617 $c->stash->{analysis_type} = 'anova';
619 $c->controller('solGS::Files')->analysis_error_file($c);
623 sub adj_means_file {
624 my ( $self, $c ) = @_;
626 my $trial_id = $c->stash->{trial_id};
627 my $trait_id = $c->stash->{trait_id};
629 $c->stash->{cache_dir} = $c->stash->{anova_cache_dir};
631 my $cache_data = {
632 key => "adj_means_${trial_id}_${trait_id}",
633 file => "adj_means_${trial_id}_${trait_id}",
634 stash_key => "adj_means_file"
637 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
641 sub schema {
642 my ( $self, $c ) = @_;
644 return $c->dbic_schema("Bio::Chado::Schema");
648 sub begin : Private {
649 my ( $self, $c ) = @_;
651 $c->controller('solGS::Files')->get_solgs_dirs($c);
655 __PACKAGE__->meta->make_immutable;