Added eval; site now shows clean dataset missing message instead of server error...
[sgn.git] / lib / SGN / Controller / solGS / pca.pm
blob3cb8e35d468bee59868d092a58a8021264e6c4fb
1 package SGN::Controller::solGS::pca;
3 use Moose;
4 use namespace::autoclean;
6 use Carp qw/ carp confess croak /;
7 use File::Spec::Functions qw / catfile catdir/;
8 use File::Path qw / mkpath /;
9 use File::Temp qw / tempfile tempdir /;
10 use File::Slurp qw /write_file read_file :edit prepend_file/;
11 use JSON;
12 use Scalar::Util qw /weaken reftype/;
13 use Storable qw/ nstore retrieve /;
15 use CXGN::List;
17 BEGIN { extends 'Catalyst::Controller' }
19 sub pca_analysis : Path('/pca/analysis/') Args() {
20 my ( $self, $c, $id ) = @_;
22 if ( $id && !$c->user ) {
23 $c->controller('solGS::Utils')->require_login($c);
26 $c->stash->{template} = '/solgs/tools/pca/analysis.mas';
30 sub run_pca_analysis : Path('/run/pca/analysis') Args() {
31 my ( $self, $c ) = @_;
33 my $args = $c->req->param('arguments');
34 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
36 $c->stash->{data_type} = 'genotype' if !$c->stash->{data_type};
38 my $file_id = $c->controller('solGS::Files')->create_file_id($c);
39 $c->stash->{file_id} = $file_id;
41 my $list_id = $c->stash->{list_id};
42 if ($list_id) {
43 $c->controller('solGS::List')
44 ->create_list_population_metadata_file( $c, $file_id );
45 $c->controller('solGS::List')->stash_list_metadata( $c, $list_id );
48 my $combo_pops_id = $c->stash->{combo_pops_id};
49 if ($combo_pops_id) {
50 $c->controller('solGS::combinedTrials')
51 ->get_combined_pops_list( $c, $combo_pops_id );
52 $c->stash->{pops_ids_list} = $c->stash->{combined_pops_list};
55 my $cached =
56 $c->controller('solGS::CachedResult')->check_pca_output( $c, $file_id );
57 if ( !$cached->{scores_file} ) {
58 $self->run_pca($c);
61 $self->prepare_pca_output_response($c);
62 my $ret = $c->stash->{pca_output_response};
64 $ret = to_json($ret);
65 $c->res->content_type('application/json');
66 $c->res->body($ret);
70 sub download_pca_scores : Path('/download/pca/scores/population') Args(1) {
71 my ( $self, $c, $file_id ) = @_;
73 $c->stash->{file_id} = $file_id;
74 $self->pca_scores_file($c);
75 my $file = $c->stash->{pca_scores_file};
76 my $pca_data = $c->controller('solGS::Utils')
77 ->structure_downloadable_data( $file, 'Individuals' );
79 $c->res->content_type("text/plain");
80 $c->res->body( join "", map { $_->[0] } @$pca_data );
83 sub download_pca_loadings : Path('/download/pca/loadings/population') Args(1) {
84 my ( $self, $c, $file_id ) = @_;
86 $c->stash->{file_id} = $file_id;
87 $self->pca_loadings_file($c);
88 my $file = $c->stash->{pca_loadings_file};
89 my $pca_data = $c->controller('solGS::Utils')
90 ->structure_downloadable_data( $file, 'Variables' );
92 $c->res->content_type("text/plain");
93 $c->res->body( join "", map { $_->[0] } @$pca_data );
97 sub download_pca_variances : Path('/download/pca/variances/population') Args(1)
99 my ( $self, $c, $file_id ) = @_;
101 $c->stash->{file_id} = $file_id;
102 $self->pca_variances_file($c);
103 my $file = $c->stash->{pca_variances_file};
105 my $pca_data = $c->controller('solGS::Utils')
106 ->structure_downloadable_data( $file, 'PCs' );
108 $c->res->content_type("text/plain");
109 $c->res->body( join "", map { $_->[0] } @$pca_data );
113 sub prepare_pca_output_response {
114 my ( $self, $c ) = @_;
116 my $file_id = $c->stash->{file_id};
117 my $ret->{status} = undef;
119 if ($file_id) {
120 $self->pca_scores_file($c);
121 my $scores_file = $c->stash->{pca_scores_file};
123 $self->pca_variances_file($c);
124 my $variances_file = $c->stash->{pca_variances_file};
126 if ( -s $scores_file ) {
127 my $scores =
128 $c->controller('solGS::Utils')->read_file_data($scores_file);
129 my $variances =
130 $c->controller('solGS::Utils')->read_file_data($variances_file);
132 $self->prep_pca_download_files($c);
133 my $scree_plot_file = $c->stash->{download_scree_plot};
134 my $scree_data_file = $c->stash->{download_scree_data};
135 my $loadings_file = $c->stash->{download_loadings};
136 my $variances_file = $c->stash->{download_variances};
137 my $scores_file = $c->stash->{download_scores};
139 my $output_link = '/pca/analysis/' . $file_id;
140 my $trials_names;
142 my $tr_pop_id = $c->stash->{training_pop_id};
143 my $sel_pop_id = $c->stash->{selection_pop_id};
144 if ( $tr_pop_id && $sel_pop_id ) {
145 $trials_names = {
146 $tr_pop_id => 'Training population',
147 $sel_pop_id => 'Selection population'
150 else {
151 $c->controller('solGS::combinedTrials')
152 ->process_trials_list_details($c);
153 $trials_names = $c->stash->{trials_names};
156 if ($scores) {
157 $ret->{scores} = $scores;
158 $ret->{variances} = $variances;
159 $ret->{scores_file} = $scores_file;
160 $ret->{variances_file} = $variances_file;
161 $ret->{loadings_file} = $loadings_file;
162 $ret->{scree_data_file} = $scree_data_file;
163 $ret->{scree_plot_file} = $scree_plot_file;
164 $ret->{status} = 'success';
165 $ret->{cached} = 1;
166 $ret->{pca_pop_id} = $c->stash->{pca_pop_id};
167 $ret->{file_id} = $file_id;
168 $ret->{list_id} = $c->stash->{list_id};
169 $ret->{trials_names} = $trials_names;
170 $ret->{output_link} = $output_link;
171 $ret->{data_type} = $c->stash->{data_type};
174 $c->stash->{pca_output_response} = $ret;
176 else {
177 $ret->{status} = $self->error_message($c);
178 $c->stash->{formatted_pca_output} = $ret;
181 else {
182 die "Required file id argument missing.";
187 sub error_message {
188 my ( $self, $c ) = @_;
190 $self->pca_scores_file($c);
191 my $pca_scores_file = $c->stash->{pca_scores_file};
193 $self->pca_input_files($c);
194 my $files = $c->stash->{pca_input_files};
196 my $error_message;
198 my @data_exists;
199 my @data_files = split( /\s/, read_file( $files, { binmode => ':utf8' } ) );
201 foreach my $file (@data_files) {
202 push @data_exists, 1 if -s $file;
205 if ( !@data_exists ) {
206 my $data_type = $c->stash->{data_type};
207 $error_message = "There is no $data_type for this dataset.";
209 elsif ( @data_exists && !-s $pca_scores_file ) {
210 $error_message = 'The PCA R Script failed.';
213 return $error_message;
216 sub format_pca_scores {
217 my ( $self, $c ) = @_;
219 my $file = $c->stash->{pca_scores_file};
220 my $data = $c->controller('solGS::Utils')->read_file_data($file);
222 $c->stash->{pca_scores} = $data;
226 sub pca_query_jobs {
227 my ( $self, $c ) = @_;
229 my $data_type = $c->stash->{data_type};
230 my $pca_pop_id = $c->stash->{pca_pop_id};
231 my $protocol_id = $c->stash->{genotyping_protocol_id};
233 my $jobs = [];
235 if ( $data_type =~ /phenotype/i ) {
236 $jobs = $c->controller('solGS::AsyncJob')
237 ->create_phenotype_data_query_jobs( $c, $pca_pop_id );
239 elsif ( $data_type =~ /genotype/i ) {
240 $jobs = $c->controller('solGS::AsyncJob')
241 ->create_genotype_data_query_jobs( $c, $pca_pop_id, $protocol_id );
244 if ( reftype $jobs ne 'ARRAY' ) {
245 $jobs = [$jobs];
248 $c->stash->{pca_query_jobs} = $jobs;
251 sub pca_scores_file {
252 my ( $self, $c ) = @_;
254 my $file_id = $c->stash->{file_id};
255 $c->stash->{cache_dir} = $c->stash->{pca_cache_dir};
257 my $cache_data = {
258 key => "pca_scores_${file_id}",
259 file => "pca_scores_${file_id}",
260 stash_key => 'pca_scores_file'
263 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
267 sub pca_scree_data_file {
268 my ( $self, $c ) = @_;
270 my $file_id = $c->stash->{file_id};
271 $c->stash->{cache_dir} = $c->stash->{pca_cache_dir};
273 my $cache_data = {
274 key => "pca_scree_data_${file_id}",
275 file => "pca_scree_data_${file_id}",
276 stash_key => 'pca_scree_data_file'
279 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
283 sub pca_scree_plot_file {
284 my ( $self, $c ) = @_;
286 my $file_id = $c->stash->{file_id};
287 $c->stash->{cache_dir} = $c->stash->{pca_cache_dir};
289 my $cache_data = {
290 key => "pca_scree_plot_${file_id}",
291 file => "pca_scree_plot_${file_id}",
292 ext => 'png',
293 stash_key => 'pca_scree_plot_file'
296 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
300 sub pca_variances_file {
301 my ( $self, $c ) = @_;
303 my $file_id = $c->stash->{file_id};
304 $c->stash->{cache_dir} = $c->stash->{pca_cache_dir};
306 my $cache_data = {
307 key => "pca_variances_${file_id}",
308 file => "pca_variances_${file_id}",
309 stash_key => 'pca_variances_file'
312 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
316 sub pca_loadings_file {
317 my ( $self, $c ) = @_;
319 my $file_id = $c->stash->{file_id};
320 $c->stash->{cache_dir} = $c->stash->{pca_cache_dir};
322 my $cache_data = {
323 key => "pca_loadings_${file_id}",
324 file => "pca_loadings_${file_id}",
325 stash_key => 'pca_loadings_file'
328 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
332 sub pca_output_files {
333 my ( $self, $c ) = @_;
335 my $file_id = $c->stash->{file_id};
337 $self->pca_scores_file($c);
338 $self->pca_loadings_file($c);
339 $self->pca_variances_file($c);
340 $self->pca_scree_data_file($c);
341 $self->pca_scree_plot_file($c);
342 $self->combined_pca_trials_data_file($c);
344 my $file_list = join( "\t",
345 $c->stash->{pca_scores_file}, $c->stash->{pca_loadings_file},
346 $c->stash->{pca_scree_data_file}, $c->stash->{pca_scree_plot_file},
347 $c->stash->{pca_variances_file}, $c->stash->{combined_pca_data_file},
350 my $tmp_dir = $c->stash->{pca_temp_dir};
351 my $name = "pca_output_files_${file_id}";
352 my $tempfile =
353 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
354 write_file( $tempfile, { binmode => ':utf8' }, $file_list );
356 $c->stash->{pca_output_files} = $tempfile;
360 sub combined_pca_trials_data_file {
361 my ( $self, $c ) = @_;
363 my $file_id = $c->stash->{file_id};
364 my $tmp_dir = $c->stash->{pca_temp_dir};
365 my $name = "combined_pca_data_file_${file_id}";
366 my $tempfile =
367 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
369 $c->stash->{combined_pca_data_file} = $tempfile;
373 sub pca_data_input_files {
374 my ( $self, $c ) = @_;
376 my $pop_id = $c->stash->{pca_pop_id};
377 my $data_type = $c->stash->{data_type};
378 my $protocol_id = $c->stash->{genotyping_protocol_id};
380 my $input_file;
381 if ( $data_type =~ /genotype/i ) {
382 $c->controller('solGS::Files')
383 ->genotype_file_name( $c, $pop_id, $protocol_id );
384 $input_file = $c->stash->{genotype_file_name};
386 elsif ( $data_type =~ /phenotype/i ) {
387 $c->controller('solGS::Files')->phenotype_file_name( $c, $pop_id );
388 $input_file = $c->stash->{phenotype_file_name};
391 return $input_file;
394 sub pca_input_files {
395 my ( $self, $c ) = @_;
397 my $file_id = $c->stash->{file_id};
398 my $tmp_dir = $c->stash->{pca_temp_dir};
400 my $name = "pca_input_files_${file_id}";
401 my $tempfile =
402 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
404 my $files;
405 my $data_type = $c->stash->{data_type};
406 if ( $data_type =~ /genotype/i ) {
407 $self->pca_geno_input_files($c);
408 $files = $c->stash->{pca_geno_input_files};
410 elsif ( $data_type =~ /phenotype/i ) {
411 $self->pca_pheno_input_files($c);
412 $files = $c->stash->{pca_pheno_input_files};
415 write_file( $tempfile, { binmode => ':utf8' }, $files );
416 $c->stash->{pca_input_files} = $tempfile;
420 sub pca_geno_input_files {
421 my ( $self, $c ) = @_;
423 my $data_type = $c->stash->{data_type};
424 my $files;
426 if ( $data_type =~ /genotype/i ) {
427 if ( $c->req->referer =~
428 /solgs\/selection\/|solgs\/combined\/model\/\d+\/selection\// )
430 $self->training_selection_geno_files($c);
433 $files =
434 $c->stash->{genotype_files_list} || $c->stash->{genotype_file_name};
437 $files = join( "\t", @$files ) if reftype($files) eq 'ARRAY';
438 $c->stash->{pca_geno_input_files} = $files;
441 sub training_selection_geno_files {
442 my ( $self, $c ) = @_;
444 my $tr_pop = $c->stash->{training_pop_id};
445 my $sel_pop = $c->stash->{selection_pop_id};
447 my @files;
448 foreach my $id ( ( $tr_pop, $sel_pop ) ) {
449 $c->controller('solGS::Files')->genotype_file_name( $c, $id );
450 push @files, $c->stash->{genotype_file_name};
453 my $files = join( "\t", @files );
454 $c->stash->{genotype_files_list} = $files;
457 sub pca_pheno_input_files {
458 my ( $self, $c ) = @_;
460 my $data_type = $c->stash->{data_type};
461 my $files;
463 if ( $data_type =~ /phenotype/i ) {
464 $files = $c->stash->{phenotype_files_list}
465 || $c->stash->{phenotype_file_name};
467 $files = join( "\t", @$files ) if reftype($files) eq 'ARRAY';
469 $c->controller('solGS::Files')->phenotype_metadata_file($c);
470 my $metadata_file = $c->stash->{phenotype_metadata_file};
472 $files .= "\t" . $metadata_file;
475 $c->stash->{pca_pheno_input_files} = $files;
479 sub run_pca {
480 my ( $self, $c ) = @_;
482 $self->pca_query_jobs_file($c);
483 $c->stash->{prerequisite_jobs} = $c->stash->{pca_query_jobs_file};
485 $self->pca_r_jobs_file($c);
486 $c->stash->{dependent_jobs} = $c->stash->{pca_r_jobs_file};
488 $c->controller('solGS::AsyncJob')->run_async($c);
492 sub run_pca_single_core {
493 my ( $self, $c ) = @_;
495 $self->pca_query_jobs($c);
496 my $queries = $c->stash->{pca_query_jobs};
498 $self->pca_r_jobs($c);
499 my $r_jobs = $c->stash->{pca_r_jobs};
501 foreach my $job (@$queries) {
502 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
505 foreach my $job (@$r_jobs) {
506 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
511 sub run_pca_multi_cores {
512 my ( $self, $c ) = @_;
514 $self->pca_query_jobs_file($c);
515 $c->stash->{prerequisite_jobs} = $c->stash->{pca_query_jobs_file};
517 $self->pca_r_jobs_file($c);
518 $c->stash->{dependent_jobs} = $c->stash->{pca_r_jobs_file};
520 $c->controller('solGS::AsyncJob')->run_async($c);
524 sub pca_r_jobs {
525 my ( $self, $c ) = @_;
527 my $file_id = $c->stash->{file_id};
529 $self->pca_output_files($c);
530 my $output_file = $c->stash->{pca_output_files};
532 $self->pca_input_files($c);
533 my $input_file = $c->stash->{pca_input_files};
535 $c->stash->{analysis_tempfiles_dir} = $c->stash->{pca_temp_dir};
537 $c->stash->{input_files} = $input_file;
538 $c->stash->{output_files} = $output_file;
539 $c->stash->{r_temp_file} = "pca-${file_id}";
540 $c->stash->{r_script} = 'R/solGS/pca.r';
542 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
543 my $jobs = $c->stash->{cluster_r_job_args};
545 if ( reftype $jobs ne 'ARRAY' ) {
546 $jobs = [$jobs];
549 $c->stash->{pca_r_jobs} = $jobs;
553 sub pca_r_jobs_file {
554 my ( $self, $c ) = @_;
556 $self->pca_r_jobs($c);
557 my $jobs = $c->stash->{pca_r_jobs};
559 my $temp_dir = $c->stash->{pca_temp_dir};
560 my $jobs_file = $c->controller('solGS::Files')
561 ->create_tempfile( $temp_dir, 'pca-r-jobs-file' );
563 nstore $jobs, $jobs_file
564 or croak "pca r jobs : $! serializing pca r jobs to $jobs_file";
566 $c->stash->{pca_r_jobs_file} = $jobs_file;
570 sub pca_query_jobs_file {
571 my ( $self, $c ) = @_;
573 $self->pca_query_jobs($c);
574 my $jobs = $c->stash->{pca_query_jobs};
576 my $temp_dir = $c->stash->{pca_temp_dir};
577 my $jobs_file = $c->controller('solGS::Files')
578 ->create_tempfile( $temp_dir, 'pca-query-jobs-file' );
580 nstore $jobs, $jobs_file
581 or croak "pca query jobs : $! serializing pca query jobs to $jobs_file";
583 $c->stash->{pca_query_jobs_file} = $jobs_file;
587 sub prep_pca_download_files {
588 my ( $self, $c ) = @_;
590 my $analysis_type = $c->stash->{analysis_type};
591 $self->pca_scores_file($c);
592 $self->pca_loadings_file($c);
593 $self->pca_variances_file($c);
594 $self->pca_scree_data_file($c);
595 $self->pca_scree_plot_file($c);
597 my $scores_file = $c->stash->{pca_scores_file};
598 my $loadings_file = $c->stash->{pca_loadings_file};
599 my $scree_data_file = $c->stash->{pca_scree_data_file};
600 my $scree_plot_file = $c->stash->{pca_scree_plot_file};
601 my $variances_file = $c->stash->{pca_variances_file};
603 $scores_file = $c->controller('solGS::Files')
604 ->copy_to_tempfiles_subdir( $c, $scores_file, 'pca' );
605 $loadings_file = $c->controller('solGS::Files')
606 ->copy_to_tempfiles_subdir( $c, $loadings_file, 'pca' );
607 $scree_data_file = $c->controller('solGS::Files')
608 ->copy_to_tempfiles_subdir( $c, $scree_data_file, 'pca' );
609 $scree_plot_file = $c->controller('solGS::Files')
610 ->copy_to_tempfiles_subdir( $c, $scree_plot_file, 'pca' );
611 $variances_file = $c->controller('solGS::Files')
612 ->copy_to_tempfiles_subdir( $c, $variances_file, 'pca' );
614 $c->stash->{download_scores} = $scores_file;
615 $c->stash->{download_loadings} = $loadings_file;
616 $c->stash->{download_scree_data} = $scree_data_file;
617 $c->stash->{download_scree_plot} = $scree_plot_file;
618 $c->stash->{download_variances} = $variances_file;
622 sub begin : Private {
623 my ( $self, $c ) = @_;
625 $c->controller('solGS::Files')->get_solgs_dirs($c);
629 __PACKAGE__->meta->make_immutable;
631 ####
633 ####