1 package SGN
::Controller
::solGS
::pca
;
4 use namespace
::autoclean
;
6 use Carp qw
/ carp confess croak /;
7 use File
::Spec
::Functions qw
/ catfile catdir/;
8 use File
::Path qw
/ mkpath /;
9 use File
::Temp qw
/ tempfile tempdir /;
10 use File
::Slurp qw
/write_file read_file :edit prepend_file/;
12 use Scalar
::Util qw
/weaken reftype/;
13 use Storable qw
/ nstore retrieve /;
17 BEGIN { extends
'Catalyst::Controller' }
19 sub pca_analysis
: Path
('/pca/analysis/') Args
() {
20 my ( $self, $c, $id ) = @_;
22 if ( $id && !$c->user ) {
23 $c->controller('solGS::Utils')->require_login($c);
26 $c->stash->{template
} = '/solgs/tools/pca/analysis.mas';
30 sub run_pca_analysis
: Path
('/run/pca/analysis') Args
() {
31 my ( $self, $c ) = @_;
33 my $args = $c->req->param('arguments');
34 $c->controller('solGS::Utils')->stash_json_args( $c, $args );
36 $c->stash->{data_type
} = 'genotype' if !$c->stash->{data_type
};
38 my $file_id = $c->controller('solGS::Files')->create_file_id($c);
39 $c->stash->{file_id
} = $file_id;
41 my $list_id = $c->stash->{list_id
};
43 $c->controller('solGS::List')
44 ->create_list_population_metadata_file( $c, $file_id );
45 $c->controller('solGS::List')->stash_list_metadata( $c, $list_id );
48 my $combo_pops_id = $c->stash->{combo_pops_id
};
50 $c->controller('solGS::combinedTrials')
51 ->get_combined_pops_list( $c, $combo_pops_id );
52 $c->stash->{pops_ids_list
} = $c->stash->{combined_pops_list
};
56 $c->controller('solGS::CachedResult')->check_pca_output( $c, $file_id );
57 if ( !$cached->{scores_file
} ) {
61 $self->prepare_pca_output_response($c);
62 my $ret = $c->stash->{pca_output_response
};
65 $c->res->content_type('application/json');
70 sub download_pca_scores
: Path
('/download/pca/scores/population') Args
(1) {
71 my ( $self, $c, $file_id ) = @_;
73 $c->stash->{file_id
} = $file_id;
74 $self->pca_scores_file($c);
75 my $file = $c->stash->{pca_scores_file
};
76 my $pca_data = $c->controller('solGS::Utils')
77 ->structure_downloadable_data( $file, 'Individuals' );
79 $c->res->content_type("text/plain");
80 $c->res->body( join "", map { $_->[0] } @
$pca_data );
83 sub download_pca_loadings
: Path
('/download/pca/loadings/population') Args
(1) {
84 my ( $self, $c, $file_id ) = @_;
86 $c->stash->{file_id
} = $file_id;
87 $self->pca_loadings_file($c);
88 my $file = $c->stash->{pca_loadings_file
};
89 my $pca_data = $c->controller('solGS::Utils')
90 ->structure_downloadable_data( $file, 'Variables' );
92 $c->res->content_type("text/plain");
93 $c->res->body( join "", map { $_->[0] } @
$pca_data );
97 sub download_pca_variances
: Path
('/download/pca/variances/population') Args
(1)
99 my ( $self, $c, $file_id ) = @_;
101 $c->stash->{file_id
} = $file_id;
102 $self->pca_variances_file($c);
103 my $file = $c->stash->{pca_variances_file
};
105 my $pca_data = $c->controller('solGS::Utils')
106 ->structure_downloadable_data( $file, 'PCs' );
108 $c->res->content_type("text/plain");
109 $c->res->body( join "", map { $_->[0] } @
$pca_data );
113 sub prepare_pca_output_response
{
114 my ( $self, $c ) = @_;
116 my $file_id = $c->stash->{file_id
};
117 my $ret->{status
} = undef;
120 $self->pca_scores_file($c);
121 my $scores_file = $c->stash->{pca_scores_file
};
123 $self->pca_variances_file($c);
124 my $variances_file = $c->stash->{pca_variances_file
};
126 if ( -s
$scores_file ) {
128 $c->controller('solGS::Utils')->read_file_data($scores_file);
130 $c->controller('solGS::Utils')->read_file_data($variances_file);
132 $self->prep_pca_download_files($c);
133 my $scree_plot_file = $c->stash->{download_scree_plot
};
134 my $scree_data_file = $c->stash->{download_scree_data
};
135 my $loadings_file = $c->stash->{download_loadings
};
136 my $variances_file = $c->stash->{download_variances
};
137 my $scores_file = $c->stash->{download_scores
};
139 my $output_link = '/pca/analysis/' . $file_id;
142 my $tr_pop_id = $c->stash->{training_pop_id
};
143 my $sel_pop_id = $c->stash->{selection_pop_id
};
144 if ( $tr_pop_id && $sel_pop_id ) {
146 $tr_pop_id => 'Training population',
147 $sel_pop_id => 'Selection population'
151 $c->controller('solGS::combinedTrials')
152 ->process_trials_list_details($c);
153 $trials_names = $c->stash->{trials_names
};
157 $ret->{scores
} = $scores;
158 $ret->{variances
} = $variances;
159 $ret->{scores_file
} = $scores_file;
160 $ret->{variances_file
} = $variances_file;
161 $ret->{loadings_file
} = $loadings_file;
162 $ret->{scree_data_file
} = $scree_data_file;
163 $ret->{scree_plot_file
} = $scree_plot_file;
164 $ret->{status
} = 'success';
166 $ret->{pca_pop_id
} = $c->stash->{pca_pop_id
};
167 $ret->{file_id
} = $file_id;
168 $ret->{list_id
} = $c->stash->{list_id
};
169 $ret->{trials_names
} = $trials_names;
170 $ret->{output_link
} = $output_link;
171 $ret->{data_type
} = $c->stash->{data_type
};
174 $c->stash->{pca_output_response
} = $ret;
177 $ret->{status
} = $self->error_message($c);
178 $c->stash->{formatted_pca_output
} = $ret;
182 die "Required file id argument missing.";
188 my ( $self, $c ) = @_;
190 $self->pca_scores_file($c);
191 my $pca_scores_file = $c->stash->{pca_scores_file
};
193 $self->pca_input_files($c);
194 my $files = $c->stash->{pca_input_files
};
199 my @data_files = split( /\s/, read_file
( $files, { binmode => ':utf8' } ) );
201 foreach my $file (@data_files) {
202 push @data_exists, 1 if -s
$file;
205 if ( !@data_exists ) {
206 my $data_type = $c->stash->{data_type
};
207 $error_message = "There is no $data_type for this dataset.";
209 elsif ( @data_exists && !-s
$pca_scores_file ) {
210 $error_message = 'The PCA R Script failed.';
213 return $error_message;
216 sub format_pca_scores
{
217 my ( $self, $c ) = @_;
219 my $file = $c->stash->{pca_scores_file
};
220 my $data = $c->controller('solGS::Utils')->read_file_data($file);
222 $c->stash->{pca_scores
} = $data;
227 my ( $self, $c ) = @_;
229 my $data_type = $c->stash->{data_type
};
230 my $pca_pop_id = $c->stash->{pca_pop_id
};
231 my $protocol_id = $c->stash->{genotyping_protocol_id
};
235 if ( $data_type =~ /phenotype/i ) {
236 $jobs = $c->controller('solGS::AsyncJob')
237 ->create_phenotype_data_query_jobs( $c, $pca_pop_id );
239 elsif ( $data_type =~ /genotype/i ) {
240 $jobs = $c->controller('solGS::AsyncJob')
241 ->create_genotype_data_query_jobs( $c, $pca_pop_id, $protocol_id );
244 if ( reftype
$jobs ne 'ARRAY' ) {
248 $c->stash->{pca_query_jobs
} = $jobs;
251 sub pca_scores_file
{
252 my ( $self, $c ) = @_;
254 my $file_id = $c->stash->{file_id
};
255 $c->stash->{cache_dir
} = $c->stash->{pca_cache_dir
};
258 key
=> "pca_scores_${file_id}",
259 file
=> "pca_scores_${file_id}",
260 stash_key
=> 'pca_scores_file'
263 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
267 sub pca_scree_data_file
{
268 my ( $self, $c ) = @_;
270 my $file_id = $c->stash->{file_id
};
271 $c->stash->{cache_dir
} = $c->stash->{pca_cache_dir
};
274 key
=> "pca_scree_data_${file_id}",
275 file
=> "pca_scree_data_${file_id}",
276 stash_key
=> 'pca_scree_data_file'
279 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
283 sub pca_scree_plot_file
{
284 my ( $self, $c ) = @_;
286 my $file_id = $c->stash->{file_id
};
287 $c->stash->{cache_dir
} = $c->stash->{pca_cache_dir
};
290 key
=> "pca_scree_plot_${file_id}",
291 file
=> "pca_scree_plot_${file_id}",
293 stash_key
=> 'pca_scree_plot_file'
296 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
300 sub pca_variances_file
{
301 my ( $self, $c ) = @_;
303 my $file_id = $c->stash->{file_id
};
304 $c->stash->{cache_dir
} = $c->stash->{pca_cache_dir
};
307 key
=> "pca_variances_${file_id}",
308 file
=> "pca_variances_${file_id}",
309 stash_key
=> 'pca_variances_file'
312 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
316 sub pca_loadings_file
{
317 my ( $self, $c ) = @_;
319 my $file_id = $c->stash->{file_id
};
320 $c->stash->{cache_dir
} = $c->stash->{pca_cache_dir
};
323 key
=> "pca_loadings_${file_id}",
324 file
=> "pca_loadings_${file_id}",
325 stash_key
=> 'pca_loadings_file'
328 $c->controller('solGS::Files')->cache_file( $c, $cache_data );
332 sub pca_output_files
{
333 my ( $self, $c ) = @_;
335 my $file_id = $c->stash->{file_id
};
337 $self->pca_scores_file($c);
338 $self->pca_loadings_file($c);
339 $self->pca_variances_file($c);
340 $self->pca_scree_data_file($c);
341 $self->pca_scree_plot_file($c);
342 $self->combined_pca_trials_data_file($c);
344 my $file_list = join( "\t",
345 $c->stash->{pca_scores_file
}, $c->stash->{pca_loadings_file
},
346 $c->stash->{pca_scree_data_file
}, $c->stash->{pca_scree_plot_file
},
347 $c->stash->{pca_variances_file
}, $c->stash->{combined_pca_data_file
},
350 my $tmp_dir = $c->stash->{pca_temp_dir
};
351 my $name = "pca_output_files_${file_id}";
353 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
354 write_file
( $tempfile, { binmode => ':utf8' }, $file_list );
356 $c->stash->{pca_output_files
} = $tempfile;
360 sub combined_pca_trials_data_file
{
361 my ( $self, $c ) = @_;
363 my $file_id = $c->stash->{file_id
};
364 my $tmp_dir = $c->stash->{pca_temp_dir
};
365 my $name = "combined_pca_data_file_${file_id}";
367 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
369 $c->stash->{combined_pca_data_file
} = $tempfile;
373 sub pca_data_input_files
{
374 my ( $self, $c ) = @_;
376 my $pop_id = $c->stash->{pca_pop_id
};
377 my $data_type = $c->stash->{data_type
};
378 my $protocol_id = $c->stash->{genotyping_protocol_id
};
381 if ( $data_type =~ /genotype/i ) {
382 $c->controller('solGS::Files')
383 ->genotype_file_name( $c, $pop_id, $protocol_id );
384 $input_file = $c->stash->{genotype_file_name
};
386 elsif ( $data_type =~ /phenotype/i ) {
387 $c->controller('solGS::Files')->phenotype_file_name( $c, $pop_id );
388 $input_file = $c->stash->{phenotype_file_name
};
394 sub pca_input_files
{
395 my ( $self, $c ) = @_;
397 my $file_id = $c->stash->{file_id
};
398 my $tmp_dir = $c->stash->{pca_temp_dir
};
400 my $name = "pca_input_files_${file_id}";
402 $c->controller('solGS::Files')->create_tempfile( $tmp_dir, $name );
405 my $data_type = $c->stash->{data_type
};
406 if ( $data_type =~ /genotype/i ) {
407 $self->pca_geno_input_files($c);
408 $files = $c->stash->{pca_geno_input_files
};
410 elsif ( $data_type =~ /phenotype/i ) {
411 $self->pca_pheno_input_files($c);
412 $files = $c->stash->{pca_pheno_input_files
};
415 write_file
( $tempfile, { binmode => ':utf8' }, $files );
416 $c->stash->{pca_input_files
} = $tempfile;
420 sub pca_geno_input_files
{
421 my ( $self, $c ) = @_;
423 my $data_type = $c->stash->{data_type
};
426 if ( $data_type =~ /genotype/i ) {
427 if ( $c->req->referer =~
428 /solgs\/selection\
/|solgs\/combined\
/model\/\d
+\
/selection\// )
430 $self->training_selection_geno_files($c);
434 $c->stash->{genotype_files_list
} || $c->stash->{genotype_file_name
};
437 $files = join( "\t", @
$files ) if reftype
($files) eq 'ARRAY';
438 $c->stash->{pca_geno_input_files
} = $files;
441 sub training_selection_geno_files
{
442 my ( $self, $c ) = @_;
444 my $tr_pop = $c->stash->{training_pop_id
};
445 my $sel_pop = $c->stash->{selection_pop_id
};
448 foreach my $id ( ( $tr_pop, $sel_pop ) ) {
449 $c->controller('solGS::Files')->genotype_file_name( $c, $id );
450 push @files, $c->stash->{genotype_file_name
};
453 my $files = join( "\t", @files );
454 $c->stash->{genotype_files_list
} = $files;
457 sub pca_pheno_input_files
{
458 my ( $self, $c ) = @_;
460 my $data_type = $c->stash->{data_type
};
463 if ( $data_type =~ /phenotype/i ) {
464 $files = $c->stash->{phenotype_files_list
}
465 || $c->stash->{phenotype_file_name
};
467 $files = join( "\t", @
$files ) if reftype
($files) eq 'ARRAY';
469 $c->controller('solGS::Files')->phenotype_metadata_file($c);
470 my $metadata_file = $c->stash->{phenotype_metadata_file
};
472 $files .= "\t" . $metadata_file;
475 $c->stash->{pca_pheno_input_files
} = $files;
480 my ( $self, $c ) = @_;
482 $self->pca_query_jobs_file($c);
483 $c->stash->{prerequisite_jobs
} = $c->stash->{pca_query_jobs_file
};
485 $self->pca_r_jobs_file($c);
486 $c->stash->{dependent_jobs
} = $c->stash->{pca_r_jobs_file
};
488 $c->controller('solGS::AsyncJob')->run_async($c);
492 sub run_pca_single_core
{
493 my ( $self, $c ) = @_;
495 $self->pca_query_jobs($c);
496 my $queries = $c->stash->{pca_query_jobs
};
498 $self->pca_r_jobs($c);
499 my $r_jobs = $c->stash->{pca_r_jobs
};
501 foreach my $job (@
$queries) {
502 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
505 foreach my $job (@
$r_jobs) {
506 $c->controller('solGS::AsyncJob')->submit_job_cluster( $c, $job );
511 sub run_pca_multi_cores
{
512 my ( $self, $c ) = @_;
514 $self->pca_query_jobs_file($c);
515 $c->stash->{prerequisite_jobs
} = $c->stash->{pca_query_jobs_file
};
517 $self->pca_r_jobs_file($c);
518 $c->stash->{dependent_jobs
} = $c->stash->{pca_r_jobs_file
};
520 $c->controller('solGS::AsyncJob')->run_async($c);
525 my ( $self, $c ) = @_;
527 my $file_id = $c->stash->{file_id
};
529 $self->pca_output_files($c);
530 my $output_file = $c->stash->{pca_output_files
};
532 $self->pca_input_files($c);
533 my $input_file = $c->stash->{pca_input_files
};
535 $c->stash->{analysis_tempfiles_dir
} = $c->stash->{pca_temp_dir
};
537 $c->stash->{input_files
} = $input_file;
538 $c->stash->{output_files
} = $output_file;
539 $c->stash->{r_temp_file
} = "pca-${file_id}";
540 $c->stash->{r_script
} = 'R/solGS/pca.r';
542 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
543 my $jobs = $c->stash->{cluster_r_job_args
};
545 if ( reftype
$jobs ne 'ARRAY' ) {
549 $c->stash->{pca_r_jobs
} = $jobs;
553 sub pca_r_jobs_file
{
554 my ( $self, $c ) = @_;
556 $self->pca_r_jobs($c);
557 my $jobs = $c->stash->{pca_r_jobs
};
559 my $temp_dir = $c->stash->{pca_temp_dir
};
560 my $jobs_file = $c->controller('solGS::Files')
561 ->create_tempfile( $temp_dir, 'pca-r-jobs-file' );
563 nstore
$jobs, $jobs_file
564 or croak
"pca r jobs : $! serializing pca r jobs to $jobs_file";
566 $c->stash->{pca_r_jobs_file
} = $jobs_file;
570 sub pca_query_jobs_file
{
571 my ( $self, $c ) = @_;
573 $self->pca_query_jobs($c);
574 my $jobs = $c->stash->{pca_query_jobs
};
576 my $temp_dir = $c->stash->{pca_temp_dir
};
577 my $jobs_file = $c->controller('solGS::Files')
578 ->create_tempfile( $temp_dir, 'pca-query-jobs-file' );
580 nstore
$jobs, $jobs_file
581 or croak
"pca query jobs : $! serializing pca query jobs to $jobs_file";
583 $c->stash->{pca_query_jobs_file
} = $jobs_file;
587 sub prep_pca_download_files
{
588 my ( $self, $c ) = @_;
590 my $analysis_type = $c->stash->{analysis_type
};
591 $self->pca_scores_file($c);
592 $self->pca_loadings_file($c);
593 $self->pca_variances_file($c);
594 $self->pca_scree_data_file($c);
595 $self->pca_scree_plot_file($c);
597 my $scores_file = $c->stash->{pca_scores_file
};
598 my $loadings_file = $c->stash->{pca_loadings_file
};
599 my $scree_data_file = $c->stash->{pca_scree_data_file
};
600 my $scree_plot_file = $c->stash->{pca_scree_plot_file
};
601 my $variances_file = $c->stash->{pca_variances_file
};
603 $scores_file = $c->controller('solGS::Files')
604 ->copy_to_tempfiles_subdir( $c, $scores_file, 'pca' );
605 $loadings_file = $c->controller('solGS::Files')
606 ->copy_to_tempfiles_subdir( $c, $loadings_file, 'pca' );
607 $scree_data_file = $c->controller('solGS::Files')
608 ->copy_to_tempfiles_subdir( $c, $scree_data_file, 'pca' );
609 $scree_plot_file = $c->controller('solGS::Files')
610 ->copy_to_tempfiles_subdir( $c, $scree_plot_file, 'pca' );
611 $variances_file = $c->controller('solGS::Files')
612 ->copy_to_tempfiles_subdir( $c, $variances_file, 'pca' );
614 $c->stash->{download_scores
} = $scores_file;
615 $c->stash->{download_loadings
} = $loadings_file;
616 $c->stash->{download_scree_data
} = $scree_data_file;
617 $c->stash->{download_scree_plot
} = $scree_plot_file;
618 $c->stash->{download_variances
} = $variances_file;
622 sub begin
: Private
{
623 my ( $self, $c ) = @_;
625 $c->controller('solGS::Files')->get_solgs_dirs($c);
629 __PACKAGE__
->meta->make_immutable;