Merge pull request #5230 from solgenomics/topic/open_pollinated
[sgn.git] / lib / SGN / Controller / solGS / Correlation.pm
blob17fec539fe161413526c91801fa8128d6791c2b3
1 package SGN::Controller::solGS::Correlation;
3 use Moose;
4 use namespace::autoclean;
6 use Carp qw/ carp confess croak /;
7 use Cache::File;
8 use CXGN::Tools::Run;
9 use File::Temp qw / tempfile tempdir /;
10 use File::Spec::Functions qw / catfile catdir/;
11 use File::Slurp qw /write_file read_file/;
12 use File::Path qw / mkpath /;
13 use File::Copy;
14 use File::Basename;
15 use CXGN::Phenome::Population;
16 use JSON;
17 use Try::Tiny;
18 use Scalar::Util qw /weaken reftype/;
19 use Storable qw/ nstore retrieve /;
22 BEGIN { extends 'Catalyst::Controller' }
24 sub cluster_analysis : Path('/correlation/analysis/') Args() {
25 my ( $self, $c, $id ) = @_;
27 if ( $id && !$c->user ) {
28 $c->controller('solGS::Utils')->require_login($c);
31 $c->stash->{template} = '/solgs/tools/correlation/analysis.mas';
36 sub pheno_correlation_analysis :Path('/phenotypic/correlation/analysis') Args(0) {
37 my ($self, $c) = @_;
39 my $args = $c->req->param('arguments');
40 $c->controller('solGS::Utils')->stash_json_args($c, $args);
42 $self->cache_pheno_corr_output_files($c);
43 my $corre_json_file = $c->stash->{pheno_corr_json_file};
45 my $ret->{status} = 'Correlation analysis failed.';
47 if (!-s $corre_json_file)
49 $c->controller('solGS::Utils')->save_metadata($c);
50 $self->run_correlation_analysis($c);
53 if (-s $corre_json_file)
55 $ret->{status} = 'success';
56 $ret->{data} = read_file($corre_json_file, {binmode => ':utf8'});
57 $ret->{corre_table_file} = $self->download_pheno_correlation_file($c);
60 $ret = to_json($ret);
62 $c->res->content_type('application/json');
63 $c->res->body($ret);
68 sub genetic_correlation_analysis :Path('/genetic/correlation/analysis') Args() {
69 my ($self, $c) = @_;
71 my $args = $c->req->param('arguments');
72 $c->controller('solGS::Utils')->stash_json_args($c, $args);
74 my $corr_pop_id = $c->stash->{corr_pop_id};
75 my $pop_type = $c->stash->{pop_type};
76 $c->stash->{selection_pop_id} = $corr_pop_id if $pop_type =~ /selection/;
78 $self->cache_genetic_corr_output_files($c);
79 my $corre_json_file = $c->stash->{genetic_corr_json_file};
81 if (!-s $corre_json_file)
83 $c->controller('solGS::Gebvs')->run_combine_traits_gebvs($c);
86 $c->controller('solGS::Gebvs')->combined_gebvs_file($c);
87 my $combined_gebvs_file = $c->stash->{combined_gebvs_file};
89 my $ret->{status} = undef;
90 my $json = JSON->new();
91 if ( !-s $combined_gebvs_file )
93 $ret->{status} = "There is no GEBVs input. Error occured combining the GEBVs of the traits.";
95 else
97 $self->run_correlation_analysis($c);
100 if (-s $corre_json_file)
102 $ret->{status} = 'success';
103 $ret->{data} = read_file($corre_json_file, {binmode => ':utf8'});
104 $ret->{corre_table_file} = $self->download_genetic_correlation_file($c);
106 else
108 $ret->{status} = 'There is no correlation output. Error occured running the correlation. ';
111 $ret = $json->encode($ret);
112 $c->res->content_type('application/json');
113 $c->res->body($ret);
117 sub cache_pheno_corr_output_files {
118 my ($self, $c) = @_;
120 my $pop_id = $c->stash->{corr_pop_id};
121 my $corre_cache_dir = $c->stash->{correlation_cache_dir};
123 my $table_cache_data = {key => 'pheno_corr_table_' . $pop_id,
124 file => "pheno_corr_table_${pop_id}" . '.txt',
125 stash_key => 'pheno_corr_table_file',
126 cache_dir => $corre_cache_dir
129 $c->controller('solGS::Files')->cache_file($c, $table_cache_data);
131 my $json_cache_data = {key => 'pheno_corr_json_' . $pop_id,
132 file => "pheno_corr_json_${pop_id}" . '.txt',
133 stash_key => 'pheno_corr_json_file',
134 cache_dir => $corre_cache_dir
137 $c->controller('solGS::Files')->cache_file($c, $json_cache_data);
142 sub cache_genetic_corr_output_files {
143 my ($self, $c) = @_;
145 my $corr_pop_id = $c->stash->{corr_pop_id};
146 my $pop_type = $c->stash->{pop_type};
147 my $traits_code = $c->stash->{training_traits_code};
148 my $sindex_name = $c->stash->{sindex_name};
150 my $model_id = $c->stash->{training_pop_id};
151 my $identifier;
152 if ($sindex_name)
154 $identifier = $sindex_name;
156 else
158 $identifier = $pop_type =~ /selection/ ? "$model_id-${corr_pop_id}-${traits_code}" : "${corr_pop_id}-${traits_code}";
161 my $corre_cache_dir = $c->stash->{correlation_cache_dir};
163 my $table_cache_data = {key => 'genetic_corr_table_' . $identifier,
164 file => "genetic_corr_table_${identifier}",
165 stash_key => 'genetic_corr_table_file',
166 cache_dir => $corre_cache_dir
169 $c->controller('solGS::Files')->cache_file($c, $table_cache_data);
171 my $json_cache_data = {key => 'genetic_corr_json_' . $identifier,
172 file => "genetic_corr_json_${identifier}",
173 stash_key => 'genetic_corr_json_file',
174 cache_dir => $corre_cache_dir
177 $c->controller('solGS::Files')->cache_file($c, $json_cache_data);
182 sub download_pheno_correlation_file {
183 my ($self, $c) = @_;
185 $self->cache_pheno_corr_output_files($c);
186 my $file = $c->stash->{pheno_corr_table_file};
188 $file = $c->controller('solGS::Files')->copy_to_tempfiles_subdir($c, $file, 'correlation');
190 return $file;
194 sub download_genetic_correlation_file {
195 my ($self, $c) = @_;
197 $self->cache_genetic_corr_output_files($c);
198 my $file = $c->stash->{genetic_corr_table_file};
200 $file = $c->controller('solGS::Files')->copy_to_tempfiles_subdir($c, $file, 'correlation');
202 return $file;
206 sub pheno_corr_output_files {
207 my ($self, $c) = @_;
209 my $pop_id = $c->stash->{corr_pop_id};
210 $self->cache_pheno_corr_output_files($c);
212 my $output_files = join ("\t",
213 $c->stash->{pheno_corr_table_file},
214 $c->stash->{pheno_corr_json_file},
217 my $tmp_dir = $c->stash->{correlation_temp_dir};
218 my $name = "pheno_corr_output_files_${pop_id}";
219 my $tempfile = $c->controller('solGS::Files')->create_tempfile($tmp_dir, $name);
220 write_file($tempfile, {binmode => ':utf8'}, $output_files);
222 $c->stash->{pheno_corr_output_files} = $tempfile;
227 sub pheno_corr_input_files {
228 my ($self, $c) = @_;
230 my $pop_id = $c->stash->{corr_pop_id};
231 my $data_type = $c->stash->{data_type} || 'phenotype';
232 my $input_files;
234 if ( $data_type =~ /phenotype/i )
236 $input_files = $c->stash->{phenotype_files_list}
237 || $c->stash->{phenotype_file_name};
239 if (!$input_files)
241 if ($c->stash->{data_set_type} =~ /combined_populations/)
243 $c->controller('solGS::combinedTrials')->get_combined_pops_list( $c, $pop_id );
244 $c->controller('solGS::combinedTrials')->multi_pops_pheno_files($c, $c->stash->{combined_pops_list});
246 else
248 $c->controller('solGS::Files')->phenotype_file_name( $c, $pop_id);
251 $c->controller('solGS::Files')->phenotype_metadata_file($c);
252 my $metadata_file = $c->stash->{phenotype_metadata_file};
253 $input_files .= "\t" . $metadata_file;
256 my $tmp_dir = $c->stash->{correlation_temp_dir};
257 my $name = "pheno_corr_input_files_${pop_id}";
258 my $tempfile = $c->controller('solGS::Files')->create_tempfile($tmp_dir, $name);
259 write_file($tempfile, {binmode => ':utf8'}, $input_files);
260 $c->stash->{pheno_corr_input_files} = $tempfile;
265 sub geno_corr_output_files {
266 my ($self, $c) = @_;
268 my $pop_id = $c->stash->{corr_pop_id};
269 $self->cache_genetic_corr_output_files($c);
271 my $output_files = join ("\t",
272 $c->stash->{genetic_corr_table_file},
273 $c->stash->{genetic_corr_json_file},
276 my $tmp_dir = $c->stash->{correlation_temp_dir};
277 my $name = "genetic_corr_output_files_${pop_id}";
278 my $tempfile = $c->controller('solGS::Files')->create_tempfile($tmp_dir, $name);
279 write_file($tempfile, {binmode => ':utf8'}, $output_files);
281 $c->stash->{geno_corr_output_files} = $tempfile;
286 sub geno_corr_input_files {
287 my ($self, $c) = @_;
289 my $pop_id = $c->stash->{corr_pop_id};
290 my $gebvs_file = $c->stash->{combined_gebvs_file};
291 my $index_file = $c->stash->{selection_index_file};
293 my $input_files = join ("\t",
294 $gebvs_file,
295 $index_file
298 my $tmp_dir = $c->stash->{correlation_temp_dir};
299 my $name = "genetic_corr_input_files_${pop_id}";
300 my $tempfile = $c->controller('solGS::Files')->create_tempfile($tmp_dir, $name);
301 write_file($tempfile, {binmode => ':utf8'}, $input_files);
303 $c->stash->{geno_corr_input_files} = $tempfile;
307 sub corr_input_files {
308 my ($self, $c) = @_;
310 $c->stash->{correlation_script} = "R/solGS/correlation.r";
311 if ($c->stash->{correlation_type} =~ /pheno/)
313 $self->pheno_corr_input_files($c);
314 $c->stash->{corre_input_files} = $c->stash->{pheno_corr_input_files};
316 elsif ($c->stash->{correlation_type} =~ /genetic/)
318 $self->geno_corr_input_files($c);
319 $c->stash->{corre_input_files} = $c->stash->{geno_corr_input_files};
324 sub corr_output_files {
325 my ($self, $c) = @_;
327 if ($c->stash->{correlation_type} =~ /pheno/)
329 $self->pheno_corr_output_files($c);
330 $c->stash->{corre_output_files} = $c->stash->{pheno_corr_output_files};
332 elsif ($c->stash->{correlation_type} =~ /genetic/)
334 $self->geno_corr_output_files($c);
335 $c->stash->{corre_output_files} = $c->stash->{geno_corr_output_files};
340 sub run_correlation_analysis {
341 my ($self, $c) = @_;
343 my $queries_file;
344 if ($c->stash->{correlation_type} =~ /pheno/)
346 $self->corr_query_jobs_file($c);
347 $queries_file = $c->stash->{corr_query_jobs_file};
350 $self->corr_r_jobs_file($c);
351 my $r_jobs_file = $c->stash->{corr_r_jobs_file};
352 $c->stash->{prerequisite_jobs} = $queries_file if $queries_file;
353 $c->stash->{dependent_jobs} = $r_jobs_file;
355 $c->controller('solGS::AsyncJob')->run_async($c);
360 sub corr_r_jobs {
361 my ($self, $c) = @_;
363 $self->corr_input_files($c);
364 $c->stash->{input_files} = $c->stash->{corre_input_files};
366 $self->corr_output_files($c);
367 $c->stash->{output_files} = $c->stash->{corre_output_files};
369 my $corre_type = $c->stash->{correlation_type};
370 my $pop_id = $c->stash->{corr_pop_id};
372 $c->stash->{r_temp_file} = "${corre_type}-${pop_id}";
373 $c->stash->{r_script} = $c->stash->{correlation_script};
375 $c->stash->{analysis_tempfiles_dir} = $c->stash->{correlation_temp_dir};
377 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
378 my $jobs = $c->stash->{cluster_r_job_args};
380 if (reftype $jobs ne 'ARRAY')
382 $jobs = [$jobs];
385 $c->stash->{corr_r_jobs} = $jobs;
390 sub corr_r_jobs_file {
391 my ($self, $c) = @_;
393 $self->corr_r_jobs($c);
394 my $jobs = $c->stash->{corr_r_jobs};
396 my $temp_dir = $c->stash->{correlation_temp_dir};
397 my $jobs_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'corre-r-jobs-file');
399 nstore $jobs, $jobs_file
400 or croak "correlation r jobs : $! serializing correlation r jobs to $jobs_file";
402 $c->stash->{corr_r_jobs_file} = $jobs_file;
406 sub corr_query_jobs {
407 my ($self, $c) = @_;
409 my $corr_pop_id = $c->stash->{corr_pop_id};
410 my $data_set_type = $c->stash->{data_set_type};
411 my $data_str = $c->stash->{data_structure};
412 my $trials_ids = [];
414 my $jobs = $c->controller('solGS::AsyncJob')->create_phenotype_data_query_jobs($c, $corr_pop_id);
416 if (reftype $jobs ne 'ARRAY')
418 $jobs = [$jobs];
421 $c->stash->{corr_query_jobs} = $jobs;
426 sub corr_query_jobs_file {
427 my ($self, $c) = @_;
429 $self->corr_query_jobs($c);
430 my $jobs = $c->stash->{corr_query_jobs};
432 my $corr_type = $c->stash->{correlation_type};
433 my $jobs_file;
435 if ($jobs->[0])
437 my $temp_dir = $c->stash->{correlation_temp_dir};
438 $jobs_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${corr_type}-query-jobs-file");
440 nstore $jobs, $jobs_file
441 or croak "correlation pheno query jobs : $! serializing correlation ${corr_type} query jobs to $jobs_file";
444 $c->stash->{corr_query_jobs_file} = $jobs_file;
449 sub begin : Private {
450 my ($self, $c) = @_;
452 $c->controller('solGS::Files')->get_solgs_dirs($c);
458 ####
460 ####