can download plant phenotype data in the same way as plot phenotype data
[sgn.git] / lib / SGN / Controller / solGS / Correlation.pm
blob0c35dd6fa6901b3ec328424930f4585dd39ae158
1 package SGN::Controller::solGS::Correlation;
3 use Moose;
4 use namespace::autoclean;
6 use Cache::File;
7 use CXGN::Tools::Run;
8 use File::Temp qw / tempfile tempdir /;
9 use File::Spec::Functions qw / catfile catdir/;
10 use File::Slurp qw /write_file read_file/;
11 use File::Path qw / mkpath /;
12 use File::Copy;
13 use File::Basename;
14 use CXGN::Phenome::Population;
15 use JSON;
16 use Try::Tiny;
18 BEGIN { extends 'Catalyst::Controller' }
21 sub check_pheno_corr_result :Path('/phenotype/correlation/check/result/') Args(1) {
22 my ($self, $c, $pop_id) = @_;
24 $c->stash->{pop_id} = $pop_id;
26 $self->pheno_correlation_output_files($c);
27 my $corre_output_file = $c->stash->{corre_coefficients_json_file};
29 my $ret->{result} ='No';
31 if (-s $corre_output_file && $pop_id =~ /\d+/)
33 $ret->{result} = 'yes';
36 $ret = to_json($ret);
38 $c->res->content_type('application/json');
39 $c->res->body($ret);
44 sub correlation_phenotype_data :Path('/correlation/phenotype/data/') Args(0) {
45 my ($self, $c) = @_;
47 my $pop_id = $c->req->param('population_id');
48 $c->stash->{pop_id} = $pop_id;
49 my $referer = $c->req->referer;
51 my $phenotype_file;
53 if( $pop_id =~ /uploaded/)
55 my $phenotype_dir = $c->stash->{solgs_prediction_upload_dir};
56 my $userid = $c->user->id;
57 $phenotype_file = "phenotype_data_${userid}_${pop_id}";
58 $phenotype_file = $c->controller('solGS::solGS')->grep_file($phenotype_dir, $phenotype_file);
60 elsif ($referer =~ /qtl/)
62 $self->create_correlation_phenodata_file($c);
63 $phenotype_file = $c->stash->{phenotype_file};
65 else
67 my $phenotype_dir = $c->stash->{solgs_cache_dir};
68 $phenotype_file = 'phenotype_data_' . $pop_id;
69 $phenotype_file = $c->controller('solGS::solGS')->grep_file($phenotype_dir, '\'^' . $phenotype_file . '\'');
72 unless ($phenotype_file)
74 $self->create_correlation_phenodata_file($c);
75 $phenotype_file = $c->stash->{phenotype_file};
78 my $ret->{status} = 'failed';
80 if (-s $phenotype_file)
82 $ret->{status} = 'success';
84 else
86 $ret->{status} = 'This population set has no phenotype data.';
89 $ret = to_json($ret);
91 $c->res->content_type('application/json');
92 $c->res->body($ret);
97 sub correlation_genetic_data :Path('/correlation/genetic/data/') Args(0) {
98 my ($self, $c) = @_;
100 my $corr_pop_id = $c->req->param('corr_population_id');
101 my $pop_type = $c->req->param('type');
102 my $model_id = $c->req->param('model_id');
104 my $index_file = $c->req->param('index_file');
106 $c->stash->{model_id} = $model_id;
107 $c->stash->{pop_id} = $model_id;
109 $c->stash->{prediction_pop_id} = $corr_pop_id if $pop_type =~ /selection/;
111 $c->stash->{selection_index_file} = $index_file;
112 $self->combine_gebvs_of_traits($c);
113 my $combined_gebvs_file = $c->stash->{combined_gebvs_file};
115 my $ret->{status} = 'failed';
117 if ( -s $combined_gebvs_file )
119 $ret->{status} = 'success';
120 $ret->{gebvs_file} = $combined_gebvs_file;
123 $ret = to_json($ret);
125 $c->res->content_type('application/json');
126 $c->res->body($ret);
131 sub combine_gebvs_of_traits {
132 my ($self, $c) = @_;
134 $c->controller("solGS::solGS")->get_gebv_files_of_traits($c);
135 my $gebvs_files = $c->stash->{gebv_files_of_valid_traits};
137 if (!-s $gebvs_files)
139 $gebvs_files = $c->stash->{gebv_files_of_traits};
142 my $index_file = $c->stash->{selection_index_file};
144 my @files_no = map { split(/\t/) } read_file($gebvs_files);
146 if (scalar(@files_no) > 1 )
149 if ($index_file)
151 write_file($gebvs_files, {append => 1}, "\t". $index_file )
154 my $pred_pop_id = $c->stash->{prediction_pop_id};
155 my $model_id = $c->stash->{model_id};
156 my $identifier = $pred_pop_id ? $model_id . "_" . $pred_pop_id : $model_id;
158 my $combined_gebvs_file = $c->controller("solGS::solGS")->create_tempfile($c, "combined_gebvs_${identifier}");
160 $c->stash->{input_files} = $gebvs_files;
161 $c->stash->{output_files} = $combined_gebvs_file;
162 $c->stash->{r_temp_file} = "combining-gebvs-${identifier}";
163 $c->stash->{r_script} = 'R/combine_gebvs_files.r';
165 $c->controller("solGS::solGS")->run_r_script($c);
166 $c->stash->{combined_gebvs_file} = $combined_gebvs_file;
168 else
170 $c->stash->{combined_gebvs_files} = 0;
175 sub create_correlation_phenodata_file {
176 my ($self, $c) = @_;
177 my $referer = $c->req->referer;
179 if ($referer =~ /qtl/)
181 my $pop_id = $c->stash->{pop_id};
183 my $pheno_exp = "phenodata_${pop_id}";
184 my $dir = catdir($c->config->{solqtl}, 'cache');
186 my $phenotype_file = $c->controller("solGS::solGS")->grep_file($dir, $pheno_exp);
188 unless ($phenotype_file)
190 my $pop = CXGN::Phenome::Population->new($c->dbc->dbh, $pop_id);
191 $phenotype_file = $pop->phenotype_file($c);
194 my $new_file = catfile($c->stash->{correlation_dir}, "phenotype_data_${pop_id}.csv");
196 copy($phenotype_file, $new_file)
197 or die "could not copy $phenotype_file to $new_file";
199 $c->stash->{phenotype_file} = $new_file;
201 else
203 $c->controller("solGS::solGS")->phenotype_file($c);
209 sub create_correlation_dir {
210 my ($self, $c) = @_;
212 $c->controller("solGS::solGS")->get_solgs_dirs($c);
217 sub pheno_correlation_output_files {
218 my ($self, $c) = @_;
220 my $pop_id = $c->stash->{pop_id};
222 $self->create_correlation_dir($c);
223 my $corre_dir = $c->stash->{correlation_dir};
225 my $file_cache = Cache::File->new(cache_root => $corre_dir);
226 $file_cache->purge();
228 my $key_table = 'corre_coefficients_table_' . $pop_id;
229 my $key_json = 'corre_coefficients_json_' . $pop_id;
230 my $corre_coefficients_file = $file_cache->get($key_table);
231 my $corre_coefficients_json_file = $file_cache->get($key_json);
233 unless ($corre_coefficients_file && $corre_coefficients_json_file )
235 $corre_coefficients_file= catfile($corre_dir, "corre_coefficients_table_${pop_id}");
237 write_file($corre_coefficients_file);
238 $file_cache->set($key_table, $corre_coefficients_file, '30 days');
240 $corre_coefficients_json_file = catfile($corre_dir, "corre_coefficients_json_${pop_id}");
242 write_file($corre_coefficients_json_file);
243 $file_cache->set($key_json, $corre_coefficients_json_file, '30 days');
246 $c->stash->{corre_coefficients_table_file} = $corre_coefficients_file;
247 $c->stash->{corre_coefficients_json_file} = $corre_coefficients_json_file;
251 sub genetic_correlation_output_files {
252 my ($self, $c) = @_;
254 my $corre_pop_id = $c->stash->{corre_pop_id};
255 my $model_id = $c->stash->{model_id};
256 my $type = $c->stash->{type};
258 my $pred_pop_id = $c->stash->{prediction_pop_id};
259 $model_id = $c->stash->{model_id};
260 my $identifier = $type =~ /selection/ ? $model_id . "_" . $corre_pop_id : $corre_pop_id;
262 my $solgs_controller = $c->controller("solGS::solGS");
263 my $corre_json_file = $solgs_controller->create_tempfile($c, "genetic_corre_json_${identifier}");
264 my $corre_table_file = $solgs_controller->create_tempfile($c, "genetic_corre_table_${identifier}");
266 $c->stash->{genetic_corre_table_file} = $corre_table_file;
267 $c->stash->{genetic_corre_json_file} = $corre_json_file;
271 sub pheno_correlation_analysis_output :Path('/phenotypic/correlation/analysis/output') Args(0) {
272 my ($self, $c) = @_;
274 my $pop_id = $c->req->param('population_id');
275 $c->stash->{pop_id} = $pop_id;
277 $self->pheno_correlation_output_files($c);
278 my $corre_json_file = $c->stash->{corre_coefficients_json_file};
280 my $ret->{status} = 'failed';
282 if (!-s $corre_json_file)
284 $self->run_pheno_correlation_analysis($c);
285 $corre_json_file = $c->stash->{corre_coefficients_json_file};
288 if (-s $corre_json_file)
290 $ret->{status} = 'success';
291 $ret->{data} = read_file($corre_json_file);
294 $ret = to_json($ret);
296 $c->res->content_type('application/json');
297 $c->res->body($ret);
302 sub genetic_correlation_analysis_output :Path('/genetic/correlation/analysis/output') Args(0) {
303 my ($self, $c) = @_;
305 $c->stash->{corre_pop_id} = $c->req->param('corr_population_id');
306 $c->stash->{model_id} = $c->req->param('model_id');
307 $c->stash->{type} = $c->req->param('type');
309 my $corr_pop_id = $c->req->param('corr_population_id');
310 my $model_id = $c->req->param('model_id');
311 my $type = $c->req->param('type');
313 my $gebvs_file = $c->req->param('gebvs_file');
314 $c->stash->{data_input_file} = $gebvs_file;
316 $self->genetic_correlation_output_files($c);
318 if (-s $gebvs_file)
320 $self->run_genetic_correlation_analysis($c);
323 my $ret->{status} = 'failed';
324 my $corre_json_file = $c->stash->{genetic_corre_json_file};
326 if (-s $corre_json_file)
328 $ret->{status} = 'success';
329 $ret->{data} = read_file($corre_json_file);
332 $ret = to_json($ret);
334 $c->res->content_type('application/json');
335 $c->res->body($ret);
340 sub run_pheno_correlation_analysis {
341 my ($self, $c) = @_;
343 my $pop_id = $c->stash->{pop_id};
345 $self->create_correlation_phenodata_file($c);
346 $c->stash->{data_input_file} = $c->stash->{phenotype_file};
348 $self->pheno_correlation_output_files($c);
349 $c->stash->{corre_table_output_file} = $c->stash->{corre_coefficients_table_file};
350 $c->stash->{corre_json_output_file} = $c->stash->{corre_coefficients_json_file};
352 $c->controller("solGS::solGS")->formatted_phenotype_file($c);
354 $c->stash->{referer} = $c->req->referer;
356 $c->stash->{correlation_type} = "pheno_correlation_${pop_id}";
357 $c->stash->{correlation_script} = "R/phenotypic_correlation.r";
359 $self->run_correlation_analysis($c);
364 sub run_genetic_correlation_analysis {
365 my ($self, $c) = @_;
367 my $pop_id = $c->stash->{corre_pop_id};
369 $self->genetic_correlation_output_files($c);
370 $c->stash->{corre_table_output_file} = $c->stash->{genetic_corre_table_file};
371 $c->stash->{corre_json_output_file} = $c->stash->{genetic_corre_json_file};
373 $c->stash->{referer} = $c->req->referer;
375 $c->stash->{correlation_type} = "genetic_correlation_${pop_id}";
376 $c->stash->{correlation_script} = "R/genetic_correlation.r";
377 $self->run_correlation_analysis($c);
382 sub download_phenotypic_correlation : Path('/download/phenotypic/correlation/population') Args(1) {
383 my ($self, $c, $id) = @_;
385 $self->create_correlation_dir($c);
386 my $corr_dir = $c->stash->{correlation_dir};
387 my $corr_file = catfile($corr_dir, "corre_coefficients_table_${id}");
389 unless (!-e $corr_file || -s $corr_file <= 1)
391 my @corr_data;
392 my $count=1;
394 foreach my $row ( read_file($corr_file) )
396 if ($count==1) { $row = 'Traits,' . $row;}
397 $row =~ s/NA//g;
398 $row = join(",", split(/\s/, $row));
399 $row .= "\n";
401 push @corr_data, [ $row ];
402 $count++;
405 $c->res->content_type("text/plain");
406 $c->res->body(join "", map{ $_->[0] } @corr_data);
413 sub run_correlation_analysis {
414 my ($self, $c) = @_;
416 my $pop_id = $c->stash->{pop_id};
418 $self->create_correlation_dir($c);
419 my $corre_dir = $c->stash->{correlation_dir};
421 my $data_input_file = $c->stash->{data_input_file};
423 my $corre_table_file = $c->stash->{corre_table_output_file};
424 my $corre_json_file = $c->stash->{corre_json_output_file};
426 my $formatted_phenotype_file = $c->stash->{formatted_phenotype_file};
428 my $referer = $c->stash->{referer};
429 my $corre_analysis = $c->stash->{correlation_type};
430 my $corre_script = $c->stash->{correlation_script};
432 if (-s $data_input_file)
434 CXGN::Tools::Run->temp_base($corre_dir);
436 my ( $corre_commands_temp, $corre_output_temp ) =
439 my (undef, $filename ) =
440 tempfile(
441 catfile(
442 CXGN::Tools::Run->temp_base(),
443 "$corre_analysis-$_-XXXXXX",
446 $filename
447 } qw / in out /;
450 my $corre_commands_file = $c->path_to($corre_script);
451 copy( $corre_commands_file, $corre_commands_temp )
452 or die "could not copy '$corre_commands_file' to '$corre_commands_temp'";
455 try
457 print STDERR "\nsubmitting correlation job to the cluster..\n";
458 my $r_process = CXGN::Tools::Run->run_cluster(
459 'R', 'CMD', 'BATCH',
460 '--slave',
461 "--args $formatted_phenotype_file $referer $corre_table_file $corre_json_file $data_input_file",
462 $corre_commands_temp,
463 $corre_output_temp,
465 working_dir => $corre_dir,
466 max_cluster_jobs => 1_000_000_000,
470 $r_process->wait;
471 print STDERR "\ndone with correlation analysis..\n";
473 catch
476 my $err = $_;
477 $err =~ s/\n at .+//s; #< remove any additional backtrace
478 # # try to append the R output
482 $err .= "\n=== R output ===\n".file($corre_output_temp)->slurp."\n=== end R output ===\n"
485 $c->stash->{script_error} = "Correlation analysis failed.";
492 sub begin : Private {
493 my ($self, $c) = @_;
495 $c->controller("solGS::solGS")->get_solgs_dirs($c);
501 ####
503 ####