Merge pull request #5248 from solgenomics/topic/batch_update_trials
[sgn.git] / lib / SGN / Controller / solGS / Gebvs.pm
blob744340e0f1e6f1b175e12cc7001f9fad9022987e
1 package SGN::Controller::solGS::Gebvs;
3 use Moose;
4 use namespace::autoclean;
6 use Array::Utils qw(:all);
7 use Cache::File;
8 use File::Temp qw / tempfile tempdir /;
9 use File::Spec::Functions qw / catfile catdir/;
10 use File::Slurp qw /write_file read_file/;
11 use File::Path qw / mkpath /;
12 use File::Copy;
13 use File::Basename;
14 use JSON;
15 use List::MoreUtils qw /uniq/;
16 use Scalar::Util qw /weaken reftype/;
17 use String::CRC;
18 use Try::Tiny;
20 BEGIN { extends 'Catalyst::Controller' }
24 sub gebvs_data :Path('/solgs/trait/gebvs/data') Args(0) {
25 my ($self, $c) = @_;
27 my $args = $c->req->param('arguments');
28 $c->controller('solGS::Utils')->stash_json_args($c, $args);
30 my $trait_id = $c->stash->{'trait_id'};
31 $c->controller('solGS::Trait')->get_trait_details($c, $trait_id);
33 my $gebvs_file;
34 my $page = $c->req->referer();
36 my $training_pop_id = $c->stash->{training_pop_id};
37 my $selection_pop_id = $c->stash->{selection_pop_id};
39 my $type;
40 if ($page =~ /solgs\/selection\//)
42 $type = 'selection';
43 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $training_pop_id, $selection_pop_id, $trait_id);
44 $gebvs_file = $c->stash->{rrblup_selection_gebvs_file};
46 else
48 $type = 'training';
49 $c->controller('solGS::Files')->rrblup_training_gebvs_file($c);
50 $gebvs_file = $c->stash->{rrblup_training_gebvs_file};
53 my $gebvs_data = $c->controller("solGS::Utils")->read_file_data($gebvs_file);
54 my $gebvs_file_id = $c->controller('solGS::Files')->gebvs_file_id($c, $type);
56 my $ret->{status} = 'failed';
58 if (@$gebvs_data)
60 $ret->{status} = 'success';
61 $ret->{gebvs_data} = $gebvs_data;
62 $ret->{gebvs_file_id} = $gebvs_file_id;
65 $ret = to_json($ret);
67 $c->res->content_type('application/json');
68 $c->res->body($ret);
73 sub get_traits_selection_id :Path('/solgs/get/traits/selection/id') Args(0) {
74 my ($self, $c) = @_;
76 my @traits_ids = $c->req->param('trait_ids[]');
78 my $ret->{status} = 0;
80 if (@traits_ids > 1)
82 $self->catalogue_traits_selection($c, \@traits_ids);
84 my $traits_selection_id = $self->create_traits_selection_id(\@traits_ids);
85 $ret->{traits_selection_id} = $traits_selection_id;
86 $ret->{status} = 1;
89 $ret = to_json($ret);
91 $c->res->content_type('application/json');
92 $c->res->body($ret);
97 sub combine_gebvs_jobs_args {
98 my ($self, $c) = @_;
100 $self->get_gebv_files_of_traits($c);
101 my $gebvs_files = $c->stash->{gebv_files_of_valid_traits};
103 if (!-s $gebvs_files)
105 $gebvs_files = $c->stash->{gebv_files_of_traits};
108 my $index_file = $c->stash->{selection_index_file};
110 my @files_no = map { split(/\t/) } read_file($gebvs_files, {binmode => ':utf8'});
112 if (scalar(@files_no) > 1 )
114 if ($index_file)
116 write_file($gebvs_files, {append => 1, binmode => ':utf8'}, "\t". $index_file)
119 my $identifier = $self->combined_gebvs_file_id($c);
120 my $tmp_dir = $c->stash->{solgs_tempfiles_dir};
122 #my $combined_gebvs_file = $c->controller('solGS::Files')->create_tempfile($tmp_dir, "combined_gebvs_${identifier}");
123 $self->combined_gebvs_file($c);
124 my $combined_gebvs_file = $c->stash->{combined_gebvs_file};
125 $c->stash->{input_files} = $gebvs_files;
126 $c->stash->{output_files} = $combined_gebvs_file;
127 $c->stash->{r_temp_file} = "combining-gebvs-${identifier}";
128 $c->stash->{r_script} = 'R/solGS/combine_gebvs_files.r';
129 $c->stash->{analysis_tempfiles_dir} = $tmp_dir;
131 else
133 $c->stash->{combined_gebvs_files} = 0;
139 sub combined_gebvs_file_id {
140 my ($self, $c) = @_;
142 my $selection_pop_id = $c->stash->{selection_pop_id};
143 my $training_pop_id = $c->stash->{training_pop_id};
144 my $traits_code = $c->stash->{training_traits_code};
146 my $file_id = $selection_pop_id ? "${training_pop_id}-${selection_pop_id}-${traits_code}" : "${training_pop_id}-${traits_code}";
148 return $file_id;
153 sub combined_gebvs_file {
154 my ($self, $c) = @_;
156 my $identifier = $self->combined_gebvs_file_id($c);
158 my $cache_data = {
159 key => "combined_gebvs_${identifier}",
160 file => "combined_gebvs_${identifier}" . '.txt',
161 stash_key => 'combined_gebvs_file',
162 cache_dir => $c->stash->{solgs_cache_dir}
165 $c->controller('solGS::Files')->cache_file($c, $cache_data);
170 sub combine_gebvs_jobs {
171 my ($self, $c) = @_;
173 $self->combine_gebvs_jobs_args($c);
175 $c->controller('solGS::AsyncJob')->get_cluster_r_job_args($c);
176 my $jobs = $c->stash->{cluster_r_job_args};
178 if (reftype $jobs ne 'ARRAY')
180 $jobs = [$jobs];
183 $c->stash->{combine_gebvs_jobs} = $jobs;
187 sub run_combine_traits_gebvs {
188 my ($self, $c) = @_;
190 $self->combine_gebvs_jobs_args($c);
191 $c->controller("solGS::AsyncJob")->run_r_script($c);
195 #creates and writes a list of GEBV files of
196 #traits selected for ranking genotypes.
197 sub get_gebv_files_of_traits {
198 my ($self, $c) = @_;
200 my $training_pop_id = $c->stash->{training_pop_id} || $c->stash->{combo_pops_id} || $c->stash->{corre_pop_id};
201 $c->stash->{model_id} = $training_pop_id;
202 my $selection_pop_id = $c->stash->{selection_pop_id};
203 my $dir = $c->stash->{solgs_cache_dir};
205 my $gebv_files;
206 my $valid_gebv_files;
208 if ($selection_pop_id)
210 $self->selection_pop_analyzed_traits($c, $training_pop_id, $selection_pop_id);
211 $gebv_files = join("\t", @{$c->stash->{selection_pop_analyzed_traits_files}});
213 else
215 $self->training_pop_analyzed_traits($c);
216 $gebv_files = join("\t", @{$c->stash->{training_pop_analyzed_traits_files}});
217 $valid_gebv_files = join("\t", @{$c->stash->{training_pop_analyzed_valid_traits_files}});
220 my $pred_file_suffix = '_' . $selection_pop_id if $selection_pop_id;
221 my $name = "gebv_files_of_traits_${training_pop_id}${pred_file_suffix}";
222 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
223 my $file = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
225 write_file($file, {binmode => ':utf8'}, $gebv_files);
226 $c->stash->{gebv_files_of_traits} = $file;
228 my $name2 = "gebv_files_of_valid_traits_${training_pop_id}${pred_file_suffix}";
229 my $file2 = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name2);
231 write_file($file2, {binmode => ':utf8'}, $valid_gebv_files);
233 $c->stash->{gebv_files_of_valid_traits} = $file2;
238 sub traits_selection_catalogue_file {
239 my ($self, $c) = @_;
241 my $cache_data = {key => 'traits_selection_catalogue_file',
242 file => 'traits_selection_catalogue_file.txt',
243 stash_key => 'traits_selection_catalogue_file',
244 cache_dir => $c->stash->{solgs_cache_dir}
247 $c->controller('solGS::Files')->cache_file($c, $cache_data);
252 sub catalogue_traits_selection {
253 my ($self, $c, $traits_ids) = @_;
255 $self->traits_selection_catalogue_file($c);
256 my $file = $c->stash->{traits_selection_catalogue_file};
258 my $traits_selection_id = $self->create_traits_selection_id($traits_ids);
259 my $ids = join(',', @$traits_ids);
260 my $entry = $traits_selection_id . "\t" . $ids;
262 if (!-s $file)
264 my $header = 'traits_selection_id' . "\t" . 'traits_ids' . "\n";
265 write_file($file, {binmode => ':utf8'}, ($header, $entry));
267 else
269 my @combo = ($entry);
271 my @entries = map{ $_ =~ s/\n// ? $_ : undef } read_file($file, {binmode => ':utf8'});
272 my @intersect = intersect(@combo, @entries);
274 unless( @intersect )
276 write_file($file, {append => 1, binmode => ':utf8'}, "\n" . $entry);
283 sub get_traits_selection_list {
284 my ($self, $c, $id) = @_;
286 $id = $c->stash->{traits_selection_id} if !$id;
288 $self->traits_selection_catalogue_file($c);
289 my $traits_selection_catalogue_file = $c->stash->{traits_selection_catalogue_file};
291 my @combos = uniq(read_file($traits_selection_catalogue_file, {binmode => ':utf8'}));
293 foreach my $entry (@combos)
295 if ($entry =~ m/$id/)
297 chomp($entry);
298 my ($traits_selection_id, $traits) = split(/\t/, $entry);
300 if ($id == $traits_selection_id)
302 my @traits_list = split(',', $traits);
303 $c->stash->{traits_selection_list} = \@traits_list;
311 sub create_traits_selection_id {
312 my ($self, $traits_ids) = @_;
314 if ($traits_ids)
316 return crc(join('', sort(uniq(@$traits_ids))));
318 else
320 return 0;
326 sub training_pop_analyzed_traits {
327 my ($self, $c) = @_;
329 my $training_pop_id = $c->stash->{model_id} || $c->stash->{training_pop_id};
330 my @selected_analyzed_traits = @{$c->stash->{training_traits_ids}} if $c->stash->{training_traits_ids};
332 my @traits;
333 my @traits_ids;
334 my @si_traits;
335 my @valid_traits_files;
336 my @analyzed_traits_files;
338 foreach my $trait_id (@selected_analyzed_traits)
340 $c->stash->{trait_id} = $trait_id;
341 $c->controller('solGS::Trait')->get_trait_details($c);
342 my $trait = $c->stash->{trait_abbr};
344 $c->controller('solGS::modelAccuracy')->get_model_accuracy_value($c, $training_pop_id, $trait);
345 my $av = $c->stash->{accuracy_value};
347 my $trait_file;
348 if ($av && $av =~ m/\d+/ && $av > 0)
350 $c->controller('solGS::Files')->rrblup_training_gebvs_file($c, $training_pop_id, $trait_id);
351 $trait_file = $c->stash->{rrblup_training_gebvs_file};
352 push @valid_traits_files, $trait_file;
353 push @si_traits, $trait;
357 push @traits, $trait;
358 push @analyzed_traits_files, $trait_file;
361 @traits = uniq(@traits);
362 @si_traits = uniq(@si_traits);
363 $c->stash->{training_pop_analyzed_traits} = \@traits;
364 $c->stash->{training_pop_analyzed_traits_ids} = \@selected_analyzed_traits;
365 $c->stash->{training_pop_analyzed_traits_files} = \@analyzed_traits_files;
366 $c->stash->{selection_index_traits} = \@si_traits;
367 $c->stash->{training_pop_analyzed_valid_traits_files} = \@valid_traits_files;
371 sub selection_pop_analyzed_traits {
372 my ($self, $c, $training_pop_id, $selection_pop_id) = @_;
374 my @selected_analyzed_traits = @{$c->stash->{training_traits_ids}} if $c->stash->{training_traits_ids};
376 no warnings 'uninitialized';
378 my $dir = $c->stash->{solgs_cache_dir};
379 opendir my $dh, $dir or die "can't open $dir: $!\n";
381 my @files;
382 my @trait_ids;
383 my @trait_abbrs;
384 my @selected_trait_abbrs;
385 my @selected_files;
387 if (@selected_analyzed_traits)
390 foreach my $trait_id (@selected_analyzed_traits)
392 $c->stash->{trait_id} = $trait_id;
393 $c->controller('solGS::Trait')->get_trait_details($c);
394 push @selected_trait_abbrs, $c->stash->{trait_abbr};
396 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $training_pop_id, $selection_pop_id, $trait_id);
397 my $file = $c->stash->{rrblup_selection_gebvs_file};
399 if ( -s $c->stash->{rrblup_selection_gebvs_file})
401 push @selected_files, $c->stash->{rrblup_selection_gebvs_file};
402 push @trait_ids, $trait_id;
407 @trait_abbrs = @selected_trait_abbrs if @selected_trait_abbrs;
408 @files = @selected_files if @selected_files;
410 $c->stash->{selection_pop_analyzed_traits} = \@trait_abbrs;
411 $c->stash->{selection_pop_analyzed_traits_ids} = \@trait_ids;
412 $c->stash->{selection_pop_analyzed_traits_files} = \@files;
417 sub begin : Private {
418 my ($self, $c) = @_;
420 $c->controller('solGS::Files')->get_solgs_dirs($c);
426 ####
428 ####