Merge pull request #3024 from solgenomics/topic/solgs/clusters-si
[sgn.git] / lib / SGN / Controller / solGS / solGS.pm
blob447d8c4b1d40b1740260a4f01ec3a2e237f71be4
1 package SGN::Controller::solGS::solGS;
3 use Moose;
4 use namespace::autoclean;
6 use String::CRC;
7 use URI::FromHash 'uri';
8 use File::Path qw / mkpath /;
9 use File::Spec::Functions qw / catfile catdir/;
10 use File::Temp qw / tempfile tempdir /;
11 use File::Slurp qw /write_file read_file/;
12 use File::Copy;
13 use File::Basename;
14 use Cache::File;
15 use Try::Tiny;
16 use List::MoreUtils qw /uniq/;
17 use Scalar::Util qw /weaken reftype/;
18 use Statistics::Descriptive;
19 use Math::Round::Var;
20 use Algorithm::Combinatorics qw /combinations/;
21 use Array::Utils qw(:all);
22 use CXGN::Tools::Run;
23 use JSON;
24 use Storable qw/ nstore retrieve /;
25 use Carp qw/ carp confess croak /;
26 use SGN::Controller::solGS::Utils;
29 BEGIN { extends 'Catalyst::Controller' }
32 # Sets the actions in this controller to be registered with no prefix
33 # so they function identically to actions created in MyApp.pm
36 #__PACKAGE__->config(namespace => '');
38 =head1 NAME
40 solGS::Controller::Root - Root Controller for solGS
42 =head1 DESCRIPTION
44 [enter your description here]
46 =head1 METHODS
48 =head2 index
50 The root page (/)
52 =cut
55 # sub index :Path :Args(0) {
56 # my ($self, $c) = @_;
57 # $c->forward('search');
58 # }
60 sub solgs : Path('/solgs'){
61 my ($self, $c) = @_;
62 $c->forward('search');
66 sub solgs_breeder_search :Path('/solgs/breeder_search') Args(0) {
67 my ($self, $c) = @_;
68 $c->stash->{referer} = $c->req->referer();
69 $c->stash->{template} = '/solgs/breeder_search_solgs.mas';
73 sub submit :Path('/solgs/submit/intro') Args(0) {
74 my ($self, $c) = @_;
76 $c->stash->{template} = $c->controller('solGS::Files')->template('/submit/intro.mas');
80 sub solgs_login_message :Path('/solgs/login/message') Args(0) {
81 my ($self, $c) = @_;
83 my $page = $c->req->param('page');
85 my $message = "This is a private data. If you are the owner, "
86 . "please <a href=\"/user/login?goto_url=$page\">login</a> to view it.";
88 $c->stash->{message} = $message;
90 $c->stash->{template} = "/generic_message.mas";
95 sub search : Path('/solgs/search') Args() {
96 my ($self, $c) = @_;
98 #$self->gs_traits_index($c);
99 #my $gs_traits_index = $c->stash->{gs_traits_index};
101 $c->stash(template => $c->controller('solGS::Files')->template('/search/solgs.mas'),
102 # gs_traits_index => $gs_traits_index,
108 sub search_trials : Path('/solgs/search/trials') Args() {
109 my ($self, $c) = @_;
111 my $show_result = $c->req->param('show_result');
113 my $limit = $show_result =~ /all/ ? undef : 10;
115 my $projects_ids = $c->model('solGS::solGS')->all_gs_projects($limit);
117 my $ret->{status} = 'failed';
119 my $formatted_trials = [];
121 if (@$projects_ids)
123 my $projects_rs = $c->model('solGS::solGS')->project_details($projects_ids);
125 $self->get_projects_details($c, $projects_rs);
126 my $projects = $c->stash->{projects_details};
128 $self->format_gs_projects($c, $projects);
129 $formatted_trials = $c->stash->{formatted_gs_projects};
131 $ret->{status} = 'success';
134 $ret->{trials} = $formatted_trials;
135 $ret = to_json($ret);
137 $c->res->content_type('application/json');
138 $c->res->body($ret);
143 sub projects_links {
144 my ($self, $c, $pr_rs) = @_;
146 my $protocol_id = $c->stash->{genotyping_protocol_id};
148 $self->get_projects_details($c, $pr_rs);
149 my $projects = $c->stash->{projects_details};
151 my @projects_pages;
152 my $update_marker_count;
154 foreach my $pr_id (keys %$projects)
156 my $pr_name = $projects->{$pr_id}{project_name};
157 my $pr_desc = $projects->{$pr_id}{project_desc};
158 my $pr_year = $projects->{$pr_id}{project_year};
159 my $pr_location = $projects->{$pr_id}{project_location};
161 my $dummy_name = $pr_name =~ /test\w*/ig;
162 #my $dummy_desc = $pr_desc =~ /test\w*/ig;
164 $self->check_population_has_genotype($c);
165 my $has_genotype = $c->stash->{population_has_genotype};
167 no warnings 'uninitialized';
169 unless ($dummy_name || !$pr_name )
171 #$self->trial_compatibility_table($c, $has_genotype);
172 #my $match_code = $c->stash->{trial_compatibility_code};
174 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="solGS.combinedTrials.getPopIds()"/> </form> |;
176 #$match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:30px">code</div> |;
178 push @projects_pages, [$checkbox, qq|<a href="/solgs/population/$pr_id/gp/$protocol_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|,
179 $pr_desc, $pr_location, $pr_year
187 $c->stash->{projects_pages} = \@projects_pages;
191 sub search_trials_trait : Path('/solgs/search/trials/trait') Args() {
192 my ($self, $c, $trait_id, $gp, $protocol_id) = @_;
194 $self->get_trait_details($c, $trait_id);
195 $c->stash->{genotyping_protocol_id} = $protocol_id;
197 $c->stash->{template} = $c->controller('solGS::Files')->template('/search/trials/trait.mas');
202 sub show_search_result_pops : Path('/solgs/search/result/populations') Args() {
203 my ($self, $c, $trait_id, $gp, $protocol_id) = @_;
205 my $combine = $c->req->param('combine');
206 my $page = $c->req->param('page') || 1;
208 my $projects_ids = $c->model('solGS::solGS')->search_trait_trials($trait_id, $protocol_id);
210 my $ret->{status} = 'failed';
211 my $formatted_projects = [];
213 if (@$projects_ids)
215 my $projects_rs = $c->model('solGS::solGS')->project_details($projects_ids);
216 my $trait = $c->model('solGS::solGS')->trait_name($trait_id);
218 $self->get_projects_details($c, $projects_rs);
219 my $projects = $c->stash->{projects_details};
221 $self->format_trait_gs_projects($c, $trait_id, $projects, $protocol_id);
222 $formatted_projects = $c->stash->{formatted_gs_projects};
224 $ret->{status} = 'success';
227 $ret->{trials} = $formatted_projects;
229 $ret = to_json($ret);
231 $c->res->content_type('application/json');
232 $c->res->body($ret);
237 sub format_trait_gs_projects {
238 my ($self, $c, $trait_id, $projects, $protocol_id) = @_;
240 my @formatted_projects;
241 $c->stash->{genotyping_protocol_id} = $protocol_id;
242 foreach my $pr_id (keys %$projects)
244 my $pr_name = $projects->{$pr_id}{project_name};
245 my $pr_desc = $projects->{$pr_id}{project_desc};
246 my $pr_year = $projects->{$pr_id}{project_year};
247 my $pr_location = $projects->{$pr_id}{project_location};
249 $c->stash->{pop_id} = $pr_id;
250 $self->check_population_has_genotype($c);
251 my $has_genotype = $c->stash->{population_has_genotype};
253 if ($has_genotype)
255 #my $trial_compatibility_file = $self->trial_compatibility_file($c);
257 #$self->trial_compatibility_table($c, $has_genotype);
258 #my $match_code = $c->stash->{trial_compatibility_code};
260 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="solGS.combinedTrials.getPopIds()"/> </form> |;
261 #$match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:100%">code</div> |;
263 push @formatted_projects, [ $checkbox, qq|<a href="/solgs/trait/$trait_id/population/$pr_id/gp/$protocol_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|, $pr_desc, $pr_location, $pr_year];
267 $c->stash->{formatted_gs_projects} = \@formatted_projects;
272 sub format_gs_projects {
273 my ($self, $c, $projects) = @_;
275 my @formatted_projects;
277 my $protocol_id = $c->stash->{genotyping_protocol_id};
279 foreach my $pr_id (keys %$projects)
281 my $pr_name = $projects->{$pr_id}{project_name};
282 my $pr_desc = $projects->{$pr_id}{project_desc};
283 my $pr_year = $projects->{$pr_id}{project_year};
284 my $pr_location = $projects->{$pr_id}{project_location};
286 # $c->stash->{pop_id} = $pr_id;
287 # $self->check_population_has_genotype($c);
288 # my $has_genotype = $c->stash->{population_has_genotype};
289 my $has_genotype = $c->config->{default_genotyping_protocol};
291 if ($has_genotype)
293 my $trial_compatibility_file = $self->trial_compatibility_file($c);
295 $self->trial_compatibility_table($c, $has_genotype);
296 my $match_code = $c->stash->{trial_compatibility_code};
298 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="solGS.combinedTrials.getPopIds()"/> </form> |;
299 $match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:100%">code</div> |;
301 push @formatted_projects, [ $checkbox, qq|<a href="/solgs/population/$pr_id/gp/$protocol_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|, $pr_desc, $pr_location, $pr_year, $match_code];
305 $c->stash->{formatted_gs_projects} = \@formatted_projects;
310 sub trial_compatibility_table {
311 my ($self, $c, $markers) = @_;
313 $self->trial_compatibility_file($c);
314 my $compatibility_file = $c->stash->{trial_compatibility_file};
316 my $color;
318 if (-s $compatibility_file)
320 my @line = read_file($compatibility_file);
321 my ($entry) = grep(/$markers/, @line);
322 chomp($entry);
324 if($entry)
326 ($markers, $color) = split(/\t/, $entry);
327 $c->stash->{trial_compatibility_code} = $color;
331 if (!$color)
333 my ($red, $blue, $green) = map { int(rand(255)) } 1..3;
334 $color = 'rgb' . '(' . "$red,$blue,$green" . ')';
336 my $color_code = $markers . "\t" . $color . "\n";
338 $c->stash->{trial_compatibility_code} = $color;
339 write_file($compatibility_file,{append => 1}, $color_code);
344 sub trial_compatibility_file {
345 my ($self, $c) = @_;
347 my $cache_data = {key => 'trial_compatibility',
348 file => 'trial_compatibility_codes',
349 stash_key => 'trial_compatibility_file'
352 $c->controller('solGS::Files')->cache_file($c, $cache_data);
357 sub get_projects_details {
358 my ($self, $c, $pr_rs) = @_;
360 my ($year, $location, $pr_id, $pr_name, $pr_desc);
361 my %projects_details = ();
363 while (my $pr = $pr_rs->next)
365 $pr_id = $pr->get_column('project_id');
366 $pr_name = $pr->get_column('name');
367 $pr_desc = $pr->get_column('description');
369 my $pr_yr_rs = $c->model('solGS::solGS')->project_year($pr_id);
371 while (my $pr = $pr_yr_rs->next)
373 $year = $pr->value;
376 my $location = $c->model('solGS::solGS')->project_location($pr_id);
378 $projects_details{$pr_id} = {
379 project_name => $pr_name,
380 project_desc => $pr_desc,
381 project_year => $year,
382 project_location => $location,
386 $c->stash->{projects_details} = \%projects_details;
391 sub store_project_marker_count {
392 my ($self, $c) = @_;
394 my $pop_id = $c->stash->{pop_id};
395 my $marker_count = $c->stash->{marker_count};
397 unless ($marker_count)
399 my $markers = $c->model("solGS::solGS")->get_project_genotyping_markers($pop_id);
400 my @markers = split('\t', $markers);
401 $marker_count = scalar(@markers);
404 my $genoprop = {'project_id' => $pop_id, 'marker_count' => $marker_count};
405 $c->model("solGS::solGS")->set_project_genotypeprop($genoprop);
410 sub search_traits : Path('/solgs/search/traits/') Args() {
411 my ($self, $c, $query, $gp, $protocol_id) = @_;
413 my $traits = $c->model('solGS::solGS')->search_trait($query);
414 my $result = $c->model('solGS::solGS')->trait_details($traits);
416 my $ret->{status} = 0;
417 if ($result->first)
419 $ret->{status} = 1;
420 $ret->{genotyping_protocol_id} = $protocol_id;
423 $ret = to_json($ret);
425 $c->res->content_type('application/json');
426 $c->res->body($ret);
431 sub show_search_result_traits : Path('/solgs/search/result/traits') Args() {
432 my ($self, $c, $query, $gp, $protocol_id) = @_;
434 my $traits = $c->model('solGS::solGS')->search_trait($query);
435 my $result = $c->model('solGS::solGS')->trait_details($traits);
437 my @rows;
438 while (my $row = $result->next)
440 my $id = $row->cvterm_id;
441 my $name = $row->name;
442 my $def = $row->definition;
444 push @rows, [ qq |<a href="/solgs/search/trials/trait/$id/gp/$protocol_id" onclick="solGS.waitPage()">$name</a>|, $def];
447 if (@rows)
449 $c->stash(template => $c->controller('solGS::Files')->template('/search/result/traits.mas'),
450 result => \@rows,
451 query => $query,
452 genotyping_protocol_id => $protocol_id
459 sub population : Path('/solgs/population') Args() {
460 my ($self, $c, $pop_id, $gp, $protocol_id) = @_;
462 if (!$pop_id)
464 $c->stash->{message} = "You can not access this page with out population id.";
465 $c->stash->{template} = "/generic_message.mas";
468 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
470 $c->stash->{training_pop_id} = $pop_id;
471 #$c->stash->{pop_id} = $pop_id;
473 if ($pop_id =~ /dataset/)
475 $c->stash->{dataset_id} = $pop_id =~ s/\w+_//r;
477 elsif ($pop_id =~ /list/)
479 $c->stash->{list_id} = $pop_id =~ s/\w+_//r;
482 my $cached = $c->controller('solGS::CachedResult')->check_single_trial_training_data($c, $pop_id, $protocol_id);
484 if (!$cached)
486 $c->stash->{message} = "Cached output for this training population does not exist anymore.\n"
487 . "Please go to <a href=\"/solgs/search/\">the search page</a>"
488 . " and create the training population data.";
490 $c->stash->{template} = "/generic_message.mas";
492 else
494 $c->controller('solGS::Utils')->save_metadata($c);
495 $self->get_all_traits($c);
496 $self->project_description($c, $pop_id);
498 $c->stash->{template} = $c->controller('solGS::Files')->template('/population.mas');
500 my $acronym = $self->get_acronym_pairs($c, $pop_id);
501 $c->stash->{acronym} = $acronym;
507 sub get_project_details {
508 my ($self, $c, $pr_id) = @_;
510 my $pr_rs = $c->model('solGS::solGS')->project_details($pr_id);
512 while (my $row = $pr_rs->next)
514 $c->stash(project_id => $row->id,
515 project_name => $row->name,
516 project_desc => $row->description
523 sub get_markers_count {
524 my ($self, $c, $pop_hash) = @_;
526 my $filtered_geno_file;
527 my $markers_cnt;
529 my $protocol_id = $c->stash->{genotyping_protocol_id};
531 if ($pop_hash->{training_pop})
533 my $training_pop_id = $pop_hash->{training_pop_id};
534 $c->stash->{pop_id} = $training_pop_id;
535 $c->controller('solGS::Files')->filtered_training_genotype_file($c, $protocol_id);
536 $filtered_geno_file = $c->stash->{filtered_training_genotype_file};
538 if (-s $filtered_geno_file) {
539 my @geno_lines = read_file($filtered_geno_file);
540 $markers_cnt = scalar(split('\t', $geno_lines[0]));
542 else
544 $c->controller('solGS::Files')->genotype_file_name($c, $training_pop_id, $protocol_id);
545 my $geno_file = $c->stash->{genotype_file_name};
546 my @geno_lines = read_file($geno_file);
547 $markers_cnt= scalar(split ('\t', $geno_lines[0]));
551 elsif ($pop_hash->{selection_pop})
553 my $selection_pop_id = $pop_hash->{selection_pop_id};
554 $c->stash->{pop_id} = $selection_pop_id;
555 $c->controller('solGS::Files')->filtered_selection_genotype_file($c);
556 $filtered_geno_file = $c->stash->{filtered_selection_genotype_file};
558 if (-s $filtered_geno_file) {
559 my @geno_lines = read_file($filtered_geno_file);
560 $markers_cnt = scalar(split('\t', $geno_lines[0]));
562 else
564 $c->controller('solGS::Files')->genotype_file_name($c, $selection_pop_id, $protocol_id);
565 my $geno_file = $c->stash->{genotype_file_name};
566 my @geno_lines = read_file($geno_file);
567 $markers_cnt= scalar(split ('\t', $geno_lines[0]));
571 return $markers_cnt;
576 sub project_description {
577 my ($self, $c, $pr_id) = @_;
579 $c->stash->{pop_id} = $pr_id;
580 $c->stash->{training_pop_id} = $pr_id;
581 my $protocol_id = $c->stash->{genotyping_protocol_id};
583 if ($c->stash->{list_id})
585 $c->controller('solGS::List')->list_population_summary($c);
587 elsif ($c->stash->{dataset_id})
589 $c->controller('solGS::Dataset')->dataset_population_summary($c);
591 else
593 my $pr_rs = $c->model('solGS::solGS')->project_details($pr_id);
595 while (my $row = $pr_rs->next)
597 $c->stash(project_id => $row->id,
598 project_name => $row->name,
599 project_desc => $row->description
603 $self->get_project_owners($c, $pr_id);
604 $c->stash->{owner} = $c->stash->{project_owners};
607 $c->controller('solGS::Files')->filtered_training_genotype_file($c, $pr_id, $protocol_id);
608 my $filtered_geno_file = $c->stash->{filtered_training_genotype_file};
610 my $markers_no;
611 my @geno_lines;
613 if (-s $filtered_geno_file) {
614 @geno_lines = read_file($filtered_geno_file);
615 $markers_no = scalar(split('\t', $geno_lines[0])) - 1;
617 else
619 $c->controller('solGS::Files')->genotype_file_name($c, $pr_id, $protocol_id);
620 my $geno_file = $c->stash->{genotype_file_name};
621 @geno_lines = read_file($geno_file);
622 $markers_no = scalar(split ('\t', $geno_lines[0])) - 1;
625 my $stocks_no = $self->training_pop_member_count($c, $pr_id, $protocol_id);
627 $c->controller('solGS::Files')->traits_acronym_file($c, $pr_id);
628 my $traits_file = $c->stash->{traits_acronym_file};
629 my @traits_lines = read_file($traits_file);
630 my $traits_no = scalar(@traits_lines) - 1;
632 my $protocol_url = $c->controller('solGS::genotypingProtocol')->create_protocol_url($c, $protocol_id);
634 $c->stash(markers_no => $markers_no,
635 traits_no => $traits_no,
636 stocks_no => $stocks_no,
637 protocol_url => $protocol_url,
643 sub training_pop_member_count {
644 my ($self, $c, $pop_id, $protocol_id) = @_;
646 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
647 my $geno_file = $c->stash->{genotype_file_name};
648 my $geno = qx /wc -l $geno_file/;
649 my ($geno_lines, $g_file) = split(" ", $geno);
651 my $count = $geno_lines > 1 ? $geno_lines - 1 : 0;
653 return $count;
657 sub check_training_pop_size : Path('/solgs/check/training/pop/size') Args(0) {
658 my ($self, $c) = @_;
660 my $pop_id = $c->req->param('training_pop_id');
661 my $type = $c->req->param('data_set_type');
662 my $protocol_id = $c->req->param('genotyping_protocol_id');
664 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
666 my $count;
667 if ($type =~ /single/)
669 $count = $self->training_pop_member_count($c, $pop_id, $protocol_id);
671 elsif ($type =~ /combined/)
673 $c->stash->{combo_pops_id} = $pop_id;
674 $count = $c->controller('solGS::combinedTrials')->count_combined_trials_members($c, $pop_id, $protocol_id);
677 my $ret->{status} = 'failed';
679 if ($count)
681 $ret->{status} = 'success';
682 $ret->{member_count} = $count;
685 $ret = to_json($ret);
687 $c->res->content_type('application/json');
688 $c->res->body($ret);
694 sub selection_trait :Path('/solgs/selection/') Args() {
695 my ($self, $c, $selection_pop_id,
696 $model_key, $training_pop_id,
697 $trait_key, $trait_id, $gp, $protocol_id) = @_;
699 $self->get_trait_details($c, $trait_id);
700 $c->stash->{training_pop_id} = $training_pop_id;
701 $c->stash->{selection_pop_id} = $selection_pop_id;
702 $c->stash->{data_set_type} = 'single population';
703 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
704 $protocol_id = $c->stash->{genotyping_protocol_id};
706 my $identifier = $training_pop_id . '_' . $selection_pop_id;
708 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
709 my $gebvs_file = $c->stash->{rrblup_selection_gebvs_file};
711 if (!-s $gebvs_file)
713 my $model_page = qq | <a href="/solgs/trait/$trait_id/population/$training_pop_id">training model page</a> |;
715 $c->stash->{message} = "No cached output was found for this trait.\n" .
716 " Please go to the $model_page and run the prediction.";
718 $c->stash->{template} = "/generic_message.mas";
720 else
722 if ($training_pop_id =~ /list/)
724 $c->stash->{list_id} = $training_pop_id =~ s/\w+_//r;
725 $c->controller('solGS::List')->list_population_summary($c);
726 $c->stash->{training_pop_id} = $c->stash->{project_id};
727 $c->stash->{training_pop_name} = $c->stash->{project_name};
728 $c->stash->{training_pop_desc} = $c->stash->{project_desc};
729 $c->stash->{training_pop_owner} = $c->stash->{owner};
731 elsif ($training_pop_id =~ /dataset/)
733 $c->stash->{dataset_id} = $training_pop_id =~ s/\w+_//r;
734 $c->controller('solGS::Dataset')->dataset_population_summary($c);
735 $c->stash->{training_pop_id} = $c->stash->{project_id};
736 $c->stash->{training_pop_name} = $c->stash->{project_name};
737 $c->stash->{training_pop_desc} = $c->stash->{project_desc};
738 $c->stash->{training_pop_owner} = $c->stash->{owner};
740 else
742 $self->get_project_details($c, $training_pop_id);
743 $c->stash->{training_pop_id} = $c->stash->{project_id};
744 $c->stash->{training_pop_name} = $c->stash->{project_name};
745 $c->stash->{training_pop_desc} = $c->stash->{project_desc};
747 $self->get_project_owners($c, $training_pop_id);
748 $c->stash->{training_pop_owner} = $c->stash->{project_owners};
751 if ($selection_pop_id =~ /list/)
753 $c->stash->{list_id} = $selection_pop_id =~ s/\w+_//r;
755 $c->controller('solGS::List')->list_population_summary($c);
756 $c->stash->{selection_pop_id} = $c->stash->{project_id};
757 $c->stash->{selection_pop_name} = $c->stash->{project_name};
758 $c->stash->{selection_pop_desc} = $c->stash->{project_desc};
759 $c->stash->{selection_pop_owner} = $c->stash->{owner};
761 elsif ($selection_pop_id =~ /dataset/)
763 $c->stash->{dataset_id} = $selection_pop_id =~ s/\w+_//r;
764 $c->controller('solGS::Dataset')->dataset_population_summary($c);
765 $c->stash->{selection_pop_id} = $c->stash->{project_id};
766 $c->stash->{selection_pop_name} = $c->stash->{project_name};
767 $c->stash->{selection_pop_desc} = $c->stash->{project_desc};
768 $c->stash->{selection_pop_owner} = $c->stash->{owner};
770 else
772 $self->get_project_details($c, $selection_pop_id);
773 $c->stash->{selection_pop_id} = $c->stash->{project_id};
774 $c->stash->{selection_pop_name} = $c->stash->{project_name};
775 $c->stash->{selection_pop_desc} = $c->stash->{project_desc};
777 $self->get_project_owners($c, $selection_pop_id);
778 $c->stash->{selection_pop_owner} = $c->stash->{project_owners};
781 my $tr_pop_mr_cnt = $self->get_markers_count($c, {'training_pop' => 1, 'training_pop_id' => $training_pop_id});
782 my $sel_pop_mr_cnt = $self->get_markers_count($c, {'selection_pop' => 1, 'selection_pop_id' => $selection_pop_id});
784 $c->stash->{training_markers_cnt} = $tr_pop_mr_cnt;
785 $c->stash->{selection_markers_cnt} = $sel_pop_mr_cnt;
787 my $protocol_url = $c->controller('solGS::genotypingProtocol')->create_protocol_url($c, $protocol_id);
788 $c->stash->{protocol_url} = $protocol_url;
790 my @stock_rows = read_file($gebvs_file);
791 $c->stash->{selection_stocks_cnt} = scalar(@stock_rows) - 1;
793 $self->top_blups($c, $gebvs_file);
795 $c->stash->{blups_download_url} = qq | <a href="/solgs/download/prediction/model/$training_pop_id/prediction/$selection_pop_id/$trait_id/gp/$protocol_id">Download all GEBVs</a>|;
797 $c->stash->{template} = $c->controller('solGS::Files')->template('/population/selection_trait.mas');
803 sub build_single_trait_model {
804 my ($self, $c) = @_;
806 my $trait_id = $c->stash->{trait_id};
807 $self->get_trait_details($c, $trait_id);
809 $self->get_rrblup_output($c);
814 sub trait :Path('/solgs/trait') Args() {
815 my ($self, $c, $trait_id, $key, $pop_id, $gp, $protocol_id) = @_;
818 if ($pop_id =~ /dataset/)
820 $c->stash->{dataset_id} = $pop_id =~ s/\w+_//r;
822 elsif ($pop_id =~ /list/)
824 $c->stash->{list_id} = $pop_id =~ s/\w+_//r;
827 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
828 $c->stash->{pop_id} = $pop_id;
829 $c->stash->{training_pop_id} = $pop_id;
830 $c->stash->{trait_id} = $trait_id;
832 if ($pop_id && $trait_id)
834 #$c->controller('solGS::Files')->rrblup_training_gebvs_file($c);
835 #my $gebv_file = $c->stash->{rrblup_training_gebvs_file};
836 $self->project_description($c, $pop_id);
838 my $cached = $c->controller('solGS::CachedResult')->check_single_trial_model_output($c, $pop_id, $trait_id, $protocol_id);
840 if (!$cached)
842 my $training_pop_name = $c->stash->{project_name};
843 #my $training_pop_desc = $c->stash->{project_desc};
844 my $training_pop_page = qq | <a href="/solgs/population/$pop_id">$training_pop_name</a> |;
846 $c->stash->{message} = "Cached output for this model does not exist anymore.\n" .
847 " Please go to $training_pop_page and run the analysis.";
849 $c->stash->{template} = "/generic_message.mas";
851 else
853 $self->get_trait_details($c, $trait_id);
854 $self->gs_modeling_files($c);
856 $c->controller('solGS::modelAccuracy')->cross_validation_stat($c, $pop_id, $c->stash->{trait_abbr});
857 $c->controller('solGS::Files')->traits_acronym_file($c, $pop_id);
858 my $acronym_file = $c->stash->{traits_acronym_file};
860 if (!-e $acronym_file || !-s $acronym_file)
862 $self->get_all_traits($c);
865 $self->trait_phenotype_stat($c);
866 $c->stash->{template} = $c->controller('solGS::Files')->template("/population/trait.mas");
873 sub gs_modeling_files {
874 my ($self, $c) = @_;
876 $self->output_files($c);
877 $self->input_files($c);
878 $c->controller('solGS::modelAccuracy')->model_accuracy_report($c);
879 $self->top_blups($c, $c->stash->{rrblup_training_gebvs_file});
880 $c->controller('solGS::Download')->training_prediction_download_urls($c);
881 $self->top_markers($c, $c->stash->{marker_effects_file});
882 $self->model_parameters($c);
887 sub trait_info_file {
888 my ($self, $c) = @_;
890 my $pop_id = $c->stash->{pop_id} || $c->stash->{combo_pops_id};
891 my $trait_id = $c->stash->{trait_id};
892 my $trait_abbr = $c->stash->{trait_abbr};
893 my $name = "trait_info_${trait_id}_pop_${pop_id}";
894 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
895 my $trait_info = $trait_id . "\t" . $trait_abbr;
896 my $file = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
898 write_file($file, $trait_info);
900 $c->stash->{trait_info_file} = $file;
904 sub input_files {
905 my ($self, $c) = @_;
907 if ($c->stash->{data_set_type} =~ /combined populations/i)
909 $c->controller('solGS::combinedTrials')->combined_pops_gs_input_files($c);
910 my $input_file = $c->stash->{combined_pops_gs_input_files};
911 $c->stash->{input_files} = $input_file;
913 else
915 my $pop_id = $c->stash->{pop_id};
916 my $protocol_id = $c->stash->{genotyping_protocol_id};
918 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
919 $c->controller('solGS::Files')->phenotype_file_name($c, $pop_id);
920 $self->trait_info_file($c);
922 $c->controller('solGS::Files')->formatted_phenotype_file($c);
923 my $formatted_phenotype_file = $c->stash->{formatted_phenotype_file};
925 my $selection_pop_id = $c->stash->{prediction_pop_id} ||$c->stash->{selection_pop_id} ;
926 my ($selection_population_file, $filtered_pred_geno_file);
928 if ($selection_pop_id)
930 $selection_population_file = $c->stash->{selection_population_file};
933 my $pheno_file = $c->stash->{phenotype_file_name};
934 my $geno_file = $c->stash->{genotype_file_name};
935 my $traits_file = $c->stash->{selected_traits_file};
936 my $trait_file = $c->stash->{trait_info_file};
938 no warnings 'uninitialized';
940 my $input_files = join ("\t",
941 $pheno_file,
942 $formatted_phenotype_file,
943 $geno_file,
944 $traits_file,
945 $trait_file,
946 $selection_population_file,
949 my $name = "input_files_${pop_id}";
950 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
951 my $tempfile = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
952 write_file($tempfile, $input_files);
953 $c->stash->{input_files} = $tempfile;
958 sub output_files {
959 my ($self, $c) = @_;
961 my $pop_id = $c->stash->{pop_id};
962 my $trait = $c->stash->{trait_abbr};
963 my $trait_id = $c->stash->{trait_id};
965 $c->controller('solGS::Files')->marker_effects_file($c);
966 $c->controller('solGS::Files')->rrblup_training_gebvs_file($c);
967 $c->controller('solGS::Files')->validation_file($c);
968 $c->controller("solGS::Files")->trait_phenodata_file($c);
969 $c->controller("solGS::Files")->variance_components_file($c);
970 $c->controller('solGS::Files')->relationship_matrix_file($c);
971 $c->controller('solGS::Files')->filtered_training_genotype_file($c);
973 my $selection_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
974 if (!$pop_id) {$pop_id = $c->stash->{model_id};}
976 no warnings 'uninitialized';
978 if ($selection_pop_id)
980 my $identifier = $pop_id . '_' . $selection_pop_id;
981 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
984 my $file_list = join ("\t",
985 $c->stash->{rrblup_training_gebvs_file},
986 $c->stash->{marker_effects_file},
987 $c->stash->{validation_file},
988 $c->stash->{trait_phenodata_file},
989 $c->stash->{selected_traits_gebv_file},
990 $c->stash->{variance_components_file},
991 $c->stash->{relationship_matrix_file},
992 $c->stash->{filtered_training_genotype_file},
993 $c->stash->{rrblup_selection_gebvs_file}
996 my $name = "output_files_${trait}_$pop_id";
997 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
998 my $tempfile = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
999 write_file($tempfile, $file_list);
1001 $c->stash->{output_files} = $tempfile;
1006 sub top_markers {
1007 my ($self, $c, $markers_file) = @_;
1009 $c->stash->{top_marker_effects} = $c->controller('solGS::Utils')->top_10($markers_file);
1013 sub top_blups {
1014 my ($self, $c, $gebv_file) = @_;
1016 $c->stash->{top_blups} = $c->controller('solGS::Utils')->top_10($gebv_file);
1020 sub predict_selection_pop_single_trait {
1021 my ($self, $c) = @_;
1023 if ($c->stash->{data_set_type} =~ /single population/)
1025 $self->predict_selection_pop_single_pop_model($c)
1027 else
1029 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1035 sub predict_selection_pop_multi_traits {
1036 my ($self, $c) = @_;
1038 my $data_set_type = $c->stash->{data_set_type};
1039 my $training_pop_id = $c->stash->{training_pop_id};
1040 my $selection_pop_id = $c->stash->{selection_pop_id};
1041 my $protocol_id = $c->stash->{genotyping_protocol_id};
1043 $c->stash->{pop_id} = $training_pop_id;
1045 my @traits = @{$c->stash->{training_traits_ids}} if $c->stash->{training_traits_ids};
1047 $self->traits_with_valid_models($c);
1048 my @traits_with_valid_models = @{$c->stash->{traits_ids_with_valid_models}};
1050 $c->stash->{training_traits_ids} = \@traits_with_valid_models;
1052 my @unpredicted_traits;
1053 foreach my $trait_id (@{$c->stash->{training_traits_ids}})
1055 my $identifier = $training_pop_id .'_' . $selection_pop_id;
1056 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
1058 push @unpredicted_traits, $trait_id if !-s $c->stash->{rrblup_selection_gebvs_file};
1061 if (@unpredicted_traits)
1063 $c->stash->{training_traits_ids} = \@unpredicted_traits;
1065 $c->controller('solGS::Files')->genotype_file_name($c, $selection_pop_id, $protocol_id);
1067 if (!-s $c->stash->{genotype_file_name})
1069 $self->get_selection_pop_query_args_file($c);
1070 $c->stash->{prerequisite_jobs} = $c->stash->{selection_pop_query_args_file};
1073 $c->controller('solGS::Files')->selection_population_file($c, $selection_pop_id, $protocol_id);
1075 $self->get_gs_modeling_jobs_args_file($c);
1076 $c->stash->{dependent_jobs} = $c->stash->{gs_modeling_jobs_args_file};
1079 #$c->stash->{prerequisite_type} = 'selection_pop_download_data';
1081 $self->run_async($c);
1083 else
1085 croak "No traits to predict: $!\n";
1091 sub predict_selection_pop_single_pop_model {
1092 my ($self, $c) = @_;
1094 my $trait_id = $c->stash->{trait_id};
1095 my $training_pop_id = $c->stash->{training_pop_id};
1096 my $prediction_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
1097 my $protocol_id = $c->stash->{genotyping_protocol_id};
1099 $self->get_trait_details($c, $trait_id);
1100 my $trait_abbr = $c->stash->{trait_abbr};
1102 my $identifier = $training_pop_id . '_' . $prediction_pop_id;
1103 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
1105 my $rrblup_selection_gebvs_file = $c->stash->{rrblup_selection_gebvs_file};
1106 $c->stash->{selection_pop_id} = $prediction_pop_id;
1108 if (!-s $rrblup_selection_gebvs_file)
1110 $c->stash->{pop_id} = $training_pop_id;
1111 $c->controller('solGS::Files')->phenotype_file_name($c, $training_pop_id);
1112 my $pheno_file = $c->stash->{phenotype_file_name};
1114 $c->controller('solGS::Files')->genotype_file_name($c, $training_pop_id, $protocol_id);
1115 my $geno_file = $c->stash->{genotype_file_name};
1117 $c->stash->{pheno_file} = $pheno_file;
1118 $c->stash->{geno_file} = $geno_file;
1120 $c->controller('solGS::Files')->selection_population_file($c, $prediction_pop_id, $protocol_id);
1122 $self->get_rrblup_output($c);
1128 sub selection_prediction :Path('/solgs/model') Args() {
1129 my ($self, $c, $training_pop_id, $pop, $selection_pop_id, $gp, $protocol_id) = @_;
1131 my $referer = $c->req->referer;
1132 my $path = $c->req->path;
1133 my $base = $c->req->base;
1134 $referer =~ s/$base//;
1136 $c->stash->{training_pop_id} = $training_pop_id;
1137 $c->stash->{model_id} = $training_pop_id;
1138 $c->stash->{pop_id} = $training_pop_id;
1139 $c->stash->{prediction_pop_id} = $selection_pop_id;
1140 $c->stash->{selection_pop_id} = $selection_pop_id;
1141 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
1143 if ($referer =~ /solgs\/model\/combined\/populations\//)
1145 my ($combo_pops_id, $trait_id) = $referer =~ m/(\d+)/g;
1147 $c->stash->{data_set_type} = "combined populations";
1148 $c->stash->{combo_pops_id} = $combo_pops_id;
1149 $c->stash->{trait_id} = $trait_id;
1151 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1153 $c->controller('solGS::combinedTrials')->combined_pops_summary($c);
1154 $self->trait_phenotype_stat($c);
1155 $self->gs_modeling_files($c);
1157 $c->res->redirect("/solgs/model/combined/populations/$combo_pops_id/trait/$trait_id/gp/$protocol_id");
1158 $c->detach();
1160 elsif ($referer =~ /solgs\/trait\//)
1162 my ($trait_id, $pop_id) = $referer =~ m/(\d+)/g;
1164 $c->stash->{data_set_type} = "single population";
1165 $c->stash->{trait_id} = $trait_id;
1167 $self->predict_selection_pop_single_pop_model($c);
1169 $self->trait_phenotype_stat($c);
1170 $self->gs_modeling_files($c);
1172 $c->res->redirect("/solgs/trait/$trait_id/population/$training_pop_id/gp/$protocol_id");
1173 $c->detach();
1175 elsif ($referer =~ /solgs\/models\/combined\/trials/)
1177 $c->stash->{data_set_type} = "combined populations";
1178 $c->stash->{combo_pops_id} = $training_pop_id;
1180 $self->traits_with_valid_models($c);
1181 my @traits_abbrs = @ {$c->stash->{traits_with_valid_models}};
1183 foreach my $trait_abbr (@traits_abbrs)
1185 $c->stash->{trait_abbr} = $trait_abbr;
1186 $self->get_trait_details_of_trait_abbr($c);
1187 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1190 $c->res->redirect("/solgs/models/combined/trials/$training_pop_id/gp/$protocol_id");
1191 $c->detach();
1193 elsif ($referer =~ /solgs\/traits\/all\/population\//)
1195 $c->stash->{data_set_type} = "single population";
1197 $self->predict_selection_pop_multi_traits($c);
1199 $c->res->redirect("/solgs/traits/all/population/$training_pop_id/gp/$protocol_id");
1200 $c->detach();
1206 sub list_predicted_selection_pops {
1207 my ($self, $c, $model_id) = @_;
1209 my $dir = $c->stash->{solgs_cache_dir};
1211 opendir my $dh, $dir or die "can't open $dir: $!\n";
1213 my @files = grep { /rrblup_selection_gebvs_\w+_${model_id}_/ && -f "$dir/$_" }
1214 readdir($dh);
1216 closedir $dh;
1218 my @pred_pops;
1220 foreach (@files)
1222 unless ($_ =~ /list/) {
1223 my ($model_id2, $pred_pop_id) = $_ =~ m/\d+/g;
1225 push @pred_pops, $pred_pop_id;
1229 @pred_pops = uniq(@pred_pops);
1231 $c->stash->{list_of_predicted_selection_pops} = \@pred_pops;
1236 sub prediction_pop_analyzed_traits {
1237 my ($self, $c, $training_pop_id, $selection_pop_id) = @_;
1239 my @selected_analyzed_traits = @{$c->stash->{training_traits_ids}} if $c->stash->{training_traits_ids};
1241 no warnings 'uninitialized';
1243 my $dir = $c->stash->{solgs_cache_dir};
1244 opendir my $dh, $dir or die "can't open $dir: $!\n";
1246 my @files;
1247 my @trait_ids;
1248 my @trait_abbrs;
1249 my @selected_trait_abbrs;
1250 my @selected_files;
1251 my $identifier = $training_pop_id . '_' . $selection_pop_id;
1253 if (@selected_analyzed_traits)
1255 @trait_ids;
1257 foreach my $trait_id (@selected_analyzed_traits)
1259 $c->stash->{trait_id} = $trait_id;
1260 $self->get_trait_details($c);
1261 push @selected_trait_abbrs, $c->stash->{trait_abbr};
1263 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
1264 my $file = $c->stash->{rrblup_selection_gebvs_file};
1266 if ( -s $c->stash->{rrblup_selection_gebvs_file})
1268 push @selected_files, $c->stash->{rrblup_selection_gebvs_file};
1269 push @trait_ids, $trait_id;
1274 @trait_abbrs = @selected_trait_abbrs if @selected_trait_abbrs;
1275 @files = @selected_files if @selected_files;
1277 $c->stash->{prediction_pop_analyzed_traits} = \@trait_abbrs;
1278 $c->stash->{prediction_pop_analyzed_traits_ids} = \@trait_ids;
1279 $c->stash->{prediction_pop_analyzed_traits_files} = \@files;
1284 sub model_parameters {
1285 my ($self, $c) = @_;
1287 $c->controller("solGS::Files")->variance_components_file($c);
1288 my $file = $c->stash->{variance_components_file};
1290 my $params = $c->controller('solGS::Utils')->read_file_data($file);
1291 $c->stash->{model_parameters} = $params;
1296 sub solgs_details_trait :Path('/solgs/details/trait/') Args(1) {
1297 my ($self, $c, $trait_id) = @_;
1299 $trait_id = $c->req->param('trait_id') if !$trait_id;
1301 my $ret->{status} = undef;
1303 if ($trait_id)
1305 $self->get_trait_details($c, $trait_id);
1306 $ret->{name} = $c->stash->{trait_name};
1307 $ret->{def} = $c->stash->{trait_def};
1308 $ret->{abbr} = $c->stash->{trait_abbr};
1309 $ret->{id} = $c->stash->{trait_id};
1310 $ret->{status} = 1;
1313 $ret = to_json($ret);
1315 $c->res->content_type('application/json');
1316 $c->res->body($ret);
1321 sub get_trait_details {
1322 my ($self, $c, $trait) = @_;
1324 $trait = $c->stash->{trait_id} if !$trait;
1326 die "Can't get trait details with out trait id or name: $!\n" if !$trait;
1328 my ($trait_name, $trait_def, $trait_id, $trait_abbr);
1330 if ($trait =~ /^\d+$/)
1332 $trait = $c->model('solGS::solGS')->trait_name($trait);
1335 if ($trait)
1337 my $rs = $c->model('solGS::solGS')->trait_details($trait);
1339 while (my $row = $rs->next)
1341 $trait_id = $row->id;
1342 $trait_name = $row->name;
1343 $trait_def = $row->definition;
1344 $trait_abbr = $c->controller('solGS::Utils')->abbreviate_term($trait_name);
1348 my $abbr = $c->controller('solGS::Utils')->abbreviate_term($trait_name);
1350 $c->stash->{trait_id} = $trait_id;
1351 $c->stash->{trait_name} = $trait_name;
1352 $c->stash->{trait_def} = $trait_def;
1353 $c->stash->{trait_abbr} = $abbr;
1358 sub check_selection_pops_list :Path('/solgs/check/selection/populations') Args(1) {
1359 my ($self, $c, $tr_pop_id) = @_;
1361 my @traits_ids = $c->req->param('training_traits_ids[]');
1362 $c->stash->{training_traits_ids} = \@traits_ids;
1363 $c->stash->{training_pop_id} = $tr_pop_id;
1364 my $protocol_id = $c->req->param('genotyping_protocol_id');
1366 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
1368 $c->controller('solGS::Files')->list_of_prediction_pops_file($c, $tr_pop_id);
1369 my $pred_pops_file = $c->stash->{list_of_prediction_pops_file};
1371 my $ret->{result} = 0;
1373 if (-s $pred_pops_file)
1375 $self->list_of_prediction_pops($c, $tr_pop_id);
1376 my $selection_pops_ids = $c->stash->{selection_pops_ids};
1377 my $formatted_selection_pops = $c->stash->{list_of_prediction_pops};
1379 $self->prediction_pop_analyzed_traits($c, $tr_pop_id, $selection_pops_ids->[0]);
1380 my $selection_pop_traits = $c->stash->{prediction_pop_analyzed_traits_ids};
1382 $ret->{selection_traits} = $selection_pop_traits;
1383 $ret->{data} = $formatted_selection_pops;
1386 $ret = to_json($ret);
1388 $c->res->content_type('application/json');
1389 $c->res->body($ret);
1394 sub selection_population_predicted_traits :Path('/solgs/selection/population/predicted/traits/') Args(0) {
1395 my ($self, $c) = @_;
1397 my $training_pop_id = $c->req->param('training_pop_id');
1398 my $selection_pop_id = $c->req->param('selection_pop_id');
1400 $c->stash->{genotyping_protocol_id} = $c->req->param('genotyping_protocol_id');
1401 $c->stash->{training_pop_id} = $training_pop_id;
1402 $c->stash->{selection_pop_id} = $selection_pop_id;
1404 my $ret->{selection_traits} = undef;
1405 if ($training_pop_id && $selection_pop_id)
1407 $self->prediction_pop_analyzed_traits($c, $training_pop_id, $selection_pop_id);
1408 my $selection_pop_traits = $c->stash->{prediction_pop_analyzed_traits_ids};
1409 $ret->{selection_traits} = $selection_pop_traits;
1412 $ret = to_json($ret);
1414 $c->res->content_type('application/json');
1415 $c->res->body($ret);
1420 sub check_genotype_data_population :Path('/solgs/check/genotype/data/population/') Args(1) {
1421 my ($self, $c, $pop_id) = @_;
1423 $c->stash->{pop_id} = $pop_id;
1424 $self->check_population_has_genotype($c);
1426 my $ret->{has_genotype} = $c->stash->{population_has_genotype};
1427 $ret = to_json($ret);
1429 $c->res->content_type('application/json');
1430 $c->res->body($ret);
1435 sub check_phenotype_data_population :Path('/solgs/check/phenotype/data/population/') Args(1) {
1436 my ($self, $c, $pop_id) = @_;
1438 $c->stash->{pop_id} = $pop_id;
1439 $self->check_population_has_phenotype($c);
1441 my $ret->{has_phenotype} = $c->stash->{population_has_phenotype};
1442 $ret = to_json($ret);
1444 $c->res->content_type('application/json');
1445 $c->res->body($ret);
1450 sub check_population_exists :Path('/solgs/check/population/exists/') Args(0) {
1451 my ($self, $c) = @_;
1453 my $name = $c->req->param('name');
1455 my $rs = $c->model("solGS::solGS")->project_details_by_name($name);
1457 my $pop_id;
1458 while (my $row = $rs->next)
1460 $pop_id = $row->id;
1463 my $ret->{population_id} = $pop_id;
1464 $ret = to_json($ret);
1466 $c->res->content_type('application/json');
1467 $c->res->body($ret);
1472 sub check_training_population :Path('/solgs/check/training/population/') Args() {
1473 my ($self, $c, $pop_id, $gp, $protocol_id) = @_;
1475 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
1477 $c->stash->{pop_id} = $pop_id;
1478 $c->stash->{training_pop_id} = $pop_id;
1480 $self->check_population_is_training_population($c, $pop_id, $protocol_id);
1481 my $is_training_pop = $c->stash->{is_training_population};
1483 my $training_pop_data;
1484 if ($is_training_pop)
1486 my $pr_rs = $c->model('solGS::solGS')->project_details($pop_id);
1487 $self->projects_links($c, $pr_rs);
1488 $training_pop_data = $c->stash->{projects_pages};
1491 my $ret->{is_training_population} = $is_training_pop;
1492 $ret->{training_pop_data} = $training_pop_data;
1493 $ret = to_json($ret);
1495 $c->res->content_type('application/json');
1496 $c->res->body($ret);
1501 sub check_population_is_training_population {
1502 my ($self, $c, $pop_id, $protocol_id) = @_;
1504 $pop_id = $c->stash->{training_pop_id} if !$pop_id;
1505 $protocol_id = $c->stash->{genotyping_protocol_id} if !$protocol_id;
1507 my $is_gs = $c->model("solGS::solGS")->get_project_type($pop_id);
1509 my $has_phenotype;
1510 my $has_genotype;
1512 if ($is_gs !~ /genomic selection/)
1514 $self->check_population_has_phenotype($c);
1515 $has_phenotype = $c->stash->{population_has_phenotype};
1517 if ($has_phenotype)
1519 $self->check_population_has_genotype($c);
1520 $has_genotype = $c->stash->{population_has_genotype};
1524 if ($is_gs || ($has_phenotype && $has_genotype))
1526 $c->stash->{is_training_population} = 1;
1532 sub check_population_has_phenotype {
1533 my ($self, $c) = @_;
1535 my $pr_id = $c->stash->{pop_id};
1536 my $is_gs = $c->model("solGS::solGS")->get_project_type($pr_id);
1537 my $has_phenotype = 1 if $is_gs;
1539 if ($is_gs !~ /genomic selection/)
1541 $c->controller('solGS::Files')->phenotype_file_name($c, $pr_id);
1542 my $pheno_file = $c->stash->{phenotype_file_name};
1544 if (!-s $pheno_file)
1546 $has_phenotype = $c->model("solGS::solGS")->has_phenotype($pr_id);
1548 else
1550 $has_phenotype = 1;
1554 $c->stash->{population_has_phenotype} = $has_phenotype;
1559 sub check_population_has_genotype {
1560 my ($self, $c) = @_;
1562 my $pop_id = $c->stash->{pop_id};
1563 my $protocol_id = $c->stash->{genotyping_protocol_id};
1565 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
1566 my $geno_file = $c->stash->{genotype_file_name};
1568 my $has_genotype;
1570 if (-s $geno_file)
1572 $has_genotype = 1;
1574 else
1576 $c->controller('solGS::Files')->first_stock_genotype_file($c, $pop_id, $protocol_id);
1577 my $first_stock_file = $c->stash->{first_stock_genotype_file};
1579 $has_genotype = 1 if -s $first_stock_file;
1582 if (!$has_genotype)
1584 $has_genotype = $c->model('solGS::solGS')->has_genotype($pop_id, $protocol_id);
1587 $c->stash->{population_has_genotype} = $has_genotype;
1591 sub check_selection_population_relevance :Path('/solgs/check/selection/population/relevance') Args() {
1592 my ($self, $c) = @_;
1594 #my $data_set_type = $c->req->param('data_set_type');
1595 my $training_pop_id = $c->req->param('training_pop_id');
1596 my $selection_pop_name = $c->req->param('selection_pop_name');
1597 my $trait_id = $c->req->param('trait_id');
1598 my $protocol_id = $c->req->param('genotyping_protocol_id');
1600 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
1602 my $referer = $c->req->referer;
1604 if ($referer =~ /combined\//)
1606 $c->stash->{data_set_type} = 'combined populations';
1607 $c->stash->{combo_pops_id} = $training_pop_id;
1610 my $pr_rs = $c->model("solGS::solGS")->project_details_by_exact_name($selection_pop_name);
1612 my $selection_pop_id;
1613 while (my $row = $pr_rs->next) {
1614 $selection_pop_id = $row->project_id;
1617 my $ret = {};
1619 if ($selection_pop_id !~ /$training_pop_id/)
1621 my $has_genotype;
1622 if ($selection_pop_id)
1624 $c->stash->{pop_id} = $selection_pop_id;
1625 $c->stash->{selection_pop_id} = $selection_pop_id;
1626 $self->check_population_has_genotype($c);
1627 $has_genotype = $c->stash->{population_has_genotype};
1630 my $similarity;
1631 if ($has_genotype)
1633 $self->first_stock_genotype_data($c, $selection_pop_id, $protocol_id);
1635 $c->controller('solGS::Files')->first_stock_genotype_file($c, $selection_pop_id, $protocol_id);
1636 my $selection_geno_file = $c->stash->{first_stock_genotype_file};
1638 $c->controller('solGS::Files')->genotype_file_name($c, $training_pop_id, $protocol_id);
1639 my $training_geno_file = $c->stash->{genotype_file_name};
1641 $similarity = $self->compare_marker_set_similarity([$selection_geno_file, $training_geno_file]);
1644 my $selection_pop_data;
1645 unless ($similarity < 0.5 )
1647 $c->stash->{training_pop_id} = $training_pop_id;
1648 $self->format_selection_pops($c, [$selection_pop_id]);
1649 $selection_pop_data = $c->stash->{selection_pops_list};
1650 $self->save_selection_pops($c, [$selection_pop_id]);
1653 $ret->{selection_pop_data} = $selection_pop_data;
1654 $ret->{similarity} = $similarity;
1655 $ret->{has_genotype} = $has_genotype;
1656 $ret->{selection_pop_id} = $selection_pop_id;
1658 else
1660 $ret->{selection_pop_id} = $selection_pop_id;
1663 $ret = to_json($ret);
1665 $c->res->content_type('application/json');
1666 $c->res->body($ret);
1671 sub save_selection_pops {
1672 my ($self, $c, $selection_pop_id) = @_;
1674 my $training_pop_id = $c->stash->{training_pop_id};
1676 $c->controller('solGS::Files')->list_of_prediction_pops_file($c, $training_pop_id);
1677 my $selection_pops_file = $c->stash->{list_of_prediction_pops_file};
1679 my @existing_pops_ids = read_file($selection_pops_file);
1681 my @uniq_ids = unique(@existing_pops_ids, @$selection_pop_id);
1682 my $formatted_ids = join("\n", @uniq_ids);
1684 write_file($selection_pops_file, $formatted_ids);
1689 sub search_selection_pops :Path('/solgs/search/selection/populations/') {
1690 my ($self, $c, $tr_pop_id) = @_;
1692 $c->stash->{training_pop_id} = $tr_pop_id;
1694 $self->search_all_relevant_selection_pops($c, $tr_pop_id);
1695 my $selection_pops_list = $c->stash->{all_relevant_selection_pops};
1697 my $ret->{selection_pops_list} = 0;
1698 if ($selection_pops_list)
1700 $ret->{data} = $selection_pops_list;
1703 $ret = to_json($ret);
1705 $c->res->content_type('application/json');
1706 $c->res->body($ret);
1711 sub list_of_prediction_pops {
1712 my ($self, $c, $training_pop_id) = @_;
1714 $c->controller('solGS::Files')->list_of_prediction_pops_file($c, $training_pop_id);
1715 my $pred_pops_file = $c->stash->{list_of_prediction_pops_file};
1717 my @pred_pops_ids = read_file($pred_pops_file);
1718 grep(s/\s//g, @pred_pops_ids);
1720 $c->stash->{selection_pops_ids} = \@pred_pops_ids;
1722 $self->format_selection_pops($c, \@pred_pops_ids);
1723 $c->stash->{list_of_prediction_pops} = $c->stash->{selection_pops_list};
1728 sub search_all_relevant_selection_pops {
1729 my ($self, $c, $training_pop_id) = @_;
1731 my @pred_pops_ids = @{$c->model('solGS::solGS')->prediction_pops($training_pop_id)};
1733 $self->save_selection_pops($c, \@pred_pops_ids);
1735 $self->format_selection_pops($c, \@pred_pops_ids);
1737 $c->stash->{all_relevant_selection_pops} = $c->stash->{selection_pops_list};
1742 sub format_selection_pops {
1743 my ($self, $c, $pred_pops_ids) = @_;
1745 my $training_pop_id = $c->stash->{training_pop_id};
1747 my @pred_pops_ids = @{$pred_pops_ids};
1748 my @data;
1750 if (@pred_pops_ids) {
1752 foreach my $prediction_pop_id (@pred_pops_ids)
1754 my $pred_pop_rs = $c->model('solGS::solGS')->project_details($prediction_pop_id);
1755 my $pred_pop_link;
1757 while (my $row = $pred_pop_rs->next)
1759 my $name = $row->name;
1760 my $desc = $row->description;
1762 # unless ($name =~ /test/ || $desc =~ /test/)
1764 my $id_pop_name->{id} = $prediction_pop_id;
1765 $id_pop_name->{name} = $name;
1766 $id_pop_name->{pop_type} = 'selection';
1767 $id_pop_name = to_json($id_pop_name);
1769 # $pred_pop_link = qq | <a href="/solgs/model/$training_pop_id/prediction/$prediction_pop_id"
1770 # onclick="solGS.waitPage(this.href); return false;"><input type="hidden" value=\'$id_pop_name\'>$name</data>
1771 # </a>
1772 # |;
1774 $pred_pop_link = qq | <data><input type="hidden" value=\'$id_pop_name\'>$name</data>|;
1777 my $pr_yr_rs = $c->model('solGS::solGS')->project_year($prediction_pop_id);
1778 my $project_yr;
1780 while ( my $yr_r = $pr_yr_rs->next )
1782 $project_yr = $yr_r->value;
1785 $c->controller('solGS::Download')->selection_prediction_download_urls($c, $training_pop_id, $prediction_pop_id);
1786 my $download_prediction = $c->stash->{selection_prediction_download};
1788 push @data, [$pred_pop_link, $desc, $project_yr, $download_prediction];
1793 $c->stash->{selection_pops_list} = \@data;
1798 sub get_trait_details_of_trait_abbr {
1799 my ($self, $c) = @_;
1801 my $trait_abbr = $c->stash->{trait_abbr};
1803 my $acronym_pairs = $self->get_acronym_pairs($c, $c->stash->{training_pop_id});
1805 if ($acronym_pairs)
1807 foreach my $r (@$acronym_pairs)
1809 if ($r->[0] eq $trait_abbr)
1811 my $trait_name = $r->[1];
1812 $trait_name =~ s/^\s+|\s+$//g;
1814 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
1815 $self->get_trait_details($c, $trait_id);
1823 sub build_multiple_traits_models {
1824 my ($self, $c) = @_;
1826 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
1827 my @selected_traits = @{$c->stash->{training_traits_ids}};
1828 my $trait_id = $selected_traits[0] if scalar(@selected_traits) == 1;
1830 my $traits;
1832 for (my $i = 0; $i <= $#selected_traits; $i++)
1834 my $tr = $c->model('solGS::solGS')->trait_name($selected_traits[$i]);
1835 my $abbr = $c->controller('solGS::Utils')->abbreviate_term($tr);
1836 $traits .= $abbr;
1837 $traits .= "\t" unless ($i == $#selected_traits);
1841 my $name = "selected_traits_pop_${pop_id}";
1842 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
1843 my $file = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
1845 write_file($file, $traits);
1846 $c->stash->{selected_traits_file} = $file;
1848 $name = "trait_info_${trait_id}_pop_${pop_id}";
1849 my $file2 = $c->controller('solGS::Files')->create_tempfile($temp_dir, $name);
1851 $c->stash->{trait_file} = $file2;
1853 my $protocol_id = $c->stash->{genotyping_protocol_id};
1854 my $cached = $c->controller('solGS::CachedResult')->check_single_trial_training_data($c, $pop_id, $protocol_id);
1856 if (!$cached)
1858 $self->get_training_pop_data_query_job_args_file($c, [$pop_id], $protocol_id);
1859 $c->stash->{prerequisite_jobs} = $c->stash->{training_pop_data_query_job_args_file};
1863 $self->get_gs_modeling_jobs_args_file($c);
1864 $c->stash->{dependent_jobs} = $c->stash->{gs_modeling_jobs_args_file};
1865 $self->run_async($c);
1870 sub all_traits_output :Path('/solgs/traits/all/population') Args() {
1871 my ($self, $c, $training_pop_id, $tr_txt, $traits_selection_id, $gp, $protocol_id) = @_;
1873 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
1875 my @traits_ids;
1877 if ($traits_selection_id =~ /^\d+$/)
1879 $c->controller('solGS::TraitsGebvs')->get_traits_selection_list($c, $traits_selection_id);
1880 @traits_ids = @{$c->stash->{traits_selection_list}} if $c->stash->{traits_selection_list};
1883 if ($training_pop_id =~ /list/)
1885 $c->stash->{list_id} = $training_pop_id =~ s/list_//r;
1888 $self->project_description($c, $training_pop_id);
1889 my $training_pop_name = $c->stash->{project_name};
1890 my $training_pop_desc = $c->stash->{project_desc};
1891 my $training_pop_page = qq | <a href="/solgs/population/$training_pop_id/gp/$protocol_id">$training_pop_name</a> |;
1893 my @select_analysed_traits;
1895 if(!@traits_ids)
1897 $c->stash->{message} = "Cached output for this page does not exist anymore.\n" .
1898 " Please go to $training_pop_page and run the analysis.";
1900 $c->stash->{template} = "/generic_message.mas";
1902 else
1904 my @traits_pages;
1905 if (scalar(@traits_ids) == 1)
1907 my $trait_id = $traits_ids[0];
1908 $c->res->redirect("/solgs/trait/$trait_id/population/$training_pop_id/gp/$protocol_id");
1909 $c->detach();
1911 else
1913 foreach my $trait_id (@traits_ids)
1915 $c->stash->{trait_id} = $trait_id;
1916 $c->stash->{model_id} = $training_pop_id;
1917 $self->create_model_summary($c);
1918 my $model_summary = $c->stash->{model_summary};
1920 push @traits_pages, $model_summary;
1924 $c->stash->{training_traits_ids} = \@traits_ids;
1925 $self->analyzed_traits($c);
1926 my $analyzed_traits = $c->stash->{analyzed_traits};
1928 $c->stash->{trait_pages} = \@traits_pages;
1930 my @training_pop_data = ([$training_pop_page, $training_pop_desc, \@traits_pages]);
1932 $c->stash->{model_data} = \@training_pop_data;
1933 $c->stash->{pop_id} = $training_pop_id;
1934 $c->controller('solGS::solGS')->get_acronym_pairs($c, $training_pop_id);
1936 $c->stash->{template} = '/solgs/population/multiple_traits_output.mas';
1942 sub create_model_summary {
1943 my ($self, $c) = @_;
1945 my $trait_id = $c->stash->{trait_id};
1946 my $model_id = $c->stash->{model_id};
1947 my $protocol_id = $c->stash->{genotyping_protocol_id};
1949 $c->controller("solGS::solGS")->get_trait_details($c, $trait_id);
1950 my $tr_abbr = $c->stash->{trait_abbr};
1952 my $path = $c->req->path;
1953 my $trait_page;
1955 if ($path =~ /solgs\/traits\/all\/population\//)
1957 $trait_page = qq | <a href="/solgs/trait/$trait_id/population/$model_id/gp/$protocol_id" onclick="solGS.waitPage()">$tr_abbr</a>|;
1959 elsif ($path =~ /solgs\/models\/combined\/trials\//)
1961 $trait_page = qq | <a href="/solgs/model/combined/populations/$model_id/trait/$trait_id/gp/$protocol_id" onclick="solGS.waitPage()">$tr_abbr</a>|;
1964 $c->controller("solGS::modelAccuracy")->get_model_accuracy_value($c, $model_id, $tr_abbr);
1965 my $accuracy_value = $c->stash->{accuracy_value};
1967 $c->controller("solGS::Heritability")->get_heritability($c);
1968 my $heritability = $c->stash->{heritability};
1970 my $model_summary = [$trait_page, $accuracy_value, $heritability];
1972 $c->stash->{model_summary} = $model_summary;
1978 sub traits_with_valid_models {
1979 my ($self, $c) = @_;
1981 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
1983 $self->analyzed_traits($c);
1985 my @analyzed_traits = @{$c->stash->{analyzed_traits}};
1986 my @filtered_analyzed_traits;
1987 my @valid_traits_ids;
1989 foreach my $analyzed_trait (@analyzed_traits)
1991 $c->controller('solGS::modelAccuracy')->get_model_accuracy_value($c, $pop_id, $analyzed_trait);
1992 my $av = $c->stash->{accuracy_value};
1993 if ($av && $av =~ m/\d+/ && $av > 0)
1995 push @filtered_analyzed_traits, $analyzed_trait;
1998 $c->stash->{trait_abbr} = $analyzed_trait;
1999 $self->get_trait_details_of_trait_abbr($c);
2000 push @valid_traits_ids, $c->stash->{trait_id};
2004 @filtered_analyzed_traits = uniq(@filtered_analyzed_traits);
2005 @valid_traits_ids = uniq(@valid_traits_ids);
2007 $c->stash->{traits_with_valid_models} = \@filtered_analyzed_traits;
2008 $c->stash->{traits_ids_with_valid_models} = \@valid_traits_ids;
2013 sub get_project_owners {
2014 my ($self, $c, $pr_id) = @_;
2016 my $owners = $c->model("solGS::solGS")->get_stock_owners($pr_id);
2017 my $owners_names;
2019 if ($owners)
2021 for (my $i=0; $i < scalar(@$owners); $i++)
2023 my $owner_name = $owners->[$i]->{'first_name'} . "\t" . $owners->[$i]->{'last_name'} if $owners->[$i];
2025 unless (!$owner_name)
2027 $owners_names .= $owners_names ? ', ' . $owner_name : $owner_name;
2032 $c->stash->{project_owners} = $owners_names;
2036 sub compare_marker_set_similarity {
2037 my ($self, $marker_file_pair) = @_;
2039 my $file_1 = $marker_file_pair->[0];
2040 my $file_2 = $marker_file_pair->[1];
2042 my $first_markers = (read_file($marker_file_pair->[0]))[0];
2043 my $sec_markers = (read_file($marker_file_pair->[1]))[0];
2045 my @first_geno_markers = split(/\t/, $first_markers);
2046 my @sec_geno_markers = split(/\t/, $sec_markers);
2048 if ( @first_geno_markers && @sec_geno_markers)
2050 my $common_markers = scalar(intersect(@first_geno_markers, @sec_geno_markers));
2051 my $similarity = $common_markers / scalar(@first_geno_markers);
2053 return $similarity;
2055 else
2057 return 0;
2063 sub compare_genotyping_platforms {
2064 my ($self, $c, $g_files) = @_;
2066 my $combinations = combinations($g_files, 2);
2067 my $combo_cnt = combinations($g_files, 2);
2069 my $not_matching_pops;
2070 my $cnt = 0;
2071 my $cnt_pairs = 0;
2073 while ($combo_cnt->next)
2075 $cnt_pairs++;
2078 while (my $pair = $combinations->next)
2080 $cnt++;
2081 my $similarity = $self->compare_marker_set_similarity($pair);
2083 unless ($similarity > 0.5 )
2085 no warnings 'uninitialized';
2086 my $pop_id_1 = fileparse($pair->[0]);
2087 my $pop_id_2 = fileparse($pair->[1]);
2089 map { s/genotype_data_|\.txt//g } $pop_id_1, $pop_id_2;
2091 my $list_type_pop = $c->stash->{list_prediction};
2093 unless ($list_type_pop)
2095 my @pop_names;
2096 foreach ($pop_id_1, $pop_id_2)
2098 my $pr_rs = $c->model('solGS::solGS')->project_details($_);
2100 while (my $row = $pr_rs->next)
2102 push @pop_names, $row->name;
2106 $not_matching_pops .= '[ ' . $pop_names[0]. ' and ' . $pop_names[1] . ' ]';
2107 $not_matching_pops .= ', ' if $cnt != $cnt_pairs;
2109 # else
2110 # {
2111 # $not_matching_pops = 'not_matching';
2116 $c->stash->{pops_with_no_genotype_match} = $not_matching_pops;
2121 sub submit_cluster_compare_trials_markers {
2122 my ($self, $c, $geno_files) = @_;
2124 $c->stash->{r_temp_file} = 'compare-trials-markers';
2125 $self->create_cluster_accesible_tmp_files($c);
2126 my $out_temp_file = $c->stash->{out_file_temp};
2127 my $err_temp_file = $c->stash->{err_file_temp};
2129 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2130 my $background_job = $c->stash->{background_job};
2132 my $status;
2134 try
2136 my $compare_trials_job = CXGN::Tools::Run->run_cluster_perl({
2138 method => ["SGN::Controller::solGS::solGS" => "compare_genotyping_platforms"],
2139 args => ['SGN::Context', $geno_files],
2140 load_packages => ['SGN::Controller::solGS::solGS', 'SGN::Context'],
2141 run_opts => {
2142 out_file => $out_temp_file,
2143 err_file => $err_temp_file,
2144 working_dir => $temp_dir,
2145 max_cluster_jobs => 1_000_000_000,
2150 $c->stash->{r_job_tempdir} = $compare_trials_job->tempdir();
2152 $c->stash->{r_job_id} = $compare_trials_job->job_id();
2153 $c->stash->{cluster_job} = $compare_trials_job;
2155 unless ($background_job)
2157 $compare_trials_job->wait();
2161 catch
2163 $status = $_;
2164 $status =~ s/\n at .+//s;
2170 sub phenotype_graph :Path('/solgs/phenotype/graph') Args(0) {
2171 my ($self, $c) = @_;
2173 my $pop_id = $c->req->param('pop_id');
2174 my $trait_id = $c->req->param('trait_id');
2175 my $combo_pops_id = $c->req->param('combo_pops_id');
2177 $self->get_trait_details($c, $trait_id);
2179 $c->stash->{pop_id} = $pop_id;
2180 $c->stash->{combo_pops_id} = $combo_pops_id;
2182 $c->stash->{data_set_type} = 'combined populations' if $combo_pops_id;
2184 $c->controller("solGS::Files")->trait_phenodata_file($c);
2186 my $trait_pheno_file = $c->{stash}->{trait_phenodata_file};
2187 my $trait_data = $c->controller("solGS::Utils")->read_file_data($trait_pheno_file);
2189 my $ret->{status} = 'failed';
2191 if (@$trait_data)
2193 $ret->{status} = 'success';
2194 $ret->{trait_data} = $trait_data;
2197 $ret = to_json($ret);
2199 $c->res->content_type('application/json');
2200 $c->res->body($ret);
2205 sub trait_pheno_data_type {
2206 my ($self, $c, $trait_pheno_file) = @_;
2208 #$c->controller("solGS::Files")->trait_phenodata_file($c);
2209 #my $trait_pheno_file = $c->{stash}->{trait_phenodata_file};
2211 my $mean_type;
2212 if (-s $trait_pheno_file)
2214 my @trait_data = read_file($trait_pheno_file);
2215 $mean_type = shift(@trait_data);
2217 if ($mean_type =~ /fixed_effects/)
2219 $mean_type = 'Adjusted means, fixed (genotype) effects model';
2221 elsif ($mean_type =~ /random_effects/)
2223 $mean_type = 'Adjusted means, random (genotype) effects model';
2225 else
2227 if ($c->req->path =~ /combined\/populations\//)
2229 $mean_type = 'Average of adjusted means and/or arithmetic means across trials.';
2231 else
2233 $mean_type = 'Arithmetic means';
2238 return $mean_type;
2243 #generates descriptive stat for a trait phenotype data
2244 sub trait_phenotype_stat {
2245 my ($self, $c) = @_;
2247 $c->controller("solGS::Files")->trait_phenodata_file($c);
2248 my $trait_pheno_file = $c->{stash}->{trait_phenodata_file};
2250 my $trait_data = $c->controller("solGS::Utils")->read_file_data($trait_pheno_file);
2252 my @desc_stat;
2253 my $background_job = $c->stash->{background_job};
2255 my $pheno_type = $self->trait_pheno_data_type($c, $trait_pheno_file);
2257 if ($trait_data && !$background_job)
2259 my @pheno_data;
2260 foreach (@$trait_data)
2262 unless (!$_->[0])
2264 my $d = $_->[1];
2265 chomp($d);
2267 if ($d =~ /\d+/)
2269 push @pheno_data, $d;
2274 my $stat = Statistics::Descriptive::Full->new();
2275 $stat->add_data(@pheno_data);
2277 my $min = $stat->min;
2278 my $max = $stat->max;
2279 my $mean = $stat->mean;
2280 my $med = $stat->median;
2281 my $std = $stat->standard_deviation;
2282 my $cnt = scalar(@$trait_data);
2283 my $cv = ($std / $mean) * 100;
2284 my $na = scalar(@$trait_data) - scalar(@pheno_data);
2286 if ($na == 0) { $na = '--'; }
2288 my $round = Math::Round::Var->new(0.01);
2289 $std = $round->round($std);
2290 $mean = $round->round($mean);
2291 $cv = $round->round($cv);
2292 $cv = $cv . '%';
2294 @desc_stat = (
2295 [ 'Phenotype data type', $pheno_type],
2296 [ 'Total no. of genotypes', $cnt ],
2297 [ 'Genotypes missing data', $na ],
2298 [ 'Minimum', $min ],
2299 [ 'Maximum', $max ],
2300 [ 'Arithmetic mean', $mean ],
2301 [ 'Median', $med ],
2302 [ 'Standard deviation', $std ],
2303 [ 'Coefficient of variation', $cv ]
2306 else
2308 @desc_stat = ( [ 'Total no. of genotypes', 'None' ],
2309 [ 'Genotypes missing data', 'None' ],
2310 [ 'Minimum', 'None' ],
2311 [ 'Maximum', 'None' ],
2312 [ 'Arithmetic mean', 'None' ],
2313 [ 'Median', 'None'],
2314 [ 'Standard deviation', 'None' ],
2315 [ 'Coefficient of variation', 'None' ]
2320 $c->stash->{descriptive_stat} = \@desc_stat;
2322 #sends an array of trait gebv data to an ajax request
2323 #with a population id and trait id parameters
2324 sub gebv_graph :Path('/solgs/trait/gebv/graph') Args(0) {
2325 my ($self, $c) = @_;
2327 my $training_pop_id = $c->req->param('training_pop_id');
2328 my $trait_id = $c->req->param('trait_id');
2329 my $selection_pop_id = $c->req->param('selection_pop_id');
2330 my $combo_pops_id = $c->req->param('combo_pops_id');
2331 my $protocol_id = $c->req->param('genotyping_protocol_id');
2334 if ($combo_pops_id)
2336 $c->controller('solGS::combinedTrials')->get_combined_pops_list($c, $combo_pops_id);
2337 $c->stash->{data_set_type} = 'combined populations';
2338 $training_pop_id = $combo_pops_id;
2339 $c->stash->{combo_pops_id} = $combo_pops_id;
2342 $c->stash->{pop_id} = $training_pop_id;
2343 $c->stash->{training_pop_id} = $training_pop_id;
2344 $c->stash->{prediction_pop_id} = $selection_pop_id;
2345 $c->stash->{selectiion_pop_id} = $selection_pop_id;
2346 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
2347 $self->get_trait_details($c, $trait_id);
2349 my $page = $c->req->referer();
2350 my $gebv_file;
2352 if ($page =~ /solgs\/selection\//)
2354 my $identifier = $training_pop_id . '_' . $selection_pop_id;
2355 $c->controller('solGS::Files')->rrblup_selection_gebvs_file($c, $identifier, $trait_id);
2356 $gebv_file = $c->stash->{rrblup_selection_gebvs_file};
2358 else
2360 $c->controller('solGS::Files')->rrblup_training_gebvs_file($c);
2361 $gebv_file = $c->stash->{rrblup_training_gebvs_file};
2364 my $gebv_data = $c->controller("solGS::Utils")->read_file_data($gebv_file);
2366 my $ret->{status} = 'failed';
2368 if (@$gebv_data)
2370 $ret->{status} = 'success';
2371 $ret->{gebv_data} = $gebv_data;
2374 $ret = to_json($ret);
2376 $c->res->content_type('application/json');
2377 $c->res->body($ret);
2382 sub save_single_trial_traits {
2383 my ($self, $c, $pop_id) = @_;
2385 $pop_id = $c->stash->{training_pop_id} if !$pop_id;
2387 $c->controller('solGS::Files')->traits_list_file($c, $pop_id);
2388 my $traits_file = $c->stash->{traits_list_file};
2389 print STDERR "\save single : pop_id: $pop_id -- file: $traits_file\n";
2390 if (!-s $traits_file)
2392 my $trait_names = $c->controller('solGS::Utils')->get_clean_trial_trait_names($c, $pop_id);
2394 $trait_names = join("\t", @$trait_names);
2395 write_file($traits_file, $trait_names);
2401 sub get_all_traits {
2402 my ($self, $c, $pop_id) = @_;
2404 $pop_id = $c->stash->{training_pop_id} if !$pop_id;
2406 $c->controller('solGS::Files')->traits_list_file($c, $pop_id);
2407 my $traits_file = $c->stash->{traits_list_file};
2409 if (!-s $traits_file)
2411 my $page = $c->req->path;
2413 if ($page =~ /solgs\/population\/|anova\// && $pop_id !~ /\D+/)
2415 $self->save_single_trial_traits($c, $pop_id);
2419 my $traits = read_file($traits_file);
2421 $c->controller('solGS::Files')->traits_acronym_file($c, $pop_id);
2422 my $acronym_file = $c->stash->{traits_acronym_file};
2424 unless (-s $acronym_file)
2426 my @filtered_traits = split(/\t/, $traits);
2427 my $acronymized_traits = $c->controller('solGS::Utils')->acronymize_traits(\@filtered_traits);
2428 my $acronym_table = $acronymized_traits->{acronym_table};
2430 $self->traits_acronym_table($c, $acronym_table, $pop_id);
2433 $self->create_trait_data($c, $pop_id);
2437 sub create_trait_data {
2438 my ($self, $c, $pop_id) = @_;
2440 my $acronym_pairs = $self->get_acronym_pairs($c, $pop_id);
2442 if (@$acronym_pairs)
2444 my $table = 'trait_id' . "\t" . 'trait_name' . "\t" . 'acronym' . "\n";
2445 foreach (@$acronym_pairs)
2447 my $trait_name = $_->[1];
2448 $trait_name =~ s/\n//g;
2450 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
2452 if ($trait_id)
2454 $table .= $trait_id . "\t" . $trait_name . "\t" . $_->[0] . "\n";
2458 $c->controller('solGS::Files')->all_traits_file($c, $pop_id);
2459 my $traits_file = $c->stash->{all_traits_file};
2460 write_file($traits_file, $table);
2465 sub get_acronym_pairs {
2466 my ($self, $c, $pop_id) = @_;
2468 $pop_id = $c->stash->{training_pop_id} if !$pop_id;
2469 #$pop_id = $c->stash->{combo_pops_id} if !$pop_id;
2471 my $dir = $c->stash->{solgs_cache_dir};
2472 opendir my $dh, $dir
2473 or die "can't open $dir: $!\n";
2475 no warnings 'uninitialized';
2477 my ($file) = grep(/traits_acronym_pop_${pop_id}/, readdir($dh));
2478 $dh->close;
2480 my $acronyms_file = catfile($dir, $file);
2482 my @acronym_pairs;
2483 if (-f $acronyms_file)
2485 @acronym_pairs = map { [ split(/\t/) ] } read_file($acronyms_file);
2486 shift(@acronym_pairs); # remove header;
2489 @acronym_pairs = sort {uc $a->[0] cmp uc $b->[0] } @acronym_pairs;
2491 $c->stash->{acronym} = \@acronym_pairs;
2493 return \@acronym_pairs;
2498 sub traits_acronym_table {
2499 my ($self, $c, $acronym_table, $pop_id) = @_;
2501 $pop_id = $c->stash->{training_pop_id} if !$pop_id;
2503 if (keys %$acronym_table)
2505 my $table = 'Acronym' . "\t" . 'Trait name' . "\n";
2507 foreach (keys %$acronym_table)
2509 $table .= $_ . "\t" . $acronym_table->{$_} . "\n";
2512 $c->controller('solGS::Files')->traits_acronym_file($c, $pop_id);
2513 my $acronym_file = $c->stash->{traits_acronym_file};
2515 write_file($acronym_file, $table);
2521 sub analyzed_traits {
2522 my ($self, $c) = @_;
2524 my $training_pop_id = $c->stash->{model_id} || $c->stash->{training_pop_id};
2525 my @selected_analyzed_traits = @{$c->stash->{training_traits_ids}} if $c->stash->{training_traits_ids};
2527 my @traits;
2528 my @traits_ids;
2529 my @si_traits;
2530 my @valid_traits_files;
2531 my @analyzed_traits_files;
2533 foreach my $trait_id (@selected_analyzed_traits)
2535 $c->stash->{trait_id} = $trait_id;
2536 $self->get_trait_details($c);
2537 my $trait = $c->stash->{trait_abbr};
2539 $c->controller('solGS::modelAccuracy')->get_model_accuracy_value($c, $training_pop_id, $trait);
2540 my $av = $c->stash->{accuracy_value};
2542 my $trait_file;
2543 if ($av && $av =~ m/\d+/ && $av > 0)
2545 $c->controller('solGS::Files')->rrblup_training_gebvs_file($c, $training_pop_id, $trait_id);
2546 $trait_file = $c->stash->{rrblup_training_gebvs_file};
2547 push @valid_traits_files, $trait_file;
2548 push @si_traits, $trait;
2552 push @traits, $trait;
2553 push @analyzed_traits_files, $trait_file;
2556 @traits = uniq(@traits);
2557 @si_traits = uniq(@si_traits);
2559 $c->stash->{analyzed_traits} = \@traits;
2560 $c->stash->{analyzed_traits_ids} = \@selected_analyzed_traits;
2561 $c->stash->{analyzed_traits_files} = \@analyzed_traits_files;
2562 $c->stash->{selection_index_traits} = \@si_traits;
2563 $c->stash->{analyzed_valid_traits_files} = \@valid_traits_files;
2567 sub all_gs_traits_list {
2568 my ($self, $c) = @_;
2570 $self->trial_compatibility_file($c);
2571 my $file = $c->stash->{trial_compatibility_file};
2573 my $traits;
2574 my $mv_name = 'all_gs_traits';
2576 my $matview = $c->model('solGS::solGS')->check_matview_exists($mv_name);
2578 if (!$matview)
2580 $c->model('solGS::solGS')->materialized_view_all_gs_traits();
2581 $c->model('solGS::solGS')->insert_matview_public($mv_name);
2583 else
2585 if (!-s $file)
2587 $c->model('solGS::solGS')->refresh_materialized_view_all_gs_traits();
2588 $c->model('solGS::solGS')->update_matview_public($mv_name);
2594 $traits = $c->model('solGS::solGS')->all_gs_traits();
2596 catch
2599 if ($_ =~ /materialized view \"all_gs_traits\" has not been populated/)
2603 $c->model('solGS::solGS')->refresh_materialized_view_all_gs_traits();
2604 $c->model('solGS::solGS')->update_matview_public($mv_name);
2605 $traits = $c->model('solGS::solGS')->all_gs_traits();
2610 $c->stash->{all_gs_traits} = $traits;
2615 sub gs_traits_index {
2616 my ($self, $c) = @_;
2618 $self->all_gs_traits_list($c);
2619 my $all_traits = $c->stash->{all_gs_traits};
2620 my @all_traits = sort{$a cmp $b} @$all_traits;
2622 my @indices = ('A'..'Z');
2623 my %traits_hash;
2624 my @valid_indices;
2626 foreach my $index (@indices)
2628 my @index_traits;
2629 foreach my $trait (@all_traits)
2631 if ($trait =~ /^$index/i)
2633 push @index_traits, $trait;
2636 if (@index_traits)
2638 $traits_hash{$index}=[ @index_traits ];
2642 foreach my $k ( keys(%traits_hash))
2644 push @valid_indices, $k;
2647 @valid_indices = sort( @valid_indices );
2649 my $trait_index;
2650 foreach my $v_i (@valid_indices)
2652 $trait_index .= qq | <a href=/solgs/traits/$v_i>$v_i</a> |;
2653 unless ($v_i eq $valid_indices[-1])
2655 $trait_index .= " | ";
2659 $c->stash->{gs_traits_index} = $trait_index;
2664 sub traits_starting_with {
2665 my ($self, $c, $index) = @_;
2667 $self->all_gs_traits_list($c);
2668 my $all_traits = $c->stash->{all_gs_traits};
2670 my $trait_gr = [
2671 sort { $a cmp $b }
2672 grep { /^$index/i }
2673 uniq @$all_traits
2676 $c->stash->{trait_subgroup} = $trait_gr;
2680 sub hyperlink_traits {
2681 my ($self, $c, $traits) = @_;
2683 if (ref($traits) eq 'ARRAY')
2685 my @traits_urls;
2686 foreach my $tr (@$traits)
2688 push @traits_urls, [ qq | <a href="/solgs/search/result/traits/$tr">$tr</a> | ];
2691 $c->stash->{traits_urls} = \@traits_urls;
2693 else
2695 $c->stash->{traits_urls} = qq | <a href="/solgs/search/result/traits/$traits">$traits</a> |;
2700 sub gs_traits : Path('/solgs/traits') Args(1) {
2701 my ($self, $c, $index) = @_;
2703 my @traits_list;
2705 if ($index =~ /^\w{1}$/)
2707 $self->traits_starting_with($c, $index);
2708 my $traits_gr = $c->stash->{trait_subgroup};
2710 foreach my $trait (@$traits_gr)
2712 $self->hyperlink_traits($c, $trait);
2713 my $trait_url = $c->stash->{traits_urls};
2715 $self->get_trait_details($c, $trait);
2716 push @traits_list, [$trait_url, $c->stash->{trait_def}];
2719 $c->stash( template => $c->controller('solGS::Files')->template('/search/traits/list.mas'),
2720 index => $index,
2721 traits_list => \@traits_list
2724 else
2726 $c->forward('search');
2731 sub get_cluster_phenotype_query_job_args {
2732 my ($self, $c, $trials) = @_;
2734 my @queries;
2736 $c->controller('solGS::combinedTrials')->multi_pops_pheno_files($c, $trials);
2737 $c->stash->{phenotype_files_list} = $c->stash->{multi_pops_pheno_files};
2739 foreach my $trial_id (@$trials)
2741 $c->controller('solGS::Files')->phenotype_file_name($c, $trial_id);
2743 if (!-s $c->stash->{phenotype_file_name})
2745 my $args = $self->phenotype_trial_query_args($c, $trial_id);
2747 $c->stash->{r_temp_file} = "phenotype-data-query-${trial_id}";
2748 $self->create_cluster_accesible_tmp_files($c);
2749 my $out_temp_file = $c->stash->{out_file_temp};
2750 my $err_temp_file = $c->stash->{err_file_temp};
2752 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2753 my $background_job = $c->stash->{background_job};
2755 my $args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "pheno-data-args_file-${trial_id}");
2757 nstore $args, $args_file
2758 or croak "data query script: $! serializing phenotype data query details to $args_file ";
2760 my $cmd = 'mx-run solGS::queryJobs '
2761 . ' --data_type phenotype '
2762 . ' --population_type trial '
2763 . ' --args_file ' . $args_file;
2766 my $config_args = {
2767 'temp_dir' => $temp_dir,
2768 'out_file' => $out_temp_file,
2769 'err_file' => $err_temp_file,
2770 'cluster_host' => 'localhost'
2773 my $config = $self->create_cluster_config($c, $config_args);
2775 my $job_args = {
2776 'cmd' => $cmd,
2777 'config' => $config,
2778 'background_job'=> $background_job,
2779 'temp_dir' => $temp_dir,
2782 push @queries, $job_args;
2786 $c->stash->{cluster_phenotype_query_job_args} = \@queries;
2791 sub get_pheno_data_query_job_args_file {
2792 my ($self, $c, $trials) = @_;
2794 $self->get_cluster_phenotype_query_job_args($c, $trials);
2795 my $pheno_query_args = $c->stash->{cluster_phenotype_query_job_args};
2797 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2798 my $pheno_query_args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'phenotype_data_query_args_file');
2800 nstore $pheno_query_args, $pheno_query_args_file
2801 or croak "pheno data query job : $! serializing selection pop data query details to $pheno_query_args_file";
2803 $c->stash->{pheno_data_query_job_args_file} = $pheno_query_args_file;
2807 sub get_geno_data_query_job_args_file {
2808 my ($self, $c, $trials, $protocol_id) = @_;
2810 $self->get_cluster_genotype_query_job_args($c, $trials, $protocol_id);
2811 my $geno_query_args = $c->stash->{cluster_genotype_query_job_args};
2813 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2814 my $geno_query_args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'genotype_data_query_args_file');
2816 nstore $geno_query_args, $geno_query_args_file
2817 or croak "geno data query job : $! serializing selection pop data query details to $geno_query_args_file";
2819 $c->stash->{geno_data_query_job_args_file} = $geno_query_args_file;
2823 sub submit_cluster_phenotype_query {
2824 my ($self, $c, $trials) = @_;
2826 $self->get_pheno_data_query_job_args_file($c, $trials);
2827 $c->stash->{dependent_jobs} = $c->stash->{pheno_data_query_job_args_file};
2828 $self->run_async($c);
2832 sub submit_cluster_genotype_query {
2833 my ($self, $c, $trials, $protocol_id) = @_;
2835 $self->get_geno_data_query_job_args_file($c, $trials, $protocol_id);
2836 $c->stash->{dependent_jobs} = $c->stash->{geno_data_query_job_args_file};
2837 $self->run_async($c);
2841 sub submit_cluster_training_pop_data_query {
2842 my ($self, $c, $trials, $protocol_id) = @_;
2844 $self->get_training_pop_data_query_job_args_file($c, $trials, $protocol_id);
2845 $c->stash->{dependent_jobs} = $c->stash->{training_pop_data_query_job_args_file};
2846 $self->run_async($c);
2850 sub training_pop_data_query_job_args {
2851 my ($self, $c, $trials, $protocol_id) = @_;
2853 my @queries;
2855 foreach my $trial (@$trials)
2857 $c->controller('solGS::Files')->phenotype_file_name($c, $trial);
2859 if (!-s $c->stash->{phenotype_file_name})
2861 $self->get_cluster_phenotype_query_job_args($c, [$trial]);
2862 my $pheno_query = $c->stash->{cluster_phenotype_query_job_args};
2863 push @queries, @$pheno_query if $pheno_query;
2866 $c->controller('solGS::Files')->genotype_file_name($c, $trial, $protocol_id);
2868 if (!-s $c->stash->{genotype_file_name})
2870 $self->get_cluster_genotype_query_job_args($c, [$trial], $protocol_id);
2871 my $geno_query = $c->stash->{cluster_genotype_query_job_args};
2872 push @queries, @$geno_query if $geno_query;
2877 $c->stash->{training_pop_data_query_job_args} = \@queries;
2881 sub get_training_pop_data_query_job_args_file {
2882 my ($self, $c, $trials, $protocol_id) = @_;
2884 $self->training_pop_data_query_job_args($c, $trials, $protocol_id);
2885 my $training_query_args = $c->stash->{training_pop_data_query_job_args};
2887 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2888 my $training_query_args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'training_pop_data_query_args');
2890 nstore $training_query_args, $training_query_args_file
2891 or croak "training pop data query job : $! serializing selection pop data query details to $training_query_args_file";
2893 $c->stash->{training_pop_data_query_job_args_file} = $training_query_args_file;
2897 sub get_cluster_genotype_query_job_args {
2898 my ($self, $c, $trials, $protocol_id) = @_;
2900 my @queries;
2902 foreach my $trial_id (@$trials)
2904 my $geno_file;
2905 if ($c->stash->{check_data_exists})
2907 $c->controller('solGS::Files')->first_stock_genotype_file($c, $trial_id, $protocol_id);
2908 $geno_file = $c->stash->{first_stock_genotype_file};
2910 else
2912 $c->controller('solGS::Files')->genotype_file_name($c, $trial_id, $protocol_id);
2913 $geno_file = $c->stash->{genotype_file_name};
2916 if (!-s $geno_file)
2918 #my $pop_id = $args->{selection_pop_id} || $args->{selection_pop_id} || $args->{training_pop_id};
2919 my $args = $self->genotype_trial_query_args($c, $trial_id, $protocol_id);
2921 $c->stash->{r_temp_file} = "genotype-data-query-${trial_id}";
2922 $self->create_cluster_accesible_tmp_files($c);
2923 my $out_temp_file = $c->stash->{out_file_temp};
2924 my $err_temp_file = $c->stash->{err_file_temp};
2926 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2927 my $background_job = $c->stash->{background_job};
2929 my $args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-args_file-${trial_id}");
2931 nstore $args, $args_file
2932 or croak "data queryscript: $! serializing model details to $args_file ";
2934 my $check_data_exists = $c->stash->{check_data_exists} ? 1 : 0;
2936 my $cmd = 'mx-run solGS::queryJobs '
2937 . ' --data_type genotype '
2938 . ' --population_type trial '
2939 . ' --args_file ' . $args_file
2940 . ' --check_data_exists ' . $check_data_exists;
2942 my $config_args = {
2943 'temp_dir' => $temp_dir,
2944 'out_file' => $out_temp_file,
2945 'err_file' => $err_temp_file,
2946 'cluster_host' => 'localhost'
2949 my $config = $self->create_cluster_config($c, $config_args);
2951 my $job_args = {
2952 'cmd' => $cmd,
2953 'config' => $config,
2954 'background_job'=> $background_job,
2955 'temp_dir' => $temp_dir,
2958 push @queries, $job_args;
2962 $c->stash->{cluster_genotype_query_job_args} = \@queries;
2966 sub first_stock_genotype_data {
2967 my ($self, $c, $pr_id, $protocol_id) = @_;
2969 $c->stash->{check_data_exists} = 1;
2970 $self->submit_cluster_genotype_query($c, [$pr_id], $protocol_id);
2974 sub phenotype_file {
2975 my ($self, $c, $pop_id) = @_;
2977 if (!$pop_id) {
2978 $pop_id = $c->stash->{pop_id}
2979 || $c->stash->{training_pop_id}
2980 || $c->stash->{trial_id};
2983 $c->stash->{pop_id} = $pop_id;
2984 die "Population id must be provided to get the phenotype data set." if !$pop_id;
2985 $pop_id =~ s/combined_//;
2987 if ($c->stash->{list_reference} || $pop_id =~ /list/) {
2988 if (!$c->user) {
2990 my $page = "/" . $c->req->path;
2992 $c->res->redirect("/solgs/login/message?page=$page");
2993 $c->detach;
2997 $c->controller('solGS::Files')->phenotype_file_name($c, $pop_id);
2998 my $pheno_file = $c->stash->{phenotype_file_name};
3000 no warnings 'uninitialized';
3002 unless ( -s $pheno_file)
3004 if ($pop_id !~ /list/)
3006 #my $args = $self->phenotype_trial_query_args($c);
3007 $self->submit_cluster_phenotype_query($c, [$pop_id]);
3011 $self->get_all_traits($c);
3013 $c->stash->{phenotype_file} = $pheno_file;
3018 sub genotype_trial_query_args {
3019 my ($self, $c, $pop_id, $protocol_id) = @_;
3021 my $geno_file;
3022 my $check_data_exists = $c->stash->{check_data_exists};
3024 if ($c->stash->{check_data_exists})
3026 $c->controller('solGS::Files')->first_stock_genotype_file($c, $pop_id, $protocol_id);
3027 $geno_file = $c->stash->{first_stock_genotype_file};
3029 else
3031 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
3032 $geno_file = $c->stash->{genotype_file_name};
3035 my $args = {
3036 'trial_id' => $pop_id,
3037 'genotype_file' => $geno_file,
3038 'genotyping_protocol_id' => $protocol_id,
3039 'cache_dir' => $c->stash->{solgs_cache_dir},
3042 return $args;
3047 sub phenotype_trial_query_args {
3048 my ($self, $c, $pop_id) = @_;
3050 $pop_id = $c->stash->{pop_id} if !$pop_id;
3052 $c->controller('solGS::Files')->phenotype_file_name($c, $pop_id);
3053 my $pheno_file = $c->stash->{phenotype_file_name};
3055 $c->controller('solGS::Files')->phenotype_metadata_file($c);
3056 my $metadata_file = $c->stash->{phenotype_metadata_file};
3058 no warnings 'uninitialized';
3060 $c->controller('solGS::Files')->traits_list_file($c);
3061 my $traits_file = $c->stash->{traits_list_file};
3063 my $args = {
3064 'population_id' => $pop_id,
3065 'phenotype_file' => $pheno_file,
3066 'traits_list_file' => $traits_file,
3067 'metadata_file' => $metadata_file,
3070 return $args;
3074 sub format_phenotype_dataset {
3075 my ($self, $data_ref, $metadata, $traits_file) = @_;
3077 my $data = $$data_ref;
3078 my @rows = split (/\n/, $data);
3080 my $formatted_headers = $self->format_phenotype_dataset_headers($rows[0], $metadata, $traits_file);
3081 $rows[0] = $formatted_headers;
3083 my $formatted_dataset = $self->format_phenotype_dataset_rows(\@rows);
3085 return $formatted_dataset;
3089 sub format_phenotype_dataset_rows {
3090 my ($self, $data_rows) = @_;
3092 my $data = join("\n", @$data_rows);
3094 return $data;
3099 sub format_phenotype_dataset_headers {
3100 my ($self, $all_headers, $meta_headers, $traits_file) = @_;
3102 $all_headers = SGN::Controller::solGS::Utils->clean_traits($all_headers);
3104 my $traits = $all_headers;
3106 foreach my $mh (@$meta_headers) {
3107 $traits =~ s/($mh)//g;
3110 write_file($traits_file, $traits) if $traits_file && $traits_file =~ /pop_list/;
3112 my @filtered_traits = split(/\t/, $traits);
3114 my $acronymized_traits = SGN::Controller::solGS::Utils->acronymize_traits(\@filtered_traits);
3115 my $acronym_table = $acronymized_traits->{acronym_table};
3117 my $formatted_headers;
3118 my @headers = split("\t", $all_headers);
3120 foreach my $hd (@headers)
3122 my $acronym;
3123 foreach my $acr (keys %$acronym_table)
3125 $acronym = $acr if $acronym_table->{$acr} =~ /$hd/;
3126 last if $acronym;
3129 $formatted_headers .= $acronym ? $acronym : $hd;
3130 $formatted_headers .= "\t" unless ($headers[-1] eq $hd);
3133 return $formatted_headers;
3138 sub genotype_file {
3139 my ($self, $c, $pop_id, $protocol_id) = @_;
3141 $pop_id = $c->stash->{pop_id} if !$pop_id;
3143 my $training_pop_id = $c->stash->{training_pop_id};
3144 my $selection_pop_id = $c->stash->{selection_pop_id};
3146 $pop_id = $training_pop_id || $selection_pop_id if !$pop_id;
3147 die "Population id must be provided to get the genotype data set." if !$pop_id;
3149 if ($pop_id =~ /list/)
3151 if (!$c->user)
3153 my $path = "/" . $c->req->path;
3154 $c->res->redirect("/solgs/login/message?page=$path");
3155 $c->detach;
3159 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
3160 my $geno_file = $c->stash->{genotype_file_name};
3162 no warnings 'uninitialized';
3163 unless (-s $geno_file)
3165 my $args = $self->genotype_trial_query_args($c, $pop_id, $protocol_id);
3166 $self->submit_cluster_genotype_query($c, $args, $protocol_id);
3169 $c->stash->{genotype_file} = $geno_file;
3174 sub get_rrblup_output {
3175 my ($self, $c) = @_;
3177 $c->stash->{pop_id} = $c->stash->{combo_pops_id} if $c->stash->{combo_pops_id};
3179 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
3180 my $trait_abbr = $c->stash->{trait_abbr};
3181 my $trait_name = $c->stash->{trait_name};
3182 my $trait_id = $c->stash->{trait_id};
3184 my $data_set_type = $c->stash->{data_set_type};
3185 my $selection_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
3187 my ($traits_file, @traits, @trait_pages);
3189 $c->stash->{selection_pop_id} = $selection_pop_id;
3190 if ($trait_id)
3192 $self->run_rrblup_trait($c, $trait_id);
3194 else
3196 $traits_file = $c->stash->{selected_traits_file};
3197 my $content = read_file($traits_file);
3199 if ($content =~ /\t/)
3201 @traits = split(/\t/, $content);
3203 else
3205 push @traits, $content;
3208 no warnings 'uninitialized';
3210 foreach my $tr (@traits)
3212 my $acronym_pairs = $self->get_acronym_pairs($c);
3213 my $trait_name;
3214 if ($acronym_pairs)
3216 foreach my $r (@$acronym_pairs)
3218 if ($r->[0] eq $tr)
3220 $trait_name = $r->[1];
3221 $trait_name =~ s/\n//g;
3222 $c->stash->{trait_name} = $trait_name;
3223 $c->stash->{trait_abbr} = $r->[0];
3228 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
3229 $self->run_rrblup_trait($c, $trait_id);
3232 push @trait_pages, [ qq | <a href="/solgs/trait/$trait_id/population/$pop_id" onclick="solGS.waitPage()">$tr</a>| ];
3236 $c->stash->{combo_pops_analysis_result} = 0;
3238 no warnings 'uninitialized';
3240 if ($data_set_type !~ /combined populations/)
3242 if (scalar(@traits) == 1)
3244 $self->gs_modeling_files($c);
3245 $c->stash->{template} = $c->controller('solGS::Files')->template('population/trait.mas');
3248 if (scalar(@traits) > 1)
3250 $c->stash->{model_id} = $pop_id;
3251 $self->analyzed_traits($c);
3252 $c->stash->{template} = $c->controller('solGS::Files')->template('/population/multiple_traits_output.mas');
3253 $c->stash->{trait_pages} = \@trait_pages;
3256 else
3258 $c->stash->{combo_pops_analysis_result} = 1;
3264 sub run_rrblup_trait {
3265 my ($self, $c, $trait_id) = @_;
3267 $trait_id = $c->stash->{trait_id} if !$trait_id;
3269 $c->stash->{trait_id} = $trait_id;
3270 $self->get_trait_details($c, $trait_id);
3272 my $training_pop_id = $c->stash->{training_pop_id} || $c->stash->{pop_id};
3273 my $selection_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
3275 $self->input_files($c);
3276 $self->output_files($c);
3277 $c->stash->{r_script} = 'R/solGS/gs.r';
3279 my $training_pop_gebvs_file = $c->stash->{rrblup_training_gebvs_file};
3280 my $selection_pop_gebvs_file = $c->stash->{rrblup_selection_gebvs_file};
3282 if ($training_pop_id && !-s $training_pop_gebvs_file)
3284 $self->run_r_script($c);
3286 elsif (($selection_pop_id && !-s $selection_pop_gebvs_file))
3289 $self->get_selection_pop_query_args_file($c);
3290 my $pre_req = $c->stash->{selection_pop_query_args_file};
3292 $self->get_gs_modeling_jobs_args_file($c);
3293 my $dependent_job = $c->stash->{gs_modeling_jobs_args_file};
3295 $c->stash->{prerequisite_jobs} = $pre_req;
3296 $c->stash->{dependent_jobs} = $dependent_job;
3298 $self->run_async($c);
3304 sub create_cluster_accesible_tmp_files {
3305 my ($self, $c, $template) = @_;
3307 my $temp_file_template = $template || $c->stash->{r_temp_file};
3309 my $temp_dir = $c->stash->{analysis_tempfiles_dir} || $c->stash->{solgs_tempfiles_dir};
3311 my $in_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-in");
3312 my $out_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-out");
3313 my $err_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-err");
3315 $c->stash(
3316 in_file_temp => $in_file,
3317 out_file_temp => $out_file,
3318 err_file_temp => $err_file,
3324 sub run_async {
3325 my ($self, $c) = @_;
3327 my $prerequisite_jobs = $c->stash->{prerequisite_jobs} || 'none';
3328 my $background_job = $c->stash->{background_job};
3329 my $dependent_jobs = $c->stash->{dependent_jobs};
3331 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3333 $c->stash->{r_temp_file} = 'run-async';
3334 $self->create_cluster_accesible_tmp_files($c);
3335 my $err_temp_file = $c->stash->{err_file_temp};
3336 my $out_temp_file = $c->stash->{out_file_temp};
3338 my $referer = $c->req->referer;
3340 my $report_file = 'none';
3342 if ($background_job)
3344 $c->stash->{async} = 1;
3345 $c->controller('solGS::AnalysisQueue')->get_analysis_report_job_args_file($c, 2);
3346 $report_file = $c->stash->{analysis_report_job_args_file};
3349 my $config_args = {
3350 'temp_dir' => $temp_dir,
3351 'out_file' => $out_temp_file,
3352 'err_file' => $err_temp_file,
3353 'cluster_host' => 'localhost'
3356 my $job_config = $self->create_cluster_config($c, $config_args);
3357 my $job_config_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'job_config_file');
3359 nstore $job_config, $job_config_file
3360 or croak "job config file: $! serializing job config to $job_config_file ";
3362 my $cmd = 'mx-run solGS::asyncJob'
3363 . ' --prerequisite_jobs ' . $prerequisite_jobs
3364 . ' --dependent_jobs ' . $dependent_jobs
3365 . ' --analysis_report_job ' . $report_file
3366 . ' --config_file ' . $job_config_file;
3369 my $cluster_job_args = {
3370 'cmd' => $cmd,
3371 'config' => $job_config,
3372 'background_job' => $background_job,
3373 'temp_dir' => $temp_dir,
3374 'async' => $c->stash->{async},
3377 my $job = $self->submit_job_cluster($c, $cluster_job_args);
3382 sub get_gs_r_temp_file {
3383 my ($self, $c) = @_;
3385 my $pop_id = $c->stash->{pop_id};
3386 my $trait_id = $c->stash->{trait_id};
3388 my $data_set_type = $c->stash->{data_set_type};
3390 my $selection_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
3391 $c->stash->{selection_pop_id} = $selection_pop_id;
3393 $pop_id = $c->stash->{combo_pops_id} if !$pop_id;
3394 my $identifier = $selection_pop_id ? $pop_id . '-' . $selection_pop_id : $pop_id;
3396 if ($data_set_type =~ /combined populations/)
3398 my $combo_identifier = $c->stash->{combo_pops_id};
3399 $c->stash->{r_temp_file} = "gs-rrblup-combo-${identifier}-${trait_id}";
3401 else
3403 $c->stash->{r_temp_file} = "gs-rrblup-${identifier}-${trait_id}";
3409 sub get_selection_pop_query_args {
3410 my ($self, $c) = @_;
3412 my $selection_pop_id = $c->stash->{selection_pop_id} || $c->stash->{prediction_pop_id};
3413 my $protocol_id = $c->stash->{genotyping_protocol_id};
3414 my $selection_pop_geno_file;
3415 my $pop_type;
3417 if ($selection_pop_id)
3419 $c->controller('solGS::Files')->genotype_file_name($c, $selection_pop_id, $protocol_id);
3420 $selection_pop_geno_file = $c->stash->{genotype_file_name};
3423 my $genotypes_ids;
3424 if ($selection_pop_id =~ /list/)
3426 $c->controller('solGS::List')->get_genotypes_list_details($c);
3427 $genotypes_ids = $c->stash->{genotypes_ids};
3428 $pop_type = 'list';
3430 elsif ($selection_pop_id =~ /dataset/)
3432 #$c->controller('solGS::Dataset')->get_dataset_genotypes_list($c);
3433 #$genotypes_ids = $c->stash->{genotypes_ids};
3435 $pop_type = 'dataset';
3437 else
3439 $pop_type = 'trial';
3442 $c->stash->{population_type} = $pop_type;
3443 my $temp_file_template = "genotype-data-query-${selection_pop_id}";
3444 $self->create_cluster_accesible_tmp_files($c, $temp_file_template);
3445 my $in_file = $c->stash->{in_file_temp};
3446 my $out_temp_file = $c->stash->{out_file_temp};
3447 my $err_temp_file = $c->stash->{err_file_temp};
3449 my $selection_pop_query_args = {
3450 'trial_id' => $selection_pop_id,
3451 'genotype_file' => $selection_pop_geno_file,
3452 'genotypes_ids' => $genotypes_ids,
3453 'dataset_id' => $c->stash->{dataset_id},
3454 'out_file' => $out_temp_file,
3455 'err_file' => $err_temp_file,
3456 'population_type' => $pop_type,
3457 'genotyping_protocol_id' => $protocol_id
3460 $c->stash->{selection_pop_query_args} = $selection_pop_query_args;
3465 sub get_cluster_query_job_args {
3466 my ($self, $c) = @_;
3468 my $pop_id = $c->stash->{selection_pop_id} || $c->stash->{prediction_pop_id};
3469 my $protocol_id = $c->stash->{genotyping_protocol_id};
3471 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id, $protocol_id);
3472 my $geno_file = $c->stash->{genotype_file_name};
3474 my @queries;
3476 if (!-s $geno_file)
3478 $c->stash->{r_temp_file} = "genotype-data-query-${pop_id}";
3479 $self->create_cluster_accesible_tmp_files($c);
3480 my $out_temp_file = $c->stash->{out_file_temp};
3481 my $err_temp_file = $c->stash->{err_file_temp};
3483 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3484 my $background_job = $c->stash->{background_job};
3486 $self->get_selection_pop_query_args($c);
3487 my $query_args = $c->stash->{selection_pop_query_args};
3488 my $genotype_file = $query_args->{genotype_file};
3489 my $args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-args_file-${pop_id}");
3491 my $pop_type = $query_args->{population_type};
3492 my $data_type = 'genotype';
3494 nstore $query_args, $args_file
3495 or croak "data query script: $! serializing model details to $args_file ";
3497 my $cmd = 'mx-run solGS::queryJobs '
3498 . ' --data_type ' . $data_type
3499 . ' --population_type ' . $pop_type
3500 . ' --args_file ' . $args_file;
3502 my $config_args = {
3503 'temp_dir' => $temp_dir,
3504 'out_file' => $out_temp_file,
3505 'err_file' => $err_temp_file,
3506 'cluster_host' => 'localhost'
3509 my $config = $self->create_cluster_config($c, $config_args);
3511 my $job_args = {
3512 'cmd' => $cmd,
3513 'config' => $config,
3514 'background_job'=> $background_job,
3515 'temp_dir' => $temp_dir,
3516 'genotype_file' => $genotype_file
3519 push @queries, $job_args;
3523 $c->stash->{cluster_query_job_args} = \@queries;
3527 sub get_selection_pop_query_args_file {
3528 my ($self, $c) = @_;
3530 $self->get_cluster_query_job_args($c);
3531 my $selection_pop_query_args = $c->stash->{cluster_query_job_args};
3533 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3534 my $selection_pop_query_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'selection_pop_query_args');
3536 nstore $selection_pop_query_args, $selection_pop_query_file
3537 or croak "selection pop query job : $! serializing selection pop data query details to $selection_pop_query_file";
3539 $c->stash->{selection_pop_query_args_file} = $selection_pop_query_file;
3543 sub modeling_jobs {
3544 my ($self, $c) = @_;
3546 my $modeling_traits = $c->stash->{training_traits_ids} || [$c->stash->{trait_id}];
3547 my $training_pop_id = $c->stash->{training_pop_id};
3548 my $selection_pop_id = $c->stash->{selection_pop_id};
3550 my @modeling_jobs;
3552 if ($modeling_traits) {
3554 foreach my $trait_id (@$modeling_traits)
3556 $c->stash->{trait_id} = $trait_id;
3557 $self->get_trait_details($c);
3559 $self->input_files($c);
3560 $self->output_files($c);
3562 my $selection_pop_gebvs_file = $c->stash->{rrblup_selection_gebvs_file};
3563 my $training_pop_gebvs_file = $c->stash->{rrblup_training_gebvs_file};
3565 if (($training_pop_id && !-s $training_pop_gebvs_file) ||
3566 ($selection_pop_id && !-s $selection_pop_gebvs_file))
3568 $self->get_gs_r_temp_file($c);
3569 $c->stash->{r_script} = 'R/solGS/gs.r';
3570 $self->get_cluster_r_job_args($c);
3572 push @modeling_jobs, $c->stash->{cluster_r_job_args};
3577 return \@modeling_jobs;
3581 sub get_gs_modeling_jobs_args_file {
3582 my ($self, $c) = @_;
3584 my $modeling_jobs = [];
3586 if ($c->stash->{training_traits_ids})
3588 $modeling_jobs = $self->modeling_jobs($c);
3591 if ($modeling_jobs)
3593 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3594 my $model_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, 'gs_model_args');
3596 nstore $modeling_jobs, $model_file
3597 or croak "gs r script: $! serializing model details to $model_file";
3599 $c->stash->{gs_modeling_jobs_args_file} = $model_file;
3605 sub run_r_script {
3606 my ($self, $c) = @_;
3608 if ($c->stash->{background_job})
3610 $self->get_gs_modeling_jobs_args_file($c);
3611 $c->stash->{dependent_jobs} = $c->stash->{gs_modeling_jobs_args_file};
3612 $self->run_async($c);
3614 else
3616 $self->get_cluster_r_job_args($c);
3617 my $cluster_job_args = $c->stash->{cluster_r_job_args};
3618 $self->submit_job_cluster($c, $cluster_job_args);
3624 sub get_cluster_r_job_args {
3625 my ($self, $c) = @_;
3627 my $r_script = $c->stash->{r_script};
3628 my $input_files = $c->stash->{input_files};
3629 my $output_files = $c->stash->{output_files};
3631 if ($r_script =~ /gs/)
3633 $self->get_gs_r_temp_file($c);
3636 $self->create_cluster_accesible_tmp_files($c);
3637 my $in_file = $c->stash->{in_file_temp};
3638 my $out_temp_file = $c->stash->{out_file_temp};
3639 my $err_temp_file = $c->stash->{err_file_temp};
3641 my $temp_dir = $c->stash->{analysis_tempfiles_dir} || $c->stash->{solgs_tempfiles_dir};
3644 my $r_cmd_file = $c->path_to($r_script);
3645 copy($r_cmd_file, $in_file)
3646 or die "could not copy '$r_cmd_file' to '$in_file'";
3649 my $config_args = {
3650 'temp_dir' => $temp_dir,
3651 'out_file' => $out_temp_file,
3652 'err_file' => $err_temp_file
3655 my $config = $self->create_cluster_config($c, $config_args);
3657 my $cmd = 'Rscript --slave '
3658 . "$in_file $out_temp_file "
3659 . '--args ' . $input_files
3660 . ' ' . $output_files;
3662 my $job_args = {
3663 'cmd' => $cmd,
3664 'background_job' => $c->stash->{background_job},
3665 'config' => $config,
3668 $c->stash->{cluster_r_job_args} = $job_args;
3673 sub create_cluster_config {
3674 my ($self, $c, $args) = @_;
3676 my $config = {
3677 temp_base => $args->{temp_dir},
3678 queue => $c->config->{'web_cluster_queue'},
3679 max_cluster_jobs => 1_000_000_000,
3680 out_file => $args->{out_file},
3681 err_file => $args->{err_file},
3682 is_async => 0,
3683 do_cleanup => 0,
3684 sleep => $args->{sleep}
3687 if ($args->{cluster_host} =~ /localhost/ || !$c->config->{cluster_host})
3689 $config->{backend} = 'Slurm';
3690 $config->{submit_host} = 'localhost';
3692 else
3694 $config->{backend} = $c->config->{backend};
3695 $config->{submit_host} = $c->config->{cluster_host};
3698 return $config;
3702 sub submit_job_cluster {
3703 my ($self, $c, $args) = @_;
3705 my $job;
3707 eval
3709 $job = CXGN::Tools::Run->new($args->{config});
3710 $job->do_not_cleanup(1);
3712 if ($args->{background_job})
3714 $job->is_async(1);
3715 $job->run_async($args->{cmd});
3717 $c->stash->{r_job_tempdir} = $job->job_tempdir();
3718 $c->stash->{r_job_id} = $job->jobid();
3719 $c->stash->{cluster_job_id} = $job->cluster_job_id();
3720 $c->stash->{cluster_job} = $job;
3722 else
3724 $job->run_async($args->{cmd});
3725 $job->wait();
3729 if ($@)
3731 $c->stash->{Error} = 'Error occured submitting the job ' . $@ . "\nJob: " . $args->{cmd};
3732 $c->stash->{status} = 'Error occured submitting the job ' . $@ . "\nJob: " . $args->{cmd};
3735 return $job;
3739 # sub default :Path {
3740 # my ( $self, $c ) = @_;
3741 # $c->forward('search');
3746 =head2 end
3748 Attempt to render a view, if needed.
3750 =cut
3752 #sub render : ActionClass('RenderView') {}
3753 sub begin : Private {
3754 my ($self, $c) = @_;
3756 $c->controller('solGS::Files')->get_solgs_dirs($c);
3762 =head1 AUTHOR
3764 Isaak Y Tecle <iyt2@cornell.edu>
3766 =head1 LICENSE
3768 This library is free software. You can redistribute it and/or modify
3769 it under the same terms as Perl itself.
3771 =cut
3773 __PACKAGE__->meta->make_immutable;