move combined trials model selection prediction..
[sgn.git] / lib / SGN / Controller / solGS / solGS.pm
blob48d8894504669d024a16f3923f07b92285d344f6
1 package SGN::Controller::solGS::solGS;
3 use Moose;
4 use namespace::autoclean;
6 use String::CRC;
7 use URI::FromHash 'uri';
8 use File::Path qw / mkpath /;
9 use File::Spec::Functions qw / catfile catdir/;
10 use File::Temp qw / tempfile tempdir /;
11 use File::Slurp qw /write_file read_file :edit prepend_file append_file/;
12 use File::Copy;
13 use File::Basename;
14 use Cache::File;
15 use Try::Tiny;
16 use List::MoreUtils qw /uniq/;
17 use Scalar::Util qw /weaken reftype/;
18 use Statistics::Descriptive;
19 use Math::Round::Var;
20 use Algorithm::Combinatorics qw /combinations/;
21 use Array::Utils qw(:all);
22 use CXGN::Tools::Run;
23 use JSON;
24 use Storable qw/ nstore retrieve /;
25 use Carp qw/ carp confess croak /;
27 BEGIN { extends 'Catalyst::Controller' }
30 # Sets the actions in this controller to be registered with no prefix
31 # so they function identically to actions created in MyApp.pm
34 #__PACKAGE__->config(namespace => '');
36 =head1 NAME
38 solGS::Controller::Root - Root Controller for solGS
40 =head1 DESCRIPTION
42 [enter your description here]
44 =head1 METHODS
46 =head2 index
48 The root page (/)
50 =cut
53 # sub index :Path :Args(0) {
54 # my ($self, $c) = @_;
55 # $c->forward('search');
56 # }
58 sub solgs : Path('/solgs'){
59 my ($self, $c) = @_;
60 $c->forward('search');
64 sub solgs_breeder_search :Path('/solgs/breeder_search') Args(0) {
65 my ($self, $c) = @_;
66 $c->stash->{referer} = $c->req->referer();
67 $c->stash->{template} = '/solgs/breeder_search_solgs.mas';
71 sub submit :Path('/solgs/submit/intro') Args(0) {
72 my ($self, $c) = @_;
74 $c->stash->{template} = $self->template('/submit/intro.mas');
78 sub search : Path('/solgs/search') Args() {
79 my ($self, $c) = @_;
81 #$self->gs_traits_index($c);
82 #my $gs_traits_index = $c->stash->{gs_traits_index};
84 $c->stash(template => $self->template('/search/solgs.mas'),
85 # gs_traits_index => $gs_traits_index,
91 sub search_trials : Path('/solgs/search/trials') Args() {
92 my ($self, $c) = @_;
94 my $show_result = $c->req->param('show_result');
95 my $limit = $show_result =~ /all/ ? undef : 10;
97 my $projects_ids = $c->model('solGS::solGS')->all_gs_projects($limit);
99 my $ret->{status} = 'failed';
101 my $formatted_trials = [];
103 if (@$projects_ids)
105 my $projects_rs = $c->model('solGS::solGS')->project_details($projects_ids);
107 $self->get_projects_details($c, $projects_rs);
108 my $projects = $c->stash->{projects_details};
110 $self->format_gs_projects($c, $projects);
111 $formatted_trials = $c->stash->{formatted_gs_projects};
113 $ret->{status} = 'success';
116 $ret->{trials} = $formatted_trials;
117 $ret = to_json($ret);
119 $c->res->content_type('application/json');
120 $c->res->body($ret);
125 sub projects_links {
126 my ($self, $c, $pr_rs) = @_;
128 $self->get_projects_details($c, $pr_rs);
129 my $projects = $c->stash->{projects_details};
131 my @projects_pages;
132 my $update_marker_count;
134 foreach my $pr_id (keys %$projects)
136 my $pr_name = $projects->{$pr_id}{project_name};
137 my $pr_desc = $projects->{$pr_id}{project_desc};
138 my $pr_year = $projects->{$pr_id}{project_year};
139 my $pr_location = $projects->{$pr_id}{project_location};
141 my $dummy_name = $pr_name =~ /test\w*/ig;
142 #my $dummy_desc = $pr_desc =~ /test\w*/ig;
144 $self->check_population_has_genotype($c);
145 my $has_genotype = $c->stash->{population_has_genotype};
147 no warnings 'uninitialized';
149 unless ($dummy_name || !$pr_name )
151 #$self->trial_compatibility_table($c, $has_genotype);
152 #my $match_code = $c->stash->{trial_compatibility_code};
154 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="getPopIds()"/> </form> |;
156 #$match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:30px">code</div> |;
158 push @projects_pages, [$checkbox, qq|<a href="/solgs/population/$pr_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|,
159 $pr_desc, $pr_location, $pr_year
167 $c->stash->{projects_pages} = \@projects_pages;
171 sub search_trials_trait : Path('/solgs/search/trials/trait') Args(1) {
172 my ($self, $c, $trait_id) = @_;
174 $self->get_trait_details($c, $trait_id);
176 $c->stash->{template} = $self->template('/search/trials/trait.mas');
181 sub show_search_result_pops : Path('/solgs/search/result/populations') Args(1) {
182 my ($self, $c, $trait_id) = @_;
184 my $combine = $c->req->param('combine');
185 my $page = $c->req->param('page') || 1;
187 my $projects_ids = $c->model('solGS::solGS')->search_trait_trials($trait_id);
189 my $ret->{status} = 'failed';
190 my $formatted_projects = [];
192 if (@$projects_ids)
194 my $projects_rs = $c->model('solGS::solGS')->project_details($projects_ids);
195 my $trait = $c->model('solGS::solGS')->trait_name($trait_id);
197 $self->get_projects_details($c, $projects_rs);
198 my $projects = $c->stash->{projects_details};
200 $self->format_trait_gs_projects($c, $trait_id, $projects);
201 $formatted_projects = $c->stash->{formatted_gs_projects};
203 $ret->{status} = 'success';
206 $ret->{trials} = $formatted_projects;
208 $ret = to_json($ret);
210 $c->res->content_type('application/json');
211 $c->res->body($ret);
216 sub format_trait_gs_projects {
217 my ($self, $c, $trait_id, $projects) = @_;
219 my @formatted_projects;
221 foreach my $pr_id (keys %$projects)
223 my $pr_name = $projects->{$pr_id}{project_name};
224 my $pr_desc = $projects->{$pr_id}{project_desc};
225 my $pr_year = $projects->{$pr_id}{project_year};
226 my $pr_location = $projects->{$pr_id}{project_location};
228 $c->stash->{pop_id} = $pr_id;
229 $self->check_population_has_genotype($c);
230 my $has_genotype = $c->stash->{population_has_genotype};
232 if ($has_genotype)
234 my $trial_compatibility_file = $self->trial_compatibility_file($c);
236 $self->trial_compatibility_table($c, $has_genotype);
237 my $match_code = $c->stash->{trial_compatibility_code};
239 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="getPopIds()"/> </form> |;
240 $match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:100%">code</div> |;
242 push @formatted_projects, [ $checkbox, qq|<a href="/solgs/trait/$trait_id/population/$pr_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|, $pr_desc, $pr_location, $pr_year, $match_code];
246 $c->stash->{formatted_gs_projects} = \@formatted_projects;
251 sub format_gs_projects {
252 my ($self, $c, $projects) = @_;
254 my @formatted_projects;
256 foreach my $pr_id (keys %$projects)
258 my $pr_name = $projects->{$pr_id}{project_name};
259 my $pr_desc = $projects->{$pr_id}{project_desc};
260 my $pr_year = $projects->{$pr_id}{project_year};
261 my $pr_location = $projects->{$pr_id}{project_location};
263 # $c->stash->{pop_id} = $pr_id;
264 # $self->check_population_has_genotype($c);
265 # my $has_genotype = $c->stash->{population_has_genotype};
266 my $has_genotype = $c->config->{default_genotyping_protocol};
268 if ($has_genotype)
270 my $trial_compatibility_file = $self->trial_compatibility_file($c);
272 $self->trial_compatibility_table($c, $has_genotype);
273 my $match_code = $c->stash->{trial_compatibility_code};
275 my $checkbox = qq |<form> <input type="checkbox" name="project" value="$pr_id" onclick="getPopIds()"/> </form> |;
276 $match_code = qq | <div class=trial_code style="color: $match_code; background-color: $match_code; height: 100%; width:100%">code</div> |;
278 push @formatted_projects, [ $checkbox, qq|<a href="/solgs/population/$pr_id" onclick="solGS.waitPage(this.href); return false;">$pr_name</a>|, $pr_desc, $pr_location, $pr_year, $match_code];
282 $c->stash->{formatted_gs_projects} = \@formatted_projects;
287 sub trial_compatibility_table {
288 my ($self, $c, $markers) = @_;
290 $self->trial_compatibility_file($c);
291 my $compatibility_file = $c->stash->{trial_compatibility_file};
293 my $color;
295 if (-s $compatibility_file)
297 my @line = read_file($compatibility_file);
298 my ($entry) = grep(/$markers/, @line);
299 chomp($entry);
301 if($entry)
303 ($markers, $color) = split(/\t/, $entry);
304 $c->stash->{trial_compatibility_code} = $color;
308 if (!$color)
310 my ($red, $blue, $green) = map { int(rand(255)) } 1..3;
311 $color = 'rgb' . '(' . "$red,$blue,$green" . ')';
313 my $color_code = $markers . "\t" . $color . "\n";
315 $c->stash->{trial_compatibility_code} = $color;
316 write_file($compatibility_file,{append => 1}, $color_code);
321 sub trial_compatibility_file {
322 my ($self, $c) = @_;
324 my $cache_data = {key => 'trial_compatibility',
325 file => 'trial_compatibility_codes',
326 stash_key => 'trial_compatibility_file'
329 $self->cache_file($c, $cache_data);
334 sub get_projects_details {
335 my ($self, $c, $pr_rs) = @_;
337 my ($year, $location, $pr_id, $pr_name, $pr_desc);
338 my %projects_details = ();
340 while (my $pr = $pr_rs->next)
342 $pr_id = $pr->get_column('project_id');
343 $pr_name = $pr->get_column('name');
344 $pr_desc = $pr->get_column('description');
346 my $pr_yr_rs = $c->model('solGS::solGS')->project_year($pr_id);
348 while (my $pr = $pr_yr_rs->next)
350 $year = $pr->value;
353 my $location = $c->model('solGS::solGS')->project_location($pr_id);
355 $projects_details{$pr_id} = {
356 project_name => $pr_name,
357 project_desc => $pr_desc,
358 project_year => $year,
359 project_location => $location,
363 $c->stash->{projects_details} = \%projects_details;
368 sub store_project_marker_count {
369 my ($self, $c) = @_;
371 my $pop_id = $c->stash->{pop_id};
372 my $marker_count = $c->stash->{marker_count};
374 unless ($marker_count)
376 my $markers = $c->model("solGS::solGS")->get_project_genotyping_markers($pop_id);
377 my @markers = split('\t', $markers);
378 $marker_count = scalar(@markers);
381 my $genoprop = {'project_id' => $pop_id, 'marker_count' => $marker_count};
382 $c->model("solGS::solGS")->set_project_genotypeprop($genoprop);
387 sub search_traits : Path('/solgs/search/traits/') Args(1) {
388 my ($self, $c, $query) = @_;
390 my $traits = $c->model('solGS::solGS')->search_trait($query);
391 my $result = $c->model('solGS::solGS')->trait_details($traits);
393 my $ret->{status} = 0;
394 if ($result->first)
396 $ret->{status} = 1;
399 $ret = to_json($ret);
401 $c->res->content_type('application/json');
402 $c->res->body($ret);
407 sub show_search_result_traits : Path('/solgs/search/result/traits') Args(1) {
408 my ($self, $c, $query) = @_;
410 my $traits = $c->model('solGS::solGS')->search_trait($query);
411 my $result = $c->model('solGS::solGS')->trait_details($traits);
413 my @rows;
414 while (my $row = $result->next)
416 my $id = $row->cvterm_id;
417 my $name = $row->name;
418 my $def = $row->definition;
420 push @rows, [ qq |<a href="/solgs/search/trials/trait/$id" onclick="solGS.waitPage()">$name</a>|, $def];
423 if (@rows)
425 $c->stash(template => $self->template('/search/result/traits.mas'),
426 result => \@rows,
427 query => $query,
434 sub population : Regex('^solgs/population/([\w|\d]+)(?:/([\w+]+))?') {
435 my ($self, $c) = @_;
437 my ($pop_id, $action) = @{$c->req->captures};
439 my $uploaded_reference = $c->req->param('uploaded_reference');
440 $c->stash->{uploaded_reference} = $uploaded_reference;
442 if ($uploaded_reference)
444 $pop_id = $c->req->param('model_id');
446 $c->stash->{model_id} = $c->req->param('model_id'),
447 $c->stash->{list_name} = $c->req->param('list_name'),
450 if ($pop_id )
452 if($pop_id =~ /uploaded/)
454 $c->stash->{uploaded_reference} = 1;
455 $uploaded_reference = 1;
458 $c->stash->{pop_id} = $pop_id;
460 $self->phenotype_file($c);
461 $self->genotype_file($c);
462 $self->get_all_traits($c);
463 $self->project_description($c, $pop_id);
465 $c->stash->{template} = $self->template('/population.mas');
467 if ($action && $action =~ /selecttraits/ ) {
468 $c->stash->{no_traits_selected} = 'none';
470 else {
471 $c->stash->{no_traits_selected} = 'some';
474 my $acronym = $self->get_acronym_pairs($c);
475 $c->stash->{acronym} = $acronym;
478 my $pheno_data_file = $c->stash->{phenotype_file};
480 if ($uploaded_reference)
482 my $ret->{status} = 'failed';
483 if ( !-s $pheno_data_file )
485 $ret->{status} = 'failed';
487 $ret = to_json($ret);
489 $c->res->content_type('application/json');
490 $c->res->body($ret);
496 sub uploaded_population_summary {
497 my ($self, $c, $list_pop_id) = @_;
499 my $tmp_dir = $c->stash->{solgs_prediction_upload_dir};
501 if (!$c->user)
503 my $page = "/" . $c->req->path;
504 $c->res->redirect("/solgs/list/login/message?page=$page");
505 $c->detach;
507 else
509 my $user_name = $c->user->id;
511 #my $model_id = $c->stash->{model_id};
512 #my $selection_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
514 my $protocol = $c->config->{default_genotyping_protocol};
515 $protocol = 'N/A' if !$protocol;
517 if ($list_pop_id)
519 my $metadata_file_tr = catfile($tmp_dir, "metadata_${user_name}_${list_pop_id}");
521 my @metadata_tr = read_file($metadata_file_tr) if $list_pop_id;
523 my ($key, $list_name, $desc);
525 ($desc) = grep {/description/} @metadata_tr;
526 ($key, $desc) = split(/\t/, $desc);
528 ($list_name) = grep {/list_name/} @metadata_tr;
529 ($key, $list_name) = split(/\t/, $list_name);
531 $c->stash(project_id => $list_pop_id,
532 project_name => $list_name,
533 prediction_pop_name => $list_name,
534 project_desc => $desc,
535 owner => $user_name,
536 protocol => $protocol,
540 # if ($selection_pop_id =~ /uploaded/)
542 # my $metadata_file_sl = catfile($tmp_dir, "metadata_${user_name}_${selection_pop_id}");
543 # my @metadata_sl = read_file($metadata_file_sl) if $selection_pop_id;
545 # my ($list_name_sl) = grep {/list_name/} @metadata_sl;
546 # my ($key_sl, $list_name) = split(/\t/, $list_name_sl);
548 # $c->stash->{prediction_pop_name} = $list_name;
554 sub get_project_details {
555 my ($self, $c, $pr_id) = @_;
557 my $pr_rs = $c->model('solGS::solGS')->project_details($pr_id);
559 while (my $row = $pr_rs->next)
561 $c->stash(project_id => $row->id,
562 project_name => $row->name,
563 project_desc => $row->description
570 sub get_markers_count {
571 my ($self, $c, $pop_hash) = @_;
573 my $filtered_geno_file;
574 my $markers_cnt;
576 if ($pop_hash->{training_pop})
578 my $training_pop_id = $pop_hash->{training_pop_id};
579 $c->stash->{pop_id} = $training_pop_id;
580 $self->filtered_training_genotype_file($c);
581 $filtered_geno_file = $c->stash->{filtered_training_genotype_file};
583 if (-s $filtered_geno_file) {
584 my @geno_lines = read_file($filtered_geno_file);
585 $markers_cnt = scalar(split('\t', $geno_lines[0])) - 1;
587 else
589 $self->genotype_file_name($c, $training_pop_id);
590 my $geno_file = $c->stash->{genotype_file_name};
591 my @geno_lines = read_file($geno_file);
592 $markers_cnt= scalar(split ('\t', $geno_lines[0])) - 1;
596 elsif ($pop_hash->{selection_pop})
598 my $selection_pop_id = $pop_hash->{selection_pop_id};
599 $c->stash->{pop_id} = $selection_pop_id;
600 $self->filtered_selection_genotype_file($c);
601 $filtered_geno_file = $c->stash->{filtered_selection_genotype_file};
603 if (-s $filtered_geno_file) {
604 my @geno_lines = read_file($filtered_geno_file);
605 $markers_cnt = scalar(split('\t', $geno_lines[0])) - 1;
607 else
609 $self->genotype_file_name($c, $selection_pop_id);
610 my $geno_file = $c->stash->{genotype_file_name};
611 my @geno_lines = read_file($geno_file);
612 $markers_cnt= scalar(split ('\t', $geno_lines[0])) - 1;
616 return $markers_cnt;
621 sub project_description {
622 my ($self, $c, $pr_id) = @_;
624 $c->stash->{pop_id} = $pr_id;
625 $c->stash->{uploaded_reference} = 1 if ($pr_id =~ /uploaded/);
627 my $protocol = $c->config->{default_genotyping_protocol};
628 $protocol = 'N/A' if !$protocol;
630 if(!$c->stash->{uploaded_reference}) {
631 my $pr_rs = $c->model('solGS::solGS')->project_details($pr_id);
633 while (my $row = $pr_rs->next)
635 $c->stash(project_id => $row->id,
636 project_name => $row->name,
637 project_desc => $row->description
641 $self->get_project_owners($c, $pr_id);
642 $c->stash->{owner} = $c->stash->{project_owners};
645 else
647 $c->stash->{model_id} = $pr_id;
648 $self->uploaded_population_summary($c, $pr_id);
651 $self->filtered_training_genotype_file($c);
652 my $filtered_geno_file = $c->stash->{filtered_training_genotype_file};
654 my $markers_no;
655 my @geno_lines;
657 if (-s $filtered_geno_file) {
658 @geno_lines = read_file($filtered_geno_file);
659 $markers_no = scalar(split('\t', $geno_lines[0])) - 1;
661 else
663 $self->genotype_file($c);
664 my $geno_file = $c->stash->{genotype_file};
665 @geno_lines = read_file($geno_file);
666 $markers_no = scalar(split ('\t', $geno_lines[0])) - 1;
669 $self->trait_phenodata_file($c);
670 my $trait_pheno_file = $c->stash->{trait_phenodata_file};
671 my @trait_pheno_lines = read_file($trait_pheno_file) if $trait_pheno_file;
673 my $stocks_no = @trait_pheno_lines ? scalar(@trait_pheno_lines) - 1 : scalar(@geno_lines) - 1;
675 $self->traits_acronym_file($c);
676 my $traits_file = $c->stash->{traits_acronym_file};
677 my @lines = read_file($traits_file);
678 my $traits_no = scalar(@lines) - 1;
680 $c->stash(markers_no => $markers_no,
681 traits_no => $traits_no,
682 stocks_no => $stocks_no,
683 protocol => $protocol,
689 sub selection_trait :Path('/solgs/selection/') Args(5) {
690 my ($self, $c, $selection_pop_id,
691 $model_key, $training_pop_id,
692 $trait_key, $trait_id) = @_;
694 $self->get_trait_details($c, $trait_id);
695 $c->stash->{training_pop_id} = $training_pop_id;
696 $c->stash->{selection_pop_id} = $selection_pop_id;
697 $c->stash->{data_set_type} = 'single population';
699 if ($training_pop_id =~ /uploaded/)
701 $self->uploaded_population_summary($c, $training_pop_id);
702 $c->stash->{training_pop_id} = $c->stash->{project_id};
703 $c->stash->{training_pop_name} = $c->stash->{project_name};
704 $c->stash->{training_pop_desc} = $c->stash->{project_desc};
705 $c->stash->{training_pop_owner} = $c->stash->{owner};
707 else
709 $self->get_project_details($c, $training_pop_id);
710 $c->stash->{training_pop_id} = $c->stash->{project_id};
711 $c->stash->{training_pop_name} = $c->stash->{project_name};
712 $c->stash->{training_pop_desc} = $c->stash->{project_desc};
714 $self->get_project_owners($c, $training_pop_id);
715 $c->stash->{training_pop_owner} = $c->stash->{project_owners};
718 if ($selection_pop_id =~ /uploaded/)
720 $self->uploaded_population_summary($c, $selection_pop_id);
721 $c->stash->{selection_pop_id} = $c->stash->{project_id};
722 $c->stash->{selection_pop_name} = $c->stash->{project_name};
723 $c->stash->{selection_pop_desc} = $c->stash->{project_desc};
724 $c->stash->{selection_pop_owner} = $c->stash->{owner};
726 else
728 $self->get_project_details($c, $selection_pop_id);
729 $c->stash->{selection_pop_id} = $c->stash->{project_id};
730 $c->stash->{selection_pop_name} = $c->stash->{project_name};
731 $c->stash->{selection_pop_desc} = $c->stash->{project_desc};
733 $self->get_project_owners($c, $selection_pop_id);
734 $c->stash->{selection_pop_owner} = $c->stash->{project_owners};
737 my $tr_pop_mr_cnt = $self->get_markers_count($c, {'training_pop' => 1, 'training_pop_id' => $training_pop_id});
738 my $sel_pop_mr_cnt = $self->get_markers_count($c, {'selection_pop' => 1, 'selection_pop_id' => $selection_pop_id});
740 $c->stash->{training_markers_cnt} = $tr_pop_mr_cnt;
741 $c->stash->{selection_markers_cnt} = $sel_pop_mr_cnt;
743 my $protocol = $c->config->{default_genotyping_protocol};
744 $protocol = 'N/A' if !$protocol;
745 $c->stash->{protocol} = $protocol;
747 my $identifier = $training_pop_id . '_' . $selection_pop_id;
748 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
749 my $gebvs_file = $c->stash->{prediction_pop_gebvs_file};
751 my @stock_rows = read_file($gebvs_file);
752 $c->stash->{selection_stocks_cnt} = scalar(@stock_rows) - 1;
754 $self->top_blups($c, $gebvs_file);
756 $c->stash->{blups_download_url} = qq | <a href="/solgs/download/prediction/model/$training_pop_id/prediction/$selection_pop_id/$trait_id">Download all GEBVs</a>|;
758 $c->stash->{template} = $self->template('/population/selection_trait.mas');
763 sub build_single_trait_model {
764 my ($self, $c) = @_;
766 my $trait_id = $c->stash->{trait_id};
767 $self->get_trait_details($c, $trait_id);
769 $self->get_rrblup_output($c);
774 sub trait :Path('/solgs/trait') Args(3) {
775 my ($self, $c, $trait_id, $key, $pop_id) = @_;
777 my $ajaxredirect = $c->req->param('source');
778 $c->stash->{ajax_request} = $ajaxredirect;
780 if ($pop_id && $trait_id)
782 $c->stash->{pop_id} = $pop_id;
783 $c->stash->{trait_id} = $trait_id;
785 $self->build_single_trait_model($c);
787 $self->gs_files($c);
789 unless ($ajaxredirect eq 'heritability')
791 my $script_error = $c->stash->{script_error};
793 if ($script_error)
795 my $trait_name = $c->stash->{trait_name};
796 $c->stash->{message} = "$script_error can't create a prediction model for <b>$trait_name</b>.
797 There is a problem with the trait dataset.";
799 $c->stash->{template} = "/generic_message.mas";
801 else
803 $self->traits_acronym_file($c);
804 my $acronym_file = $c->stash->{traits_acronym_file};
806 if (!-e $acronym_file || !-s $acronym_file)
808 $self->get_all_traits($c);
811 $self->project_description($c, $pop_id);
813 $self->trait_phenotype_stat($c);
815 $self->get_project_owners($c, $pop_id);
816 $c->stash->{owner} = $c->stash->{project_owners};
818 $c->stash->{template} = $self->template("/population/trait.mas");
823 if ($ajaxredirect)
825 my $trait_abbr = $c->stash->{trait_abbr};
826 my $cache_dir = $c->stash->{solgs_cache_dir};
827 my $gebv_file = "gebv_kinship_${trait_abbr}_${pop_id}";
828 $gebv_file = $self->grep_file($cache_dir, $gebv_file);
830 my $ret->{status} = 'failed';
832 if (-s $gebv_file)
834 $ret->{status} = 'success';
837 $ret = to_json($ret);
839 $c->res->content_type('application/json');
840 $c->res->body($ret);
847 sub gs_files {
848 my ($self, $c) = @_;
850 $self->output_files($c);
851 #$self->input_files($c);
852 $self->model_accuracy($c);
853 $self->blups_file($c);
854 $self->download_urls($c);
855 $self->top_markers($c);
856 $self->model_parameters($c);
861 sub input_files {
862 my ($self, $c) = @_;
864 $self->genotype_file($c);
865 $self->phenotype_file($c);
866 $self->formatted_phenotype_file($c);
868 my $pred_pop_id = $c->stash->{prediction_pop_id} ||$c->stash->{selection_pop_id} ;
869 my ($prediction_population_file, $filtered_pred_geno_file);
871 if ($pred_pop_id)
873 $prediction_population_file = $c->stash->{prediction_population_file};
876 my $formatted_phenotype_file = $c->stash->{formatted_phenotype_file};
878 my $pheno_file = $c->stash->{phenotype_file};
879 my $geno_file = $c->stash->{genotype_file};
880 my $traits_file = $c->stash->{selected_traits_file};
881 my $trait_file = $c->stash->{trait_file};
882 my $pop_id = $c->stash->{pop_id};
884 no warnings 'uninitialized';
886 my $input_files = join ("\t",
887 $pheno_file,
888 $formatted_phenotype_file,
889 $geno_file,
890 $traits_file,
891 $trait_file,
892 $prediction_population_file,
895 my $name = "input_files_${pop_id}";
896 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
897 my $tempfile = $self->create_tempfile($temp_dir, $name);
898 write_file($tempfile, $input_files);
899 $c->stash->{input_files} = $tempfile;
904 sub output_files {
905 my ($self, $c) = @_;
907 my $pop_id = $c->stash->{pop_id};
908 my $trait = $c->stash->{trait_abbr};
909 my $trait_id = $c->stash->{trait_id};
911 $self->gebv_marker_file($c);
912 $self->gebv_kinship_file($c);
913 $self->validation_file($c);
914 $self->trait_phenodata_file($c);
915 $self->variance_components_file($c);
916 $self->relationship_matrix_file($c);
917 $self->filtered_training_genotype_file($c);
919 $self->filtered_training_genotype_file($c);
921 my $prediction_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
922 if (!$pop_id) {$pop_id = $c->stash->{model_id};}
924 no warnings 'uninitialized';
926 #$prediction_id = "uploaded_${prediction_id" if $c->stash->{uploaded_prediction};
928 my $pred_pop_gebvs_file;
930 if ($prediction_id)
932 my $identifier = $pop_id . '_' . $prediction_id;
933 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
934 $pred_pop_gebvs_file = $c->stash->{prediction_pop_gebvs_file};
937 my $file_list = join ("\t",
938 $c->stash->{gebv_kinship_file},
939 $c->stash->{gebv_marker_file},
940 $c->stash->{validation_file},
941 $c->stash->{trait_phenodata_file},
942 $c->stash->{selected_traits_gebv_file},
943 $c->stash->{variance_components_file},
944 $c->stash->{relationship_matrix_file},
945 $c->stash->{filtered_training_genotype_file},
946 $pred_pop_gebvs_file
949 my $name = "output_files_${trait}_$pop_id";
950 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
951 my $tempfile = $self->create_tempfile($temp_dir, $name);
952 write_file($tempfile, $file_list);
954 $c->stash->{output_files} = $tempfile;
959 sub gebv_marker_file {
960 my ($self, $c) = @_;
962 my $pop_id = $c->stash->{pop_id};
963 my $trait = $c->stash->{trait_abbr};
965 no warnings 'uninitialized';
967 my $data_set_type = $c->stash->{data_set_type};
969 my $cache_data;
971 if ($data_set_type =~ /combined populations/)
973 my $combo_identifier = $c->stash->{combo_pops_id};
975 $cache_data = {key => 'gebv_marker_combined_pops_'. $trait . '_' . $combo_identifier,
976 file => 'gebv_marker_'. $trait . '_' . $combo_identifier . '_combined_pops',
977 stash_key => 'gebv_marker_file'
980 else
982 $cache_data = {key => 'gebv_marker_' . $pop_id . '_'. $trait,
983 file => 'gebv_marker_' . $trait . '_' . $pop_id,
984 stash_key => 'gebv_marker_file'
988 $self->cache_file($c, $cache_data);
993 sub variance_components_file {
994 my ($self, $c) = @_;
996 my $pop_id = $c->stash->{pop_id};
997 my $trait = $c->stash->{trait_abbr};
999 my $data_set_type = $c->stash->{data_set_type};
1001 my $cache_data;
1003 no warnings 'uninitialized';
1005 if ($data_set_type =~ /combined populations/)
1007 my $combo_identifier = $c->stash->{combo_pops_id};
1009 $cache_data = {key => 'variance_components_combined_pops_'. $trait . "_". $combo_identifier,
1010 file => 'variance_components_'. $trait . '_' . $combo_identifier. '_combined_pops',
1011 stash_key => 'variance_components_file'
1014 else
1016 $cache_data = {key => 'variance_components_' . $pop_id . '_'. $trait,
1017 file => 'variance_components_' . $trait . '_' . $pop_id,
1018 stash_key => 'variance_components_file'
1022 $self->cache_file($c, $cache_data);
1026 sub trait_phenodata_file {
1027 my ($self, $c) = @_;
1029 my $pop_id = $c->stash->{pop_id};
1030 my $trait = $c->stash->{trait_abbr};
1031 my $data_set_type = $c->stash->{data_set_type};
1033 my $cache_data;
1035 no warnings 'uninitialized';
1037 if ($data_set_type =~ /combined populations/)
1039 my $combo_identifier = $c->stash->{combo_pops_id};
1040 $cache_data = {key => 'phenotype_trait_combined_pops_'. $trait . "_". $combo_identifier,
1041 file => 'phenotype_trait_'. $trait . '_' . $combo_identifier. '_combined_pops',
1042 stash_key => 'trait_phenodata_file'
1045 else
1047 $cache_data = {key => 'phenotype_' . $pop_id . '_'. $trait,
1048 file => 'phenotype_trait_' . $trait . '_' . $pop_id,
1049 stash_key => 'trait_phenodata_file'
1053 $self->cache_file($c, $cache_data);
1057 sub filtered_training_genotype_file {
1058 my ($self, $c) = @_;
1060 my $pop_id = $c->stash->{pop_id};
1061 $pop_id = $c->{stash}->{combo_pops_id} if !$pop_id;
1063 my $cache_data = { key => 'filtered_genotype_data_' . $pop_id,
1064 file => 'filtered_genotype_data_' . $pop_id . '.txt',
1065 stash_key => 'filtered_training_genotype_file'
1068 $self->cache_file($c, $cache_data);
1072 sub filtered_selection_genotype_file {
1073 my ($self, $c) = @_;
1075 my $pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
1077 my $cache_data = { key => 'filtered_genotype_data_' . $pop_id,
1078 file => 'filtered_genotype_data_' . $pop_id . '.txt',
1079 stash_key => 'filtered_selection_genotype_file'
1082 $self->cache_file($c, $cache_data);
1086 sub formatted_phenotype_file {
1087 my ($self, $c) = @_;
1089 my $pop_id = $c->stash->{pop_id};
1090 $pop_id = $c->{stash}->{combo_pops_id} if !$pop_id;
1092 my $cache_data = { key => 'formatted_phenotype_data_' . $pop_id,
1093 file => 'formatted_phenotype_data_' . $pop_id,
1094 stash_key => 'formatted_phenotype_file'
1097 $self->cache_file($c, $cache_data);
1101 sub phenotype_file_name {
1102 my ($self, $c, $pop_id) = @_;
1104 #my $pop_id = $c->stash->{pop_id};
1105 #$pop_id = $c->{stash}->{combo_pops_id} if !$pop_id;
1107 if ($pop_id =~ /uploaded/)
1109 my $tmp_dir = $c->stash->{solgs_prediction_upload_dir};
1110 my $file = catfile($tmp_dir, 'phenotype_data_' . $pop_id . '.txt');
1111 $c->stash->{phenotype_file_name} = $file;
1113 else
1116 my $cache_data = { key => 'phenotype_data_' . $pop_id,
1117 file => 'phenotype_data_' . $pop_id . '.txt',
1118 stash_key => 'phenotype_file_name'
1121 $self->cache_file($c, $cache_data);
1126 sub genotype_file_name {
1127 my ($self, $c, $pop_id) = @_;
1129 # my $pop_id = $c->stash->{pop_id};
1130 # $pop_id = $c->stash->{combo_pops_id} if !$pop_id;
1131 # my $pred_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id} ;
1133 if ($pop_id =~ /uploaded/)
1135 my $tmp_dir = $c->stash->{solgs_prediction_upload_dir};
1136 my $file = catfile($tmp_dir, 'genotype_data_' . $pop_id . '.txt');
1137 $c->stash->{genotype_file_name} = $file;
1139 else
1141 my $cache_data = { key => 'genotype_data_' . $pop_id,
1142 file => 'genotype_data_' . $pop_id . '.txt',
1143 stash_key => 'genotype_file_name'
1146 $self->cache_file($c, $cache_data);
1151 sub gebv_kinship_file {
1152 my ($self, $c) = @_;
1154 my $pop_id = $c->stash->{pop_id};
1155 my $trait = $c->stash->{trait_abbr};
1156 my $data_set_type = $c->stash->{data_set_type};
1158 my $cache_data;
1160 no warnings 'uninitialized';
1162 if ($data_set_type =~ /combined populations/)
1164 my $combo_identifier = $c->stash->{combo_pops_id};
1165 $cache_data = {key => 'gebv_kinship_combined_pops_'. $combo_identifier . "_" . $trait,
1166 file => 'gebv_kinship_'. $trait . '_' . $combo_identifier. '_combined_pops',
1167 stash_key => 'gebv_kinship_file'
1171 else
1174 $cache_data = {key => 'gebv_kinship_' . $pop_id . '_'. $trait,
1175 file => 'gebv_kinship_' . $trait . '_' . $pop_id,
1176 stash_key => 'gebv_kinship_file'
1180 $self->cache_file($c, $cache_data);
1185 sub relationship_matrix_file {
1186 my ($self, $c) = @_;
1188 my $pop_id = $c->stash->{pop_id};
1189 my $data_set_type = $c->stash->{data_set_type};
1191 my $cache_data;
1193 no warnings 'uninitialized';
1195 if ($data_set_type =~ /combined populations/)
1197 my $combo_identifier = $c->stash->{combo_pops_id};
1198 $cache_data = {key => 'relationship_matrix_combined_pops_'. $combo_identifier,
1199 file => 'relationship_matrix_combined_pops_' . $combo_identifier,
1200 stash_key => 'relationship_matrix_file'
1204 else
1207 $cache_data = {key => 'relationship_matrix_' . $pop_id,
1208 file => 'relationship_matrix_' . $pop_id,
1209 stash_key => 'relationship_matrix_file'
1213 $self->cache_file($c, $cache_data);
1218 sub blups_file {
1219 my ($self, $c) = @_;
1221 my $blups_file = $c->stash->{gebv_kinship_file};
1222 $self->top_blups($c, $blups_file);
1226 sub download_blups :Path('/solgs/download/blups/pop') Args(3) {
1227 my ($self, $c, $pop_id, $trait, $trait_id) = @_;
1229 $self->get_trait_details($c, $trait_id);
1230 my $trait_abbr = $c->stash->{trait_abbr};
1232 my $dir = $c->stash->{solgs_cache_dir};
1233 my $blup_exp = "gebv_kinship_${trait_abbr}_${pop_id}";
1234 my $blups_file = $self->grep_file($dir, $blup_exp);
1236 unless (!-e $blups_file || -s $blups_file == 0)
1238 my @blups = map { [ split(/\t/) ] } read_file($blups_file);
1240 $c->res->content_type("text/plain");
1241 $c->res->body(join "", map { $_->[0] . "\t" . $_->[1] } @blups);
1247 sub download_marker_effects :Path('/solgs/download/marker/pop') Args(3) {
1248 my ($self, $c, $pop_id, $trait, $trait_id) = @_;
1250 $self->get_trait_details($c, $trait_id);
1251 my $trait_abbr = $c->stash->{trait_abbr};
1253 my $dir = $c->stash->{solgs_cache_dir};
1254 my $marker_exp = "gebv_marker_${trait_abbr}_${pop_id}";
1255 my $markers_file = $self->grep_file($dir, $marker_exp);
1257 unless (!-e $markers_file || -s $markers_file == 0)
1259 my @effects = map { [ split(/\t/) ] } read_file($markers_file);
1261 $c->res->content_type("text/plain");
1262 $c->res->body(join "", map { $_->[0] . "\t" . $_->[1] } @effects);
1268 sub download_urls {
1269 my ($self, $c) = @_;
1270 my $data_set_type = $c->stash->{data_set_type};
1271 my $pop_id;
1273 no warnings 'uninitialized';
1275 if ($data_set_type =~ /combined populations/)
1277 $pop_id = $c->stash->{combo_pops_id};
1279 else
1281 $pop_id = $c->stash->{pop_id};
1284 my $trait_id = $c->stash->{trait_id};
1285 my $ranked_genos_file = $c->stash->{selection_index_file};
1287 if ($ranked_genos_file)
1289 ($ranked_genos_file) = fileparse($ranked_genos_file);
1292 my $blups_url = qq | <a href="/solgs/download/blups/pop/$pop_id/trait/$trait_id">Download all GEBVs</a> |;
1293 my $marker_url = qq | <a href="/solgs/download/marker/pop/$pop_id/trait/$trait_id">Download all marker effects</a> |;
1294 my $validation_url = qq | <a href="/solgs/download/validation/pop/$pop_id/trait/$trait_id">Download model accuracy report</a> |;
1295 my $ranked_genotypes_url = qq | <a href="/solgs/download/ranked/genotypes/pop/$pop_id/$ranked_genos_file">Download selection indices</a> |;
1297 $c->stash(blups_download_url => $blups_url,
1298 marker_effects_download_url => $marker_url,
1299 validation_download_url => $validation_url,
1300 ranked_genotypes_download_url => $ranked_genotypes_url,
1305 sub top_blups {
1306 my ($self, $c, $blups_file) = @_;
1308 my $blups = $self->convert_to_arrayref_of_arrays($c, $blups_file);
1310 my @top_blups = @$blups[0..9];
1312 $c->stash->{top_blups} = \@top_blups;
1316 sub top_markers {
1317 my ($self, $c) = @_;
1319 my $markers_file = $c->stash->{gebv_marker_file};
1321 my $markers = $self->convert_to_arrayref_of_arrays($c, $markers_file);
1323 my @top_markers = @$markers[0..9];
1325 $c->stash->{top_marker_effects} = \@top_markers;
1329 sub validation_file {
1330 my ($self, $c) = @_;
1332 my $pop_id = $c->stash->{pop_id};
1333 my $trait = $c->stash->{trait_abbr};
1335 my $data_set_type = $c->stash->{data_set_type};
1337 my $cache_data;
1339 no warnings 'uninitialized';
1341 if ($data_set_type =~ /combined populations/)
1343 my $combo_identifier = $c->stash->{combo_pops_id};
1344 $cache_data = {key => 'cross_validation_combined_pops_'. $trait . "_${combo_identifier}",
1345 file => 'cross_validation_'. $trait . '_' . $combo_identifier . '_combined_pops' ,
1346 stash_key => 'validation_file'
1349 else
1352 $cache_data = {key => 'cross_validation_' . $pop_id . '_' . $trait,
1353 file => 'cross_validation_' . $trait . '_' . $pop_id,
1354 stash_key => 'validation_file'
1358 $self->cache_file($c, $cache_data);
1362 sub combined_gebvs_file {
1363 my ($self, $c, $identifier) = @_;
1365 my $pop_id = $c->stash->{pop_id};
1367 my $cache_data = {key => 'selected_traits_gebv_' . $pop_id . '_' . $identifier,
1368 file => 'selected_traits_gebv_' . $pop_id . '_' . $identifier,
1369 stash_key => 'selected_traits_gebv_file'
1372 $self->cache_file($c, $cache_data);
1377 sub download_validation :Path('/solgs/download/validation/pop') Args(3) {
1378 my ($self, $c, $pop_id, $trait, $trait_id) = @_;
1380 $self->get_trait_details($c, $trait_id);
1381 my $trait_abbr = $c->stash->{trait_abbr};
1383 my $dir = $c->stash->{solgs_cache_dir};
1384 my $val_exp = "cross_validation_${trait_abbr}_${pop_id}";
1385 my $validation_file = $self->grep_file($dir, $val_exp);
1387 unless (!-e $validation_file || -s $validation_file == 0)
1389 my @validation = map { [ split(/\t/) ] } read_file($validation_file);
1391 $c->res->content_type("text/plain");
1392 $c->res->body(join "", map { $_->[0] . "\t" . $_->[1] } @validation);
1398 sub predict_selection_pop_single_trait {
1399 my ($self, $c) = @_;
1401 if ($c->stash->{data_set_type} =~ /single population/)
1403 $self->predict_selection_pop_single_pop_model($c)
1405 else
1407 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1414 sub predict_selection_pop_multi_traits {
1415 my ($self, $c) = @_;
1417 my $data_set_type = $c->stash->{data_set_type};
1418 my $training_pop_id = $c->stash->{training_pop_id};
1419 my $selection_pop_id = $c->stash->{selection_pop_id};
1421 $c->stash->{pop_id} = $training_pop_id;
1422 $self->traits_with_valid_models($c);
1423 my @traits_with_valid_models = @{$c->stash->{traits_with_valid_models}};
1425 foreach my $trait_abbr (@traits_with_valid_models)
1427 $c->stash->{trait_abbr} = $trait_abbr;
1428 $self->get_trait_details_of_trait_abbr($c);
1429 $self->predict_selection_pop_single_trait($c);
1435 sub predict_selection_pop_single_pop_model {
1436 my ($self, $c) = @_;
1438 my $trait_id = $c->stash->{trait_id};
1439 my $training_pop_id = $c->stash->{training_pop_id};
1440 my $prediction_pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
1442 $self->get_trait_details($c, $trait_id);
1443 my $trait_abbr = $c->stash->{trait_abbr};
1445 my $identifier = $training_pop_id . '_' . $prediction_pop_id;
1446 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
1448 my $prediction_pop_gebvs_file = $c->stash->{prediction_pop_gebvs_file};
1450 if (!-s $prediction_pop_gebvs_file)
1452 my $dir = $c->stash->{solgs_cache_dir};
1454 my $exp = "phenotype_data_${training_pop_id}";
1455 my $pheno_file = $self->grep_file($dir, $exp);
1457 $exp = "genotype_data_${training_pop_id}";
1458 my $geno_file = $self->grep_file($dir, $exp);
1460 $c->stash->{pheno_file} = $pheno_file;
1461 $c->stash->{geno_file} = $geno_file;
1463 $self->prediction_population_file($c, $prediction_pop_id);
1464 $self->get_rrblup_output($c);
1470 sub selection_prediction :Path('/solgs/model') Args(3) {
1471 my ($self, $c, $training_pop_id, $pop, $selection_pop_id) = @_;
1473 my $referer = $c->req->referer;
1474 my $path = $c->req->path;
1475 my $base = $c->req->base;
1476 $referer =~ s/$base//;
1478 $c->stash->{training_pop_id} = $training_pop_id;
1479 $c->stash->{model_id} = $training_pop_id;
1480 $c->stash->{pop_id} = $training_pop_id;
1481 $c->stash->{prediction_pop_id} = $selection_pop_id;
1482 $c->stash->{selection_pop_id} = $selection_pop_id;
1484 if ($referer =~ /solgs\/model\/combined\/populations\//)
1486 my ($combo_pops_id, $trait_id) = $referer =~ m/(\d+)/g;
1488 $c->stash->{data_set_type} = "combined populations";
1489 $c->stash->{combo_pops_id} = $combo_pops_id;
1490 $c->stash->{trait_id} = $trait_id;
1492 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1494 $c->controller('solGS::combinedTrials')->combined_pops_summary($c);
1495 $self->trait_phenotype_stat($c);
1496 $self->gs_files($c);
1498 $c->res->redirect("/solgs/model/combined/populations/$combo_pops_id/trait/$trait_id");
1499 $c->detach();
1501 elsif ($referer =~ /solgs\/trait\//)
1503 my ($trait_id, $pop_id) = $referer =~ m/(\d+)/g;
1505 $c->stash->{data_set_type} = "single population";
1506 $c->stash->{trait_id} = $trait_id;
1508 $self->predict_selection_pop_single_pop_model($c);
1510 $self->trait_phenotype_stat($c);
1511 $self->gs_files($c);
1513 $c->res->redirect("/solgs/trait/$trait_id/population/$training_pop_id");
1514 $c->detach();
1516 elsif ($referer =~ /solgs\/models\/combined\/trials/)
1518 $c->stash->{data_set_type} = "combined populations";
1519 $c->stash->{combo_pops_id} = $training_pop_id;
1521 $self->traits_with_valid_models($c);
1522 my @traits_abbrs = @ {$c->stash->{traits_with_valid_models}};
1524 foreach my $trait_abbr (@traits_abbrs)
1526 $c->stash->{trait_abbr} = $trait_abbr;
1527 $self->get_trait_details_of_trait_abbr($c);
1528 $c->controller('solGS::combinedTrials')->predict_selection_pop_combined_pops_model($c);
1531 $c->res->redirect("/solgs/models/combined/trials/$training_pop_id");
1532 $c->detach();
1534 elsif ($referer =~ /solgs\/traits\/all\/population\//)
1536 $c->stash->{data_set_type} = "single population";
1538 $self->predict_selection_pop_multi_traits($c);
1540 $c->res->redirect("/solgs/traits/all/population/$training_pop_id");
1541 $c->detach();
1547 sub prediction_pop_gebvs_file {
1548 my ($self, $c, $identifier, $trait_id) = @_;
1550 my $cache_data = {key => 'prediction_pop_gebvs_' . $identifier . '_' . $trait_id,
1551 file => 'prediction_pop_gebvs_' . $identifier . '_' . $trait_id,
1552 stash_key => 'prediction_pop_gebvs_file'
1555 $self->cache_file($c, $cache_data);
1560 sub list_predicted_selection_pops {
1561 my ($self, $c, $model_id) = @_;
1563 my $dir = $c->stash->{solgs_cache_dir};
1565 opendir my $dh, $dir or die "can't open $dir: $!\n";
1567 my @files = grep { /prediction_pop_gebvs_${model_id}_/ && -f "$dir/$_" }
1568 readdir($dh);
1570 closedir $dh;
1572 my @pred_pops;
1574 foreach (@files)
1577 unless ($_ =~ /uploaded/) {
1578 my ($model_id2, $pred_pop_id, $trait_id) = $_ =~ m/\d+/g;
1580 push @pred_pops, $pred_pop_id;
1584 @pred_pops = uniq(@pred_pops);
1586 $c->stash->{list_of_predicted_selection_pops} = \@pred_pops;
1591 sub download_prediction_GEBVs :Path('/solgs/download/prediction/model') Args(4) {
1592 my ($self, $c, $pop_id, $prediction, $prediction_id, $trait_id) = @_;
1594 $self->get_trait_details($c, $trait_id);
1595 $c->stash->{pop_id} = $pop_id;
1597 my $identifier = $pop_id . "_" . $prediction_id;
1598 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
1599 my $prediction_gebvs_file = $c->stash->{prediction_pop_gebvs_file};
1601 unless (!-e $prediction_gebvs_file || -s $prediction_gebvs_file == 0)
1603 my @prediction_gebvs = map { [ split(/\t/) ] } read_file($prediction_gebvs_file);
1605 $c->res->content_type("text/plain");
1606 $c->res->body(join "", map { $_->[0] . "\t" . $_->[1] } @prediction_gebvs);
1612 sub prediction_pop_analyzed_traits {
1613 my ($self, $c, $training_pop_id, $prediction_pop_id) = @_;
1615 my $dir = $c->stash->{solgs_cache_dir};
1616 my @pred_files;
1618 opendir my $dh, $dir or die "can't open $dir: $!\n";
1620 no warnings 'uninitialized';
1622 my $prediction_is_uploaded = $c->stash->{uploaded_prediction};
1624 #$prediction_pop_id = "uploaded_${prediction_pop_id}" if $prediction_is_uploaded;
1626 if ($training_pop_id !~ /$prediction_pop_id/)
1628 my @files = grep { /prediction_pop_gebvs_${training_pop_id}_${prediction_pop_id}/ && -s "$dir/$_" > 0 }
1629 readdir($dh);
1631 closedir $dh;
1633 my @trait_ids;
1635 if ($files[0])
1637 my @copy_files = @files;
1639 @trait_ids = map { s/prediction_pop_gebvs_${training_pop_id}_${prediction_pop_id}_//g ? $_ : 0} @copy_files;
1641 my @traits = ();
1642 if(@trait_ids)
1644 foreach my $trait_id (@trait_ids)
1646 $trait_id =~ s/s+//g;
1647 $self->get_trait_details($c, $trait_id);
1648 push @traits, $c->stash->{trait_abbr};
1652 $c->stash->{prediction_pop_analyzed_traits} = \@traits;
1653 $c->stash->{prediction_pop_analyzed_traits_ids} = \@trait_ids;
1654 $c->stash->{prediction_pop_analyzed_traits_files} = \@files;
1661 sub download_prediction_urls {
1662 my ($self, $c, $training_pop_id, $prediction_pop_id) = @_;
1664 my $selection_traits_ids;
1665 my $selection_traits_files;
1666 my $download_url;# = $c->stash->{download_prediction};
1667 my $model_tr_id = $c->stash->{trait_id};
1669 my $page = $c->req->referer;
1670 my $base = $c->req->base;
1672 my $data_set_type = 'combined populations' if $page =~ /combined/;
1674 if ( $base !~ /localhost/)
1676 $base =~ s/:\d+//;
1677 $base =~ s/http\w?/https/;
1680 $page =~ s/$base//;
1682 no warnings 'uninitialized';
1684 if ($prediction_pop_id)
1686 $self->prediction_pop_analyzed_traits($c, $training_pop_id, $prediction_pop_id);
1687 $selection_traits_ids = $c->stash->{prediction_pop_analyzed_traits_ids};
1688 $selection_traits_files = $c->stash->{prediction_pop_analyzed_traits_files};
1691 if ($page =~ /solgs\/model\/combined\/populations\// )
1693 ($model_tr_id) = $page =~ /(\d+)$/;
1694 $model_tr_id =~ s/s+//g;
1697 if ($page =~ /solgs\/trait\// )
1699 $model_tr_id = (split '/', $page)[2];
1702 if ($page =~ /(\/uploaded\/prediction\/)/ && $page !~ /(\solgs\/traits\/all)/)
1704 ($model_tr_id) = $page =~ /(\d+)$/;
1705 $model_tr_id =~ s/s+//g;
1708 my ($trait_is_predicted) = grep {/$model_tr_id/ } @$selection_traits_ids;
1709 my @selection_traits_ids = uniq(@$selection_traits_ids);
1711 foreach my $trait_id (@selection_traits_ids)
1713 $trait_id =~ s/s+//g;
1714 $self->get_trait_details($c, $trait_id);
1716 my $trait_abbr = $c->stash->{trait_abbr};
1717 my $trait_name = $c->stash->{trait_name};
1720 if ($page =~ /solgs\/traits\/all\/|solgs\/models\/combined\//)
1722 $model_tr_id = $trait_id;
1723 $download_url .= " | " if $download_url;
1726 if ($selection_traits_files->[0] =~ $prediction_pop_id && $trait_id == $model_tr_id)
1728 if ($data_set_type =~ /combined populations/)
1730 $download_url .= qq |<a href="/solgs/selection/$prediction_pop_id/model/combined/$training_pop_id/trait/$trait_id">$trait_abbr</a> |;
1732 else
1734 $download_url .= qq |<a href="/solgs/selection/$prediction_pop_id/model/$training_pop_id/trait/$trait_id">$trait_abbr</a> |;
1739 if ($download_url)
1741 $c->stash->{download_prediction} = $download_url;
1743 else
1745 $c->stash->{download_prediction} = qq | <a href ="/solgs/model/$training_pop_id/prediction/$prediction_pop_id" onclick="solGS.waitPage(this.href); return false;">[ Predict ]</a> |;
1747 $c->stash->{download_prediction} = undef if $c->stash->{uploaded_prediction};
1753 sub model_accuracy {
1754 my ($self, $c) = @_;
1755 my $file = $c->stash->{validation_file};
1756 my @report =();
1758 if ( !-e $file) { @report = (["Validation file doesn't exist.", "None"]);}
1759 if ( -s $file == 0) { @report = (["There is no cross-validation output report.", "None"]);}
1761 if (!@report)
1763 @report = map { [ split(/\t/, $_) ]} read_file($file);
1766 shift(@report); #add condition
1768 $c->stash->{accuracy_report} = \@report;
1773 sub model_parameters {
1774 my ($self, $c) = @_;
1776 $self->variance_components_file($c);
1777 my $file = $c->stash->{variance_components_file};
1779 my @params = map { [ split(/\t/, $_) ]} read_file($file);
1781 shift(@params); #add condition
1783 $c->stash->{model_parameters} = \@params;
1788 sub solgs_details_trait :Path('/solgs/details/trait/') Args(1) {
1789 my ($self, $c, $trait_id) = @_;
1791 $trait_id = $c->req->param('trait_id') if !$trait_id;
1793 my $ret->{status} = undef;
1795 if ($trait_id)
1797 $self->get_trait_details($c, $trait_id);
1798 $ret->{name} = $c->stash->{trait_name};
1799 $ret->{def} = $c->stash->{trait_def};
1800 $ret->{abbr} = $c->stash->{trait_abbr};
1801 $ret->{id} = $c->stash->{trait_id};
1802 $ret->{status} = 1;
1805 $ret = to_json($ret);
1807 $c->res->content_type('application/json');
1808 $c->res->body($ret);
1813 sub get_trait_details {
1814 my ($self, $c, $trait) = @_;
1816 $trait = $c->stash->{trait_id} if !$trait;
1818 die "Can't get trait details with out trait id or name: $!\n" if !$trait;
1820 my ($trait_name, $trait_def, $trait_id, $trait_abbr);
1822 if ($trait =~ /^\d+$/)
1824 $trait = $c->model('solGS::solGS')->trait_name($trait);
1827 if ($trait)
1829 my $rs = $c->model('solGS::solGS')->trait_details($trait);
1831 while (my $row = $rs->next)
1833 $trait_id = $row->id;
1834 $trait_name = $row->name;
1835 $trait_def = $row->definition;
1836 $trait_abbr = $self->abbreviate_term($trait_name);
1840 my $abbr = $self->abbreviate_term($trait_name);
1842 $c->stash->{trait_id} = $trait_id;
1843 $c->stash->{trait_name} = $trait_name;
1844 $c->stash->{trait_def} = $trait_def;
1845 $c->stash->{trait_abbr} = $abbr;
1849 #creates and writes a list of GEBV files of
1850 #traits selected for ranking genotypes.
1851 sub get_gebv_files_of_traits {
1852 my ($self, $c) = @_;
1854 my $pop_id = $c->stash->{pop_id};
1855 $c->stash->{model_id} = $pop_id;
1856 my $pred_pop_id = $c->stash->{prediction_pop_id};
1858 my $dir = $c->stash->{solgs_cache_dir};
1860 my $gebv_files;
1861 my $valid_gebv_files;
1862 my $pred_gebv_files;
1864 if ($pred_pop_id && $pred_pop_id != $pop_id)
1866 $self->prediction_pop_analyzed_traits($c, $pop_id, $pred_pop_id);
1867 $pred_gebv_files = $c->stash->{prediction_pop_analyzed_traits_files};
1869 foreach (@$pred_gebv_files)
1871 my$gebv_file = catfile($dir, $_);
1872 $gebv_files .= $gebv_file;
1873 $gebv_files .= "\t" unless (@$pred_gebv_files[-1] eq $_);
1876 else
1878 $self->analyzed_traits($c);
1879 my @analyzed_traits_files = @{$c->stash->{analyzed_traits_files}};
1881 foreach my $tr_file (@analyzed_traits_files)
1883 $gebv_files .= $tr_file;
1884 $gebv_files .= "\t" unless ($analyzed_traits_files[-1] eq $tr_file);
1887 my @analyzed_valid_traits_files = @{$c->stash->{analyzed_valid_traits_files}};
1889 foreach my $tr_file (@analyzed_valid_traits_files)
1891 $valid_gebv_files .= $tr_file;
1892 $valid_gebv_files .= "\t" unless ($analyzed_valid_traits_files[-1] eq $tr_file);
1898 my $pred_file_suffix;
1899 $pred_file_suffix = '_' . $pred_pop_id if $pred_pop_id;
1901 my $name = "gebv_files_of_traits_${pop_id}${pred_file_suffix}";
1902 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
1903 my $file = $self->create_tempfile($temp_dir, $name);
1905 write_file($file, $gebv_files);
1907 $c->stash->{gebv_files_of_traits} = $file;
1909 my $name2 = "gebv_files_of_valid_traits_${pop_id}${pred_file_suffix}";
1910 my $file2 = $self->create_tempfile($temp_dir, $name2);
1912 write_file($file2, $valid_gebv_files);
1914 $c->stash->{gebv_files_of_valid_traits} = $file2;
1919 sub gebv_rel_weights {
1920 my ($self, $c, $params, $pred_pop_id) = @_;
1922 my $pop_id = $c->stash->{pop_id};
1924 my $rel_wts = "trait" . "\t" . 'relative_weight' . "\n";
1925 foreach my $tr (keys %$params)
1927 my $wt = $params->{$tr};
1928 unless ($tr eq 'rank')
1930 $rel_wts .= $tr . "\t" . $wt;
1931 $rel_wts .= "\n";
1935 my $pred_file_suffix;
1936 $pred_file_suffix = '_' . $pred_pop_id if $pred_pop_id;
1938 my $name = "rel_weights_${pop_id}${pred_file_suffix}";
1939 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
1940 my $file = $self->create_tempfile($temp_dir, $name);
1941 write_file($file, $rel_wts);
1943 $c->stash->{rel_weights_file} = $file;
1948 sub ranked_genotypes_file {
1949 my ($self, $c, $pred_pop_id) = @_;
1951 my $pop_id = $c->stash->{pop_id};
1953 my $pred_file_suffix;
1954 $pred_file_suffix = '_' . $pred_pop_id if $pred_pop_id;
1956 my $name = "ranked_genotypes_${pop_id}${pred_file_suffix}";
1957 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
1958 my $file = $self->create_tempfile($temp_dir, $name);
1959 $c->stash->{ranked_genotypes_file} = $file;
1964 sub selection_index_file {
1965 my ($self, $c, $pred_pop_id) = @_;
1967 my $pop_id = $c->stash->{pop_id};
1969 my $pred_file_suffix;
1970 $pred_file_suffix = '_' . $pred_pop_id if $pred_pop_id;
1972 my $name = "selection_index_${pop_id}${pred_file_suffix}";
1973 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
1974 my $file = $self->create_tempfile($temp_dir, $name);
1975 $c->stash->{selection_index_file} = $file;
1980 sub download_ranked_genotypes :Path('/solgs/download/ranked/genotypes/pop') Args(2) {
1981 my ($self, $c, $pop_id, $genotypes_file) = @_;
1983 $c->stash->{pop_id} = $pop_id;
1985 $genotypes_file = catfile($c->stash->{solgs_tempfiles_dir}, $genotypes_file);
1987 unless (!-e $genotypes_file || -s $genotypes_file == 0)
1989 my @ranks = map { [ split(/\t/) ] } read_file($genotypes_file);
1991 $c->res->content_type("text/plain");
1992 $c->res->body(join "", map { $_->[0] . "\t" . $_->[1] } @ranks);
1998 sub rank_genotypes : Private {
1999 my ($self, $c, $pred_pop_id) = @_;
2001 my $pop_id = $c->stash->{pop_id};
2002 $c->stash->{prediction_pop_id} = $pred_pop_id;
2004 my $input_files = join("\t",
2005 $c->stash->{rel_weights_file},
2006 $c->stash->{gebv_files_of_traits}
2009 $self->ranked_genotypes_file($c, $pred_pop_id);
2010 $self->selection_index_file($c, $pred_pop_id);
2012 my $output_files = join("\t",
2013 $c->stash->{ranked_genotypes_file},
2014 $c->stash->{selection_index_file}
2017 my $pred_file_suffix;
2018 $pred_file_suffix = '_' . $pred_pop_id if $pred_pop_id;
2020 my $name = "output_rank_genotypes_${pop_id}${pred_file_suffix}";
2021 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2022 my $output_file = $self->create_tempfile($temp_dir, $name);
2023 write_file($output_file, $output_files);
2025 $name = "input_rank_genotypes_${pop_id}${pred_file_suffix}";
2026 my $input_file = $self->create_tempfile($temp_dir, $name);
2027 write_file($input_file, $input_files);
2029 $c->stash->{output_files} = $output_file;
2030 $c->stash->{input_files} = $input_file;
2031 $c->stash->{r_temp_file} = "rank-gebv-genotypes-${pop_id}${pred_file_suffix}";
2032 $c->stash->{r_script} = 'R/solGS/selection_index.r';
2034 $self->run_r_script($c);
2035 $self->download_urls($c);
2036 $self->get_top_10_selection_indices($c);
2040 sub get_top_10_selection_indices {
2041 my ($self, $c) = @_;
2043 my $si_file = $c->stash->{selection_index_file};
2045 my $si_data = $self->convert_to_arrayref_of_arrays($c, $si_file);
2046 my @top_genotypes = @$si_data[0..9];
2048 $c->stash->{top_10_selection_indices} = \@top_genotypes;
2052 sub convert_to_arrayref_of_arrays {
2053 my ($self, $c, $file) = @_;
2055 open my $fh, $file or die "couldnot open $file: $!";
2057 my @data;
2058 while (<$fh>)
2060 push @data, map { [ split(/\t/) ] } $_ if $_;
2063 if (@data)
2065 shift(@data);
2066 return \@data;
2067 } else
2069 return;
2074 sub trait_phenotype_file {
2075 my ($self, $c, $pop_id, $trait) = @_;
2077 my $dir = $c->stash->{solgs_cache_dir};
2078 my $exp = "phenotype_trait_${trait}_${pop_id}";
2079 my $file = $self->grep_file($dir, $exp);
2081 $c->stash->{trait_phenotype_file} = $file;
2086 sub check_selection_pops_list :Path('/solgs/check/selection/populations') Args(1) {
2087 my ($self, $c, $tr_pop_id) = @_;
2089 $c->stash->{training_pop_id} = $tr_pop_id;
2091 $self->list_of_prediction_pops_file($c, $tr_pop_id);
2092 my $pred_pops_file = $c->stash->{list_of_prediction_pops_file};
2094 my $ret->{result} = 0;
2096 if (-s $pred_pops_file)
2098 $self->list_of_prediction_pops($c, $tr_pop_id);
2099 $ret->{data} = $c->stash->{list_of_prediction_pops};
2102 $ret = to_json($ret);
2104 $c->res->content_type('application/json');
2105 $c->res->body($ret);
2110 sub check_genotype_data_population :Path('/solgs/check/genotype/data/population/') Args(1) {
2111 my ($self, $c, $pop_id) = @_;
2113 $c->stash->{pop_id} = $pop_id;
2114 $self->check_population_has_genotype($c);
2116 my $ret->{has_genotype} = $c->stash->{population_has_genotype};
2117 $ret = to_json($ret);
2119 $c->res->content_type('application/json');
2120 $c->res->body($ret);
2125 sub check_phenotype_data_population :Path('/solgs/check/phenotype/data/population/') Args(1) {
2126 my ($self, $c, $pop_id) = @_;
2128 $c->stash->{pop_id} = $pop_id;
2129 $self->check_population_has_phenotype($c);
2131 my $ret->{has_phenotype} = $c->stash->{population_has_phenotype};
2132 $ret = to_json($ret);
2134 $c->res->content_type('application/json');
2135 $c->res->body($ret);
2140 sub check_population_exists :Path('/solgs/check/population/exists/') Args(0) {
2141 my ($self, $c) = @_;
2143 my $name = $c->req->param('name');
2145 my $rs = $c->model("solGS::solGS")->project_details_by_name($name);
2147 my $pop_id;
2148 while (my $row = $rs->next) {
2149 $pop_id = $row->id;
2152 my $ret->{population_id} = $pop_id;
2153 $ret = to_json($ret);
2155 $c->res->content_type('application/json');
2156 $c->res->body($ret);
2161 sub check_training_population :Path('/solgs/check/training/population/') Args(1) {
2162 my ($self, $c, $pop_id) = @_;
2164 $c->stash->{pop_id} = $pop_id;
2166 $self->check_population_is_training_population($c);
2167 my $is_training_pop = $c->stash->{is_training_population};
2169 my $training_pop_data;
2170 if ($is_training_pop)
2172 my $pr_rs = $c->model('solGS::solGS')->project_details($pop_id);
2173 $self->projects_links($c, $pr_rs);
2174 $training_pop_data = $c->stash->{projects_pages};
2177 my $ret->{is_training_population} = $is_training_pop;
2178 $ret->{training_pop_data} = $training_pop_data;
2179 $ret = to_json($ret);
2181 $c->res->content_type('application/json');
2182 $c->res->body($ret);
2187 sub check_population_is_training_population {
2188 my ($self, $c) = @_;
2190 my $pr_id = $c->stash->{pop_id};
2191 my $is_gs = $c->model("solGS::solGS")->get_project_type($pr_id);
2193 my $has_phenotype;
2194 my $has_genotype;
2196 if ($is_gs !~ /genomic selection/)
2198 $self->check_population_has_phenotype($c);
2199 $has_phenotype = $c->stash->{population_has_phenotype};
2201 if ($has_phenotype)
2203 $self->check_population_has_genotype($c);
2204 $has_genotype = $c->stash->{population_has_genotype};
2208 if ($is_gs || ($has_phenotype && $has_genotype))
2210 $c->stash->{is_training_population} = 1;
2216 sub check_population_has_phenotype {
2217 my ($self, $c) = @_;
2219 my $pr_id = $c->stash->{pop_id};
2220 my $is_gs = $c->model("solGS::solGS")->get_project_type($pr_id);
2221 my $has_phenotype = 1 if $is_gs;
2223 if ($is_gs !~ /genomic selection/)
2225 my $cache_dir = $c->stash->{solgs_cache_dir};
2226 my $pheno_file = $self->grep_file($cache_dir, "phenotype_data_${pr_id}.txt");
2228 if (!-s $pheno_file)
2230 $has_phenotype = $c->model("solGS::solGS")->has_phenotype($pr_id);
2232 else
2234 $has_phenotype = 1;
2238 $c->stash->{population_has_phenotype} = $has_phenotype;
2243 sub check_population_has_genotype {
2244 my ($self, $c) = @_;
2246 my $pop_id = $c->stash->{pop_id};
2248 my $has_genotype;
2250 my $geno_file;
2251 if ($pop_id =~ /upload/)
2253 my $dir = $c->stash->{solgs_prediction_upload_dir};
2254 my $user_id = $c->user->id;
2255 my $file_name = "genotype_data_${pop_id}";
2256 $geno_file = $self->grep_file($dir, $file_name);
2257 $has_genotype = 1 if -s $geno_file;
2260 unless ($has_genotype)
2262 $has_genotype = $c->model('solGS::solGS')->has_genotype($pop_id);
2265 $c->stash->{population_has_genotype} = $has_genotype;
2270 sub check_selection_population_relevance :Path('/solgs/check/selection/population/relevance') Args() {
2271 my ($self, $c) = @_;
2273 my $data_set_type = $c->req->param('data_set_type');
2274 my $training_pop_id = $c->req->param('training_pop_id');
2275 my $selection_pop_name = $c->req->param('selection_pop_name');
2276 my $trait_id = $c->req->param('trait_id');
2278 $c->stash->{data_set_type} = $data_set_type;
2280 my $pr_rs = $c->model("solGS::solGS")->project_details_by_exact_name($selection_pop_name);
2282 my $selection_pop_id;
2283 while (my $row = $pr_rs->next) {
2284 $selection_pop_id = $row->project_id;
2287 my $ret = {};
2289 if ($selection_pop_id !~ /$training_pop_id/)
2291 my $has_genotype;
2292 if ($selection_pop_id)
2294 $c->stash->{pop_id} = $selection_pop_id;
2295 $self->check_population_has_genotype($c);
2296 $has_genotype = $c->stash->{population_has_genotype};
2299 my $similarity;
2300 if ($has_genotype)
2302 $c->stash->{pop_id} = $selection_pop_id;
2304 $self->first_stock_genotype_data($c, $selection_pop_id);
2305 my $selection_pop_geno_file = $c->stash->{first_stock_genotype_file};
2307 my $training_pop_geno_file;
2309 if ($training_pop_id =~ /upload/)
2311 my $dir = $c->stash->{solgs_prediction_upload_dir};
2312 my $user_id = $c->user->id;
2313 my $tr_geno_file = "genotype_data_${training_pop_id}";
2314 $training_pop_geno_file = $self->grep_file($dir, $tr_geno_file);
2316 else
2318 my $dir = $c->stash->{solgs_cache_dir};
2319 my $tr_geno_file;
2321 if ($data_set_type =~ /combined populations/)
2323 $self->get_trait_details($c, $trait_id);
2324 my $trait_abbr = $c->stash->{trait_abbr};
2325 $tr_geno_file = "genotype_data_${training_pop_id}_${trait_abbr}";
2327 else
2329 $tr_geno_file = "genotype_data_${training_pop_id}";
2332 $training_pop_geno_file = $self->grep_file($dir, $tr_geno_file);
2335 $similarity = $self->compare_marker_set_similarity([$selection_pop_geno_file, $training_pop_geno_file]);
2338 my $selection_pop_data;
2339 if ($similarity >= 0.5 )
2341 $c->stash->{training_pop_id} = $training_pop_id;
2342 $self->format_selection_pops($c, [$selection_pop_id]);
2343 $selection_pop_data = $c->stash->{selection_pops_list};
2344 $self->save_selection_pops($c, [$selection_pop_id]);
2347 $ret->{selection_pop_data} = $selection_pop_data;
2348 $ret->{similarity} = $similarity;
2349 $ret->{has_genotype} = $has_genotype;
2350 $ret->{selection_pop_id} = $selection_pop_id;
2352 else
2354 $ret->{selection_pop_id} = $selection_pop_id;
2357 $ret = to_json($ret);
2359 $c->res->content_type('application/json');
2360 $c->res->body($ret);
2365 sub save_selection_pops {
2366 my ($self, $c, $selection_pop_id) = @_;
2368 my $training_pop_id = $c->stash->{training_pop_id};
2370 $self->list_of_prediction_pops_file($c, $training_pop_id);
2371 my $selection_pops_file = $c->stash->{list_of_prediction_pops_file};
2373 my @existing_pops_ids = split(/\n/, read_file($selection_pops_file));
2375 my @uniq_ids = unique(@existing_pops_ids, @$selection_pop_id);
2376 my $formatted_ids = join("\n", @uniq_ids);
2378 write_file($selection_pops_file, $formatted_ids);
2383 sub search_selection_pops :Path('/solgs/search/selection/populations/') {
2384 my ($self, $c, $tr_pop_id) = @_;
2386 $c->stash->{training_pop_id} = $tr_pop_id;
2388 $self->search_all_relevant_selection_pops($c, $tr_pop_id);
2389 my $selection_pops_list = $c->stash->{all_relevant_selection_pops};
2391 my $ret->{selection_pops_list} = 0;
2392 if ($selection_pops_list)
2394 $ret->{data} = $selection_pops_list;
2397 $ret = to_json($ret);
2399 $c->res->content_type('application/json');
2400 $c->res->body($ret);
2405 sub list_of_prediction_pops {
2406 my ($self, $c, $training_pop_id) = @_;
2408 $self->list_of_prediction_pops_file($c, $training_pop_id);
2409 my $pred_pops_file = $c->stash->{list_of_prediction_pops_file};
2411 my @pred_pops_ids = split(/\n/, read_file($pred_pops_file));
2413 $self->format_selection_pops($c, \@pred_pops_ids);
2415 $c->stash->{list_of_prediction_pops} = $c->stash->{selection_pops_list};
2420 sub search_all_relevant_selection_pops {
2421 my ($self, $c, $training_pop_id) = @_;
2423 my @pred_pops_ids = @{$c->model('solGS::solGS')->prediction_pops($training_pop_id)};
2425 $self->save_selection_pops($c, \@pred_pops_ids);
2427 $self->format_selection_pops($c, \@pred_pops_ids);
2429 $c->stash->{all_relevant_selection_pops} = $c->stash->{selection_pops_list};
2434 sub format_selection_pops {
2435 my ($self, $c, $pred_pops_ids) = @_;
2437 my $training_pop_id = $c->stash->{training_pop_id};
2439 my @pred_pops_ids = @{$pred_pops_ids};
2440 my @data;
2442 if (@pred_pops_ids) {
2444 foreach my $prediction_pop_id (@pred_pops_ids)
2446 my $pred_pop_rs = $c->model('solGS::solGS')->project_details($prediction_pop_id);
2447 my $pred_pop_link;
2449 while (my $row = $pred_pop_rs->next)
2451 my $name = $row->name;
2452 my $desc = $row->description;
2454 # unless ($name =~ /test/ || $desc =~ /test/)
2456 my $id_pop_name->{id} = $prediction_pop_id;
2457 $id_pop_name->{name} = $name;
2458 $id_pop_name->{pop_type} = 'selection';
2459 $id_pop_name = to_json($id_pop_name);
2461 $pred_pop_link = qq | <a href="/solgs/model/$training_pop_id/prediction/$prediction_pop_id"
2462 onclick="solGS.waitPage(this.href); return false;"><input type="hidden" value=\'$id_pop_name\'>$name</data>
2463 </a>
2466 my $pr_yr_rs = $c->model('solGS::solGS')->project_year($prediction_pop_id);
2467 my $project_yr;
2469 while ( my $yr_r = $pr_yr_rs->next )
2471 $project_yr = $yr_r->value;
2474 $self->download_prediction_urls($c, $training_pop_id, $prediction_pop_id);
2475 my $download_prediction = $c->stash->{download_prediction};
2477 push @data, [$pred_pop_link, $desc, $project_yr, $download_prediction];
2483 $c->stash->{selection_pops_list} = \@data;
2488 sub list_of_prediction_pops_file {
2489 my ($self, $c, $training_pop_id)= @_;
2491 my $cache_data = {key => 'list_of_prediction_pops' . $training_pop_id,
2492 file => 'list_of_prediction_pops_' . $training_pop_id,
2493 stash_key => 'list_of_prediction_pops_file'
2496 $self->cache_file($c, $cache_data);
2501 sub first_stock_genotype_file {
2502 my ($self, $c, $pop_id) = @_;
2504 my $cache_data = {key => 'first_stock_genotype_file'. $pop_id,
2505 file => 'first_stock_genotype_file_' . $pop_id . '.txt',
2506 stash_key => 'first_stock_genotype_file'
2509 $self->cache_file($c, $cache_data);
2514 sub prediction_population_file {
2515 my ($self, $c, $pred_pop_id) = @_;
2517 my $tmp_dir = $c->stash->{solgs_tempfiles_dir};
2519 my ($fh, $tempfile) = tempfile("prediction_population_${pred_pop_id}-XXXXX",
2520 DIR => $tmp_dir
2523 $c->stash->{prediction_pop_id} = $pred_pop_id;
2525 $self->filtered_selection_genotype_file($c);
2526 my $filtered_geno_file = $c->stash->{filtered_selection_genotype_file};
2528 my $geno_files = $filtered_geno_file;
2530 $self->genotype_file($c, $pred_pop_id);
2531 $geno_files .= "\t" . $c->stash->{pred_genotype_file};
2533 $fh->print($geno_files);
2534 $fh->close;
2536 $c->stash->{prediction_population_file} = $tempfile;
2542 sub get_trait_details_of_trait_abbr {
2543 my ($self, $c) = @_;
2545 my $trait_abbr = $c->stash->{trait_abbr};
2547 if (!$c->stash->{pop_id})
2549 $c->stash->{pop_id} = $c->stash->{training_pop_id} || $c->stash->{combo_pops_id};
2552 my $trait_id;
2554 my $acronym_pairs = $self->get_acronym_pairs($c);
2556 if ($acronym_pairs)
2558 foreach my $r (@$acronym_pairs)
2560 if ($r->[0] eq $trait_abbr)
2562 my $trait_name = $r->[1];
2563 $trait_name =~ s/^\s+|\s+$//g;
2565 $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
2566 $self->get_trait_details($c, $trait_id);
2574 sub build_multiple_traits_models {
2575 my ($self, $c) = @_;
2577 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
2578 my $prediction_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
2580 my @selected_traits = $c->req->param('trait_id[]');
2582 if (!@selected_traits && $c->stash->{background_job})
2584 @selected_traits = @{$c->stash->{selected_traits}};
2586 #$pop_id = $c->stash->{training_pop_id};
2588 # my $params = $c->stash->{analysis_profile};
2589 # my $args = $params->{arguments};
2591 # my $json = JSON->new();
2592 # $args = $json->decode($args);
2594 # if (keys %{$args})
2595 # {
2596 # foreach my $k ( keys %{$args} )
2598 # if ($k eq 'trait_id')
2600 # @selected_traits = @{ $args->{$k} };
2601 # }
2603 # if (!$pop_id)
2605 # if ($k eq 'population_id')
2607 # my @pop_ids = @{ $args->{$k} };
2608 # $c->stash->{pop_id} = $pop_ids[0];
2612 # if ($k eq 'selection_pop_id')
2614 # $prediction_id = $args->{$k};
2616 # }
2617 # }
2618 # }
2620 if (!@selected_traits)
2622 if ($prediction_id)
2624 $c->stash->{model_id} = $pop_id;
2626 $self->traits_with_valid_models($c);
2627 @selected_traits = @ {$c->stash->{traits_with_valid_models}};
2629 else
2631 $c->res->redirect("/solgs/population/$pop_id/selecttraits");
2632 $c->detach();
2635 else
2637 my $single_trait_id;
2639 if (scalar(@selected_traits) == 1)
2641 $single_trait_id = $selected_traits[0];
2642 if ($single_trait_id =~ /\D/)
2644 $c->stash->{trait_abbr} = $single_trait_id;
2645 $self->get_trait_details_of_trait_abbr($c);
2646 $single_trait_id = $c->stash->{trait_id};
2649 if (!$prediction_id)
2651 $c->res->redirect("/solgs/trait/$single_trait_id/population/$pop_id");
2652 $c->detach();
2654 else
2656 my $name = "trait_info_${single_trait_id}_pop_${pop_id}";
2657 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2658 my $file2 = $self->create_tempfile($temp_dir, $name);
2660 $c->stash->{trait_file} = $file2;
2661 $c->stash->{trait_abbr} = $selected_traits[0];
2662 $self->get_trait_details_of_trait_abbr($c);
2664 $self->get_rrblup_output($c);
2667 else
2669 my ($traits, $trait_ids);
2671 for (my $i = 0; $i <= $#selected_traits; $i++)
2673 if ($selected_traits[$i] =~ /\D/)
2675 $c->stash->{trait_abbr} = $selected_traits[$i];
2676 $self->get_trait_details_of_trait_abbr($c);
2677 $traits .= $c->stash->{trait_abbr};
2678 $traits .= "\t" unless ($i == $#selected_traits);
2679 $trait_ids .= $c->stash->{trait_id};
2681 else
2683 my $tr = $c->model('solGS::solGS')->trait_name($selected_traits[$i]);
2684 my $abbr = $self->abbreviate_term($tr);
2685 $traits .= $abbr;
2686 $traits .= "\t" unless ($i == $#selected_traits);
2688 foreach my $tr_id (@selected_traits)
2690 $trait_ids .= $tr_id;
2695 if ($c->stash->{data_set_type} =~ /combined populations/)
2697 my $identifier = crc($trait_ids);
2698 $self->combined_gebvs_file($c, $identifier);
2701 my $name = "selected_traits_pop_${pop_id}";
2702 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
2703 my $file = $self->create_tempfile($temp_dir, $name);
2705 write_file($file, $traits);
2706 $c->stash->{selected_traits_file} = $file;
2708 $name = "trait_info_${single_trait_id}_pop_${pop_id}";
2709 my $file2 = $self->create_tempfile($temp_dir, $name);
2711 $c->stash->{trait_file} = $file2;
2712 $self->get_rrblup_output($c);
2719 sub traits_to_analyze :Regex('^solgs/analyze/traits/population/([\w|\d]+)(?:/([\d+]+))?') {
2720 my ($self, $c) = @_;
2722 my ($pop_id, $prediction_id) = @{$c->req->captures};
2724 my $req = $c->req->param('source');
2726 $c->stash->{pop_id} = $pop_id;
2727 $c->stash->{prediction_pop_id} = $prediction_id;
2729 $self->build_multiple_traits_models($c);
2731 my $referer = $c->req->referer;
2732 my $base = $c->req->base;
2733 $referer =~ s/$base//;
2734 my ($tr_id) = $referer =~ /(\d+)/;
2735 my $trait_page = "solgs/trait/$tr_id/population/$pop_id";
2737 my $error = $c->stash->{script_error};
2739 if ($error)
2741 $c->stash->{message} = "$error can't create prediction models for the selected traits.
2742 There are problems with the datasets of the traits.
2743 <p><a href=\"/solgs/population/$pop_id\">[ Go back ]</a></p>";
2745 $c->stash->{template} = "/generic_message.mas";
2747 elsif ($req =~ /AJAX/)
2749 my $ret->{status} = 'success';
2751 $ret = to_json($ret);
2753 $c->res->content_type('application/json');
2754 $c->res->body($ret);
2756 else
2758 if ($referer =~ m/$trait_page/)
2760 $c->res->redirect("/solgs/trait/$tr_id/population/$pop_id");
2761 $c->detach();
2763 else
2765 $c->res->redirect("/solgs/traits/all/population/$pop_id/$prediction_id");
2766 $c->detach();
2773 sub all_traits_output :Regex('^solgs/traits/all/population/([\w|\d]+)(?:/([\d+]+))?') {
2774 my ($self, $c) = @_;
2776 my ($pop_id, $pred_pop_id) = @{$c->req->captures};
2778 my @traits = $c->req->param;
2779 @traits = grep {$_ ne 'rank'} @traits;
2780 $c->stash->{training_pop_id} = $pop_id;
2781 $c->stash->{pop_id} = $pop_id;
2783 if ($pred_pop_id)
2785 $c->stash->{prediction_pop_id} = $pred_pop_id;
2786 $c->stash->{population_is} = 'prediction population';
2787 $self->prediction_population_file($c, $pred_pop_id);
2789 my $pr_rs = $c->model('solGS::solGS')->project_details($pred_pop_id);
2791 while (my $row = $pr_rs->next)
2793 $c->stash->{prediction_pop_name} = $row->name;
2796 else
2798 $c->stash->{prediction_pop_id} = undef;
2799 $c->stash->{population_is} = 'training population';
2802 $c->stash->{model_id} = $pop_id;
2803 $self->analyzed_traits($c);
2805 my @analyzed_traits = @{$c->stash->{analyzed_traits}};
2807 if (!@analyzed_traits)
2809 $c->res->redirect("/solgs/population/$pop_id/selecttraits/");
2810 $c->detach();
2813 my @trait_pages;
2814 foreach my $tr (@analyzed_traits)
2816 my $acronym_pairs = $self->get_acronym_pairs($c);
2817 my $trait_name;
2818 if ($acronym_pairs)
2820 foreach my $r (@$acronym_pairs)
2822 if ($r->[0] eq $tr)
2824 $trait_name = $r->[1];
2825 $trait_name =~ s/\n//g;
2826 $c->stash->{trait_name} = $trait_name;
2827 $c->stash->{trait_abbr} = $r->[0];
2832 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
2833 my $trait_abbr = $c->stash->{trait_abbr};
2835 $self->get_model_accuracy_value($c, $pop_id, $trait_abbr);
2836 my $accuracy_value = $c->stash->{accuracy_value};
2838 $c->controller("solGS::Heritability")->get_heritability($c);
2839 my $heritability = $c->stash->{heritability};
2841 push @trait_pages, [ qq | <a href="/solgs/trait/$trait_id/population/$pop_id">$trait_abbr</a>|, $accuracy_value, $heritability];
2845 $self->project_description($c, $pop_id);
2846 my $project_name = $c->stash->{project_name};
2847 my $project_desc = $c->stash->{project_desc};
2849 my @model_desc = ([qq | <a href="/solgs/population/$pop_id">$project_name</a> |, $project_desc, \@trait_pages]);
2851 $c->stash->{template} = $self->template('/population/multiple_traits_output.mas');
2852 $c->stash->{trait_pages} = \@trait_pages;
2853 $c->stash->{model_data} = \@model_desc;
2855 my $acronym = $self->get_acronym_pairs($c);
2856 $c->stash->{acronym} = $acronym;
2861 sub selection_index_form :Path('/solgs/selection/index/form') Args(0) {
2862 my ($self, $c) = @_;
2864 my $pred_pop_id = $c->req->param('pred_pop_id');
2865 my $training_pop_id = $c->req->param('training_pop_id');
2867 $c->stash->{model_id} = $training_pop_id;
2868 $c->stash->{prediction_pop_id} = $pred_pop_id;
2870 my @traits;
2871 if (!$pred_pop_id)
2873 $self->analyzed_traits($c);
2874 @traits = @{ $c->stash->{selection_index_traits} };
2876 else
2878 $self->prediction_pop_analyzed_traits($c, $training_pop_id, $pred_pop_id);
2879 @traits = @{ $c->stash->{prediction_pop_analyzed_traits} };
2882 my $ret->{status} = 'success';
2883 $ret->{traits} = \@traits;
2885 $ret = to_json($ret);
2887 $c->res->content_type('application/json');
2888 $c->res->body($ret);
2893 sub traits_with_valid_models {
2894 my ($self, $c) = @_;
2896 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
2898 $self->analyzed_traits($c);
2900 my @analyzed_traits = @{$c->stash->{analyzed_traits}};
2901 my @filtered_analyzed_traits;
2902 my @valid_traits_ids;
2904 foreach my $analyzed_trait (@analyzed_traits)
2906 $self->get_model_accuracy_value($c, $pop_id, $analyzed_trait);
2907 my $av = $c->stash->{accuracy_value};
2908 if ($av && $av =~ m/\d+/ && $av > 0)
2910 push @filtered_analyzed_traits, $analyzed_trait;
2913 $c->stash->{trait_abbr} = $analyzed_trait;
2914 $self->get_trait_details_of_trait_abbr($c);
2915 push @valid_traits_ids, $c->stash->{trait_id};
2919 @filtered_analyzed_traits = uniq(@filtered_analyzed_traits);
2920 @valid_traits_ids = uniq(@valid_traits_ids);
2922 $c->stash->{traits_with_valid_models} = \@filtered_analyzed_traits;
2923 $c->stash->{traits_ids_with_valid_models} = \@valid_traits_ids;
2928 sub calculate_selection_index :Path('/solgs/calculate/selection/index') Args(2) {
2929 my ($self, $c, $model_id, $pred_pop_id) = @_;
2931 $c->stash->{pop_id} = $model_id;
2933 if ($pred_pop_id =~ /\d+/ && $model_id != $pred_pop_id)
2935 $c->stash->{prediction_pop_id} = $pred_pop_id;
2937 else
2939 $pred_pop_id = undef;
2940 $c->stash->{prediction_pop_id} = $pred_pop_id;
2943 my @traits = $c->req->param;
2944 @traits = grep {$_ ne 'rank'} @traits;
2946 my @values;
2947 foreach (@traits)
2949 push @values, $c->req->param($_);
2952 if (@values)
2954 $self->get_gebv_files_of_traits($c);
2956 my $params = $c->req->params;
2957 $self->gebv_rel_weights($c, $params, $pred_pop_id);
2959 $c->forward('rank_genotypes', [$pred_pop_id]);
2961 my $geno = $self->tohtml_genotypes($c);
2963 my $link = $c->stash->{ranked_genotypes_download_url};
2964 my $ranked_genos = $c->stash->{top_10_selection_indices};
2965 my $index_file = $c->stash->{selection_index_file};
2967 my $ret->{status} = 'No GEBV values to rank.';
2969 if (@$ranked_genos)
2971 $ret->{status} = 'success';
2972 $ret->{genotypes} = $geno;
2973 $ret->{link} = $link;
2974 $ret->{index_file} = $index_file;
2977 $ret = to_json($ret);
2979 $c->res->content_type('application/json');
2980 $c->res->body($ret);
2985 sub get_model_accuracy_value {
2986 my ($self, $c, $model_id, $trait_abbr) = @_;
2988 my $dir = $c->stash->{solgs_cache_dir};
2989 opendir my $dh, $dir or die "can't open $dir: $!\n";
2991 my ($validation_file) = grep { /cross_validation_${trait_abbr}_${model_id}/ && -f "$dir/$_" }
2992 readdir($dh);
2994 closedir $dh;
2996 $validation_file = catfile($dir, $validation_file);
2998 my ($row) = grep {/Average/} read_file($validation_file);
2999 my ($text, $accuracy_value) = split(/\t/, $row);
3001 $c->stash->{accuracy_value} = $accuracy_value;
3006 sub get_project_owners {
3007 my ($self, $c, $pr_id) = @_;
3009 my $owners = $c->model("solGS::solGS")->get_stock_owners($pr_id);
3010 my $owners_names;
3012 if ($owners)
3014 for (my $i=0; $i < scalar(@$owners); $i++)
3016 my $owner_name = $owners->[$i]->{'first_name'} . "\t" . $owners->[$i]->{'last_name'} if $owners->[$i];
3018 unless (!$owner_name)
3020 $owners_names .= $owners_names ? ', ' . $owner_name : $owner_name;
3025 $c->stash->{project_owners} = $owners_names;
3029 sub compare_marker_set_similarity {
3030 my ($self, $marker_file_pair) = @_;
3032 my $file_1 = $marker_file_pair->[0];
3033 my $file_2 = $marker_file_pair->[1];
3035 my $first_markers = (read_file($marker_file_pair->[0]))[0];
3036 my $sec_markers = (read_file($marker_file_pair->[1]))[0];
3038 my @first_geno_markers = split(/\t/, $first_markers);
3039 my @sec_geno_markers = split(/\t/, $sec_markers);
3041 if ( @first_geno_markers && @first_geno_markers)
3043 my $common_markers = scalar(intersect(@first_geno_markers, @sec_geno_markers));
3044 my $similarity = $common_markers / scalar(@first_geno_markers);
3046 return $similarity;
3048 else
3050 return 0;
3056 sub compare_genotyping_platforms {
3057 my ($self, $c, $g_files) = @_;
3059 my $combinations = combinations($g_files, 2);
3060 my $combo_cnt = combinations($g_files, 2);
3062 my $not_matching_pops;
3063 my $cnt = 0;
3064 my $cnt_pairs = 0;
3066 while ($combo_cnt->next)
3068 $cnt_pairs++;
3071 while (my $pair = $combinations->next)
3073 $cnt++;
3074 my $similarity = $self->compare_marker_set_similarity($pair);
3076 unless ($similarity > 0.5 )
3078 no warnings 'uninitialized';
3079 my $pop_id_1 = fileparse($pair->[0]);
3080 my $pop_id_2 = fileparse($pair->[1]);
3082 map { s/genotype_data_|\.txt//g } $pop_id_1, $pop_id_2;
3084 my $list_type_pop = $c->stash->{uploaded_prediction};
3086 unless ($list_type_pop)
3088 my @pop_names;
3089 foreach ($pop_id_1, $pop_id_2)
3091 my $pr_rs = $c->model('solGS::solGS')->project_details($_);
3093 while (my $row = $pr_rs->next)
3095 push @pop_names, $row->name;
3099 $not_matching_pops .= '[ ' . $pop_names[0]. ' and ' . $pop_names[1] . ' ]';
3100 $not_matching_pops .= ', ' if $cnt != $cnt_pairs;
3102 # else
3103 # {
3104 # $not_matching_pops = 'not_matching';
3109 $c->stash->{pops_with_no_genotype_match} = $not_matching_pops;
3115 sub submit_cluster_compare_trials_markers {
3116 my ($self, $c, $geno_files) = @_;
3118 $c->stash->{r_temp_file} = 'compare-trials-markers';
3119 $self->create_cluster_accesible_tmp_files($c);
3120 my $out_temp_file = $c->stash->{out_file_temp};
3121 my $err_temp_file = $c->stash->{err_file_temp};
3123 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3124 my $background_job = $c->stash->{background_job};
3126 my $status;
3130 # if ($dependency && $background_job)
3132 # my $dependent_job_script = $self->create_tempfile($c, "compare_trials_job", "pl");
3134 # my $cmd = '#!/usr/bin/env perl;' . "\n";
3135 # $cmd .= 'use strict;' . "\n";
3136 # $cmd .= 'use warnings;' . "\n\n\n";
3137 # $cmd .= 'system("Rscript --slave '
3138 # . $in_file_temp
3139 # . ' --args ' . $input_files . ' ' . $output_files
3140 # . ' | qsub -W ' . $dependency . '");';
3142 # write_file($dependent_job_script, $cmd);
3143 # chmod 0755, $dependent_job_script;
3145 # $r_job = CXGN::Tools::Run->run_cluster('perl',
3146 # $dependent_job_script,
3147 # $out_file_temp,
3149 # working_dir => $c->stash->{solgs_tempfiles_dir},
3150 # max_cluster_jobs => 1_000_000_000,
3151 # },
3152 # );
3153 # }
3156 try
3158 my $compare_trials_job = CXGN::Tools::Run->run_cluster_perl({
3160 method => ["SGN::Controller::solGS::solGS" => "compare_genotyping_platforms"],
3161 args => ['SGN::Context', $geno_files],
3162 load_packages => ['SGN::Controller::solGS::solGS', 'SGN::Context'],
3163 run_opts => {
3164 out_file => $out_temp_file,
3165 err_file => $err_temp_file,
3166 working_dir => $temp_dir,
3167 max_cluster_jobs => 1_000_000_000,
3172 $c->stash->{r_job_tempdir} = $compare_trials_job->job_tempdir();
3173 $c->stash->{r_job_id} = $compare_trials_job->job_id();
3174 $c->stash->{cluster_job} = $compare_trials_job;
3176 unless ($background_job)
3178 $compare_trials_job->wait();
3182 catch
3184 $status = $_;
3185 $status =~ s/\n at .+//s;
3191 sub create_tempfile {
3192 my ($self, $dir, $name, $ext) = @_;
3194 $ext = '.' . $ext if $ext;
3196 my ($fh, $file) = tempfile($name . "-XXXXX",
3197 SUFFIX => $ext,
3198 DIR => $dir,
3201 $fh->close;
3203 return $file;
3208 sub grep_file {
3209 my ($self, $dir, $exp) = @_;
3211 opendir my $dh, $dir
3212 or die "can't open $dir: $!\n";
3214 my ($file) = grep { /^$exp/ && -f "$dir/$_" } readdir($dh);
3215 close $dh;
3217 if ($file)
3219 $file = catfile($dir, $file);
3222 return $file;
3229 sub phenotype_graph :Path('/solgs/phenotype/graph') Args(0) {
3230 my ($self, $c) = @_;
3232 my $pop_id = $c->req->param('pop_id');
3233 my $trait_id = $c->req->param('trait_id');
3234 my $combo_pops_id = $c->req->param('combo_pops_id');
3236 $self->get_trait_details($c, $trait_id);
3238 $c->stash->{pop_id} = $pop_id;
3239 $c->stash->{combo_pops_id} = $combo_pops_id;
3241 $c->stash->{data_set_type} = 'combined populations' if $combo_pops_id;
3243 $self->trait_phenodata_file($c);
3245 my $trait_pheno_file = $c->{stash}->{trait_phenodata_file};
3246 my $trait_data = $self->convert_to_arrayref_of_arrays($c, $trait_pheno_file);
3248 my $ret->{status} = 'failed';
3250 if (@$trait_data)
3252 $ret->{status} = 'success';
3253 $ret->{trait_data} = $trait_data;
3256 $ret = to_json($ret);
3258 $c->res->content_type('application/json');
3259 $c->res->body($ret);
3264 #generates descriptive stat for a trait phenotype data
3265 sub trait_phenotype_stat {
3266 my ($self, $c) = @_;
3268 $self->trait_phenodata_file($c);
3270 my $trait_pheno_file = $c->{stash}->{trait_phenodata_file};
3272 my $trait_data = $self->convert_to_arrayref_of_arrays($c, $trait_pheno_file);
3274 my @desc_stat;
3275 my $background_job = $c->stash->{background_job};
3277 if ($trait_data && !$background_job)
3279 my @pheno_data;
3280 foreach (@$trait_data)
3282 unless (!$_->[0])
3284 my $d = $_->[1];
3285 chomp($d);
3287 if ($d =~ /\d+/)
3289 push @pheno_data, $d;
3294 my $stat = Statistics::Descriptive::Full->new();
3295 $stat->add_data(@pheno_data);
3297 my $min = $stat->min;
3298 my $max = $stat->max;
3299 my $mean = $stat->mean;
3300 my $med = $stat->median;
3301 my $std = $stat->standard_deviation;
3302 my $cnt = scalar(@$trait_data);
3303 my $cv = ($std / $mean) * 100;
3304 my $na = scalar(@$trait_data) - scalar(@pheno_data);
3306 if ($na == 0) { $na = '--'; }
3308 my $round = Math::Round::Var->new(0.01);
3309 $std = $round->round($std);
3310 $mean = $round->round($mean);
3311 $cv = $round->round($cv);
3312 $cv = $cv . '%';
3314 @desc_stat = ( [ 'Total no. of genotypes', $cnt ],
3315 [ 'Genotypes missing data', $na ],
3316 [ 'Minimum', $min ],
3317 [ 'Maximum', $max ],
3318 [ 'Arithmetic mean', $mean ],
3319 [ 'Median', $med ],
3320 [ 'Standard deviation', $std ],
3321 [ 'Coefficient of variation', $cv ]
3326 else
3328 @desc_stat = ( [ 'Total no. of genotypes', 'None' ],
3329 [ 'Genotypes missing data', 'None' ],
3330 [ 'Minimum', 'None' ],
3331 [ 'Maximum', 'None' ],
3332 [ 'Arithmetic mean', 'None' ],
3333 [ 'Median', 'None'],
3334 [ 'Standard deviation', 'None' ],
3335 [ 'Coefficient of variation', 'None' ]
3340 $c->stash->{descriptive_stat} = \@desc_stat;
3343 #sends an array of trait gebv data to an ajax request
3344 #with a population id and trait id parameters
3345 sub gebv_graph :Path('/solgs/trait/gebv/graph') Args(0) {
3346 my ($self, $c) = @_;
3348 my $pop_id = $c->req->param('pop_id');
3349 my $trait_id = $c->req->param('trait_id');
3350 my $prediction_pop_id = $c->req->param('selection_pop_id');
3351 my $combo_pops_id = $c->req->param('combo_pops_id');
3353 if ($combo_pops_id)
3355 $c->controller->('solGS::combinedTrials')get_combined_pops_list($c, $combo_pops_id);
3356 $c->stash->{data_set_type} = 'combined populations';
3357 $pop_id = $combo_pops_id;
3360 $c->stash->{pop_id} = $pop_id;
3361 $c->stash->{combo_pops_id} = $combo_pops_id;
3362 $c->stash->{prediction_pop_id} = $prediction_pop_id;
3364 $self->get_trait_details($c, $trait_id);
3366 my $page = $c->req->referer();
3367 my $gebv_file;
3369 if ($page =~ /solgs\/selection\//)
3371 my $identifier = $pop_id . '_' . $prediction_pop_id;
3372 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
3374 $gebv_file = $c->stash->{prediction_pop_gebvs_file};
3376 else
3378 $self->gebv_kinship_file($c);
3379 $gebv_file = $c->stash->{gebv_kinship_file};
3383 my $gebv_data = $self->convert_to_arrayref_of_arrays($c, $gebv_file);
3385 my $ret->{status} = 'failed';
3387 if (@$gebv_data)
3389 $ret->{status} = 'success';
3390 $ret->{gebv_data} = $gebv_data;
3393 $ret = to_json($ret);
3395 $c->res->content_type('application/json');
3396 $c->res->body($ret);
3401 sub tohtml_genotypes {
3402 my ($self, $c) = @_;
3404 my $genotypes = $c->stash->{top_10_selection_indices};
3405 my %geno = ();
3407 foreach (@$genotypes)
3409 $geno{$_->[0]} = $_->[1];
3411 return \%geno;
3415 sub get_single_trial_traits {
3416 my ($self, $c) = @_;
3418 my $pop_id = $c->stash->{pop_id};
3420 $self->traits_list_file($c);
3421 my $traits_file = $c->stash->{traits_list_file};
3423 if (!-s $traits_file)
3425 my $traits_rs = $c->model('solGS::solGS')->project_traits($pop_id);
3427 my @traits_list;
3429 while (my $row = $traits_rs->next)
3431 push @traits_list, $row->name;
3434 my $traits = join("\t", @traits_list);
3435 write_file($traits_file, $traits);
3441 sub get_all_traits {
3442 my ($self, $c) = @_;
3444 my $pop_id = $c->stash->{pop_id};
3446 $self->traits_list_file($c);
3447 my $traits_file = $c->stash->{traits_list_file};
3449 if (!-s $traits_file)
3451 my $page = $c->req->path;
3453 if ($page =~ /solgs\/population\//)
3455 $self->get_single_trial_traits($c);
3459 my $traits = read_file($traits_file);
3461 $self->traits_acronym_file($c);
3462 my $acronym_file = $c->stash->{traits_acronym_file};
3464 unless (-s $acronym_file)
3466 my @filtered_traits = split(/\t/, $traits);
3467 my $count = scalar(@filtered_traits);
3469 my $acronymized_traits = $self->acronymize_traits(\@filtered_traits);
3470 my $acronym_table = $acronymized_traits->{acronym_table};
3472 $self->traits_acronym_table($c, $acronym_table);
3475 $self->create_trait_data($c);
3479 sub create_trait_data {
3480 my ($self, $c) = @_;
3482 my $acronym_pairs = $self->get_acronym_pairs($c);
3484 if (@$acronym_pairs)
3486 my $table = 'trait_id' . "\t" . 'trait_name' . "\t" . 'acronym' . "\n";
3487 foreach (@$acronym_pairs)
3489 my $trait_name = $_->[1];
3490 $trait_name =~ s/\n//g;
3492 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
3494 if ($trait_id)
3496 $table .= $trait_id . "\t" . $trait_name . "\t" . $_->[0] . "\n";
3500 $self->all_traits_file($c);
3501 my $traits_file = $c->stash->{all_traits_file};
3502 write_file($traits_file, $table);
3507 sub all_traits_file {
3508 my ($self, $c) = @_;
3510 my $pop_id = $c->stash->{pop_id};
3511 #$pop_id = $c->stash->{combo_pops_id} if !$pop_id;
3513 my $cache_data = {key => 'all_traits_pop' . $pop_id,
3514 file => 'all_traits_pop_' . $pop_id,
3515 stash_key => 'all_traits_file'
3518 $self->cache_file($c, $cache_data);
3523 sub traits_list_file {
3524 my ($self, $c) = @_;
3526 my $pop_id = $c->stash->{pop_id};
3527 # $pop_id = $c->stash->{combo_pops_id} if !$pop_id;
3529 my $cache_data = {key => 'traits_list_pop' . $pop_id,
3530 file => 'traits_list_pop_' . $pop_id,
3531 stash_key => 'traits_list_file'
3534 $self->cache_file($c, $cache_data);
3539 sub get_acronym_pairs {
3540 my ($self, $c) = @_;
3542 my $pop_id = $c->stash->{pop_id};
3543 #$pop_id = $c->stash->{combo_pops_id} if !$pop_id;
3545 my $dir = $c->stash->{solgs_cache_dir};
3546 opendir my $dh, $dir
3547 or die "can't open $dir: $!\n";
3549 no warnings 'uninitialized';
3551 my ($file) = grep(/traits_acronym_pop_${pop_id}/, readdir($dh));
3552 $dh->close;
3554 my $acronyms_file = catfile($dir, $file);
3556 my @acronym_pairs;
3557 if (-f $acronyms_file)
3559 @acronym_pairs = map { [ split(/\t/) ] } read_file($acronyms_file);
3560 shift(@acronym_pairs); # remove header;
3563 @acronym_pairs = sort {uc $a->[0] cmp uc $b->[0] } @acronym_pairs;
3565 $c->stash->{acronym} = \@acronym_pairs;
3567 return \@acronym_pairs;
3572 sub traits_acronym_table {
3573 my ($self, $c, $acronym_table) = @_;
3575 if (keys %$acronym_table)
3577 my $table = 'Acronym' . "\t" . 'Trait name' . "\n";
3579 foreach (keys %$acronym_table)
3581 $table .= $_ . "\t" . $acronym_table->{$_} . "\n";
3584 $self->traits_acronym_file($c);
3585 my $acronym_file = $c->stash->{traits_acronym_file};
3587 write_file($acronym_file, $table);
3593 sub traits_acronym_file {
3594 my ($self, $c) = @_;
3596 my $pop_id = $c->stash->{pop_id};
3597 #$pop_id = $c->stash->{combo_pops_id} if !$pop_id;
3599 my $cache_data = {key => 'traits_acronym_pop' . $pop_id,
3600 file => 'traits_acronym_pop_' . $pop_id,
3601 stash_key => 'traits_acronym_file'
3604 $self->cache_file($c, $cache_data);
3609 sub analyzed_traits {
3610 my ($self, $c) = @_;
3612 my $training_pop_id = $c->stash->{model_id} || $c->stash->{training_pop_id};
3614 my $dir = $c->stash->{solgs_cache_dir};
3615 opendir my $dh, $dir or die "can't open $dir: $!\n";
3617 my @all_files = grep { /gebv_kinship_[a-zA-Z0-9]/ && -f "$dir/$_" }
3618 readdir($dh);
3620 closedir $dh;
3622 my @traits_files = map { catfile($dir, $_)}
3623 grep {/($training_pop_id)/}
3624 @all_files;
3626 my @traits;
3627 my @traits_ids;
3628 my @si_traits;
3629 my @valid_traits_files;
3631 foreach my $trait_file (@traits_files)
3633 if (-s $trait_file > 1)
3635 my $trait = basename($trait_file);
3636 $trait =~ s/gebv_kinship_//;
3637 $trait =~ s/$training_pop_id|_|combined_pops//g;
3638 $trait =~ s/$dir|\///g;
3640 my $trait_id;
3642 my $acronym_pairs = $self->get_acronym_pairs($c);
3643 if ($acronym_pairs)
3645 foreach my $r (@$acronym_pairs)
3647 if ($r->[0] eq $trait)
3649 my $trait_name = $r->[1];
3650 $trait_name =~ s/\n//g;
3651 $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
3653 push @traits_ids, $trait_id;
3658 $self->get_model_accuracy_value($c, $training_pop_id, $trait);
3659 my $av = $c->stash->{accuracy_value};
3661 if ($av && $av =~ m/\d+/ && $av > 0)
3663 push @si_traits, $trait;
3664 push @valid_traits_files, $trait_file;
3667 push @traits, $trait;
3669 else
3671 @traits_files = grep { $_ ne $trait_file } @traits_files;
3675 $c->stash->{analyzed_traits} = \@traits;
3676 $c->stash->{analyzed_traits_ids} = \@traits_ids;
3677 $c->stash->{analyzed_traits_files} = \@traits_files;
3678 $c->stash->{selection_index_traits} = \@si_traits;
3679 $c->stash->{analyzed_valid_traits_files} = \@valid_traits_files;
3683 sub filter_phenotype_header {
3684 my ($self, $c) = @_;
3686 my @headers = ( 'studyYear', 'studyDbId', 'studyName', 'studyDesign', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber' );
3688 my $meta_headers = join("\t", @headers);
3689 if ($c)
3691 $c->stash->{filter_phenotype_header} = $meta_headers;
3693 else
3695 return $meta_headers;
3701 sub abbreviate_term {
3702 my ($self, $term) = @_;
3704 my @words = split(/\s/, $term);
3706 my $acronym;
3708 if (scalar(@words) == 1)
3710 $acronym = shift(@words);
3712 else
3714 foreach my $word (@words)
3716 if ($word =~ /^\D/)
3718 my $l = substr($word,0,1,q{});
3719 $acronym .= $l;
3721 else
3723 $acronym .= $word;
3726 $acronym = uc($acronym);
3727 $acronym =~/(\w+)/;
3728 $acronym = $1;
3732 return $acronym;
3737 sub all_gs_traits_list {
3738 my ($self, $c) = @_;
3740 $self->trial_compatibility_file($c);
3741 my $file = $c->stash->{trial_compatibility_file};
3743 my $traits;
3744 my $mv_name = 'all_gs_traits';
3746 my $matview = $c->model('solGS::solGS')->check_matview_exists($mv_name);
3748 if (!$matview)
3750 $c->model('solGS::solGS')->materialized_view_all_gs_traits();
3751 $c->model('solGS::solGS')->insert_matview_public($mv_name);
3753 else
3755 if (!-s $file)
3757 $c->model('solGS::solGS')->refresh_materialized_view_all_gs_traits();
3758 $c->model('solGS::solGS')->update_matview_public($mv_name);
3764 $traits = $c->model('solGS::solGS')->all_gs_traits();
3766 catch
3769 if ($_ =~ /materialized view \"all_gs_traits\" has not been populated/)
3773 $c->model('solGS::solGS')->refresh_materialized_view_all_gs_traits();
3774 $c->model('solGS::solGS')->update_matview_public($mv_name);
3775 $traits = $c->model('solGS::solGS')->all_gs_traits();
3780 $c->stash->{all_gs_traits} = $traits;
3785 sub gs_traits_index {
3786 my ($self, $c) = @_;
3788 $self->all_gs_traits_list($c);
3789 my $all_traits = $c->stash->{all_gs_traits};
3790 my @all_traits = sort{$a cmp $b} @$all_traits;
3792 my @indices = ('A'..'Z');
3793 my %traits_hash;
3794 my @valid_indices;
3796 foreach my $index (@indices)
3798 my @index_traits;
3799 foreach my $trait (@all_traits)
3801 if ($trait =~ /^$index/i)
3803 push @index_traits, $trait;
3806 if (@index_traits)
3808 $traits_hash{$index}=[ @index_traits ];
3812 foreach my $k ( keys(%traits_hash))
3814 push @valid_indices, $k;
3817 @valid_indices = sort( @valid_indices );
3819 my $trait_index;
3820 foreach my $v_i (@valid_indices)
3822 $trait_index .= qq | <a href=/solgs/traits/$v_i>$v_i</a> |;
3823 unless ($v_i eq $valid_indices[-1])
3825 $trait_index .= " | ";
3829 $c->stash->{gs_traits_index} = $trait_index;
3834 sub traits_starting_with {
3835 my ($self, $c, $index) = @_;
3837 $self->all_gs_traits_list($c);
3838 my $all_traits = $c->stash->{all_gs_traits};
3840 my $trait_gr = [
3841 sort { $a cmp $b }
3842 grep { /^$index/i }
3843 uniq @$all_traits
3846 $c->stash->{trait_subgroup} = $trait_gr;
3850 sub hyperlink_traits {
3851 my ($self, $c, $traits) = @_;
3853 if (ref($traits) eq 'ARRAY')
3855 my @traits_urls;
3856 foreach my $tr (@$traits)
3858 push @traits_urls, [ qq | <a href="/solgs/search/result/traits/$tr">$tr</a> | ];
3861 $c->stash->{traits_urls} = \@traits_urls;
3863 else
3865 $c->stash->{traits_urls} = qq | <a href="/solgs/search/result/traits/$traits">$traits</a> |;
3870 sub gs_traits : Path('/solgs/traits') Args(1) {
3871 my ($self, $c, $index) = @_;
3873 my @traits_list;
3875 if ($index =~ /^\w{1}$/)
3877 $self->traits_starting_with($c, $index);
3878 my $traits_gr = $c->stash->{trait_subgroup};
3880 foreach my $trait (@$traits_gr)
3882 $self->hyperlink_traits($c, $trait);
3883 my $trait_url = $c->stash->{traits_urls};
3885 $self->get_trait_details($c, $trait);
3886 push @traits_list, [$trait_url, $c->stash->{trait_def}];
3889 $c->stash( template => $self->template('/search/traits/list.mas'),
3890 index => $index,
3891 traits_list => \@traits_list
3894 else
3896 $c->forward('search');
3901 sub submit_cluster_phenotype_query {
3902 my ($self, $c, $args) = @_;
3904 my $pop_id = $args->{population_id};
3906 $c->stash->{r_temp_file} = "phenotype-data-query-${pop_id}";
3907 $self->create_cluster_accesible_tmp_files($c);
3908 my $out_file = $c->stash->{out_file_temp};
3909 my $err_file = $c->stash->{err_file_temp};
3911 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3912 my $background_job = $c->stash->{background_job};
3914 my $config = $self->create_cluster_config($c, $temp_dir, $out_file, $err_file);
3916 my $args_file = $self->create_tempfile($temp_dir, "pheno-data-args_file-${pop_id}");
3918 nstore $args, $args_file
3919 or croak "data query script: $! serializing phenotype data query details to $args_file ";
3921 my $cmd = 'mx-run solGS::Cluster '
3922 . ' --data_type phenotype '
3923 . ' --population_type trial '
3924 . ' --args_file ' . $args_file;
3926 eval
3928 my $pheno_job = CXGN::Tools::Run->new($config);
3929 $pheno_job->do_not_cleanup(1);
3931 if ($background_job) {
3932 $pheno_job->is_async(1);
3933 $pheno_job->run_cluster($cmd);
3934 $c->stash->{r_job_tempdir} = $pheno_job->job_tempdir();
3935 $c->stash->{r_job_id} = $pheno_job->jobid();
3936 $c->stash->{cluster_job} = $pheno_job;
3937 } else {
3938 $pheno_job->is_cluster(1);
3939 $pheno_job->run_cluster($cmd);
3940 $pheno_job->wait;
3944 if ($@) {
3945 print STDERR "An error occurred! $@\n";
3946 $c->stash->{Error} = $@ ;
3952 sub submit_cluster_genotype_query {
3953 my ($self, $c, $args) = @_;
3955 my $pop_id = $args->{population_id};
3957 $c->stash->{r_temp_file} = "genotype-data-query-${pop_id}";
3958 $self->create_cluster_accesible_tmp_files($c);
3959 my $out_file = $c->stash->{out_file_temp};
3960 my $err_file = $c->stash->{err_file_temp};
3962 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
3963 my $background_job = $c->stash->{background_job};
3965 my $config = $self->create_cluster_config($c, $temp_dir, $out_file, $err_file);
3967 my $args_file = $self->create_tempfile($temp_dir, "geno-data-args_file-${pop_id}");
3969 nstore $args, $args_file
3970 or croak "data queryscript: $! serializing model details to $args_file ";
3972 my $cmd = 'mx-run solGS::Cluster '
3973 . ' --data_type genotype '
3974 . ' --population_type trial '
3975 . ' --args_file ' . $args_file;
3977 eval
3979 my $geno_job = CXGN::Tools::Run->new($config);
3980 $geno_job->do_not_cleanup(1);
3982 if ($background_job) {
3983 $geno_job->is_async(1);
3984 $geno_job->run_cluster($cmd);
3986 $c->stash->{r_job_tempdir} = $geno_job->job_tempdir();
3987 $c->stash->{r_job_id} = $geno_job->jobid();
3988 $c->stash->{cluster_job} = $geno_job;
3989 } else {
3990 $geno_job->is_cluster(1);
3991 $geno_job->run_cluster($cmd);
3992 $geno_job->wait;
3996 if ($@) {
3997 print STDERR "An error occurred! $@\n";
3998 $c->stash->{Error} = $@;
4004 sub first_stock_genotype_data {
4005 my ($self, $c, $pr_id) = @_;
4007 $self->first_stock_genotype_file($c, $pr_id);
4008 my $geno_file = $c->stash->{first_stock_genotype_file};
4010 my $geno_data = $c->model('solGS::solGS')->first_stock_genotype_data($pr_id);
4012 if ($geno_data)
4014 write_file($geno_file, $geno_data);
4019 sub phenotype_file {
4020 my ($self, $c, $pop_id) = @_;
4022 if (!$pop_id) {
4023 $pop_id = $c->stash->{pop_id}
4024 || $c->stash->{training_pop_id}
4025 || $c->stash->{trial_id};
4028 die "Population id must be provided to get the phenotype data set." if !$pop_id;
4029 $pop_id =~ s/combined_//;
4031 if ($c->stash->{uploaded_reference} || $pop_id =~ /uploaded/) {
4032 if (!$c->user) {
4034 my $page = "/" . $c->req->path;
4036 $c->res->redirect("/solgs/list/login/message?page=$page");
4037 $c->detach;
4042 $self->phenotype_file_name($c, $pop_id);
4043 my $pheno_file = $c->stash->{phenotype_file_name};
4045 no warnings 'uninitialized';
4047 unless ( -s $pheno_file)
4049 $self->traits_list_file($c);
4050 my $traits_file = $c->stash->{traits_list_file};
4052 my $args = {
4053 'population_id' => $pop_id,
4054 'phenotype_file' => $pheno_file,
4055 'traits_list_file' => $traits_file,
4058 if (!$c->stash->{uploaded_reference})
4060 $self->submit_cluster_phenotype_query($c, $args);
4064 $self->get_all_traits($c);
4066 $c->stash->{phenotype_file} = $pheno_file;
4071 sub format_phenotype_dataset {
4072 my ($self, $data_ref, $traits_file) = @_;
4074 my $data = $$data_ref;
4075 my @rows = split (/\n/, $data);
4077 my $formatted_headers = $self->format_phenotype_dataset_headers($rows[0], $traits_file);
4078 $rows[0] = $formatted_headers;
4080 my $formatted_dataset = $self->format_phenotype_dataset_rows(\@rows);
4082 return $formatted_dataset;
4086 sub format_phenotype_dataset_rows {
4087 my ($self, $data_rows) = @_;
4089 my $data = join("\n", @$data_rows);
4091 return $data;
4096 sub format_phenotype_dataset_headers {
4097 my ($self, $raw_headers, $traits_file) = @_;
4099 $raw_headers =~ s/\|\w+:\d+//g;
4100 $raw_headers =~ s/\n//g;
4102 my $traits = $raw_headers;
4104 my $meta_headers= $self->filter_phenotype_header();
4105 my @mh = split("\t", $meta_headers);
4106 foreach my $mh (@mh) {
4107 $traits =~ s/($mh)//g;
4110 $traits =~ s/^\s+|\s+$//g;
4112 write_file($traits_file, $traits) if $traits_file;
4113 my @filtered_traits = split(/\t/, $traits);
4115 $raw_headers =~ s/$traits//g;
4116 my $acronymized_traits = $self->acronymize_traits(\@filtered_traits);
4117 my $formatted_headers = $raw_headers . $acronymized_traits->{acronymized_traits};
4119 return $formatted_headers;
4124 sub acronymize_traits {
4125 my ($self, $traits) = @_;
4127 my $acronym_table = {};
4128 my $cnt = 0;
4129 my $acronymized_traits;
4131 foreach my $trait_name (@$traits)
4133 $cnt++;
4134 my $abbr = $self->abbreviate_term($trait_name);
4136 $abbr = $abbr . '.2' if $cnt > 1 && $acronym_table->{$abbr};
4138 $acronymized_traits .= $abbr;
4139 $acronymized_traits .= "\t" unless $cnt == scalar(@$traits);
4141 $acronym_table->{$abbr} = $trait_name if $abbr;
4142 my $tr_h = $acronym_table->{$abbr};
4145 my $acronym_data = {
4146 'acronymized_traits' => $acronymized_traits,
4147 'acronym_table' => $acronym_table
4150 return $acronym_data;
4154 sub genotype_file {
4155 my ($self, $c, $pred_pop_id) = @_;
4157 my $pop_id = $c->stash->{pop_id};
4158 my $geno_file;
4160 if ($pred_pop_id)
4162 $pop_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
4163 $geno_file = $c->stash->{user_selection_list_genotype_data_file};
4166 die "Population id must be provided to get the genotype data set." if !$pop_id;
4168 if ($c->stash->{uploaded_reference} || $pop_id =~ /uploaded/)
4170 if (!$c->user)
4172 my $path = "/" . $c->req->path;
4173 $c->res->redirect("/solgs/list/login/message?page=$path");
4174 $c->detach;
4178 unless ($geno_file)
4180 $self->genotype_file_name($c, $pop_id);
4181 $geno_file = $c->stash->{genotype_file_name};
4184 no warnings 'uninitialized';
4186 unless (-s $geno_file)
4188 my $model_id = $c->stash->{model_id};
4190 my $dir = ($model_id =~ /uploaded/)
4191 ? $c->stash->{solgs_prediction_upload_dir}
4192 : $c->stash->{solgs_cache_dir};
4194 my $trait_abbr = $c->stash->{trait_abbr};
4196 my $tr_file = ($c->stash->{data_set_type} =~ /combined/)
4197 ? "genotype_data_${model_id}_${trait_abbr}_combined"
4198 : "genotype_data_${model_id}.txt";
4200 my $tr_geno_file = catfile($dir, $tr_file);
4202 my $args = {
4203 'population_id' => $pop_id,
4204 'prediction_id' => $pred_pop_id,
4205 'model_id' => $model_id,
4206 'tr_geno_file' => $tr_geno_file,
4207 'genotype_file' => $geno_file,
4208 'cache_dir' => $c->stash->{solgs_cache_dir},
4211 $self->submit_cluster_genotype_query($c, $args);
4214 if ($pred_pop_id)
4216 $c->stash->{pred_genotype_file} = $geno_file;
4218 else
4220 $c->stash->{genotype_file} = $geno_file;
4226 sub get_rrblup_output {
4227 my ($self, $c) = @_;
4229 $c->stash->{pop_id} = $c->stash->{combo_pops_id} if $c->stash->{combo_pops_id};
4231 my $pop_id = $c->stash->{pop_id} || $c->stash->{training_pop_id};
4232 my $trait_abbr = $c->stash->{trait_abbr};
4233 my $trait_name = $c->stash->{trait_name};
4234 my $data_set_type = $c->stash->{data_set_type};
4235 my $prediction_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
4237 my ($traits_file, @traits, @trait_pages);
4239 if ($trait_abbr)
4241 $self->run_rrblup_trait($c, $trait_abbr);
4243 else
4245 $traits_file = $c->stash->{selected_traits_file};
4246 my $content = read_file($traits_file);
4248 if ($content =~ /\t/)
4250 @traits = split(/\t/, $content);
4252 else
4254 push @traits, $content;
4257 no warnings 'uninitialized';
4259 foreach my $tr (@traits)
4261 my $acronym_pairs = $self->get_acronym_pairs($c);
4262 my $trait_name;
4263 if ($acronym_pairs)
4265 foreach my $r (@$acronym_pairs)
4267 if ($r->[0] eq $tr)
4269 $trait_name = $r->[1];
4270 $trait_name =~ s/\n//g;
4271 $c->stash->{trait_name} = $trait_name;
4272 $c->stash->{trait_abbr} = $r->[0];
4277 $self->run_rrblup_trait($c, $tr);
4279 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
4280 push @trait_pages, [ qq | <a href="/solgs/trait/$trait_id/population/$pop_id" onclick="solGS.waitPage()">$tr</a>| ];
4284 $c->stash->{combo_pops_analysis_result} = 0;
4286 no warnings 'uninitialized';
4288 if ($data_set_type !~ /combined populations/)
4290 if (scalar(@traits) == 1)
4292 $self->gs_files($c);
4293 $c->stash->{template} = $self->template('population/trait.mas');
4296 if (scalar(@traits) > 1)
4298 $c->stash->{model_id} = $pop_id;
4299 $self->analyzed_traits($c);
4300 $c->stash->{template} = $self->template('/population/multiple_traits_output.mas');
4301 $c->stash->{trait_pages} = \@trait_pages;
4304 else
4306 $c->stash->{combo_pops_analysis_result} = 1;
4312 sub run_rrblup_trait {
4313 my ($self, $c, $trait_abbr) = @_;
4315 my $pop_id = $c->stash->{pop_id};
4316 my $trait_name = $c->stash->{trait_name};
4317 my $data_set_type = $c->stash->{data_set_type};
4319 my $trait_id = $c->model('solGS::solGS')->get_trait_id($trait_name);
4320 $c->stash->{trait_id} = $trait_id;
4322 no warnings 'uninitialized';
4324 if ($data_set_type =~ /combined populations/i)
4326 my $prediction_id = $c->stash->{prediction_pop_id};
4328 $self->output_files($c);
4330 my $combined_pops_pheno_file = $c->stash->{trait_combined_pheno_file};
4331 my $combined_pops_geno_file = $c->stash->{trait_combined_geno_file};
4333 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
4334 my $trait_info = $trait_id . "\t" . $trait_abbr;
4335 my $trait_file = $self->create_tempfile($temp_dir, "trait_info_${trait_id}");
4336 write_file($trait_file, $trait_info);
4338 my $dataset_file = $self->create_tempfile($temp_dir, "dataset_info_${trait_id}");
4339 write_file($dataset_file, $data_set_type);
4341 my $prediction_population_file = $c->stash->{prediction_population_file};
4343 my $input_files = join("\t",
4344 $c->stash->{trait_combined_pheno_file},
4345 $c->stash->{trait_combined_geno_file},
4346 $trait_file,
4347 $dataset_file,
4348 $prediction_population_file,
4351 my $input_file = $self->create_tempfile($temp_dir, "input_files_combo_${trait_abbr}");
4352 write_file($input_file, $input_files);
4354 if ($c->stash->{prediction_pop_id})
4356 $c->stash->{input_files} = $input_file;
4357 $self->run_rrblup($c);
4359 else
4361 if (-s $c->stash->{gebv_kinship_file} == 0 ||
4362 -s $c->stash->{gebv_marker_file} == 0 ||
4363 -s $c->stash->{validation_file} == 0
4366 $c->stash->{input_files} = $input_file;
4367 # $self->output_files($c);
4368 $self->run_rrblup($c);
4372 else
4374 my $name = "trait_info_${trait_id}_pop_${pop_id}";
4375 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
4376 my $trait_info = $trait_id . "\t" . $trait_abbr;
4377 my $file = $self->create_tempfile($temp_dir, $name);
4378 $c->stash->{trait_file} = $file;
4379 write_file($file, $trait_info);
4381 my $prediction_id = $c->stash->{prediction_pop_id} || $c->stash->{selection_pop_id};
4382 $self->output_files($c);
4384 if ($prediction_id)
4386 #$prediction_id = "prediction_id} if $c->stash->{uploaded_prediction};
4387 my $identifier = $pop_id . '_' . $prediction_id;
4389 $self->prediction_pop_gebvs_file($c, $identifier, $trait_id);
4390 my $pred_pop_gebvs_file = $c->stash->{prediction_pop_gebvs_file};
4392 unless (-s $pred_pop_gebvs_file != 0)
4394 $self->input_files($c);
4395 $self->run_rrblup($c);
4398 else
4400 if (-s $c->stash->{gebv_kinship_file} == 0 ||
4401 -s $c->stash->{gebv_marker_file} == 0 ||
4402 -s $c->stash->{validation_file} == 0
4405 $self->input_files($c);
4406 $self->run_rrblup($c);
4414 sub run_rrblup {
4415 my ($self, $c) = @_;
4417 #get all input files & arguments for rrblup,
4418 #run rrblup and save output in solgs user dir
4419 my $pop_id = $c->stash->{pop_id};
4420 my $trait_id = $c->stash->{trait_id};
4421 my $input_files = $c->stash->{input_files};
4422 my $output_files = $c->stash->{output_files};
4423 my $data_set_type = $c->stash->{data_set_type};
4425 if ($data_set_type !~ /combined populations/)
4427 die "\nCan't run rrblup without a population id." if !$pop_id;
4431 die "\nCan't run rrblup without a trait id." if !$trait_id;
4433 die "\nCan't run rrblup without input files." if !$input_files;
4434 die "\nCan't run rrblup without output files." if !$output_files;
4436 if ($data_set_type !~ /combined populations/)
4439 $c->stash->{r_temp_file} = "gs-rrblup-${trait_id}-${pop_id}";
4441 else
4443 my $combo_pops = $c->stash->{trait_combo_pops};
4444 $combo_pops = join('', split(/,/, $combo_pops));
4445 my $combo_identifier = crc($combo_pops);
4447 $c->stash->{r_temp_file} = "gs-rrblup-combo-${trait_id}-${combo_identifier}";
4450 $c->stash->{r_script} = 'R/solGS/gs.r';
4451 $self->run_r_script($c);
4458 sub create_cluster_acccesible_tmp_files {
4459 my ($self, $c) = @_;
4461 my $temp_file_template = $c->stash->{r_temp_file};
4463 my $temp_dir = $c->stash->{analysis_tempfiles_dir} || $c->stash->{solgs_tempfiles_dir};
4465 my $in_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-in");
4466 my $out_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-out");
4467 my $err_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "${temp_file_template}-err");
4469 $c->stash(
4470 in_file_temp => $in_file,
4471 out_file_temp => $out_file,
4472 err_file_temp => $err_file,
4478 sub run_async {
4479 my ($self, $c) = @_;
4481 my $dependency = $c->stash->{dependency};
4482 my $dependency_type = $c->stash->{dependency_type};
4483 my $background_job = $c->stash->{background_job};
4484 my $dependent_job = $c->stash->{dependent_job};
4485 my $temp_file_template = $c->stash->{r_temp_file};
4486 my $job_type = $c->stash->{job_type};
4487 my $model_file = $c->stash->{gs_model_args_file};
4488 my $combine_pops_job_id = $c->stash->{combine_pops_job_id};
4489 my $solgs_tmp_dir = "'" . $c->stash->{solgs_tempfiles_dir} . "'";
4491 my $r_script = $c->stash->{r_commands_file};
4492 my $r_script_args = $c->stash->{r_script_args};
4494 if ($combine_pops_job_id)
4496 $dependency = $combine_pops_job_id;
4499 $dependency =~ s/^://;
4501 my $script_args;
4502 foreach my $arg (@$r_script_args)
4504 $script_args .= $arg;
4505 $script_args .= ' --script_args ' unless ($r_script_args->[-1] eq $arg);
4508 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
4509 my $report_file = $self->create_tempfile($temp_dir, 'analysis_report_args');
4510 $c->stash->{report_file} = $report_file;
4512 my $job_config = {
4513 backend => $c->config->{backend},
4514 web_cluster_queue => $c->config->{web_cluster_queue}
4517 my $job_config_file = $self->create_tempfile($temp_dir, 'job_config_file');
4519 nstore $job_config, $job_config_file
4520 or croak "job config file: $! serializing job config to $job_config_file ";
4522 my $cmd = 'mx-run solGS::DependentJob'
4523 . ' --dependency_jobs ' . $dependency
4524 . ' --dependency_type ' . $dependency_type
4525 . ' --temp_dir ' . $solgs_tmp_dir
4526 . ' --temp_file_template ' . $temp_file_template
4527 . ' --analysis_report_args_file ' . $report_file
4528 . ' --dependent_type ' . $job_type
4529 . ' --job_config_file ' . $job_config_file;
4531 if ($r_script)
4533 $cmd .= ' --r_script ' . $r_script
4534 . ' --script_args ' . $script_args
4535 . ' --gs_model_args_file ' . $model_file;
4538 $c->stash->{r_temp_file} = 'run-async';
4539 $self->create_cluster_accesible_tmp_files($c);
4540 my $err_file = $c->stash->{err_file_temp};
4541 my $out_file = $c->stash->{out_file_temp};
4543 #my $config = $self->create_cluster_config($c, $temp_dir, $out_file, $err_file);
4545 eval
4547 my $job = CXGN::Tools::Run->new();
4548 $job->do_not_cleanup(1);
4549 #$job->is_async(1);
4550 $job->run_async($cmd);
4551 $c->stash->{r_job_tempdir} = $job->job_tempdir();
4552 $c->stash->{r_job_id} = $job->jobid();
4553 $c->stash->{cluster_job} = $job;
4556 if ($@) {
4557 print STDERR "An error occurred! $@\n";
4558 $c->stash->{Error} = $@ ;
4564 sub run_r_script {
4565 my ($self, $c) = @_;
4567 my $r_script = $c->stash->{r_script};
4568 my $input_files = $c->stash->{input_files};
4569 my $output_files = $c->stash->{output_files};
4571 $self->create_cluster_accesible_tmp_files($c);
4572 my $in_file = $c->stash->{in_file_temp};
4573 my $out_file = $c->stash->{out_file_temp};
4574 my $err_file = $c->stash->{err_file_temp};
4576 my $dependency = $c->stash->{dependency};
4577 my $dependency_type = $c->stash->{dependency_type};
4578 my $background_job = $c->stash->{background_job};
4581 my $temp_dir = $c->stash->{analysis_tempfiles_dir} || $c->stash->{solgs_tempfiles_dir};
4584 my $r_cmd_file = $c->path_to($r_script);
4585 copy($r_cmd_file, $in_file)
4586 or die "could not copy '$r_cmd_file' to '$in_file'";
4589 if ($dependency && $background_job)
4591 $c->stash->{r_commands_file} = $in_file;
4592 $c->stash->{r_script_args} = [$input_files, $output_files];
4594 $c->stash->{gs_model_args_file} = $self->create_tempfile($temp_dir, 'gs_model_args');
4596 if ($r_script =~ /combine_populations/)
4598 $c->stash->{job_type} = 'combine_populations';
4599 $self->run_async($c);
4601 elsif ($r_script =~ /gs/)
4603 $c->stash->{job_type} = 'model';
4605 my $model_job = {
4606 'r_command_file' => $in_file,
4607 'input_files' => $input_files,
4608 'output_files' => $output_files,
4609 'r_output_file' => $out_file,
4610 'err_temp_file' => $err_file,
4613 my $model_file = $c->stash->{gs_model_args_file};
4615 nstore $model_job, $model_file
4616 or croak "gs r script: $! serializing model details to '$model_file'";
4618 if ($dependency_type =~ /combine_populations|download_data/)
4620 $self->run_async($c);
4624 else
4626 my $config = $self->create_cluster_config($c, $temp_dir, $out_file, $err_file);
4628 my $cmd = 'Rscript --slave '
4629 . "$in_file $out_file "
4630 . '--args ' . $input_files
4631 . ' ' . $output_files;
4633 eval
4635 my $job = CXGN::Tools::Run->new($config);
4636 $job->do_not_cleanup(1);
4638 if ($r_script =~ /combine_populations/)
4640 #$c->stash->{job_type} = 'combine_populations';
4641 $c->stash->{combine_pops_job_id} = $job->jobid();
4643 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
4644 $c->stash->{gs_model_args_file} = $self->create_tempfile($temp_dir, 'gs_model_args');
4645 #$self->run_async($c);
4648 if ($background_job) {
4649 $job->is_async(1);
4650 $job->run_cluster($cmd);
4652 $c->stash->{r_job_tempdir} = $job->job_tempdir();
4653 $c->stash->{r_job_id} = $job->jobid();
4654 $c->stash->{cluster_job} = $job;
4655 } else {
4656 $job->is_cluster(1);
4657 $job->run_cluster($cmd);
4658 $job->wait;
4662 if ($@) {
4663 print STDERR "An error occurred! $@\n";
4664 $c->stash->{Error} = $@ ;
4665 $c->stash->{script_error} = "$r_script";
4672 sub create_cluster_config {
4673 my ($self, $c, $temp_dir, $out_file, $err_file) = @_;
4675 my $config = {
4676 backend => $c->config->{backend},
4677 temp_base => $temp_dir,
4678 queue => $c->config->{'web_cluster_queue'},
4679 max_cluster_jobs => 1_000_000_000,
4680 out_file => $out_file,
4681 err_file => $err_file,
4682 is_async => 0,
4683 do_cleanup => 0,
4687 return $config;
4691 sub get_solgs_dirs {
4692 my ($self, $c) = @_;
4694 my $geno_version = $c->config->{default_genotyping_protocol};
4695 $geno_version = 'analysis-data' if ($geno_version =~ /undefined/) || !$geno_version;
4696 $geno_version =~ s/\s+//g;
4697 my $tmp_dir = $c->site_cluster_shared_dir;
4698 $tmp_dir = catdir($tmp_dir, $geno_version);
4699 my $solgs_dir = catdir($tmp_dir, "solgs");
4700 my $solgs_cache = catdir($tmp_dir, 'solgs', 'cache');
4702 my $solgs_tempfiles = catdir($tmp_dir, 'solgs', 'tempfiles');
4703 my $correlation_cache = catdir($tmp_dir, 'correlation', 'cache');
4704 my $correlation_temp = catdir($tmp_dir, 'correlation', 'tempfiles');
4705 my $solgs_upload = catdir($tmp_dir, 'solgs', 'tempfiles', 'prediction_upload');
4706 my $pca_dir = catdir($tmp_dir, 'pca', 'cache');
4707 my $histogram_dir = catdir($tmp_dir, 'histogram', 'cache');
4708 my $log_dir = catdir($tmp_dir, 'log', 'cache');
4709 my $anova_cache = catdir($tmp_dir, 'anova', 'cache');
4710 my $anova_temp = catdir($tmp_dir, 'anova', 'tempfiles');
4712 mkpath (
4714 $solgs_dir, $solgs_cache, $solgs_tempfiles, $solgs_upload,
4715 $correlation_cache, $correlation_temp, $pca_dir, $histogram_dir, $log_dir, $anova_cache,
4716 $anova_temp,
4718 0, 0755
4721 $c->stash(solgs_dir => $solgs_dir,
4722 solgs_cache_dir => $solgs_cache,
4723 solgs_tempfiles_dir => $solgs_tempfiles,
4724 solgs_prediction_upload_dir => $solgs_upload,
4725 correlation_temp_dir => $correlation_temp,
4726 correlation_cache_dir => $correlation_cache,
4727 pca_dir => $pca_dir,
4728 histogram_dir => $histogram_dir,
4729 analysis_log_dir => $log_dir,
4730 anova_cache_dir => $anova_cache,
4731 anova_temp_dir => $anova_temp,
4737 sub cache_file {
4738 my ($self, $c, $cache_data) = @_;
4740 my $cache_dir = $c->stash->{cache_dir};
4742 unless ($cache_dir)
4744 $cache_dir = $c->stash->{solgs_cache_dir};
4747 my $file_cache = Cache::File->new(cache_root => $cache_dir,
4748 lock_level => Cache::File::LOCK_NFS()
4751 $file_cache->purge();
4753 my $file = $file_cache->get($cache_data->{key});
4755 no warnings 'uninitialized';
4757 unless (-s $file > 1)
4759 $file = catfile($cache_dir, $cache_data->{file});
4760 write_file($file);
4761 $file_cache->set($cache_data->{key}, $file, '30 days');
4764 $c->stash->{$cache_data->{stash_key}} = $file;
4765 $c->stash->{cache_dir} = $c->stash->{solgs_cache_dir};
4769 sub template {
4770 my ($self, $file) = @_;
4772 $file =~ s/(^\/)//;
4773 my $dir = '/solgs';
4775 return catfile($dir, $file);
4780 # sub default :Path {
4781 # my ( $self, $c ) = @_;
4782 # $c->forward('search');
4787 =head2 end
4789 Attempt to render a view, if needed.
4791 =cut
4793 #sub render : ActionClass('RenderView') {}
4794 sub begin : Private {
4795 my ($self, $c) = @_;
4797 $self->get_solgs_dirs($c);
4802 # sub end : Private {
4803 # my ( $self, $c ) = @_;
4805 # return if @{$c->error};
4807 # # don't try to render a default view if this was handled by a CGI
4808 # $c->forward('render') unless $c->req->path =~ /\.pl$/;
4810 # # enforce a default texest/html content type regardless of whether
4811 # # we tried to render a default view
4812 # $c->res->content_type('text/html') unless $c->res->content_type;
4814 # # insert our javascript packages into the rendered view
4815 # if( $c->res->content_type eq 'text/html' ) {
4816 # $c->forward('/js/insert_js_pack_html');
4817 # $c->res->headers->push_header('Vary', 'Cookie');
4818 # } else {
4819 # $c->log->debug("skipping JS pack insertion for page with content type ".$c->res->content_type)
4820 # if $c->debug;
4825 =head2 auto
4827 Run for every request to the site.
4829 =cut
4831 # sub auto : Private {
4832 # my ($self, $c) = @_;
4833 # CatalystX::GlobalContext->set_context( $c );
4834 # $c->stash->{c} = $c;
4835 # weaken $c->stash->{c};
4837 # $self->get_solgs_dirs($c);
4838 # # gluecode for logins
4840 # # # unless( $c->config->{'disable_login'} ) {
4841 # # my $dbh = $c->dbc->dbh;
4842 # # if ( my $sp_person_id = CXGN::Login->new( $dbh )->has_session ) {
4844 # # my $sp_person = CXGN::People::Person->new( $dbh, $sp_person_id);
4846 # # $c->authenticate({
4847 # # username => $sp_person->get_username(),
4848 # # password => $sp_person->get_password(),
4849 # # });
4850 # # }
4851 # # }
4853 # return 1;
4859 =head1 AUTHOR
4861 Isaak Y Tecle <iyt2@cornell.edu>
4863 =head1 LICENSE
4865 This library is free software. You can redistribute it and/or modify
4866 it under the same terms as Perl itself.
4868 =cut
4870 __PACKAGE__->meta->make_immutable;