3 Isaak Y Tecle <iyt2@cornell.edu>
7 This library is free software. You can redistribute it and/or modify
8 it under the same terms as Perl itself.
12 SGN::Controller::solGS::List - Controller for list based training and selection populations
17 package SGN
::Controller
::solGS
::List
;
20 use namespace
::autoclean
;
22 use List
::MoreUtils qw
/uniq/;
25 use File
::Temp qw
/ tempfile tempdir /;
26 use File
::Spec
::Functions qw
/ catfile catdir/;
27 use File
::Slurp qw
/write_file read_file/;
30 use POSIX
qw(strftime);
32 BEGIN { extends
'Catalyst::Controller' }
36 sub generate_check_value
:Path
('/solgs/generate/checkvalue') Args
(0) {
39 my $file_name = $c->req->param('string');
40 my $check_value = crc
($file_name);
42 my $ret->{status
} = 'failed';
46 $ret->{status
} = 'success';
47 $ret->{check_value
} = $check_value;
52 $c->res->content_type('application/json');
58 sub check_predicted_list_selection
:Path
('/solgs/check/predicted/list/selection') Args
(0) {
61 my $args = $c->req->param('arguments');
63 my $json = JSON
->new();
64 $args = $json->decode($args);
66 my $training_pop_id = $args->{training_pop_id
};
67 my $selection_pop_id = $args->{selection_pop_id
};
69 $c->stash->{uploaded_prediction
} = 1;
71 $c->controller("solGS::solGS")->download_prediction_urls($c, $training_pop_id, $selection_pop_id);
73 my $ret->{output
} = $c->stash->{download_prediction
};
77 $c->res->content_type('application/json');
83 sub load_genotypes_list_selection
:Path
('/solgs/load/genotypes/list/selection') Args
(0) {
86 my $args = $c->req->param('arguments');
88 my $json = JSON
->new();
89 $args = $json->decode($args);
91 my $training_pop_id = $args->{training_pop_id
}[0];
92 my $selection_pop_id = $args->{selection_pop_id
}[0];
93 my $trait_id = $args->{trait_id
}[0];
94 $c->stash->{list
} = $args->{list
};
95 $c->stash->{list_name
} = $args->{list_name
};
96 $c->stash->{list_id
} = $args->{list_id
};
97 $c->stash->{data_set_type
} = $args->{data_set_type
};
98 $c->stash->{training_pop_id
} = $training_pop_id;
99 $c->stash->{model_id
} = $training_pop_id;
100 $c->stash->{pop_id
} = $training_pop_id;
101 $c->stash->{selection_pop_id
} = $selection_pop_id;
102 $c->stash->{uploaded_prediction
} = $args->{population_type
};
103 $c->stash->{trait_id
} = $trait_id;
105 if ($args->{data_set_type
} =~ /combined populations/)
107 $c->stash->{combo_pops_id
} = $training_pop_id;
110 $self->get_selection_genotypes_list($c);
111 my $genotypes_list = $c->stash->{genotypes_list
};
112 my $genotypes_ids = $c->stash->{genotypes_ids
};
114 my $data = $c->model('solGS::solGS')->genotypes_list_genotype_data($genotypes_list);
115 $c->stash->{genotypes_list_genotype_data
} = $data;
117 $self->genotypes_list_genotype_data_file($c, $selection_pop_id);
118 my $genotype_file = $c->stash->{genotypes_list_genotype_data_file
};
120 $self->create_list_population_metadata_file($c, $selection_pop_id);
122 my $ret->{status
} = 'failed';
124 if (-s
$genotype_file)
126 $self->predict_list_selection_gebvs($c);
128 $ret->{status
} = $c->stash->{status
};
129 $ret->{output
} = $c->stash->{download_prediction
};
132 $ret = to_json
($ret);
134 $c->res->content_type('application/json');
140 sub solgs_list_login_message
:Path
('/solgs/list/login/message') Args
(0) {
143 my $page = $c->req->param('page');
145 my $message = "This is a private data. If you are the owner, "
146 . "please <a href=\"/solpeople/login.pl?goto_url=$page\">login</a> to view it.";
148 $c->stash->{message
} = $message;
150 $c->stash->{template
} = "/generic_message.mas";
155 sub get_trial_id
:Path
('/solgs/get/trial/id') Args
(0) {
158 my @trials_names = $c->req->param('trials_names[]');
160 my $tr_rs = $c->model('solGS::solGS')->project_details_by_exact_name(\
@trials_names);
164 while (my $rw = $tr_rs->next)
166 push @trials_ids, $rw->project_id;
169 my $ret->{trials_ids
} = \
@trials_ids;
171 $ret = to_json
($ret);
173 $c->res->content_type('application/json');
179 sub get_selection_genotypes_list_from_file
{
180 my ($self, $file) = @_;
183 open my $fh, $file or die "Can't open file $file: $!";
196 sub get_selection_genotypes_list
{
199 my $list = $c->stash->{list
};
201 my @stocks_names = ();
204 foreach my $stock (@
$list)
206 push @stocks_ids, $stock->[0];;
207 push @stocks_names, $stock->[1];
210 @stocks_ids = uniq
(@stocks_ids);
211 @stocks_names = uniq
(@stocks_names);
213 $c->stash->{genotypes_list
} = \
@stocks_names;
214 $c->stash->{genotypes_ids
} = \
@stocks_ids;
219 sub genotypes_list_genotype_data_file
{
220 my ($self, $c, $list_pop_id) = @_;
222 my $geno_data = $c->stash->{genotypes_list_genotype_data
};
223 my $dir = $c->stash->{solgs_prediction_upload_dir
};
225 my $files = $self->create_list_pop_tempfiles($dir, $list_pop_id);
226 my $geno_file = $files->{geno_file
};
227 write_file
($geno_file, $geno_data);
229 $c->stash->{genotypes_list_genotype_data_file
} = $geno_file;
234 sub create_list_pop_tempfiles
{
235 my ($self, $dir, $list_pop_id) = @_;
237 my $pheno_name = "phenotype_data_${list_pop_id}.txt";
238 my $geno_name = "genotype_data_${list_pop_id}.txt";
239 my $pheno_file = catfile
($dir, $pheno_name);
240 my $geno_file = catfile
($dir, $geno_name);
242 my $files = { pheno_file
=> $pheno_file, geno_file
=> $geno_file};
249 sub create_list_population_metadata
{
251 my $metadata = 'key' . "\t" . 'value';
252 $metadata .= "\n" . 'user_id' . "\t" . $c->user->id;
253 $metadata .= "\n" . 'list_name' . "\t" . $c->{stash
}->{list_name
};
254 $metadata .= "\n" . 'description' . "\t" . 'Uploaded on: ' . strftime
"%a %b %e %H:%M %Y", localtime;
256 $c->stash->{user_list_population_metadata
} = $metadata;
261 sub create_list_population_metadata_file
{
262 my ($self, $c, $list_pop_id) = @_;
264 my $user_id = $c->user->id;
265 my $tmp_dir = $c->stash->{solgs_prediction_upload_dir
};
267 my $file = catfile
($tmp_dir, "metadata_${user_id}_${list_pop_id}");
269 $self->create_list_population_metadata($c);
270 my $metadata = $c->stash->{user_list_population_metadata
};
272 write_file
($file, $metadata);
274 $c->stash->{user_list_population_metadata_file
} = $file;
279 sub predict_list_selection_pop_single_pop_model
{
282 my $trait_id = $c->stash->{trait_id
};
283 my $training_pop_id = $c->stash->{training_pop_id
};
284 my $selection_pop_id = $c->stash->{selection_pop_id
};
286 $c->stash->{uploaded_prediction
} = 1;
288 my $identifier = $training_pop_id . '_' . $selection_pop_id;
289 $c->controller('solGS::solGS')->prediction_pop_gebvs_file($c, $identifier, $trait_id);
290 my $prediction_pop_gebvs_file = $c->stash->{prediction_pop_gebvs_file
};
292 if (!-s
$prediction_pop_gebvs_file)
294 $c->controller('solGS::solGS')->phenotype_file_name($c, $training_pop_id);
295 $c->stash->{phenotype_file
} =$c->stash->{phenotype_file_name
};
297 $c->controller('solGS::solGS')->genotype_file_name($c, $training_pop_id);
298 $c->stash->{genotype_file
} =$c->stash->{genotype_file_name
};
300 $self->user_prediction_population_file($c, $selection_pop_id);
302 $c->stash->{pop_id
} = $c->stash->{training_pop_id
};
303 $c->controller("solGS::solGS")->get_trait_details($c, $trait_id);
304 $c->controller("solGS::solGS")->get_rrblup_output($c);
305 $c->stash->{status
} = 'success';
309 $c->stash->{status
} = 'success';
315 sub predict_list_selection_pop_multi_traits
{
318 my $data_set_type = $c->stash->{data_set_type
};
319 my $training_pop_id = $c->stash->{training_pop_id
};
320 my $selection_pop_id = $c->stash->{selection_pop_id
};
322 $c->stash->{pop_id
} = $training_pop_id;
323 $c->controller('solGS::solGS')->traits_with_valid_models($c);
324 my @traits_with_valid_models = @
{$c->stash->{traits_with_valid_models
}};
326 foreach my $trait_abbr (@traits_with_valid_models)
328 $c->stash->{trait_abbr
} = $trait_abbr;
329 $c->controller('solGS::solGS')->get_trait_details_of_trait_abbr($c);
330 $self->predict_list_selection_pop_single_pop_model($c);
333 $c->controller("solGS::solGS")->download_prediction_urls($c, $training_pop_id, $selection_pop_id );
334 my $download_prediction = $c->stash->{download_prediction
};
339 sub predict_list_selection_pop_combined_pops_model
{
342 my $data_set_type = $c->stash->{data_set_type
};
343 my $combo_pops_id = $c->stash->{combo_pops_id
};
344 my $training_pop_id = $c->stash->{training_pop_id
};
345 my $selection_pop_id = $c->stash->{selection_pop_id
};
346 my $trait_id = $c->stash->{trait_id
};
348 $c->stash->{prediction_pop_id
} = $c->stash->{selection_pop_id
};
349 $c->stash->{pop_id
} = $training_pop_id;
350 $c->stash->{uploaded_prediction
} = 1;
352 my $identifier = $training_pop_id . '_' . $selection_pop_id;
353 $c->controller("solGS::solGS")->prediction_pop_gebvs_file($c, $identifier, $trait_id);
354 my $prediction_pop_gebvs_file = $c->stash->{prediction_pop_gebvs_file
};
356 if (!-s
$prediction_pop_gebvs_file)
358 $c->controller("solGS::solGS")->get_trait_details($c, $trait_id);
360 $c->controller("solGS::solGS")->cache_combined_pops_data($c);
362 my $pheno_file = $c->stash->{trait_combined_pheno_file
};
363 my $geno_file = $c->stash->{trait_combined_geno_file
};
365 $self->user_prediction_population_file($c, $selection_pop_id);
367 $c->controller("solGS::solGS")->get_rrblup_output($c);
368 $c->stash->{status
} = 'success';
372 $c->stash->{status
} = 'success';
375 $c->controller("solGS::solGS")->download_prediction_urls($c, $training_pop_id, $selection_pop_id );
380 sub predict_list_selection_gebvs
{
383 my $referer = $c->req->referer;
385 if ($referer =~ /solgs\/trait\
//)
387 $self->predict_list_selection_pop_single_pop_model($c);
389 elsif ($referer =~ /solgs\/traits\
/all\//)
391 $self->predict_list_selection_pop_multi_traits($c);
393 elsif ($referer =~ /solgs\/models\
/combined\/trials\
//)
395 $c->stash->{pop_id
} = $c->stash->{training_pop_id
};
396 $c->controller("solGS::solGS")->traits_with_valid_models($c);
397 my @traits_with_valid_models = @
{$c->stash->{traits_with_valid_models
}};
399 foreach my $trait_abbr (@traits_with_valid_models)
401 $c->stash->{trait_abbr
} = $trait_abbr;
402 $c->controller("solGS::solGS")->get_trait_details_of_trait_abbr($c);
404 $self->predict_list_selection_pop_combined_pops_model($c);
407 elsif ($referer =~ /solgs\/model\
/combined\/populations\
//)
409 $self->predict_list_selection_pop_combined_pops_model($c);
413 $c->stash->{status
} = "calling predict_list_selection_gebvs..no matching type analysis.";
418 sub user_prediction_population_file
{
419 my ($self, $c, $pred_pop_id) = @_;
421 my $upload_dir = $c->stash->{solgs_prediction_upload_dir
};
423 my ($fh, $tempfile) = tempfile
("prediction_population_${pred_pop_id}-XXXXX",
428 $c->controller("solGS::solGS")->genotype_file_name($c, $pred_pop_id);
429 my $pred_pop_file = $c->stash->{genotype_file_name
};
431 $c->stash->{genotypes_list_genotype_data_file
} = $pred_pop_file;
433 $fh->print($pred_pop_file);
436 $c->stash->{prediction_population_file
} = $tempfile;
441 sub get_list_elements_names
{
444 my $list = $c->stash->{list
};
448 foreach my $id_names (@
$list)
450 push @names, $id_names->[1];
453 $c->stash->{list_elements_names
} = \
@names;
458 sub get_list_elements_ids
{
461 my $list = $c->stash->{list
};
465 foreach my $id_names (@
$list)
467 push @ids, $id_names->[0];
470 $c->stash->{list_elements_ids
} = \
@ids;
475 sub map_genotypes_plots
{
478 my $plots = $c->stash->{plots_names
};
482 die "No plots list provided $!\n";
486 my $genotypes_rs = $c->model('solGS::solGS')->get_genotypes_from_plots($plots);
489 while (my $genotype = $genotypes_rs->next)
491 my $name = $genotype->uniquename;
492 push @genotypes, $name;
495 @genotypes = uniq
(@genotypes);
497 $c->stash->{genotypes_list
} = \
@genotypes;
503 sub load_plots_list_training
:Path
('/solgs/load/plots/list/training') Args
(0) {
506 my $args = $c->req->param('arguments');
508 my $json = JSON
->new();
509 $args = $json->decode($args);
511 $c->stash->{list_name
} = $args->{list_name
};
512 $c->stash->{list
} = $args->{list
};
513 $c->stash->{model_id
} = $args->{training_pop_id
};
514 $c->stash->{population_type
} = $args->{population_type
};
516 my $model_id = $c->stash->{model_id
};
517 $self->plots_list_phenotype_file($c);
519 $self->genotypes_list_genotype_file($c, $model_id);
521 my $tmp_dir = $c->stash->{solgs_prediction_upload_dir
};
523 my $files = $self->create_list_pop_tempfiles($tmp_dir, $model_id);
524 my $pheno_file = $files->{pheno_file
};
525 my $geno_file = $files->{geno_file
};
527 $self->create_list_population_metadata_file($c, $model_id);
529 my $ret->{status
} = 'failed';
531 if (-s
$geno_file && -s
$pheno_file)
533 $ret->{status
} = 'success';
536 $ret = to_json
($ret);
538 $c->res->content_type('application/json');
544 sub genotypes_list_genotype_data
{
545 my ($self, $args) = @_;
547 my $list_pop_id = $args->{model_id
} || $args->{list_pop_id
} || $args->{selection_pop_id
};
548 my $genotypes = $args->{genotypes_list
};
549 my $genotypes_ids = $args->{genotypes_ids
};
550 my $tmp_dir = $args->{list_data_dir
};
552 my $model = SGN
::Model
::solGS
::solGS
->new({context
=> 'SGN::Context',
553 schema
=> SGN
::Context
->dbic_schema("Bio::Chado::Schema")
556 my $geno_data = $model->genotypes_list_genotype_data($genotypes);
557 my $files = $self->create_list_pop_tempfiles($tmp_dir, $list_pop_id);
559 my $geno_file = $files->{geno_file
};
560 write_file
($geno_file, $geno_data);
565 sub genotypes_list_genotype_file
{
566 my ($self, $c, $list_pop_id) = @_;
568 my $list = $c->stash->{list
};
570 if (!$c->stash->{selection_pop_id
})
572 $self->get_list_elements_names($c);
573 $c->stash->{plots_names
} = $c->stash->{list_elements_names
};
575 $self->get_list_elements_ids($c);
576 $c->stash->{plots_ids
} = $c->stash->{list_elements_ids
};
578 $self->map_genotypes_plots($c);
582 $self->get_selection_genotypes_list($c);
585 my $genotypes = $c->stash->{genotypes_list
};
586 my $genotypes_ids = $c->stash->{genotypes_ids
};
588 my $data_dir = $c->stash->{solgs_prediction_upload_dir
};
591 'list_pop_id' => $list_pop_id,
592 'genotypes_list' => $genotypes,
593 'genotypes_ids' => $genotypes_ids,
594 'list_data_dir' => $data_dir,
597 $c->stash->{r_temp_file
} = 'genotypes-list-genotype-data-query';
598 $c->controller('solGS::solGS')->create_cluster_acccesible_tmp_files($c);
599 my $out_temp_file = $c->stash->{out_file_temp
};
600 my $err_temp_file = $c->stash->{err_file_temp
};
602 my $temp_dir = $c->stash->{solgs_tempfiles_dir
};
603 my $background_job = $c->stash->{background_job
};
605 my $report_file = $c->controller('solGS::solGS')->create_tempfile($temp_dir, 'geno-data-query-report-args');
606 $c->stash->{report_file
} = $report_file;
611 my $geno_job = CXGN
::Tools
::Run
->run_cluster_perl({
613 method
=> ["SGN::Controller::solGS::List" => "genotypes_list_genotype_data"],
615 load_packages
=> ['SGN::Controller::solGS::List', 'SGN::Context', 'SGN::Model::solGS::solGS'],
617 out_file
=> $out_temp_file,
618 err_file
=> $err_temp_file,
619 working_dir
=> $temp_dir,
620 max_cluster_jobs
=> 1_000_000_000
,
625 $c->stash->{r_job_tempdir
} = $geno_job->tempdir();
626 $c->stash->{geno_data_query_job_id
} = $geno_job->job_id();
627 $c->stash->{cluster_job
} = $geno_job;
629 unless ($background_job)
638 $status =~ s/\n at .+//s;
644 sub plots_list_phenotype_data
{
645 my ($self, $args) = @_;
647 my $model_id = $args->{model_id
};
648 my $plots_names = $args->{plots_names
};
649 my $plots_ids = $args->{plots_ids
};
650 my $traits_file = $args->{traits_file
};
651 my $tmp_dir = $args->{list_data_dir
};
653 my $model = SGN
::Model
::solGS
::solGS
->new({schema
=> SGN
::Context
->dbic_schema("Bio::Chado::Schema")});
654 my $pheno_data = $model->plots_list_phenotype_data($plots_names);
656 $pheno_data = SGN
::Controller
::solGS
::solGS
->format_phenotype_dataset($pheno_data, $traits_file);
658 my $files = $self->create_list_pop_tempfiles($tmp_dir, $model_id);
660 my $pheno_file = $files->{pheno_file
};
662 write_file
($pheno_file, $pheno_data);
667 sub plots_list_phenotype_file
{
670 my $model_id = $c->stash->{model_id
};
671 my $list = $c->stash->{list
};
673 $self->get_list_elements_names($c);
674 my $plots_names = $c->stash->{list_elements_names
};
676 $self->get_list_elements_ids($c);
677 my $plots_ids = $c->stash->{list_elements_ids
};
679 $c->stash->{pop_id
} = $model_id;
680 $c->controller("solGS::solGS")->traits_list_file($c);
681 my $traits_file = $c->stash->{traits_list_file
};
683 my $data_dir = $c->stash->{solgs_prediction_upload_dir
};
686 'model_id' => $model_id,
687 'plots_names' => $plots_names,
688 'plots_ids' => $plots_ids,
689 'traits_file' => $traits_file,
690 'list_data_dir' => $data_dir,
693 $c->stash->{r_temp_file
} = 'plots-phenotype-data-query';
694 $c->controller('solGS::solGS')->create_cluster_acccesible_tmp_files($c);
695 my $out_temp_file = $c->stash->{out_file_temp
};
696 my $err_temp_file = $c->stash->{err_file_temp
};
698 my $temp_dir = $c->stash->{solgs_tempfiles_dir
};
699 my $background_job = $c->stash->{background_job
};
705 my $pheno_job = CXGN
::Tools
::Run
->run_cluster_perl({
707 method
=> ["SGN::Controller::solGS::List" => "plots_list_phenotype_data"],
709 load_packages
=> ['SGN::Controller::solGS::List', 'SGN::Controller::solGS::solGS', 'SGN::Context', 'SGN::Model::solGS::solGS'],
711 out_file
=> $out_temp_file,
712 err_file
=> $err_temp_file,
713 working_dir
=> $temp_dir,
714 max_cluster_jobs
=> 1_000_000_000
,
719 $c->stash->{r_job_tempdir
} = $pheno_job->tempdir();
720 $c->stash->{pheno_data_query_job_id
} = $pheno_job->job_id();
721 $c->stash->{cluster_job
} = $pheno_job;
723 unless ($background_job)
732 $status =~ s/\n at .+//s;
738 sub begin
: Private
{
741 $c->controller("solGS::solGS")->get_solgs_dirs($c);