Merge pull request #5191 from solgenomics/topic/quality_control
[sgn.git] / lib / SGN / Controller / solGS / Dataset.pm
blobbc1857c0a1c420bc51078d6f93f6e10e96b61d8a
1 package SGN::Controller::solGS::Dataset;
3 use Moose;
4 use namespace::autoclean;
6 use Carp qw/ carp confess croak /;
7 use File::Slurp qw /write_file read_file :edit prepend_file/;
8 use JSON;
9 use POSIX qw(strftime);
10 use Scalar::Util qw /weaken reftype/;
11 use Storable qw/ nstore retrieve /;
12 #BEGIN { extends 'Catalyst::Controller' }
13 use Data::Dumper;
15 BEGIN { extends 'Catalyst::Controller::REST' }
19 __PACKAGE__->config(
20 default => 'application/json',
21 stash_key => 'rest',
22 map => { 'application/json' => 'JSON',
23 'text/html' => 'JSON' },
28 sub get_dataset_trials :Path('/solgs/get/dataset/trials') Args(0) {
29 my ($self, $c) = @_;
31 my $dataset_id = $c->req->param('dataset_id');
33 croak "Dataset id missing." if !$dataset_id;
35 $c->stash->{dataset_id} = $dataset_id;
36 $self->get_dataset_trials_details($c);
38 $c->stash->{rest}{'trials_ids'} = $c->stash->{trials_ids};
39 $c->stash->{rest}{'combo_pops_id'} = $c->stash->{combo_pops_id};
40 $c->stash->{rest}{'trials_names'} = $c->stash->{trials_names};;
41 $c->stash->{rest}{'genotyping_protocol_id'} = $c->stash->{genotyping_protocol_id};
45 sub check_predicted_dataset_selection :Path('/solgs/check/predicted/dataset/selection') Args(0) {
46 my ($self, $c) = @_;
48 my $args = $c->req->param('arguments');
50 my $json = JSON->new();
51 $args = $json->decode($args);
53 my $training_pop_id = $args->{training_pop_id};
54 my $selection_pop_id = $args->{selection_pop_id};
55 $c->stash->{training_traits_ids} = $args->{training_traits_ids};
56 $c->stash->{genotyping_protocol_id} = $args->{genotyping_protocol_id};
58 $c->controller('solGS::Download')->selection_prediction_download_urls($c, $training_pop_id, $selection_pop_id);
60 my $ret->{output} = $c->stash->{selection_prediction_download};
62 $ret = to_json($ret);
64 $c->res->content_type('application/json');
65 $c->res->body($ret);
70 sub get_dataset_trials_ids {
71 my ($self, $c) = @_;
72 my $dataset_id = $c->stash->{dataset_id};
74 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
75 my $trials_ids = $data->{categories}->{trials};
77 $c->controller('solGS::combinedTrials')->catalogue_combined_pops($c, $trials_ids);
79 my $protocol_id = $self->get_dataset_genotyping_protocol($c);
81 $c->stash->{genotyping_protocol_id} = $protocol_id;
82 $c->stash->{dataset_trials_ids} = $trials_ids;
83 $c->stash->{trials_ids} = $trials_ids;
88 sub get_dataset_trials_details {
89 my ($self, $c) = @_;
91 $self->get_dataset_trials_ids($c);
92 $c->controller('solGS::combinedTrials')->process_trials_list_details($c);
97 sub get_dataset_genotypes_genotype_data {
98 my ($self, $c) = @_;
100 $self->get_dataset_genotypes_list($c);
101 $c->controller('solGS::List')->genotypes_list_genotype_file($c);
106 sub get_dataset_genotypes_list {
107 my ($self, $c, $dataset_id) = @_;
109 $dataset_id = $c->stash->{dataset_id} if !$dataset_id;
111 my $genotypes_ids = $self->get_model($c)->get_genotypes_from_dataset($dataset_id);
112 my $genotypes = $c->controller('solGS::List')->transform_uniqueids_genotypes($c, $genotypes_ids);
113 $c->stash->{genotypes_list} = $genotypes;
114 $c->stash->{genotypes_ids} = $genotypes_ids;
118 sub submit_dataset_training_data_query {
119 my ($self, $c) = @_;
121 my $dataset_id = $c->stash->{dataset_id};
123 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
125 my $geno_protocol = $self->get_dataset_genotyping_protocol($c);
127 my $query_jobs_file;
129 if (@{$data->{categories}->{plots}})
131 ###### write dataset training data query job function instead...
132 $c->stash->{plots_names} = $data->{categories}->{plots};
133 $self->get_dataset_genotypes_list($c);
135 $c->controller('solGS::List')->get_list_training_data_query_jobs_file($c);
136 $query_jobs_file = $c->stash->{list_training_data_query_jobs_file};
138 elsif (@{$data->{categories}->{trials}})
140 my $trials = $data->{categories}->{trials};
142 $c->controller('solGS::AsyncJob')->get_training_pop_data_query_job_args_file($c, $trials, $geno_protocol);
143 $query_jobs_file = $c->stash->{training_pop_data_query_job_args_file};
146 $c->stash->{dependent_jobs} = $query_jobs_file;
147 $c->controller('solGS::AsyncJob')->run_async($c);
152 sub get_dataset_phenotype_data {
153 my ($self, $c) = @_;
155 my $dataset_id = $c->stash->{dataset_id};
159 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
161 if ($data->{categories}->{plots}->[0])
163 $c->stash->{plots_ids} = $data->{categories}->{plots};
165 $c->controller('solGS::List')->plots_list_phenotype_file($c);
166 $c->stash->{phenotype_file} = $c->stash->{plots_list_phenotype_file};
168 elsif ($data->{categories}->{trials}->[0])
170 my $trials = $data->{categories}->{trials};
171 $c->stash->{pops_ids_list} = $data->{categories}->{trials};
172 $c->controller('solGS::List')->get_trials_list_pheno_data($c);
177 sub create_dataset_pheno_data_query_jobs {
178 my ($self, $c) = @_;
180 my $dataset_id = $c->stash->{dataset_id};
181 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
183 if ($data->{categories}->{plots}->[0])
185 $c->stash->{plots_ids} = $data->{categories}->{plots};
187 $c->controller('solGS::List')->plots_list_phenotype_query_job($c);
188 $c->stash->{dataset_pheno_data_query_jobs} = $c->stash->{plots_list_phenotype_query_job};
190 elsif ($data->{categories}->{trials}->[0])
192 my $trials_ids = $data->{categories}->{trials};
194 $c->controller('solGS::combinedTrials')->multi_pops_pheno_files($c, $trials_ids);
195 $c->stash->{phenotype_files_list} = $c->stash->{multi_pops_pheno_files};
197 $c->controller('solGS::AsyncJob')->get_trials_phenotype_query_jobs_args($c, $trials_ids);
198 $c->stash->{dataset_pheno_data_query_jobs} = $c->stash->{trials_phenotype_query_jobs_args};
203 sub create_dataset_geno_data_query_jobs {
204 my ($self, $c) = @_;
206 my $dataset_id = $c->stash->{dataset_id};
208 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
210 my $geno_protocol = $self->get_dataset_genotyping_protocol($c);
212 if ($data->{categories}->{accessions}->[0])
214 $self->dataset_genotype_query_jobs($c);
216 elsif ($data->{categories}->{trials}->[0])
218 my $trials_ids = $data->{categories}->{trials};
219 $c->controller('solGS::combinedTrials')->multi_pops_geno_files($c, $trials_ids);
220 $c->stash->{genotype_files_list} = $c->stash->{multi_pops_geno_files};
222 $c->controller('solGS::AsyncJob')->get_trials_genotype_query_jobs_args($c, $trials_ids, $geno_protocol);
223 $c->stash->{dataset_geno_data_query_jobs} = $c->stash->{trials_genotype_query_jobs_args};
228 sub dataset_genotype_query_jobs {
229 my ($self, $c) = @_;
231 my $dataset_id = $c->stash->{dataset_id};
232 my $protocol_id = $c->stash->{genotyping_protocol_id};
234 my $pop_id = 'dataset_' . $dataset_id;
235 my $data_dir = $c->stash->{solgs_datasets_dir};
236 my $pop_type = 'dataset';
238 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id);
239 my $geno_file = $c->stash->{genotype_file_name};
241 my $args = {
242 'dataset_id'=>$dataset_id,
243 'data_dir' => $data_dir,
244 'genotype_file' => $geno_file,
245 'genotyping_protocol_id'=> $protocol_id,
246 'r_temp_file' => "genotypes-list-genotype-data-query-${pop_id}",
249 $c->stash->{r_temp_file} = $args->{r_temp_file};
250 $c->controller('solGS::AsyncJob')->create_cluster_accessible_tmp_files($c);
251 my $out_temp_file = $c->stash->{out_file_temp};
252 my $err_temp_file = $c->stash->{err_file_temp};
254 my $temp_dir = $c->stash->{solgs_tempfiles_dir};
255 my $background_job = $c->stash->{background_job};
257 my $report_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-query-report-args-${pop_id}");
258 $c->stash->{report_file} = $report_file;
260 my $config_args = {
261 'temp_dir' => $temp_dir,
262 'out_file' => $out_temp_file,
263 'err_file' => $err_temp_file,
264 'cluster_host' => 'localhost'
267 my $config = $c->controller('solGS::AsyncJob')->create_cluster_config($c, $config_args);
269 my $args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-query-job-args-file-${pop_id}");
271 nstore $args, $args_file
272 or croak "data query script: $! serializing genotype lists genotype query details to $args_file ";
274 my $dbhost = $c->config->{dbhost};
275 my $dbname = $c->config->{dbname};
276 my $dbpass = $c->config->{dbpass};
277 my $dbuser = $c->config->{dbuser};
279 my $cmd = 'mx-run solGS::queryJobs '
280 . ' --dbhost ' . $dbhost
281 .' --dbname ' . $dbname
282 .' --dbuser ' . $dbuser
283 .' --dbpass ' . $dbpass
284 . ' --data_type genotype '
285 . ' --population_type ' . $pop_type
286 . ' --args_file ' . $args_file;
288 my $job_args = {
289 'cmd' => $cmd,
290 'config' => $config,
291 'background_job'=> $background_job,
292 'temp_dir' => $temp_dir,
295 $c->stash->{dataset_geno_data_query_jobs} = $job_args;
300 sub get_dataset_genotyping_protocol {
301 my ($self, $c, $dataset_id) = @_;
303 $dataset_id = $c->stash->{dataset_id} if !$dataset_id;
305 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
307 my $protocol_id = $data->{categories}->{genotyping_protocols};
309 if (reftype($protocol_id) eq 'ARRAY')
311 $protocol_id = $protocol_id->[0];
314 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
315 $protocol_id = $c->stash->{genotyping_protocol_id};
317 return $protocol_id;
321 sub get_dataset_plots_list {
322 my ($self, $c) = @_;
324 my $dataset_id = $c->stash->{dataset_id};
325 my $plots = $self->get_model($c)->get_dataset_plots_list($dataset_id);
327 $c->stash->{plots_names} = $plots;
328 $c->controller('solGS::List')->get_plots_list_elements_ids($c);
333 sub get_model {
334 my $self = shift;
335 my $c = shift;
337 return $c->controller('solGS::Search')->model($c);
342 sub dataset_population_summary {
343 my ($self, $c) = @_;
345 my $dataset_id = $c->stash->{dataset_id};
347 my $file_id = $self->dataset_file_id($c);
348 my $tmp_dir = $c->stash->{solgs_datasets_dir};
350 if (!$c->user)
352 my $page = "/" . $c->req->path;
353 $c->res->redirect("/solgs/login/message?page=$page");
354 $c->detach;
356 else
358 my $user_name = $c->user->id;
359 my $protocol = $c->controller('solGS::genotypingProtocol')->create_protocol_url($c);
361 if ($dataset_id)
363 $c->controller('solGS::Files')->population_metadata_file($c, $tmp_dir, $file_id);
364 my $metadata_file = $c->stash->{population_metadata_file};
366 my @metadata = read_file($metadata_file, {binmode => ':utf8'});
368 my ($key, $dataset_name, $desc);
370 ($desc) = grep {/description/} @metadata;
371 ($key, $desc) = split(/\t/, $desc);
373 ($dataset_name) = grep {/dataset_name/} @metadata;
374 ($key, $dataset_name) = split(/\t/, $dataset_name);
376 $c->stash(project_id => $file_id,
377 project_name => $dataset_name,
378 selection_pop_name => $dataset_name,
379 project_desc => $desc,
380 owner => $user_name,
381 protocol => $protocol,
388 sub create_dataset_population_metadata {
389 my ($self, $c) = @_;
391 my $dataset_name = $self->get_dataset_name($c);
393 my $metadata = 'key' . "\t" . 'value';
394 $metadata .= "\n" . 'user_id' . "\t" . $c->user->id;
395 $metadata .= "\n" . 'dataset_name' . "\t" . $dataset_name;
396 $metadata .= "\n" . 'description' . "\t" . 'Uploaded on: ' . strftime "%a %b %e %H:%M %Y", localtime;
398 $c->stash->{dataset_metadata} = $metadata;
403 sub get_dataset_name {
404 my ($self, $c, $dataset_id) = @_;
406 $dataset_id = $c->stash->{dataset_id} if !$dataset_id;
407 $dataset_id =~ s/\w+_//g;
409 my $dataset_name = $c->controller('solGS::Search')->model($c)->get_dataset_name($dataset_id);
410 return $dataset_name;
416 sub create_dataset_population_metadata_file {
417 my ($self, $c) = @_;
419 my $file_id = $self->dataset_file_id($c);
421 my $tmp_dir = $c->stash->{solgs_datasets_dir};
423 $c->controller('solGS::Files')->population_metadata_file($c, $tmp_dir, $file_id,);
424 my $file = $c->stash->{population_metadata_file};
426 $self->create_dataset_population_metadata($c);
427 my $metadata = $c->stash->{dataset_metadata};
429 write_file($file, {binmode => ':utf8'}, $metadata);
431 $c->stash->{dataset_metadata_file} = $file;
436 sub create_dataset_pop_data_files {
437 my ($self, $c) = @_;
439 my $file_id = $self->dataset_file_id($c);
440 #my $dataset_id = $c->stash->{dataset_id}
441 $c->controller('solGS::Files')->phenotype_file_name($c, $file_id);
442 my $pheno_file = $c->stash->{phenotype_file_name};
443 my $protocol_id = $self->get_dataset_genotyping_protocol($c);
444 $c->controller('solGS::Files')->genotype_file_name($c, $file_id, $protocol_id);
445 my $geno_file = $c->stash->{genotype_file_name};
447 my $files = { pheno_file => $pheno_file, geno_file => $geno_file};
449 return $files;
454 sub dataset_plots_list_phenotype_file {
455 my ($self, $c) = @_;
457 my $dataset_id = $c->stash->{dataset_id};
458 my $plots_ids = $c->controller('solGS::Search')->model($c)->get_dataset_plots_list($dataset_id);
459 my $file_id = $self->dataset_file_id($c);
461 $c->stash->{pop_id} = $file_id;
462 $c->controller('solGS::Files')->traits_list_file($c);
463 my $traits_file = $c->stash->{traits_list_file};
465 my $data_dir = $c->stash->{solgs_datasets_dir};
467 $c->controller('solGS::Files')->phenotype_file_name($c, $file_id);
468 my $pheno_file = $c->stash->{phenotype_file_name};
469 #$c->stash->{dataset_plots_list_phenotype_file} = $pheno_file;
471 $c->controller('solGS::Files')->phenotype_metadata_file($c);
472 my $metadata_file = $c->stash->{phenotype_metadata_file};
474 my $args = {
475 'dataset_id' => $dataset_id,
476 'plots_ids' => $plots_ids,
477 'traits_file' => $traits_file,
478 #'data_dir' => $data_dir,
479 'phenotype_file' => $pheno_file,
480 'metadata_file' => $metadata_file,
481 'r_temp_file' => 'dataset-phenotype-data-query',
482 'population_type' => 'plots_list'
485 $c->controller('solGS::List')->submit_list_phenotype_data_query($c, $args);
486 $c->stash->{phenotype_file} = $c->stash->{dataset_plots_list_phenotype_file};
491 sub dataset_file_id {
492 my ($self, $c) = @_;
494 my $dataset_id = $c->stash->{dataset_id};
495 if ( $dataset_id =~ /dataset/) {
496 return $dataset_id;
497 } else {
498 return 'dataset_' . $dataset_id;
504 sub begin : Private {
505 my ($self, $c) = @_;
507 $c->controller('solGS::Files')->get_solgs_dirs($c);
513 __PACKAGE__->meta->make_immutable;
515 ####
517 ####