1 package SGN
::Controller
::solGS
::Dataset
;
4 use namespace
::autoclean
;
6 use Carp qw
/ carp confess croak /;
7 use File
::Slurp qw
/write_file read_file :edit prepend_file/;
9 use POSIX
qw(strftime);
10 use Scalar
::Util qw
/weaken reftype/;
11 use Storable qw
/ nstore retrieve /;
12 #BEGIN { extends 'Catalyst::Controller' }
15 BEGIN { extends
'Catalyst::Controller::REST' }
20 default => 'application/json',
22 map => { 'application/json' => 'JSON',
23 'text/html' => 'JSON' },
28 sub get_dataset_trials
:Path
('/solgs/get/dataset/trials') Args
(0) {
31 my $dataset_id = $c->req->param('dataset_id');
33 croak
"Dataset id missing." if !$dataset_id;
35 $c->stash->{dataset_id
} = $dataset_id;
36 $self->get_dataset_trials_details($c);
38 $c->stash->{rest
}{'trials_ids'} = $c->stash->{trials_ids
};
39 $c->stash->{rest
}{'combo_pops_id'} = $c->stash->{combo_pops_id
};
40 $c->stash->{rest
}{'trials_names'} = $c->stash->{trials_names
};;
41 $c->stash->{rest
}{'genotyping_protocol_id'} = $c->stash->{genotyping_protocol_id
};
45 sub check_predicted_dataset_selection
:Path
('/solgs/check/predicted/dataset/selection') Args
(0) {
48 my $args = $c->req->param('arguments');
50 my $json = JSON
->new();
51 $args = $json->decode($args);
53 my $training_pop_id = $args->{training_pop_id
};
54 my $selection_pop_id = $args->{selection_pop_id
};
55 $c->stash->{training_traits_ids
} = $args->{training_traits_ids
};
56 $c->stash->{genotyping_protocol_id
} = $args->{genotyping_protocol_id
};
58 $c->controller('solGS::Download')->selection_prediction_download_urls($c, $training_pop_id, $selection_pop_id);
60 my $ret->{output
} = $c->stash->{selection_prediction_download
};
64 $c->res->content_type('application/json');
70 sub get_dataset_trials_ids
{
72 my $dataset_id = $c->stash->{dataset_id
};
74 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
75 my $trials_ids = $data->{categories
}->{trials
};
77 $c->controller('solGS::combinedTrials')->catalogue_combined_pops($c, $trials_ids);
79 my $protocol_id = $self->get_dataset_genotyping_protocol($c);
81 $c->stash->{genotyping_protocol_id
} = $protocol_id;
82 $c->stash->{dataset_trials_ids
} = $trials_ids;
83 $c->stash->{trials_ids
} = $trials_ids;
88 sub get_dataset_trials_details
{
91 $self->get_dataset_trials_ids($c);
92 $c->controller('solGS::combinedTrials')->process_trials_list_details($c);
97 sub get_dataset_genotypes_genotype_data
{
100 $self->get_dataset_genotypes_list($c);
101 $c->controller('solGS::List')->genotypes_list_genotype_file($c);
106 sub get_dataset_genotypes_list
{
107 my ($self, $c, $dataset_id) = @_;
109 $dataset_id = $c->stash->{dataset_id
} if !$dataset_id;
111 my $genotypes_ids = $self->get_model($c)->get_genotypes_from_dataset($dataset_id);
112 my $genotypes = $c->controller('solGS::List')->transform_uniqueids_genotypes($c, $genotypes_ids);
113 $c->stash->{genotypes_list
} = $genotypes;
114 $c->stash->{genotypes_ids
} = $genotypes_ids;
118 sub submit_dataset_training_data_query
{
121 my $dataset_id = $c->stash->{dataset_id
};
123 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
125 my $geno_protocol = $self->get_dataset_genotyping_protocol($c);
129 if (@
{$data->{categories
}->{plots
}})
131 ###### write dataset training data query job function instead...
132 $c->stash->{plots_names
} = $data->{categories
}->{plots
};
133 $self->get_dataset_genotypes_list($c);
135 $c->controller('solGS::List')->get_list_training_data_query_jobs_file($c);
136 $query_jobs_file = $c->stash->{list_training_data_query_jobs_file
};
138 elsif (@
{$data->{categories
}->{trials
}})
140 my $trials = $data->{categories
}->{trials
};
142 $c->controller('solGS::AsyncJob')->get_training_pop_data_query_job_args_file($c, $trials, $geno_protocol);
143 $query_jobs_file = $c->stash->{training_pop_data_query_job_args_file
};
146 $c->stash->{dependent_jobs
} = $query_jobs_file;
147 $c->controller('solGS::AsyncJob')->run_async($c);
152 sub get_dataset_phenotype_data
{
155 my $dataset_id = $c->stash->{dataset_id
};
159 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
161 if ($data->{categories
}->{plots
}->[0])
163 $c->stash->{plots_ids
} = $data->{categories
}->{plots
};
165 $c->controller('solGS::List')->plots_list_phenotype_file($c);
166 $c->stash->{phenotype_file
} = $c->stash->{plots_list_phenotype_file
};
168 elsif ($data->{categories
}->{trials
}->[0])
170 my $trials = $data->{categories
}->{trials
};
171 $c->stash->{pops_ids_list
} = $data->{categories
}->{trials
};
172 $c->controller('solGS::List')->get_trials_list_pheno_data($c);
177 sub create_dataset_pheno_data_query_jobs
{
180 my $dataset_id = $c->stash->{dataset_id
};
181 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
183 if ($data->{categories
}->{plots
}->[0])
185 $c->stash->{plots_ids
} = $data->{categories
}->{plots
};
187 $c->controller('solGS::List')->plots_list_phenotype_query_job($c);
188 $c->stash->{dataset_pheno_data_query_jobs
} = $c->stash->{plots_list_phenotype_query_job
};
190 elsif ($data->{categories
}->{trials
}->[0])
192 my $trials_ids = $data->{categories
}->{trials
};
194 $c->controller('solGS::combinedTrials')->multi_pops_pheno_files($c, $trials_ids);
195 $c->stash->{phenotype_files_list
} = $c->stash->{multi_pops_pheno_files
};
197 $c->controller('solGS::AsyncJob')->get_trials_phenotype_query_jobs_args($c, $trials_ids);
198 $c->stash->{dataset_pheno_data_query_jobs
} = $c->stash->{trials_phenotype_query_jobs_args
};
203 sub create_dataset_geno_data_query_jobs
{
206 my $dataset_id = $c->stash->{dataset_id
};
208 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
210 my $geno_protocol = $self->get_dataset_genotyping_protocol($c);
212 if ($data->{categories
}->{accessions
}->[0])
214 $self->dataset_genotype_query_jobs($c);
216 elsif ($data->{categories
}->{trials
}->[0])
218 my $trials_ids = $data->{categories
}->{trials
};
219 $c->controller('solGS::combinedTrials')->multi_pops_geno_files($c, $trials_ids);
220 $c->stash->{genotype_files_list
} = $c->stash->{multi_pops_geno_files
};
222 $c->controller('solGS::AsyncJob')->get_trials_genotype_query_jobs_args($c, $trials_ids, $geno_protocol);
223 $c->stash->{dataset_geno_data_query_jobs
} = $c->stash->{trials_genotype_query_jobs_args
};
228 sub dataset_genotype_query_jobs
{
231 my $dataset_id = $c->stash->{dataset_id
};
232 my $protocol_id = $c->stash->{genotyping_protocol_id
};
234 my $pop_id = 'dataset_' . $dataset_id;
235 my $data_dir = $c->stash->{solgs_datasets_dir
};
236 my $pop_type = 'dataset';
238 $c->controller('solGS::Files')->genotype_file_name($c, $pop_id);
239 my $geno_file = $c->stash->{genotype_file_name
};
242 'dataset_id'=>$dataset_id,
243 'data_dir' => $data_dir,
244 'genotype_file' => $geno_file,
245 'genotyping_protocol_id'=> $protocol_id,
246 'r_temp_file' => "genotypes-list-genotype-data-query-${pop_id}",
249 $c->stash->{r_temp_file
} = $args->{r_temp_file
};
250 $c->controller('solGS::AsyncJob')->create_cluster_accessible_tmp_files($c);
251 my $out_temp_file = $c->stash->{out_file_temp
};
252 my $err_temp_file = $c->stash->{err_file_temp
};
254 my $temp_dir = $c->stash->{solgs_tempfiles_dir
};
255 my $background_job = $c->stash->{background_job
};
257 my $report_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-query-report-args-${pop_id}");
258 $c->stash->{report_file
} = $report_file;
261 'temp_dir' => $temp_dir,
262 'out_file' => $out_temp_file,
263 'err_file' => $err_temp_file,
264 'cluster_host' => 'localhost'
267 my $config = $c->controller('solGS::AsyncJob')->create_cluster_config($c, $config_args);
269 my $args_file = $c->controller('solGS::Files')->create_tempfile($temp_dir, "geno-data-query-job-args-file-${pop_id}");
271 nstore
$args, $args_file
272 or croak
"data query script: $! serializing genotype lists genotype query details to $args_file ";
274 my $dbhost = $c->config->{dbhost
};
275 my $dbname = $c->config->{dbname
};
276 my $dbpass = $c->config->{dbpass
};
277 my $dbuser = $c->config->{dbuser
};
279 my $cmd = 'mx-run solGS::queryJobs '
280 . ' --dbhost ' . $dbhost
281 .' --dbname ' . $dbname
282 .' --dbuser ' . $dbuser
283 .' --dbpass ' . $dbpass
284 . ' --data_type genotype '
285 . ' --population_type ' . $pop_type
286 . ' --args_file ' . $args_file;
291 'background_job'=> $background_job,
292 'temp_dir' => $temp_dir,
295 $c->stash->{dataset_geno_data_query_jobs
} = $job_args;
300 sub get_dataset_genotyping_protocol
{
301 my ($self, $c, $dataset_id) = @_;
303 $dataset_id = $c->stash->{dataset_id
} if !$dataset_id;
305 my $data = $self->get_model($c)->get_dataset_data($dataset_id);
307 my $protocol_id = $data->{categories
}->{genotyping_protocols
};
309 if (reftype
($protocol_id) eq 'ARRAY')
311 $protocol_id = $protocol_id->[0];
314 $c->controller('solGS::genotypingProtocol')->stash_protocol_id($c, $protocol_id);
315 $protocol_id = $c->stash->{genotyping_protocol_id
};
321 sub get_dataset_plots_list
{
324 my $dataset_id = $c->stash->{dataset_id
};
325 my $plots = $self->get_model($c)->get_dataset_plots_list($dataset_id);
327 $c->stash->{plots_names
} = $plots;
328 $c->controller('solGS::List')->get_plots_list_elements_ids($c);
337 return $c->controller('solGS::Search')->model($c);
342 sub dataset_population_summary
{
345 my $dataset_id = $c->stash->{dataset_id
};
347 my $file_id = $self->dataset_file_id($c);
348 my $tmp_dir = $c->stash->{solgs_datasets_dir
};
352 my $page = "/" . $c->req->path;
353 $c->res->redirect("/solgs/login/message?page=$page");
358 my $user_name = $c->user->id;
359 my $protocol = $c->controller('solGS::genotypingProtocol')->create_protocol_url($c);
363 $c->controller('solGS::Files')->population_metadata_file($c, $tmp_dir, $file_id);
364 my $metadata_file = $c->stash->{population_metadata_file
};
366 my @metadata = read_file
($metadata_file, {binmode => ':utf8'});
368 my ($key, $dataset_name, $desc);
370 ($desc) = grep {/description/} @metadata;
371 ($key, $desc) = split(/\t/, $desc);
373 ($dataset_name) = grep {/dataset_name/} @metadata;
374 ($key, $dataset_name) = split(/\t/, $dataset_name);
376 $c->stash(project_id
=> $file_id,
377 project_name
=> $dataset_name,
378 selection_pop_name
=> $dataset_name,
379 project_desc
=> $desc,
381 protocol
=> $protocol,
388 sub create_dataset_population_metadata
{
391 my $dataset_name = $self->get_dataset_name($c);
393 my $metadata = 'key' . "\t" . 'value';
394 $metadata .= "\n" . 'user_id' . "\t" . $c->user->id;
395 $metadata .= "\n" . 'dataset_name' . "\t" . $dataset_name;
396 $metadata .= "\n" . 'description' . "\t" . 'Uploaded on: ' . strftime
"%a %b %e %H:%M %Y", localtime;
398 $c->stash->{dataset_metadata
} = $metadata;
403 sub get_dataset_name
{
404 my ($self, $c, $dataset_id) = @_;
406 $dataset_id = $c->stash->{dataset_id
} if !$dataset_id;
407 $dataset_id =~ s/\w+_//g;
409 my $dataset_name = $c->controller('solGS::Search')->model($c)->get_dataset_name($dataset_id);
410 return $dataset_name;
416 sub create_dataset_population_metadata_file
{
419 my $file_id = $self->dataset_file_id($c);
421 my $tmp_dir = $c->stash->{solgs_datasets_dir
};
423 $c->controller('solGS::Files')->population_metadata_file($c, $tmp_dir, $file_id,);
424 my $file = $c->stash->{population_metadata_file
};
426 $self->create_dataset_population_metadata($c);
427 my $metadata = $c->stash->{dataset_metadata
};
429 write_file
($file, {binmode => ':utf8'}, $metadata);
431 $c->stash->{dataset_metadata_file
} = $file;
436 sub create_dataset_pop_data_files
{
439 my $file_id = $self->dataset_file_id($c);
440 #my $dataset_id = $c->stash->{dataset_id}
441 $c->controller('solGS::Files')->phenotype_file_name($c, $file_id);
442 my $pheno_file = $c->stash->{phenotype_file_name
};
443 my $protocol_id = $self->get_dataset_genotyping_protocol($c);
444 $c->controller('solGS::Files')->genotype_file_name($c, $file_id, $protocol_id);
445 my $geno_file = $c->stash->{genotype_file_name
};
447 my $files = { pheno_file
=> $pheno_file, geno_file
=> $geno_file};
454 sub dataset_plots_list_phenotype_file
{
457 my $dataset_id = $c->stash->{dataset_id
};
458 my $plots_ids = $c->controller('solGS::Search')->model($c)->get_dataset_plots_list($dataset_id);
459 my $file_id = $self->dataset_file_id($c);
461 $c->stash->{pop_id
} = $file_id;
462 $c->controller('solGS::Files')->traits_list_file($c);
463 my $traits_file = $c->stash->{traits_list_file
};
465 my $data_dir = $c->stash->{solgs_datasets_dir
};
467 $c->controller('solGS::Files')->phenotype_file_name($c, $file_id);
468 my $pheno_file = $c->stash->{phenotype_file_name
};
469 #$c->stash->{dataset_plots_list_phenotype_file} = $pheno_file;
471 $c->controller('solGS::Files')->phenotype_metadata_file($c);
472 my $metadata_file = $c->stash->{phenotype_metadata_file
};
475 'dataset_id' => $dataset_id,
476 'plots_ids' => $plots_ids,
477 'traits_file' => $traits_file,
478 #'data_dir' => $data_dir,
479 'phenotype_file' => $pheno_file,
480 'metadata_file' => $metadata_file,
481 'r_temp_file' => 'dataset-phenotype-data-query',
482 'population_type' => 'plots_list'
485 $c->controller('solGS::List')->submit_list_phenotype_data_query($c, $args);
486 $c->stash->{phenotype_file
} = $c->stash->{dataset_plots_list_phenotype_file
};
491 sub dataset_file_id
{
494 my $dataset_id = $c->stash->{dataset_id
};
495 if ( $dataset_id =~ /dataset/) {
498 return 'dataset_' . $dataset_id;
504 sub begin
: Private
{
507 $c->controller('solGS::Files')->get_solgs_dirs($c);
513 __PACKAGE__
->meta->make_immutable;