start fixing test for multi cat phenotype upload.
[sgn.git] / lib / CXGN / Genotype / GWAS.pm
blob7658bd43fbf5d95f075e0cde7cb2b1f7264f096f
1 package CXGN::Genotype::GWAS;
3 =head1 NAME
5 CXGN::Genotype::GWAS - an object to handle GWAS
7 =head1 USAGE
9 my $geno = CXGN::Genotype::GWAS->new({
10 bcs_schema=>$schema,
11 grm_temp_file=>$file_temp_path,
12 gwas_temp_file=>$file_temp_path_gwas,
13 pheno_temp_file=>$file_temp_path_pheno,
14 people_schema=>$people_schema,
15 download_format=>$download_format, #either 'results_tsv' or 'manhattan_qq_plots'
16 accession_id_list=>\@accession_list,
17 trait_id_list=>\@trait_id_list,
18 traits_are_repeated_measurements=>$traits_are_repeated_measurements,
19 protocol_id=>$protocol_id,
20 get_grm_for_parental_accessions=>1,
21 cache_root=>$cache_root,
22 minor_allele_frequency=>0.01,
23 marker_filter=>0.6,
24 individuals_filter=>0.8
25 });
26 $geno->download_gwas();
28 =head1 DESCRIPTION
31 =head1 AUTHORS
33 Nicolas Morales <nm529@cornell.edu>
35 =cut
37 use strict;
38 use warnings;
39 use Moose;
40 use Try::Tiny;
41 use Data::Dumper;
42 use SGN::Model::Cvterm;
43 use CXGN::Trial;
44 use JSON;
45 use CXGN::Stock::Accession;
46 use CXGN::Genotype::Protocol;
47 use CXGN::Genotype::Search;
48 use CXGN::Genotype::ComputeHybridGenotype;
49 use CXGN::Phenotypes::SearchFactory;
50 use CXGN::Page;
51 use R::YapRI::Base;
52 use R::YapRI::Data::Matrix;
53 use CXGN::Dataset::Cache;
54 use Cache::File;
55 use Digest::MD5 qw | md5_hex |;
56 use File::Slurp qw | write_file |;
57 use File::Temp 'tempfile';
58 use File::Copy;
59 use POSIX;
61 has 'bcs_schema' => (
62 isa => 'Bio::Chado::Schema',
63 is => 'rw',
64 required => 1
67 has 'people_schema' => (
68 isa => 'CXGN::People::Schema',
69 is => 'rw',
70 required => 1
73 # Uses a cached file system for getting genotype results and getting GRM
74 has 'cache_root' => (
75 isa => 'Str',
76 is => 'rw',
77 required => 1
80 has 'cache' => (
81 isa => 'Cache::File',
82 is => 'rw',
85 has 'cache_expiry' => (
86 isa => 'Int',
87 is => 'rw',
88 default => 0, # never expires?
91 has '_cache_key' => (
92 isa => 'Str',
93 is => 'rw',
96 has 'grm_temp_file' => (
97 isa => 'Str',
98 is => 'rw',
99 required => 1
102 has 'gwas_temp_file' => (
103 isa => 'Str',
104 is => 'rw',
105 required => 1
108 has 'pheno_temp_file' => (
109 isa => 'Str',
110 is => 'rw',
111 required => 1
114 has 'protocol_id' => (
115 isa => 'Int',
116 is => 'rw',
117 required => 1
120 has 'download_format' => (
121 isa => 'Str',
122 is => 'rw',
123 required => 1
126 has 'minor_allele_frequency' => (
127 isa => 'Num',
128 is => 'rw',
129 default => sub{0.05}
132 has 'marker_filter' => (
133 isa => 'Num',
134 is => 'rw',
135 default => sub{0.60}
138 has 'individuals_filter' => (
139 isa => 'Num',
140 is => 'rw',
141 default => sub{0.80}
144 has 'accession_id_list' => (
145 isa => 'ArrayRef[Int]|Undef',
146 is => 'rw'
149 has 'trait_id_list' => (
150 isa => 'ArrayRef[Int]|Undef',
151 is => 'rw'
154 has 'traits_are_repeated_measurements' => (
155 isa => 'Bool',
156 is => 'ro',
157 default => 0
160 # If the accessions in the plots you are interested have not been genotyped (as in hybrids), can get this boolean to 1 and give a list of plot_id_list and you will get back a GRM built from the parent accessions for those plots (for the plots whose parents were genotyped)
161 has 'get_grm_for_parental_accessions' => (
162 isa => 'Bool',
163 is => 'ro',
164 default => 0
167 has 'genotypeprop_hash_select' => (
168 isa => 'ArrayRef[Str]',
169 is => 'ro',
170 default => sub {['DS']} #THESE ARE THE GENERIC AND EXPECTED VCF ATRRIBUTES. For dosage matrix we only need DS
173 has 'protocolprop_top_key_select' => (
174 isa => 'ArrayRef[Str]',
175 is => 'ro',
176 default => sub {['markers']} #THESE ARE ALL POSSIBLE TOP LEVEL KEYS IN PROTOCOLPROP BASED ON VCF LOADING. For dosage matrix we only need markers
179 has 'protocolprop_marker_hash_select' => (
180 isa => 'ArrayRef[Str]',
181 is => 'ro',
182 default => sub {['name']} #THESE ARE ALL POSSIBLE PROTOCOLPROP MARKER HASH KEYS BASED ON VCF LOADING. For dosage matrix we only need name
185 has 'return_only_first_genotypeprop_for_stock' => (
186 isa => 'Bool',
187 is => 'ro',
188 default => 1
191 sub get_gwas {
192 my $self = shift;
193 my $shared_cluster_dir_config = shift;
194 my $backend_config = shift;
195 my $cluster_host_config = shift;
196 my $web_cluster_queue_config = shift;
197 my $basepath_config = shift;
198 my $schema = $self->bcs_schema();
199 my $people_schema = $self->people_schema();
200 my $cache_root_dir = $self->cache_root();
201 my $accession_list = $self->accession_id_list();
202 my $trait_list = $self->trait_id_list();
203 my $protocol_id = $self->protocol_id();
204 my $get_grm_for_parental_accessions = $self->get_grm_for_parental_accessions();
205 my $grm_tempfile = $self->grm_temp_file();
206 my $gwas_tempfile = $self->gwas_temp_file();
207 my $pheno_tempfile = $self->pheno_temp_file();
208 my $download_format = $self->download_format();
209 my $traits_are_repeated_measurements = $self->traits_are_repeated_measurements();
211 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
212 my $plot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
213 my $plot_of_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot_of', 'stock_relationship')->cvterm_id();
214 my $female_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'female_parent', 'stock_relationship')->cvterm_id();
215 my $male_parent_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'male_parent', 'stock_relationship')->cvterm_id();
217 my $number_system_cores = `getconf _NPROCESSORS_ONLN` or die "Could not get number of system cores!\n";
218 chomp($number_system_cores);
219 print STDERR "NUMCORES $number_system_cores\n";
221 my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate(
222 'MaterializedViewTable',
224 bcs_schema=>$schema,
225 data_level=>'plot',
226 trait_list=>$trait_list,
227 accession_list=>$accession_list,
228 exclude_phenotype_outlier=>0,
229 include_timestamp=>0
232 my ($data, $unique_traits) = $phenotypes_search->search();
233 my %unique_trait_ids;
234 my %unique_observation_units;
235 my %phenotype_data_hash;
236 my %filtered_accession_ids;
237 foreach my $d (@$data) {
238 $unique_observation_units{$d->{observationunit_stock_id}} = $d;
239 foreach my $o (@{$d->{observations}}) {
240 my $trait_id_use;
241 if ($traits_are_repeated_measurements) {
242 $trait_id_use = '0001';
244 else {
245 $trait_id_use = $o->{trait_id};
247 $unique_trait_ids{$trait_id_use}++;
248 if ($o->{value} || $o->{value} == 0) {
249 $phenotype_data_hash{$d->{observationunit_stock_id}}->{$trait_id_use} = $o->{value};
250 $filtered_accession_ids{$d->{germplasm_stock_id}}++;
254 my @unique_stock_ids_sorted = sort keys %unique_observation_units;
255 my @unique_trait_ids_sorted = sort keys %unique_trait_ids;
256 my @unique_accession_ids_sorted = sort keys %filtered_accession_ids;
257 $accession_list = \@unique_accession_ids_sorted;
259 my $trait_string_sql = join ',', @unique_trait_ids_sorted;
260 open(my $F_pheno, ">", $pheno_tempfile) || die "Can't open file ".$pheno_tempfile;
261 print $F_pheno 'gid,field_trial_id,replicate,'.$trait_string_sql."\n";
262 foreach my $stock_id (@unique_stock_ids_sorted) {
263 my $d = $unique_observation_units{$stock_id};
264 print $F_pheno $d->{germplasm_stock_id}.','.$d->{trial_id}.','.$d->{obsunit_rep};
265 foreach my $t (@unique_trait_ids_sorted) {
266 my $pheno_val = $phenotype_data_hash{$stock_id}->{$t} || '';
267 print $F_pheno ','.$pheno_val;
269 print $F_pheno "\n";
271 close($F_pheno);
273 my $protocol = CXGN::Genotype::Protocol->new({
274 bcs_schema => $schema,
275 nd_protocol_id => $protocol_id
277 my $markers = $protocol->markers;
278 my @all_marker_objects = values %$markers;
279 if (scalar(@all_marker_objects) > 10000) {
280 my $page = CXGN::Page->new();
281 $page->message_page('GWAS Error', 'Please choose less than 10,000 markers');
282 print STDERR "Error: for GWAS please choose less than 10,000 markers";
285 no warnings 'uninitialized';
286 @all_marker_objects = sort { $a->{chrom} cmp $b->{chrom} || $a->{pos} <=> $b->{pos} || $a->{name} cmp $b->{name} } @all_marker_objects;
288 my @individuals_stock_ids;
289 my @all_individual_accessions_stock_ids;
290 my $counter = 0;
292 # In this case a list of accessions is given, so get a GRM between these accessions
293 if ($accession_list && scalar(@$accession_list)>0 && !$get_grm_for_parental_accessions){
294 @all_individual_accessions_stock_ids = @$accession_list;
295 if (scalar(@all_individual_accessions_stock_ids) > 100) {
296 my $page = CXGN::Page->new();
297 $page->message_page('GWAS Error', 'Please choose less than 100 accessions');
298 print STDERR "Error: for GWAS please choose less than 100 accessions";
300 foreach (@$accession_list) {
301 my $dataset = CXGN::Dataset::Cache->new({
302 people_schema=>$people_schema,
303 schema=>$schema,
304 cache_root=>$cache_root_dir,
305 accessions=>[$_]
307 my $genotypes = $dataset->retrieve_genotypes($protocol_id, ['DS'], ['markers'], ['name','chrom','pos'], 1, [], undef, undef, []);
309 if (scalar(@$genotypes)>0) {
311 # For old genotyping protocols without nd_protocolprop info...
312 if (scalar(@all_marker_objects) == 0) {
313 my $position_placeholder = 1;
314 foreach my $o (sort genosort keys %{$genotypes->[0]->{selected_genotype_hash}}) {
315 push @all_marker_objects, {name => $o, chrom => '1', pos => $position_placeholder};
316 $position_placeholder++;
320 foreach my $p (0..scalar(@$genotypes)-1) {
321 my $geno = $genotypes->[$p];
323 my $genotype_string = "";
324 if ($counter == 0) {
325 $genotype_string .= "ID\t";
326 foreach my $m (@all_marker_objects) {
327 $genotype_string .= $m->{name} . "\t";
329 $genotype_string .= "\n";
330 $genotype_string .= "CHROM\t";
331 foreach my $m (@all_marker_objects) {
332 $genotype_string .= $geno->{selected_protocol_hash}->{markers}->{$m->{name}}->{chrom} . " \t";
334 $genotype_string .= "\n";
335 $genotype_string .= "POS\t";
336 foreach my $m (@all_marker_objects) {
337 $genotype_string .= $geno->{selected_protocol_hash}->{markers}->{$m->{name}}->{pos} . " \t";
339 $genotype_string .= "\n";
341 my $genotype_id = $geno->{stock_id};
342 my $genotype_data_string = "";
343 foreach my $m (@all_marker_objects) {
344 $genotype_data_string .= $geno->{selected_genotype_hash}->{$m->{name}}->{'DS'}."\t";
346 $genotype_string .= $genotype_id."\t".$genotype_data_string."\n";
348 push @individuals_stock_ids, $genotype_id;
349 write_file($grm_tempfile, {append => 1}, $genotype_string);
350 undef $genotypes->[$p];
351 $counter++;
353 undef $genotypes;
357 # IN this case of a hybrid evaluation where the parents of the accessions planted in a plot are genotyped
358 elsif ($get_grm_for_parental_accessions && $accession_list && scalar(@$accession_list)>0) {
359 print STDERR "COMPUTING GENOTYPE FROM PARENTS FOR ACCESSIONS\n";
360 my $accession_list_string = join ',', @$accession_list;
361 my $q = "SELECT accession.stock_id, female_parent.stock_id, male_parent.stock_id
362 FROM stock AS accession
363 JOIN stock_relationship AS female_parent_rel ON(accession.stock_id=female_parent_rel.object_id AND female_parent_rel.type_id=$female_parent_cvterm_id)
364 JOIN stock AS female_parent ON(female_parent_rel.subject_id = female_parent.stock_id AND female_parent.type_id=$accession_cvterm_id)
365 JOIN stock_relationship AS male_parent_rel ON(accession.stock_id=male_parent_rel.object_id AND male_parent_rel.type_id=$male_parent_cvterm_id)
366 JOIN stock AS male_parent ON(male_parent_rel.subject_id = male_parent.stock_id AND male_parent.type_id=$accession_cvterm_id)
367 WHERE accession.type_id=$accession_cvterm_id AND accession.stock_id IN ($accession_list_string);";
368 my $h = $schema->storage->dbh()->prepare($q);
369 $h->execute();
370 my @accession_stock_ids_found = ();
371 my @female_stock_ids_found = ();
372 my @male_stock_ids_found = ();
373 while (my ($accession_stock_id, $female_parent_stock_id, $male_parent_stock_id) = $h->fetchrow_array()) {
374 push @accession_stock_ids_found, $accession_stock_id;
375 push @female_stock_ids_found, $female_parent_stock_id;
376 push @male_stock_ids_found, $male_parent_stock_id;
379 # print STDERR Dumper \@accession_stock_ids_found;
380 # print STDERR Dumper \@female_stock_ids_found;
381 # print STDERR Dumper \@male_stock_ids_found;
383 @all_individual_accessions_stock_ids = @accession_stock_ids_found;
385 for my $i (0..scalar(@accession_stock_ids_found)-1) {
386 my $female_stock_id = $female_stock_ids_found[$i];
387 my $male_stock_id = $male_stock_ids_found[$i];
388 my $accession_stock_id = $accession_stock_ids_found[$i];
390 my $dataset = CXGN::Dataset::Cache->new({
391 people_schema=>$people_schema,
392 schema=>$schema,
393 cache_root=>$cache_root_dir,
394 accessions=>[$female_stock_id, $male_stock_id]
396 my $genotypes = $dataset->retrieve_genotypes($protocol_id, ['DS'], ['markers'], ['name', 'chrom', 'pos'], 1, [], undef, undef, []);
398 if (scalar(@$genotypes) > 0) {
399 # For old genotyping protocols without nd_protocolprop info...
400 if (scalar(@all_marker_objects) == 0) {
401 foreach my $o (sort genosort keys %{$genotypes->[0]->{selected_genotype_hash}}) {
402 push @all_marker_objects, {name => $o};
406 my $geno = $genotypes->[0];
408 my $genotype_string = "";
409 if ($counter == 0) {
410 $genotype_string .= "ID\t";
411 foreach my $m (@all_marker_objects) {
412 $genotype_string .= $m->{name} . "\t";
414 $genotype_string .= "\n";
415 $genotype_string .= "CHROM\t";
416 foreach my $m (@all_marker_objects) {
417 $genotype_string .= $geno->{selected_protocol_hash}->{markers}->{$m->{name}}->{chrom} . " \t";
419 $genotype_string .= "\n";
420 $genotype_string .= "POS\t";
421 foreach my $m (@all_marker_objects) {
422 $genotype_string .= $geno->{selected_protocol_hash}->{markers}->{$m->{name}}->{pos} . " \t";
424 $genotype_string .= "\n";
426 my $geno_hybrid = CXGN::Genotype::ComputeHybridGenotype->new({
427 parental_genotypes=>$genotypes,
428 marker_objects=>\@all_marker_objects
430 my $progeny_genotype = $geno_hybrid->get_hybrid_genotype();
432 push @individuals_stock_ids, $accession_stock_id;
433 my $genotype_string_scores = join "\t", @$progeny_genotype;
434 $genotype_string .= $accession_stock_id."\t".$genotype_string_scores."\n";
435 write_file($grm_tempfile, {append => 1}, $genotype_string);
436 undef $progeny_genotype;
437 $counter++;
442 my $transpose_tempfile = $grm_tempfile . "_transpose";
444 my $tmp_output_dir = $shared_cluster_dir_config."/tmp_genotype_download_gwas";
445 mkdir $tmp_output_dir if ! -d $tmp_output_dir;
447 my $transpose_cmd = CXGN::Tools::Run->new(
449 backend => $backend_config,
450 submit_host => $cluster_host_config,
451 temp_base => $tmp_output_dir,
452 queue => $web_cluster_queue_config,
453 do_cleanup => 0,
454 out_file => $transpose_tempfile,
455 # out_file => $transpose_tempfile,
456 # don't block and wait if the cluster looks full
457 max_cluster_jobs => 1_000_000_000,
461 # Do the transposition job on the cluster
462 $transpose_cmd->run_cluster(
463 "perl ",
464 $basepath_config."/bin/transpose_matrix.pl",
465 $grm_tempfile,
467 $transpose_cmd->is_cluster(1);
468 $transpose_cmd->wait;
470 # print STDERR Dumper \@all_marker_names;
471 # print STDERR Dumper \@individuals_stock_ids;
472 # print STDERR Dumper \@dosage_matrix;
474 my $maf = $self->minor_allele_frequency();
475 my $marker_filter = $self->marker_filter();
476 my $individuals_filter = $self->individuals_filter();
478 my $cmd = 'R -e "library(genoDataFilter); library(rrBLUP); library(data.table); library(scales);
479 pheno <- fread(\''.$pheno_tempfile.'\', header=TRUE, sep=\',\');
480 pheno\$field_trial_id <- as.factor(pheno\$field_trial_id);
481 pheno\$replicate <- as.factor(pheno\$replicate);
482 geno_mat_marker_first <- fread(\''.$transpose_tempfile.'\', header=TRUE, sep=\'\t\') #has sample names as column names, first 3 columns are marker info;
483 geno_mat_sample_first <- data.frame(fread(\''.$grm_tempfile.'\', header=FALSE, sep=\'\t\', skip=3)) #has sample names in first column, no defined column names;
484 sample_names <- geno_mat_sample_first\$V1; #no defined column names but they are markers
485 geno_mat_sample_first <- geno_mat_sample_first[,-1]; #remove first column so that row names are sample names and all other columns are markers
486 geno_mat_sample_first <- as.data.frame(rescale(as.matrix(geno_mat_sample_first), to = c(-1,1) ) ); #rrBLUP expected -1 to 1
487 colnames(geno_mat_sample_first) <- geno_mat_marker_first\$ID; #has sample names as row names, column names are marker names
488 row.names(geno_mat_sample_first) <- sample_names;
489 mat_clean_sample_first <- filterGenoData(gData=geno_mat_sample_first, maf='.$maf.', markerFilter='.$marker_filter.', indFilter='.$individuals_filter.');
490 if (\'rn\' %in% colnames(mat_clean_sample_first)) { row.names(mat_clean_sample_first) <- mat_clean_sample_first\$rn; mat_clean_sample_first <- mat_clean_sample_first[,-1]; }
491 remaining_samples <- row.names(mat_clean_sample_first);
492 remaining_markers <- colnames(mat_clean_sample_first);
493 imputation <- A.mat(mat_clean_sample_first, impute.method=\'EM\', n.core='.$number_system_cores.', return.imputed=TRUE);
494 K.mat <- imputation\$A;
495 geno_imputed <- imputation\$imputed;
496 geno_gwas <- cbind(geno_mat_marker_first[geno_mat_marker_first\$ID %in% remaining_markers, c(1:3)], t(geno_imputed));
497 gwas_results <- GWAS(pheno[pheno\$gid %in% remaining_samples, ], geno_gwas, fixed=c(\'field_trial_id\',\'replicate\'), K=K.mat, plot=F, min.MAF='.$maf.'); #columns are ID,CHROM,POS,TraitIDs and values in TraitIDs column are -log10 p values'."\n";
498 if ($download_format eq 'manhattan_qq_plots') {
499 $cmd .= 'pdf( \''.$gwas_tempfile.'\', width = 11, height = 8.5 );
500 for (i in 4:length(gwas_results)) { alpha_bonferroni=-log10(0.05/length(gwas_results[,i])); chromosome_ids <- as.factor(gwas_results\$CHROM); marker_indicator <- match(unique(gwas_results\$CHROM), gwas_results\$CHROM); N <- length(gwas_results[,1]); plot(seq(1:N), gwas_results[,i], col=chromosome_ids, ylab=\'-log10(pvalue)\', main=paste(\'Manhattan Plot \',colnames(gwas_results)[i]), xaxt=\'n\', xlab=\'Position\', ylim=c(0,14)); axis(1,at=marker_indicator,labels=gwas_results\$CHROM[marker_indicator], cex.axis=0.8, las=2); abline(h=alpha_bonferroni,col=\'red\',lwd=2); expected.logvalues <- sort( -log10( c(1:N) * (1/N) ) ); observed.logvalues <- sort(gwas_results[,i]); plot(expected.logvalues, observed.logvalues, main=paste(\'QQ Plot \',colnames(gwas_results)[i]), xlab=\'Expected -log p-values \', ylab=\'Observed -log p-values\', col.main=\'black\', col=\'coral1\', pch=20); abline(0,1,lwd=3,col=\'black\'); }
501 dev.off();
504 elsif ($download_format eq 'results_tsv') {
505 $cmd .= 'write.table(gwas_results, file=\''.$gwas_tempfile.'\', row.names=FALSE, col.names=TRUE, sep=\'\t\');
508 print STDERR Dumper $cmd;
510 # Do the GWAS on the cluster
511 my $gwas_cmd = CXGN::Tools::Run->new(
513 backend => $backend_config,
514 submit_host => $cluster_host_config,
515 temp_base => $tmp_output_dir,
516 queue => $web_cluster_queue_config,
517 do_cleanup => 0,
518 out_file => $gwas_tempfile,
519 # don't block and wait if the cluster looks full
520 max_cluster_jobs => 1_000_000_000,
524 # Do the transposition job on the cluster
525 $gwas_cmd->run_cluster($cmd);
526 $gwas_cmd->is_cluster(1);
527 $gwas_cmd->wait;
528 my $status;
530 return ($gwas_tempfile, $status);
533 sub grm_cache_key {
534 my $self = shift;
535 my $datatype = shift;
537 #print STDERR Dumper($self->_get_dataref());
538 my $json = JSON->new();
539 #preserve order of hash keys to get same text
540 $json = $json->canonical();
541 my $accessions = $json->encode( $self->accession_id_list() || [] );
542 my $traits = $json->encode( $self->trait_id_list() || [] );
543 my $protocol = $self->protocol_id() || '';
544 my $genotypeprophash = $json->encode( $self->genotypeprop_hash_select() || [] );
545 my $protocolprophash = $json->encode( $self->protocolprop_top_key_select() || [] );
546 my $protocolpropmarkerhash = $json->encode( $self->protocolprop_marker_hash_select() || [] );
547 my $maf = $self->minor_allele_frequency();
548 my $marker_filter = $self->marker_filter();
549 my $download_format = $self->download_format();
550 my $individuals_filter = $self->individuals_filter();
551 my $key = md5_hex($accessions.$traits.$protocol.$genotypeprophash.$protocolprophash.$protocolpropmarkerhash.$self->get_grm_for_parental_accessions().$self->return_only_first_genotypeprop_for_stock()."_MAF$maf"."_mfilter$marker_filter"."_ifilter$individuals_filter"."repeated".$self->traits_are_repeated_measurements()."format$download_format"."_$datatype");
552 return $key;
555 sub download_gwas {
556 my $self = shift;
557 my $shared_cluster_dir_config = shift;
558 my $backend_config = shift;
559 my $cluster_host_config = shift;
560 my $web_cluster_queue_config = shift;
561 my $basepath_config = shift;
563 my $key = $self->grm_cache_key("download_gwas_v01");
564 $self->_cache_key($key);
565 $self->cache( Cache::File->new( cache_root => $self->cache_root() ));
567 my $return;
568 if ($self->cache()->exists($key)) {
569 $return = $self->cache()->handle($key);
571 else {
572 my ($gwas_tempfile, $status) = $self->get_gwas($shared_cluster_dir_config, $backend_config, $cluster_host_config, $web_cluster_queue_config, $basepath_config);
574 open my $out_copy, '<', $gwas_tempfile or die "Can't open output file: $!";
576 $self->cache()->set($key, '');
577 my $file_handle = $self->cache()->handle($key);
578 copy($out_copy, $file_handle);
580 close $out_copy;
581 $return = $self->cache()->handle($key);
583 return $return;
586 sub genosort {
587 my ($a_chr, $a_pos, $b_chr, $b_pos);
588 if ($a =~ m/S(\d+)\_(.*)/) {
589 $a_chr = $1;
590 $a_pos = $2;
592 if ($b =~ m/S(\d+)\_(.*)/) {
593 $b_chr = $1;
594 $b_pos = $2;
597 if ($a_chr && $b_chr) {
598 if ($a_chr == $b_chr) {
599 return $a_pos <=> $b_pos;
601 return $a_chr <=> $b_chr;
602 } else {
603 return -1;