From d84c69aebcc796457ff5dbc737ea32ffea5fd47e Mon Sep 17 00:00:00 2001 From: afpowell Date: Fri, 13 Mar 2020 18:08:05 +0000 Subject: [PATCH] set up cache for gwas --- lib/CXGN/Dataset/File.pm | 91 ++++++++++++++++++++++---------------- lib/SGN/Controller/AJAX/Solgwas.pm | 53 +++++++++++++--------- 2 files changed, 87 insertions(+), 57 deletions(-) diff --git a/lib/CXGN/Dataset/File.pm b/lib/CXGN/Dataset/File.pm index 49e3162ec..3577982ec 100644 --- a/lib/CXGN/Dataset/File.pm +++ b/lib/CXGN/Dataset/File.pm @@ -16,57 +16,74 @@ has 'file_name' => ( isa => 'Str', ); override('retrieve_genotypes', - sub { - my $self = shift; - my $protocol_id = shift; - my $file = shift || $self->file_name()."_genotype.txt"; - - my $accessions_list_ref = $self->accessions(); - my $genotypeprop_hash_select = shift || ['DS']; - my $protocolprop_top_key_select = shift || []; - my $protocolprop_marker_hash_select = shift || []; - my $return_only_first_genotypeprop_for_stock = shift || 1; + sub { + my $self = shift; + my $c = shift; + my $protocol_id = shift; + my $file = shift || $self->file_name()."_genotype.txt"; + + my $accessions_list_ref = $self->accessions(); + my $genotypeprop_hash_select = shift || ['DS']; + my $protocolprop_top_key_select = shift || []; + my $protocolprop_marker_hash_select = shift || []; + my $return_only_first_genotypeprop_for_stock = shift || 1; + my $cache_root_dir = $c->config->{cache_file_path}; + my $cluster_shared_tempdir_config = $c->config->{cluster_shared_tempdir}; + my $backend_config = $c->config->{backend}; + my $cluster_host_config = $c->config->{cluster_host}; + my $web_cluster_queue_config = $c->config->{'web_cluster_queue'}; + my $basepath_config = $c->config->{basepath}; + my $forbid_cache = defined($c->req->param('forbid_cache')) ? $c->req->param('forbid_cache') : 0; # my $accessions_list_ref = ['38884','38889','38890','38891','38893']; - my @accessions_list = @$accessions_list_ref; - my $genotypes_search = CXGN::Genotype::Search->new( + my @accessions_list = @$accessions_list_ref; + my $genotypes_search = CXGN::Genotype::Search->new( bcs_schema => $self->schema(), people_schema => $self->people_schema(), + cache_root=>$cache_root_dir, accession_list => $accessions_list_ref, trial_list => $self->trials(), protocol_id_list => [$protocol_id], genotypeprop_hash_select=>$genotypeprop_hash_select, #THESE ARE THE KEYS IN THE GENOTYPEPROP OBJECT protocolprop_top_key_select=>$protocolprop_top_key_select, #THESE ARE THE KEYS AT THE TOP LEVEL OF THE PROTOCOLPROP OBJECT protocolprop_marker_hash_select=>$protocolprop_marker_hash_select, #THESE ARE THE KEYS IN THE MARKERS OBJECT IN THE PROTOCOLPROP OBJECT - return_only_first_genotypeprop_for_stock=>$return_only_first_genotypeprop_for_stock #FOR MEMORY REASONS TO LIMIT DATA + return_only_first_genotypeprop_for_stock=>$return_only_first_genotypeprop_for_stock, #FOR MEMORY REASONS TO LIMIT DATA + forbid_cache=>$forbid_cache + ); + my @required_config = ( + $cluster_shared_tempdir_config, + $backend_config, + $cluster_host_config, + $web_cluster_queue_config, + $basepath_config ); - $genotypes_search->init_genotype_iterator(); - my $counter = 0; - while(my $geno = $genotypes_search->get_next_genotype_info) { - my $genotype_string = ""; - my $genotype_example = $geno; - if($counter == 0) { - foreach my $key (sort keys %{$genotype_example->{selected_genotype_hash}}) { - $genotype_string .= $key."\t"; - } - $genotype_string .= "\n"; - } +# $genotypes_search->init_genotype_iterator(); +# my $counter = 0; +# while(my $geno = $genotypes_search->get_next_genotype_info) { + # my $genotype_string = ""; + # my $genotype_example = $geno; + # if($counter == 0) { + # foreach my $key (sort keys %{$genotype_example->{selected_genotype_hash}}) { + # $genotype_string .= $key."\t"; + # } + # $genotype_string .= "\n"; + # } # foreach my $element (@$genotypes) { # my $element = $genotype_example; - my $genotype_id = $geno->{germplasmDbId}; - my $genotype_data_string = ""; - foreach my $key (sort keys %{$geno->{selected_genotype_hash}}) { - my $value = $geno->{selected_genotype_hash}->{$key}->{DS}; - my $current_genotype = $value; - $genotype_data_string .= $current_genotype."\t"; - } - my $s = join "\t", $genotype_id; - $genotype_string .= $s."\t".$genotype_data_string."\n"; + # my $genotype_id = $geno->{germplasmDbId}; +# my $genotype_data_string = ""; +# foreach my $key (sort keys %{$geno->{selected_genotype_hash}}) { +# my $value = $geno->{selected_genotype_hash}->{$key}->{DS}; +# my $current_genotype = $value; +# $genotype_data_string .= $current_genotype."\t"; +# } +# my $s = join "\t", $genotype_id; +# $genotype_string .= $s."\t".$genotype_data_string."\n"; # } - write_file($file, {append => 1}, $genotype_string); - $counter++; +# write_file($file, {append => 1}, $genotype_string); +# $counter++; - } + #} # my $genotypes = $self->SUPER::retrieve_genotypes($protocol_id, @accessions_list); # my $genotype_string = ""; @@ -93,7 +110,7 @@ override('retrieve_genotypes', # my $genotype_json = JSON::Any->encode($genotypes); # write_file($file, $genotype_json); - return; + return $genotypes_search->get_cached_file_dosage_matrix(@required_config); }); override('retrieve_phenotypes', diff --git a/lib/SGN/Controller/AJAX/Solgwas.pm b/lib/SGN/Controller/AJAX/Solgwas.pm index a5df55a08..d622c53c3 100644 --- a/lib/SGN/Controller/AJAX/Solgwas.pm +++ b/lib/SGN/Controller/AJAX/Solgwas.pm @@ -311,7 +311,10 @@ sub generate_results: Path('/ajax/solgwas/generate_results') : { $protocol_id = $row->nd_protocol_id(); } - $ds -> retrieve_genotypes($protocol_id,$geno_filepath); + my $filehandle = $ds->retrieve_genotypes($c,$protocol_id,$geno_filepath); +# my $base_filename = $$filehandle; + print STDERR $filehandle . "\n"; +# print STDERR $base_filename . "\n"; # $ds-> @$trials_ref = retrieve_genotypes(); my $newtrait = $trait_id; $newtrait =~ s/\s/\_/g; @@ -331,27 +334,30 @@ sub generate_results: Path('/ajax/solgwas/generate_results') : { $trait_id =~ tr/\//./; # my $clean_cmd = "rm /home/vagrant/cxgn/sgn/documents/tempfiles/solgwas_files/SolGWAS_Figure*.png"; # system($clean_cmd); -# my $geno_filepath2 = "." . $tempfile . "_genotype_edit.txt"; - my $geno_filepath2 = $tempfile . "_genotype_edit.txt"; - my $edit_cmd = "sed -e '1 s/\^/row.names\t/' " . $geno_filepath . " > " . $geno_filepath2; - system($edit_cmd); + my $geno_filepath2 = $tempfile . "_genotype.txt"; +# my $geno_filepath2 = $base_filename . "_genotype_edit.txt"; +# my $edit_cmd = "sed -e '1 s/\^/row.names\t/' " . $base_filename . " > " . $geno_filepath2; +# system($edit_cmd); # my $geno_filepath3 = "." . $tempfile . "_genotype_edit_subset.txt"; my $geno_filepath3 = $tempfile . "_genotype_edit_subset.txt"; # my $trim_cmd = "cut -f 1-50 " . $geno_filepath2 . " > " . $geno_filepath3; # system($trim_cmd); - open my $filehandle_in, "<", "$geno_filepath2" or die "Could not open $geno_filepath2: $!\n"; - open my $filehandle_in2, "<", "$geno_filepath2" or die "Could not open $geno_filepath2: $!\n"; - open my $filehandle_out, ">", "$geno_filepath3" or die "Could not create $geno_filepath3: $!\n"; +# open my $filehandle_in2, "<", "$geno_filepath2" or die "Could not open $geno_filepath2: $!\n"; + open my $filehandle_out, ">", "$geno_filepath2" or die "Could not create $geno_filepath2: $!\n"; my $marker_total; - while ( my $line = <$filehandle_in2> ) { + while ( my $line = <$filehandle> ) { my @sample_line = (split /\s+/, $line); $marker_total = scalar(@sample_line); + print $filehandle_out $line; } - close $filehandle_in2; + close $filehandle; + close $filehandle_out; + + # Hardcoded number of markers to be selected - make this selectable by user? my $markers_selected = 500; # my @column_selection = (0,2); @@ -362,21 +368,26 @@ sub generate_results: Path('/ajax/solgwas/generate_results') : { my $random_current = int(rand($marker_total)); redo if $columns_seen{$random_current}++; push @column_selection, $random_current; + print STDERR $random_current . "\n"; } + open my $filehandle_in, "<", "$geno_filepath2" or die "Could not open $geno_filepath2: $!\n"; + open my $filehandle_out2, ">", "$geno_filepath3" or die "Could not create $geno_filepath3: $!\n"; + # foreach my $item (@column_selection) { - while ( my $line = <$filehandle_in> ) { - my $curr_line; - my @first_item = (split /\s+/, $line); - foreach my $item (@column_selection) { - $curr_line .= $first_item[$item] . "\t"; - } -# $curr_line .= "\n"; - print $filehandle_out "$curr_line\n"; + while ( my $line = <$filehandle_in> ) { + my $curr_line; + my @first_item = (split /\s+/, $line); + foreach my $item (@column_selection) { + $curr_line .= $first_item[$item] . "\t"; } -# } +# $curr_line .= "\n"; + print STDERR $curr_line . "\n"; + print $filehandle_out2 "$curr_line\n"; + } + close $filehandle_in; - close $filehandle_out; + close $filehandle_out2; # my $cmd = "Rscript " . $c->config->{basepath} . "/R/solgwas/solgwas_script.R " . $pheno_filepath . " " . $geno_filepath3 . " " . $trait_id . " " . $figure3file . " " . $figure4file . " " . $pc_check . " " . $kinship_check; # system($cmd); @@ -402,6 +413,8 @@ sub generate_results: Path('/ajax/solgwas/generate_results') : { $pc_check, $kinship_check, ); + + $cmd->is_cluster(1); $cmd->wait; my $figure_path = $c->{basepath} . "./documents/tempfiles/solgwas_files/"; -- 2.11.4.GIT