2 package CXGN
::Dataset
::File
;
5 use File
::Slurp qw
| write_file
|;
8 use CXGN
::Genotype
::Search
;
10 extends
'CXGN::Dataset';
12 has
'file_name' => ( isa
=> 'Str',
14 default => '/tmp/dataset_file',
17 has
'quotes' => ( isa
=> 'Bool',
22 sub retrieve_genotypes_vcf
{
24 my $protocol_id = shift;
25 my $file = shift; # || $self->file_name()."_genotype.txt";
26 my $cache_root_dir = shift;
27 my $cluster_shared_tempdir_config = shift;
28 my $backend_config = shift;
29 my $cluster_host_config = shift;
30 my $web_cluster_queue_config = shift;
31 my $basepath_config = shift;
32 my $forbid_cache = shift;
33 my $genotypeprop_hash_select = shift || ['DS'];
34 my $protocolprop_top_key_select = shift || [];
35 my $protocolprop_marker_hash_select = shift || [];
36 my $return_only_first_genotypeprop_for_stock = shift || 1;
38 my $accessions_list_ref = $self->retrieve_accessions();
41 foreach my $ai (@
$accessions_list_ref) {
42 push @accession_ids, $ai->[0];
48 push @protocol_ids, $protocol_id;
51 my $genotyping_protocol_ref = $self->retrieve_genotyping_protocols();
53 foreach my $p (@
$genotyping_protocol_ref) {
54 push @protocol_ids, $p->[0];
58 if (! @protocol_ids) {
59 die "No protocol provided or no protocol associated with dataset\n";
62 print STDERR
"PROTOCOL IDS: ".Dumper
(\
@protocol_ids);
64 my @accessions_list = @
$accessions_list_ref;
65 my $genotypes_search = CXGN
::Genotype
::Search
->new(
66 bcs_schema
=> $self->schema(),
67 people_schema
=> $self->people_schema(),
68 cache_root
=>$cache_root_dir,
69 accession_list
=> \
@accession_ids,
70 trial_list
=> $self->trials(),
71 protocol_id_list
=> \
@protocol_ids,
72 genotypeprop_hash_select
=>$genotypeprop_hash_select, #THESE ARE THE KEYS IN THE GENOTYPEPROP OBJECT
73 protocolprop_top_key_select
=>$protocolprop_top_key_select, #THESE ARE THE KEYS AT THE TOP LEVEL OF THE PROTOCOLPROP OBJECT
74 protocolprop_marker_hash_select
=>$protocolprop_marker_hash_select, #THESE ARE THE KEYS IN THE MARKERS OBJECT IN THE PROTOCOLPROP OBJECT
75 return_only_first_genotypeprop_for_stock
=>$return_only_first_genotypeprop_for_stock, #FOR MEMORY REASONS TO LIMIT DATA
76 forbid_cache
=>$forbid_cache
78 my @required_config = (
79 $cluster_shared_tempdir_config,
82 $web_cluster_queue_config,
86 my $filehandle = $genotypes_search->get_cached_file_VCF(@required_config);
87 print STDERR
"Checking if a file was requested...\n";
90 print STDERR
"Generating the file $file ...\n";
91 open(my $F, ">", $file) || die "Can't open file $file";
92 while(<$filehandle>) {
95 seek $filehandle, 0,0; # rewind filehandle to the beginning of the file
96 print STDERR
"Done.\n";
104 override
('retrieve_genotypes',
107 my $protocol_id = shift;
108 my $file = shift || $self->file_name()."_genotype.txt";
109 my $cache_root_dir = shift;
110 my $cluster_shared_tempdir_config = shift;
111 my $backend_config = shift;
112 my $cluster_host_config = shift;
113 my $web_cluster_queue_config = shift;
114 my $basepath_config = shift;
115 my $forbid_cache = shift;
118 my $genotypeprop_hash_select = shift || ['DS'];
119 my $protocolprop_top_key_select = shift || [];
120 my $protocolprop_marker_hash_select = shift || [];
121 my $return_only_first_genotypeprop_for_stock = shift || 1;
123 my $accessions_list_ref = $self->retrieve_accessions();
125 foreach (@
$accessions_list_ref) {
126 push @accession_ids, $_->[0];
129 my $genotyping_protocol_ref = $self->retrieve_genotyping_protocols();
131 foreach my $p (@
$genotyping_protocol_ref) {
132 push @protocols, $p->[0];
135 print STDERR
"PROTOCOLS IN RETRIEVE_GENOTYPES: ".join(", ",@protocols)."\n";
136 my @accessions_list = @
$accessions_list_ref;
137 my $genotypes_search = CXGN
::Genotype
::Search
->new(
138 bcs_schema
=> $self->schema(),
139 people_schema
=> $self->people_schema(),
140 cache_root
=>$cache_root_dir,
141 accession_list
=> \
@accession_ids,
142 trial_list
=> $self->trials(),
143 protocol_id_list
=> \
@protocols,
144 genotypeprop_hash_select
=>$genotypeprop_hash_select, #THESE ARE THE KEYS IN THE GENOTYPEPROP OBJECT
145 protocolprop_top_key_select
=>$protocolprop_top_key_select, #THESE ARE THE KEYS AT THE TOP LEVEL OF THE PROTOCOLPROP OBJECT
146 protocolprop_marker_hash_select
=>$protocolprop_marker_hash_select, #THESE ARE THE KEYS IN THE MARKERS OBJECT IN THE PROTOCOLPROP OBJECT
147 return_only_first_genotypeprop_for_stock
=>$return_only_first_genotypeprop_for_stock, #FOR MEMORY REASONS TO LIMIT DATA
148 forbid_cache
=>$forbid_cache
151 print STDERR
"DONE WITH GENO SEARCH!\n";
153 my @required_config = (
154 $cluster_shared_tempdir_config,
156 $cluster_host_config,
157 $web_cluster_queue_config,
161 my $fh = $genotypes_search->get_cached_file_dosage_matrix(@required_config);
163 print STDERR
"DONE GETTING DOSAGE MATRIX\n";
167 print STDERR
"Generating the file $file ...\n";
168 open(my $F, ">", $file) || die "Can't open file $file";
172 print STDERR
"Done.\n";
174 seek $fh, 0, 0; # reset the filehandle
183 override
('retrieve_phenotypes',
186 my $file = shift || $self->file_name()."_phenotype.txt";
187 my $phenotypes = $self->SUPER::retrieve_phenotypes
();
188 my $phenotype_string = "";
190 foreach my $line (@
$phenotypes) {
191 if ($self->quotes()) {
192 no warnings
; # turn off warnings, otherwise there are a lot of undefined warnings.
193 $s = join("\t", map { "\"$_\"" } @
$line);
197 $s = join("\t", @
$line);
200 # my $num_col = scalar(@{$line});
201 # for (my $j = 0; $j < $num_col; $j++) {
204 # $s .= "\"@$line[$j]\"";
206 # $s .= "\t\"@$line[$j]\"";
215 $phenotype_string .= $s."\n";
217 write_file
($file, $phenotype_string);
221 override
('retrieve_accessions',
224 my $file = shift || $self->file_name()."_accessions.txt";
225 my $accessions = $self->SUPER::retrieve_accessions
();
226 my $accession_json = JSON
::Any
->encode($accessions);
227 write_file
($file, $accession_json);
231 override
('retrieve_plots',
234 my $file = shift || $self->file_name()."_plots.txt";
235 my $plots = $self->SUPER::retrieve_plots
();
236 my $plot_json = JSON
::Any
->encode($plots);
237 write_file
($file, $plot_json);
241 override
('retrieve_trials',
244 my $file = shift || $self->file_name()."_trials.txt";
245 my $trials = $self->SUPER::retrieve_trials
();
246 my $trial_json = JSON
::Any
->encode($trials);
247 write_file
($file, $trial_json);
251 override
('retrieve_traits',
254 my $file = shift || $self->file_name()."_traits.txt";
255 my $traits = $self->SUPER::retrieve_traits
();
256 my $trait_json = JSON
::Any
->encode($traits);
257 write_file
($file, $trait_json);
261 override
('retrieve_years',
264 my $file = shift || $self->file_name()."_years.txt";
265 my $years = $self->SUPER::retrieve_years
();
266 my $year_json = JSON
::Any
->encode($years);
267 write_file
($file, $year_json);