1 package CXGN
::BrAPI
::v2
::VariantSets
;
5 use SGN
::Model
::Cvterm
;
6 use CXGN
::Genotype
::Search
;
8 use CXGN
::BrAPI
::FileResponse
;
9 use CXGN
::BrAPI
::Pagination
;
10 use CXGN
::BrAPI
::JSONResponse
;
11 use List
::Util
qw(sum);
13 extends
'CXGN::BrAPI::v2::Common';
19 my $c = $self->context;
20 my $page_size = $self->page_size;
21 my $page = $self->page;
22 my $status = $self->status;
23 my $variantset_ids = $inputs->{variantSetDbId
} || ($inputs->{variantSetDbIds
} || ());
24 my $study_ids = $inputs->{studyDbId
} || ($inputs->{studyDbIds
} || ());
25 my $study_names = $inputs->{studyName
} || ($inputs->{studyNames
} || ());
26 my $variant_id = $inputs->{variantDbId
} || ($inputs->{variantDbIds
} || ());
27 my $callset_id = $inputs->{callSetDbId
} || ($inputs->{callSetDbIds
} || ());
31 if ( $variantset_ids){
32 foreach ( @
{$variantset_ids} ){
33 my @ids = split /p/, $_;
35 push @trial_ids, $ids[0] ?
$ids[0] : ();
36 push @protocol_ids, $ids[1] ?
$ids[1] : ();
43 push @trial_ids, @
{$study_ids};
46 if (scalar @trial_ids == 0){
47 my $trial_search = CXGN
::Trial
::Search
->new({
48 bcs_schema
=>$self->bcs_schema,
49 trial_design_list
=>['genotype_data_project']
51 my ($data, $total_count) = $trial_search->search();
54 push @trial_ids, $_->{trial_id
};
58 my $genotype_search = CXGN
::Genotype
::Search
->new({
59 bcs_schema
=>$self->bcs_schema,
60 people_schema
=> $self->people_schema(),
61 cache_root
=>$c->config->{cache_file_path
},
62 trial_list
=>\
@trial_ids,
63 genotypeprop_hash_select
=>['DS'],
64 protocolprop_top_key_select
=>[],
65 protocolprop_marker_hash_select
=>[],
66 accession_list
=>$callset_id,
67 protocol_id_list
=>\
@protocol_ids,
72 $genotype_search->init_genotype_iterator();
74 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
76 if( ! $study_names || grep { $_ eq $gt->{genotypingDataProjectName
}} @
{$study_names} ){
78 my $set_id = $gt->{genotypingDataProjectDbId
} . "p" . $gt->{analysisMethodDbId
};
80 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId
}}) {
83 analysisDbId
=> qq|$gt->{analysisMethodDbId
}|, #protocolid
84 analysisName
=> $gt->{analysisMethod
},
92 push( @
{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId
}} }, 1 );
93 push( @
{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount
} );
94 push( @
{ $variant_sets { $set_id }{'analysis'} }, @analysis);
97 push( @
{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId
});
98 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName
} . " - " . $gt->{analysisMethod
};
99 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId
};
104 my $start_index = $page*$page_size;
105 my $end_index = $page*$page_size + $page_size - 1;
108 foreach my $id (keys %variant_sets){
110 if ($counter >= $start_index && $counter <= $end_index) {
111 my @availableFormats;
113 push @availableFormats,{
114 dataFormat
=> "json",
115 fileFormat
=> "json",
120 analysis
=>$variant_sets{$id} {'analysis'},
121 availableFormats
=> \
@availableFormats,
122 callSetCount
=> scalar @
{$variant_sets{$id}{'genotypes'}},
123 referenceSetDbId
=> keys %{ $variant_sets{$id} {'analysisIds'} },
124 studyDbId
=> qq|$variant_sets{$id}{'dataProject'}|,
125 variantCount
=> _sum
($variant_sets{$id}{'markerCount'}),
126 variantSetDbId
=> qq|$id|,
127 variantSetName
=> $variant_sets{$id} {'name'},
133 my %result = (data
=> \
@data);
135 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response($counter,$page_size,$page);
136 return CXGN
::BrAPI
::JSONResponse
->return_success(\
%result, $pagination, \
@data_files, $status, 'VariantSets result constructed');
143 my $c = $self->context;
144 my $page_size = $self->page_size;
145 my $page = $self->page;
146 my $status = $self->status;
147 my $variantset_id = $inputs->{variantSetDbId
};
151 if ( $variantset_id){
152 my @ids = split /p/, $variantset_id;
153 push @trial_ids, $ids[0] ?
$ids[0] : ();
154 push @protocol_ids, $ids[1] ?
$ids[1] : ();
157 my $genotype_search = CXGN
::Genotype
::Search
->new({
158 bcs_schema
=>$self->bcs_schema,
159 people_schema
=> $self->people_schema(),
160 cache_root
=>$c->config->{cache_file_path
},
161 trial_list
=>\
@trial_ids,
162 protocol_id_list
=>\
@protocol_ids,
163 genotypeprop_hash_select
=>['DS'],
164 protocolprop_top_key_select
=>[],
165 protocolprop_marker_hash_select
=>[],
167 my $file_handle = $genotype_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir
}, 1); #Metadata only returned
172 $genotype_search->init_genotype_iterator();
174 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
176 my $set_id = $gt->{genotypingDataProjectDbId
} . "p" . $gt->{analysisMethodDbId
};
178 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId
}}) {
181 analysisDbId
=> qq|$gt->{analysisMethodDbId
}|, #protocolid
182 analysisName
=> $gt->{analysisMethod
},
190 push( @
{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId
}} }, 1 );
191 push( @
{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount
} );
192 push( @
{ $variant_sets { $set_id }{'analysis'} }, @analysis);
195 push( @
{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId
});
196 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName
} . " - " . $gt->{analysisMethod
};
197 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId
};
200 foreach my $id (keys %variant_sets){
202 my @availableFormats;
204 push @availableFormats,{
205 dataFormat
=> "json",
206 fileFormat
=> "json",
211 analysis
=>$variant_sets{$id} {'analysis'},
212 availableFormats
=> \
@availableFormats,
213 callSetCount
=> scalar @
{$variant_sets{$id}{'genotypes'}},
214 referenceSetDbId
=> keys %{ $variant_sets{$id} {'analysisIds'} },
215 studyDbId
=> qq|$variant_sets{$id}{'dataProject'}|,
216 variantCount
=> _sum
($variant_sets{$id}{'markerCount'}),
217 variantSetDbId
=> qq|$id|,
218 variantSetName
=> $variant_sets{$id} {'name'},
223 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response(1,$page_size,$page);
224 return CXGN
::BrAPI
::JSONResponse
->return_success(@data, $pagination, \
@data_files, $status, 'VariantSets result constructed');
230 my $c = $self->context;
231 my $page_size = $self->page_size;
232 my $page = $self->page;
233 my $status = $self->status;
234 my $variantset_id = $inputs->{variantSetDbId
};
235 my @callset_id = $inputs->{callSetDbId
} ? @
{$inputs->{callSetDbId
}} : ();
236 my @callset_name = $inputs->{callSetName
} ? @
{$inputs->{callSetName
}} : ();
240 if ( $variantset_id){
241 my @ids = split /p/, $variantset_id;
242 push @trial_ids, $ids[0] ?
$ids[0] : ();
243 push @protocol_ids, $ids[1] ?
$ids[1] : ();
246 my $genotypes_search = CXGN
::Genotype
::Search
->new({
247 bcs_schema
=>$self->bcs_schema,
248 people_schema
=> $self->people_schema(),
249 cache_root
=>$c->config->{cache_file_path
},
250 trial_list
=>\
@trial_ids,
251 protocol_id_list
=>\
@protocol_ids,
252 genotypeprop_hash_select
=>['DS'],
253 protocolprop_top_key_select
=>[],
254 protocolprop_marker_hash_select
=>[],
255 accession_list
=>\
@callset_id,
256 # offset=>$page_size*$page,
259 my $file_handle = $genotypes_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir
}, 1); #Metadata only returned
262 my $start_index = $page*$page_size;
263 my $end_index = $page*$page_size + $page_size - 1;
266 open my $fh, "<&", $file_handle or die "Can't open output file: $!";
267 my $header_line = <$fh>;
270 if ($counter >= $start_index && $counter <= $end_index) {
271 my $gt = decode_json
$_;
273 my @availableFormats;
277 callSetDbId
=> qq|$gt->{stock_id
}|,
278 callSetName
=> qq|$gt->{stock_name
}|,
280 sampleDbId
=>qq|$gt->{stock_id
}|,
281 studyDbId
=>qq|$gt->{genotypingDataProjectDbId
}|,
283 variantSetDbIds
=> [ $gt->{genotypingDataProjectDbId
}. "p". $gt->{analysisMethodDbId
} ],
289 my %result = (data
=> \
@data);
291 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response($counter,$page_size,$page);
292 return CXGN
::BrAPI
::JSONResponse
->return_success(\
%result, $pagination, \
@data_files, $status, 'VariantSets result constructed');
298 my $c = $self->context;
299 my $page_size = $self->page_size;
300 my $page = $self->page;
301 my $status = $self->status;
302 my $variantset_id = $inputs->{variantSetDbId
};
303 my $sep_phased = $inputs->{sep_phased
};
304 my $sep_unphased = $inputs->{sep_unphased
};
305 my $unknown_string = $inputs->{unknown_string
};
306 my $expand_homozygotes = $inputs->{expand_homozygotes
};
307 my $file_path = $inputs->{file_path
};
308 my $uri = $inputs->{file_uri
};
310 if ($sep_phased || $sep_unphased || $expand_homozygotes || $unknown_string){
311 push @
$status, { 'error' => 'The following parameters are not implemented: expandHomozygotes, unknownString, sepPhased, sepUnphased' };
316 if ( $variantset_id){
317 my @ids = split /p/, $variantset_id;
318 push @trial_ids, $ids[0] ?
$ids[0] : ();
319 push @protocol_ids, $ids[1] ?
$ids[1] : ();
325 my $genotypes_search = CXGN
::Genotype
::Search
->new({
326 bcs_schema
=>$self->bcs_schema,
327 people_schema
=> $self->people_schema(),
328 cache_root
=>$c->config->{cache_file_path
},
329 trial_list
=>\
@trial_ids,
330 protocol_id_list
=>\
@protocol_ids,
331 genotypeprop_hash_select
=>['DS', 'GT', 'NT'],
332 protocolprop_top_key_select
=>[],
333 protocolprop_marker_hash_select
=>[],
335 my $file_handle = $genotypes_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir
}, 0);
337 my $start_index = $page*$page_size;
338 my $end_index = $page*$page_size + $page_size - 1;
341 open my $fh, "<&", $file_handle or die "Can't open output file: $!";
342 my $header_line = <$fh>;
343 my $marker_objects = decode_json
$header_line;
347 while (my $gt_line = <$fh>) {
348 my $gt = decode_json
$gt_line;
349 my $genotype = $gt->{selected_genotype_hash
};
350 my @ordered_refmarkers = sort keys(%$genotype);
351 my $genotypeprop_id = $gt->{markerProfileDbId
};
353 foreach my $m (@ordered_refmarkers) {
354 if ($counter >= $start_index && $counter <= $end_index) {
356 if (exists($genotype->{$m}->{'NT'}) && defined($genotype->{$m}->{'NT'})){
357 $geno = $genotype->{$m}->{'NT'};
359 elsif (exists($genotype->{$m}->{'GT'}) && defined($genotype->{$m}->{'GT'})){
360 $geno = $genotype->{$m}->{'GT'};
362 elsif (exists($genotype->{$m}->{'DS'}) && defined($genotype->{$m}->{'DS'})){
363 $geno = $genotype->{$m}->{'DS'};
369 callSetDbId
=>qq|$gt->{stock_id
}|,
370 callSetName
=>qq|$gt->{stock_name
}|,
371 genotype
=>{values=>$geno},
372 genotype_likelihood
=>undef,
380 %result = ( data
=>\
@data,
381 expandHomozygotes
=>undef,
384 unknownString
=>undef);
388 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response($counter,$page_size,$page);
389 return CXGN
::BrAPI
::JSONResponse
->return_success(\
%result, $pagination, \
@data_files, $status, 'VariantSets result constructed');
395 my $page_size = $self->page_size;
396 my $page = $self->page;
397 my $status = $self->status;
399 my $variantset_id = $inputs->{variantSetDbId
};
400 my $marker_ids = $inputs->{variantDbId
} || ($inputs->{variantDbIds
} || []);
401 my $pageToken = $inputs->{pageToken
}->[0] || undef;
402 my $schema = $self->bcs_schema;
407 if ( $variantset_id){
408 my @ids = split /p/, $variantset_id;
409 push @trial_ids, $ids[0] ?
$ids[0] : ();
410 push @protocol_ids, $ids[1] ?
$ids[1] : ();
413 my $marker_search = CXGN
::Marker
::SearchBrAPI
->new({
414 bcs_schema
=> $schema,
415 protocol_id_list
=> \
@protocol_ids,
416 project_id_list
=> \
@trial_ids,
417 marker_name_list
=> $marker_ids,
418 offset
=>$page_size*$page,
422 my $start_index = $page*$page_size;
423 my $end_index = $page*$page_size + $page_size - 1;
426 my ($data, $total_count) = $marker_search->search();
429 if ($counter >= $start_index && $counter <= $end_index) {
430 my $info = $_->{info
};
431 my $svtype = $1 if ($_->{info
} =~ /SVTYPE=(\w+);/) ;
432 my @cipos = _get_info
($info,'CIPOS');
433 my @ciend = _get_info
($info,'CIEND');
434 my @svlen = _get_info
($info,'SVLEN');
437 additionalInfo
=> {},
438 alternate_bases
=> $_->{alt
},
442 end
=> $_->{pos} + length($_->{ref}),
443 filtersApplied
=> $_->{filter
} eq "." ? JSON
::false
: JSON
::true
,
444 filtersFailed
=> ( $_->{filter
} eq "PASS" || $_->{filter
} eq "." ) ?
undef : $_->{filter
},
445 filtersPassed
=> $_->{filter
} eq "PASS" ? JSON
::true
: JSON
::false
,
446 referenceBases
=> $_->{ref},
447 referenceName
=> $_->{chrom
} ?
$_->{chrom
} : undef,
451 variantDbId
=> qq|$_->{marker_name
}|,
452 variantNames
=> $_->{marker_name
},
453 variantSetDbId
=> _quote
($_->{project_id
}, $_->{nd_protocol_id
} ),
454 variantType
=> $svtype,
456 push @data_out, \
%data_obj;
461 my %result = (data
=>\
@data_out);
463 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response($total_count,1,0);
464 return CXGN
::BrAPI
::JSONResponse
->return_success(\
%result, $pagination, \
@data_files, $status, 'Variants result constructed');
470 my $c = $self->context;
471 my $page_size = $self->page_size;
472 my $page = $self->page;
473 my $status = $self->status;
474 my $variantset_ids = $inputs->{variantSetDbId
} || ($inputs->{variantSetDbIds
} || ());
475 my $study_ids = $inputs->{studyDbId
} || ($inputs->{studyDbIds
} || ());
476 my $study_names = $inputs->{studyName
} || ($inputs->{studyNames
} || ());
477 my $variant_id = $inputs->{variantDbId
} || ($inputs->{variantDbIds
} || ());
478 my $callset_id = $inputs->{callSetDbId
} || ($inputs->{callSetDbIds
} || ());
482 if ( $variantset_ids){
483 foreach ( @
{$variantset_ids} ){
484 my @ids = split /p/, $_;
486 push @trial_ids, $ids[0] ?
$ids[0] : ();
487 push @protocol_ids, $ids[1] ?
$ids[1] : ();
495 push @trial_ids, @
{$study_ids};
498 if (scalar @trial_ids == 0){
499 my $trial_search = CXGN
::Trial
::Search
->new({
500 bcs_schema
=>$self->bcs_schema,
501 trial_design_list
=>['genotype_data_project']
503 my ($data, $total_count) = $trial_search->search();
506 push @trial_ids, $_->{trial_id
};
510 my $genotype_search = CXGN
::Genotype
::Search
->new({
511 bcs_schema
=>$self->bcs_schema,
512 people_schema
=> $self->people_schema(),
513 cache_root
=>$c->config->{cache_file_path
},
514 trial_list
=>\
@trial_ids,
515 genotypeprop_hash_select
=>['DS'],
516 protocolprop_top_key_select
=>[],
517 protocolprop_marker_hash_select
=>[],
518 accession_list
=>$callset_id,
519 protocol_id_list
=>\
@protocol_ids,
524 $genotype_search->init_genotype_iterator();
526 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
528 if( ! $study_names || grep { $_ eq $gt->{genotypingDataProjectName
}} @
{$study_names} ){
530 my $set_id = $gt->{genotypingDataProjectDbId
} . "p" . $gt->{analysisMethodDbId
};
532 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId
}}) {
535 analysisDbId
=> qq|$gt->{analysisMethodDbId
}|, #protocolid
536 analysisName
=> $gt->{analysisMethod
},
544 push( @
{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId
}} }, 1 );
545 push( @
{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount
} );
546 push( @
{ $variant_sets { $set_id }{'analysis'} }, @analysis);
549 push( @
{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId
});
550 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName
} . " - " . $gt->{analysisMethod
};
551 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId
};
556 my $start_index = $page*$page_size;
557 my $end_index = $page*$page_size + $page_size - 1;
560 foreach my $id (keys %variant_sets){
562 if ($counter >= $start_index && $counter <= $end_index) {
563 my @availableFormats;
565 push @availableFormats,{
566 dataFormat
=> "json",
567 fileFormat
=> "json",
572 analysis
=>$variant_sets{$id} {'analysis'},
573 availableFormats
=> \
@availableFormats,
574 callSetCount
=> scalar @
{$variant_sets{$id}{'genotypes'}},
575 referenceSetDbId
=> keys %{ $variant_sets{$id} {'analysisIds'} },
576 studyDbId
=> qq|$variant_sets{$id}{'dataProject'}|,
577 variantCount
=> _sum
($variant_sets{$id}{'markerCount'}),
578 variantSetDbId
=> qq|$id|,
579 variantSetName
=> $variant_sets{$id} {'name'},
585 my %result = (data
=> \
@data);
587 my $pagination = CXGN
::BrAPI
::Pagination
->pagination_response($counter,$page_size,$page);
588 return CXGN
::BrAPI
::JSONResponse
->return_success(\
%result, $pagination, \
@data_files, $status, 'VariantSets result constructed');
595 foreach my $num (@
$array){
606 #match with CIPOS=-22,18;CIEND=-12,32"
607 if ( $info =~ /$item=(-?(\d+),?)+/) {
609 $match =~ s/$item=//g;
610 my @splited = split(/,/, $match);
611 @array = map { $_ + 0 } @splited;
619 my $protocol = shift;
622 $_ = "$_" . "p". $protocol;