Merge pull request #5191 from solgenomics/topic/quality_control
[sgn.git] / lib / CXGN / BrAPI / v2 / VariantSets.pm
blob62c2588695a2ad63c1ae18e6badfa43c8a0e010f
1 package CXGN::BrAPI::v2::VariantSets;
3 use Moose;
4 use Data::Dumper;
5 use SGN::Model::Cvterm;
6 use CXGN::Genotype::Search;
7 use JSON;
8 use CXGN::BrAPI::FileResponse;
9 use CXGN::BrAPI::Pagination;
10 use CXGN::BrAPI::JSONResponse;
11 use List::Util qw(sum);
13 extends 'CXGN::BrAPI::v2::Common';
16 sub search {
17 my $self = shift;
18 my $inputs = shift;
19 my $c = $self->context;
20 my $page_size = $self->page_size;
21 my $page = $self->page;
22 my $status = $self->status;
23 my $variantset_ids = $inputs->{variantSetDbId} || ($inputs->{variantSetDbIds} || ());
24 my $study_ids = $inputs->{studyDbId} || ($inputs->{studyDbIds} || ());
25 my $study_names = $inputs->{studyName} || ($inputs->{studyNames} || ());
26 my $variant_id = $inputs->{variantDbId} || ($inputs->{variantDbIds} || ());
27 my $callset_id = $inputs->{callSetDbId} || ($inputs->{callSetDbIds} || ());
29 my @trial_ids;
30 my @protocol_ids;
31 if ( $variantset_ids){
32 foreach ( @{$variantset_ids} ){
33 my @ids = split /p/, $_;
34 if(scalar @ids>1){
35 push @trial_ids, $ids[0] ? $ids[0] : ();
36 push @protocol_ids, $ids[1] ? $ids[1] : ();
37 } else{
38 push @trial_ids, 0;
42 if ($study_ids){
43 push @trial_ids, @{$study_ids};
46 if (scalar @trial_ids == 0){
47 my $trial_search = CXGN::Trial::Search->new({
48 bcs_schema=>$self->bcs_schema,
49 trial_design_list=>['genotype_data_project']
50 });
51 my ($data, $total_count) = $trial_search->search();
53 foreach (@$data){
54 push @trial_ids, $_->{trial_id};
58 my $genotype_search = CXGN::Genotype::Search->new({
59 bcs_schema=>$self->bcs_schema,
60 people_schema => $self->people_schema(),
61 cache_root=>$c->config->{cache_file_path},
62 trial_list=>\@trial_ids,
63 genotypeprop_hash_select=>['DS'],
64 protocolprop_top_key_select=>[],
65 protocolprop_marker_hash_select=>[],
66 accession_list=>$callset_id,
67 protocol_id_list=>\@protocol_ids,
68 });
70 my %variant_sets;
72 $genotype_search->init_genotype_iterator();
74 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
76 if( ! $study_names || grep { $_ eq $gt->{genotypingDataProjectName}} @{$study_names} ){
78 my $set_id = $gt->{genotypingDataProjectDbId} . "p" . $gt->{analysisMethodDbId};
80 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId}}) {
81 my @analysis;
82 push @analysis, {
83 analysisDbId=> qq|$gt->{analysisMethodDbId}|, #protocolid
84 analysisName=> $gt->{analysisMethod},
85 created=>undef,
86 description=>undef,
87 software=>undef,
88 type=>undef,
89 updated=>undef,
92 push( @{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId}} }, 1 );
93 push( @{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount} );
94 push( @{ $variant_sets { $set_id }{'analysis'} }, @analysis);
97 push( @{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId});
98 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName} . " - " . $gt->{analysisMethod};
99 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId};
103 my @data;
104 my $start_index = $page*$page_size;
105 my $end_index = $page*$page_size + $page_size - 1;
106 my $counter = 0;
108 foreach my $id (keys %variant_sets){
110 if ($counter >= $start_index && $counter <= $end_index) {
111 my @availableFormats;
113 push @availableFormats,{
114 dataFormat => "json",
115 fileFormat => "json",
116 fileURL => undef,
118 push @data, {
119 additionalInfo=>{},
120 analysis =>$variant_sets{$id} {'analysis'},
121 availableFormats => \@availableFormats,
122 callSetCount => scalar @{$variant_sets{$id}{'genotypes'}},
123 referenceSetDbId => keys %{ $variant_sets{$id} {'analysisIds'} },
124 studyDbId => qq|$variant_sets{$id}{'dataProject'}|,
125 variantCount => _sum($variant_sets{$id}{'markerCount'}),
126 variantSetDbId => qq|$id|,
127 variantSetName => $variant_sets{$id} {'name'},
130 $counter++;
133 my %result = (data => \@data);
134 my @data_files;
135 my $pagination = CXGN::BrAPI::Pagination->pagination_response($counter,$page_size,$page);
136 return CXGN::BrAPI::JSONResponse->return_success(\%result, $pagination, \@data_files, $status, 'VariantSets result constructed');
140 sub detail {
141 my $self = shift;
142 my $inputs = shift;
143 my $c = $self->context;
144 my $page_size = $self->page_size;
145 my $page = $self->page;
146 my $status = $self->status;
147 my $variantset_id = $inputs->{variantSetDbId};
149 my @trial_ids;
150 my @protocol_ids;
151 if ( $variantset_id){
152 my @ids = split /p/, $variantset_id;
153 push @trial_ids, $ids[0] ? $ids[0] : ();
154 push @protocol_ids, $ids[1] ? $ids[1] : ();
157 my $genotype_search = CXGN::Genotype::Search->new({
158 bcs_schema=>$self->bcs_schema,
159 people_schema => $self->people_schema(),
160 cache_root=>$c->config->{cache_file_path},
161 trial_list=>\@trial_ids,
162 protocol_id_list=>\@protocol_ids,
163 genotypeprop_hash_select=>['DS'],
164 protocolprop_top_key_select=>[],
165 protocolprop_marker_hash_select=>[],
167 my $file_handle = $genotype_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir}, 1); #Metadata only returned
169 my @data;
170 my %variant_sets;
172 $genotype_search->init_genotype_iterator();
174 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
176 my $set_id = $gt->{genotypingDataProjectDbId} . "p" . $gt->{analysisMethodDbId};
178 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId}}) {
179 my @analysis;
180 push @analysis, {
181 analysisDbId=> qq|$gt->{analysisMethodDbId}|, #protocolid
182 analysisName=> $gt->{analysisMethod},
183 created=>undef,
184 description=>undef,
185 software=>undef,
186 type=>undef,
187 updated=>undef,
190 push( @{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId}} }, 1 );
191 push( @{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount} );
192 push( @{ $variant_sets { $set_id }{'analysis'} }, @analysis);
195 push( @{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId});
196 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName} . " - " . $gt->{analysisMethod};
197 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId};
200 foreach my $id (keys %variant_sets){
202 my @availableFormats;
204 push @availableFormats,{
205 dataFormat => "json",
206 fileFormat => "json",
207 fileURL => undef,
209 push @data, {
210 additionalInfo=>{},
211 analysis =>$variant_sets{$id} {'analysis'},
212 availableFormats => \@availableFormats,
213 callSetCount => scalar @{$variant_sets{$id}{'genotypes'}},
214 referenceSetDbId => keys %{ $variant_sets{$id} {'analysisIds'} },
215 studyDbId => qq|$variant_sets{$id}{'dataProject'}|,
216 variantCount => _sum($variant_sets{$id}{'markerCount'}),
217 variantSetDbId => qq|$id|,
218 variantSetName => $variant_sets{$id} {'name'},
222 my @data_files;
223 my $pagination = CXGN::BrAPI::Pagination->pagination_response(1,$page_size,$page);
224 return CXGN::BrAPI::JSONResponse->return_success(@data, $pagination, \@data_files, $status, 'VariantSets result constructed');
227 sub callsets {
228 my $self = shift;
229 my $inputs = shift;
230 my $c = $self->context;
231 my $page_size = $self->page_size;
232 my $page = $self->page;
233 my $status = $self->status;
234 my $variantset_id = $inputs->{variantSetDbId};
235 my @callset_id = $inputs->{callSetDbId} ? @{$inputs->{callSetDbId}} : ();
236 my @callset_name = $inputs->{callSetName} ? @{$inputs->{callSetName}} : ();
238 my @trial_ids;
239 my @protocol_ids;
240 if ( $variantset_id){
241 my @ids = split /p/, $variantset_id;
242 push @trial_ids, $ids[0] ? $ids[0] : ();
243 push @protocol_ids, $ids[1] ? $ids[1] : ();
246 my $genotypes_search = CXGN::Genotype::Search->new({
247 bcs_schema=>$self->bcs_schema,
248 people_schema => $self->people_schema(),
249 cache_root=>$c->config->{cache_file_path},
250 trial_list=>\@trial_ids,
251 protocol_id_list=>\@protocol_ids,
252 genotypeprop_hash_select=>['DS'],
253 protocolprop_top_key_select=>[],
254 protocolprop_marker_hash_select=>[],
255 accession_list=>\@callset_id,
256 # offset=>$page_size*$page,
257 # limit=>$page_size
259 my $file_handle = $genotypes_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir}, 1); #Metadata only returned
260 my @data;
262 my $start_index = $page*$page_size;
263 my $end_index = $page*$page_size + $page_size - 1;
264 my $counter = 0;
266 open my $fh, "<&", $file_handle or die "Can't open output file: $!";
267 my $header_line = <$fh>;
269 while( <$fh> ) {
270 if ($counter >= $start_index && $counter <= $end_index) {
271 my $gt = decode_json $_;
272 my @analysis;
273 my @availableFormats;
275 push @data, {
276 additionalInfo=>{},
277 callSetDbId=> qq|$gt->{stock_id}|,
278 callSetName=> qq|$gt->{stock_name}|,
279 created=>undef,
280 sampleDbId=>qq|$gt->{stock_id}|,
281 studyDbId=>qq|$gt->{genotypingDataProjectDbId}|,
282 updated=>undef,
283 variantSetDbIds => [ $gt->{genotypingDataProjectDbId}. "p". $gt->{analysisMethodDbId} ],
286 $counter++;
289 my %result = (data => \@data);
290 my @data_files;
291 my $pagination = CXGN::BrAPI::Pagination->pagination_response($counter,$page_size,$page);
292 return CXGN::BrAPI::JSONResponse->return_success(\%result, $pagination, \@data_files, $status, 'VariantSets result constructed');
295 sub calls {
296 my $self = shift;
297 my $inputs = shift;
298 my $c = $self->context;
299 my $page_size = $self->page_size;
300 my $page = $self->page;
301 my $status = $self->status;
302 my $variantset_id = $inputs->{variantSetDbId};
303 my $sep_phased = $inputs->{sep_phased};
304 my $sep_unphased = $inputs->{sep_unphased};
305 my $unknown_string = $inputs->{unknown_string};
306 my $expand_homozygotes = $inputs->{expand_homozygotes};
307 my $file_path = $inputs->{file_path};
308 my $uri = $inputs->{file_uri};
310 if ($sep_phased || $sep_unphased || $expand_homozygotes || $unknown_string){
311 push @$status, { 'error' => 'The following parameters are not implemented: expandHomozygotes, unknownString, sepPhased, sepUnphased' };
314 my @trial_ids;
315 my @protocol_ids;
316 if ( $variantset_id){
317 my @ids = split /p/, $variantset_id;
318 push @trial_ids, $ids[0] ? $ids[0] : ();
319 push @protocol_ids, $ids[1] ? $ids[1] : ();
322 my @data_files;
323 my %result;
325 my $genotypes_search = CXGN::Genotype::Search->new({
326 bcs_schema=>$self->bcs_schema,
327 people_schema => $self->people_schema(),
328 cache_root=>$c->config->{cache_file_path},
329 trial_list=>\@trial_ids,
330 protocol_id_list=>\@protocol_ids,
331 genotypeprop_hash_select=>['DS', 'GT', 'NT'],
332 protocolprop_top_key_select=>[],
333 protocolprop_marker_hash_select=>[],
335 my $file_handle = $genotypes_search->get_cached_file_search_json($c->config->{cluster_shared_tempdir}, 0);
337 my $start_index = $page*$page_size;
338 my $end_index = $page*$page_size + $page_size - 1;
339 my $counter = 0;
341 open my $fh, "<&", $file_handle or die "Can't open output file: $!";
342 my $header_line = <$fh>;
343 my $marker_objects = decode_json $header_line;
345 my @data;
347 while (my $gt_line = <$fh>) {
348 my $gt = decode_json $gt_line;
349 my $genotype = $gt->{selected_genotype_hash};
350 my @ordered_refmarkers = sort keys(%$genotype);
351 my $genotypeprop_id = $gt->{markerProfileDbId};
353 foreach my $m (@ordered_refmarkers) {
354 if ($counter >= $start_index && $counter <= $end_index) {
355 my $geno = '';
356 if (exists($genotype->{$m}->{'NT'}) && defined($genotype->{$m}->{'NT'})){
357 $geno = $genotype->{$m}->{'NT'};
359 elsif (exists($genotype->{$m}->{'GT'}) && defined($genotype->{$m}->{'GT'})){
360 $geno = $genotype->{$m}->{'GT'};
362 elsif (exists($genotype->{$m}->{'DS'}) && defined($genotype->{$m}->{'DS'})){
363 $geno = $genotype->{$m}->{'DS'};
365 push @data, {
366 additionalInfo=>{},
367 variantName=>qq|$m|,
368 variantDbId=>qq|$m|,
369 callSetDbId=>qq|$gt->{stock_id}|,
370 callSetName=>qq|$gt->{stock_name}|,
371 genotype=>{values=>$geno},
372 genotype_likelihood=>undef,
373 phaseSet=>undef,
376 $counter++;
380 %result = ( data=>\@data,
381 expandHomozygotes=>undef,
382 sepPhased=>undef,
383 sepUnphased=>undef,
384 unknownString=>undef);
388 my $pagination = CXGN::BrAPI::Pagination->pagination_response($counter,$page_size,$page);
389 return CXGN::BrAPI::JSONResponse->return_success(\%result, $pagination, \@data_files, $status, 'VariantSets result constructed');
392 sub variants {
393 my $self = shift;
394 my $inputs = shift;
395 my $page_size = $self->page_size;
396 my $page = $self->page;
397 my $status = $self->status;
399 my $variantset_id = $inputs->{variantSetDbId};
400 my $marker_ids = $inputs->{variantDbId} || ($inputs->{variantDbIds} || []);
401 my $pageToken = $inputs->{pageToken}->[0] || undef;
402 my $schema = $self->bcs_schema;
403 my @data_out;
405 my @trial_ids;
406 my @protocol_ids;
407 if ( $variantset_id){
408 my @ids = split /p/, $variantset_id;
409 push @trial_ids, $ids[0] ? $ids[0] : ();
410 push @protocol_ids, $ids[1] ? $ids[1] : ();
413 my $marker_search = CXGN::Marker::SearchBrAPI->new({
414 bcs_schema => $schema,
415 protocol_id_list => \@protocol_ids,
416 project_id_list => \@trial_ids,
417 marker_name_list => $marker_ids,
418 offset=>$page_size*$page,
419 limit=>$page_size
422 my $start_index = $page*$page_size;
423 my $end_index = $page*$page_size + $page_size - 1;
424 my $counter = 0;
426 my ($data, $total_count) = $marker_search->search();
428 foreach (@$data){
429 if ($counter >= $start_index && $counter <= $end_index) {
430 my $info = $_->{info};
431 my $svtype = $1 if ($_->{info} =~ /SVTYPE=(\w+);/) ;
432 my @cipos = _get_info($info,'CIPOS');
433 my @ciend = _get_info($info,'CIEND');
434 my @svlen = _get_info($info,'SVLEN');
436 my %data_obj = (
437 additionalInfo => {},
438 alternate_bases => $_->{alt},
439 ciend => [@ciend],
440 cipos => [@cipos],
441 created => undef,
442 end => $_->{pos} + length($_->{ref}),
443 filtersApplied => $_->{filter} eq "." ? JSON::false : JSON::true,
444 filtersFailed => ( $_->{filter} eq "PASS" || $_->{filter} eq "." ) ? undef : $_->{filter},
445 filtersPassed => $_->{filter} eq "PASS" ? JSON::true : JSON::false,
446 referenceBases => $_->{ref},
447 referenceName => $_->{chrom} ? $_->{chrom} : undef,
448 start => $_->{pos},
449 svlen => @svlen,
450 updated => undef,
451 variantDbId => qq|$_->{marker_name}|,
452 variantNames => $_->{marker_name},
453 variantSetDbId => _quote($_->{project_id}, $_->{nd_protocol_id} ),
454 variantType => $svtype,
456 push @data_out, \%data_obj;
458 $counter++;
461 my %result = (data=>\@data_out);
462 my @data_files;
463 my $pagination = CXGN::BrAPI::Pagination->pagination_response($total_count,1,0);
464 return CXGN::BrAPI::JSONResponse->return_success(\%result, $pagination, \@data_files, $status, 'Variants result constructed');
467 sub extract {
468 my $self = shift;
469 my $inputs = shift;
470 my $c = $self->context;
471 my $page_size = $self->page_size;
472 my $page = $self->page;
473 my $status = $self->status;
474 my $variantset_ids = $inputs->{variantSetDbId} || ($inputs->{variantSetDbIds} || ());
475 my $study_ids = $inputs->{studyDbId} || ($inputs->{studyDbIds} || ());
476 my $study_names = $inputs->{studyName} || ($inputs->{studyNames} || ());
477 my $variant_id = $inputs->{variantDbId} || ($inputs->{variantDbIds} || ());
478 my $callset_id = $inputs->{callSetDbId} || ($inputs->{callSetDbIds} || ());
480 my @trial_ids;
481 my @protocol_ids;
482 if ( $variantset_ids){
483 foreach ( @{$variantset_ids} ){
484 my @ids = split /p/, $_;
485 if(scalar @ids>1){
486 push @trial_ids, $ids[0] ? $ids[0] : ();
487 push @protocol_ids, $ids[1] ? $ids[1] : ();
488 } else {
489 @trial_ids = 0;
494 if ($study_ids){
495 push @trial_ids, @{$study_ids};
498 if (scalar @trial_ids == 0){
499 my $trial_search = CXGN::Trial::Search->new({
500 bcs_schema=>$self->bcs_schema,
501 trial_design_list=>['genotype_data_project']
503 my ($data, $total_count) = $trial_search->search();
505 foreach (@$data){
506 push @trial_ids, $_->{trial_id};
510 my $genotype_search = CXGN::Genotype::Search->new({
511 bcs_schema=>$self->bcs_schema,
512 people_schema => $self->people_schema(),
513 cache_root=>$c->config->{cache_file_path},
514 trial_list=>\@trial_ids,
515 genotypeprop_hash_select=>['DS'],
516 protocolprop_top_key_select=>[],
517 protocolprop_marker_hash_select=>[],
518 accession_list=>$callset_id,
519 protocol_id_list=>\@protocol_ids,
522 my %variant_sets;
524 $genotype_search->init_genotype_iterator();
526 while (my ($count, $gt) = $genotype_search->get_next_genotype_info) {
528 if( ! $study_names || grep { $_ eq $gt->{genotypingDataProjectName}} @{$study_names} ){
530 my $set_id = $gt->{genotypingDataProjectDbId} . "p" . $gt->{analysisMethodDbId};
532 if( ! $variant_sets{$set_id}{'analysisIds'} {$gt->{analysisMethodDbId}}) {
533 my @analysis;
534 push @analysis, {
535 analysisDbId=> qq|$gt->{analysisMethodDbId}|, #protocolid
536 analysisName=> $gt->{analysisMethod},
537 created=>undef,
538 description=>undef,
539 software=>undef,
540 type=>undef,
541 updated=>undef,
544 push( @{ $variant_sets { $set_id }{'analysisIds'} {$gt->{analysisMethodDbId}} }, 1 );
545 push( @{ $variant_sets { $set_id }{'markerCount'}}, $gt->{resultCount} );
546 push( @{ $variant_sets { $set_id }{'analysis'} }, @analysis);
549 push( @{ $variant_sets { $set_id } {'genotypes'} }, $gt->{genotypeDbId});
550 $variant_sets { $set_id } {'name'} = $gt->{genotypingDataProjectName} . " - " . $gt->{analysisMethod};
551 $variant_sets { $set_id } {'dataProject'} = $gt->{genotypingDataProjectDbId};
555 my @data;
556 my $start_index = $page*$page_size;
557 my $end_index = $page*$page_size + $page_size - 1;
558 my $counter = 0;
560 foreach my $id (keys %variant_sets){
562 if ($counter >= $start_index && $counter <= $end_index) {
563 my @availableFormats;
565 push @availableFormats,{
566 dataFormat => "json",
567 fileFormat => "json",
568 fileURL => undef,
570 push @data, {
571 additionalInfo=>{},
572 analysis =>$variant_sets{$id} {'analysis'},
573 availableFormats => \@availableFormats,
574 callSetCount => scalar @{$variant_sets{$id}{'genotypes'}},
575 referenceSetDbId => keys %{ $variant_sets{$id} {'analysisIds'} },
576 studyDbId => qq|$variant_sets{$id}{'dataProject'}|,
577 variantCount => _sum($variant_sets{$id}{'markerCount'}),
578 variantSetDbId => qq|$id|,
579 variantSetName => $variant_sets{$id} {'name'},
582 $counter++;
585 my %result = (data => \@data);
586 my @data_files;
587 my $pagination = CXGN::BrAPI::Pagination->pagination_response($counter,$page_size,$page);
588 return CXGN::BrAPI::JSONResponse->return_success(\%result, $pagination, \@data_files, $status, 'VariantSets result constructed');
591 sub _sum {
592 my $array = shift;
593 my $sum=0;
595 foreach my $num (@$array){
596 $sum += $num;
598 return $sum;
601 sub _get_info {
602 my $info = shift;
603 my $item = shift;
604 my @array = [];
606 #match with CIPOS=-22,18;CIEND=-12,32"
607 if ( $info =~ /$item=(-?(\d+),?)+/) {
608 my $match = $&;
609 $match =~ s/$item=//g;
610 my @splited = split(/,/, $match);
611 @array = map { $_ + 0 } @splited;
614 return @array ;
617 sub _quote {
618 my $array = shift;
619 my $protocol = shift;
621 foreach (@$array) {
622 $_ = "$_" . "p". $protocol;
625 return $array