start fixing test for multi cat phenotype upload.
[sgn.git] / lib / CXGN / Genotype / SearchStocks.pm
blobe3bd669916419ea61d844589c42153b88c4f28f8
1 package CXGN::Genotype::SearchStocks;
3 =head1 NAME
5 CXGN::Genotype::SearchStocks - an object to handle searching stocks containing specific genotypes
7 =head1 USAGE
9 =head1 DESCRIPTION
12 =head1 AUTHORS
14 Titima Tantikanjana <tt15@cornell.edu>
16 =cut
18 use strict;
19 use warnings;
20 use Moose;
21 use Try::Tiny;
22 use Data::Dumper;
23 use SGN::Model::Cvterm;
24 use JSON;
25 use Sort::Key::Natural qw(natsort);
27 has 'bcs_schema' => ( isa => 'Bio::Chado::Schema',
28 is => 'rw',
29 required => 1,
32 has 'filtering_parameters' => (
33 isa => 'ArrayRef[Str]',
34 is => 'ro',
37 has 'stock_list' => (
38 isa => 'ArrayRef[Str]',
39 is => 'ro',
42 sub get_selected_stocks {
43 my $self = shift;
44 my $schema = $self->bcs_schema;
45 my $stock_list = $self->stock_list;
46 my $filtering_parameters = $self->filtering_parameters;
47 my @stocks = @{$stock_list};
48 my @parameters = @{$filtering_parameters};
49 my $genotyping_experiment_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
50 my $vcf_map_details_markers_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers', 'protocol_property')->cvterm_id();
51 my $vcf_map_details_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
53 my $plot_of_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot_of', 'stock_relationship')->cvterm_id();
54 my $plant_of_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant_of', 'stock_relationship')->cvterm_id();
55 my $tissue_sample_of_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'tissue_sample_of', 'stock_relationship')->cvterm_id();
56 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id();
57 my $protocol_info = $parameters[0];
58 my $info_ref = decode_json$protocol_info;
59 my %info = %{$info_ref};
60 my $protocol_id = $info{genotyping_protocol_id};
61 my $protocol_name = $info{genotyping_protocol_name};
62 my $data_type = $info{genotyping_data_type};
64 my $vcf_snp_genotyping_cvterm_id = SGN::Model::Cvterm->get_vcf_genotyping_cvterm_id($self->bcs_schema, {protocol_id => $protocol_id});
66 my $type_q= "SELECT value->>'sample_observation_unit_type_name'
67 FROM nd_protocolprop WHERE nd_protocol_id = ? AND type_id =? ";
69 my $type_h = $schema->storage->dbh()->prepare($type_q);
70 $type_h->execute($protocol_id, $vcf_map_details_id);
72 my ($sample_type) = $type_h->fetchrow_array();
74 my %chrom_hash;
75 my @incorrect_marker_names;
76 for (my $i=1; $i<@parameters; $i++) {
77 my $param_ref = decode_json$parameters[$i];
78 my %params = %{$param_ref};
79 my $marker_name = $params{marker_name};
81 if ($data_type eq 'Dosage') {
82 my $allele_dosage = $params{allele_dosage};
83 my $chrom_key;
84 my $q = "SELECT value->?->>'chrom' AS chromosome_no FROM nd_protocolprop WHERE nd_protocol_id = ? AND type_id =? AND value->?->>'chrom' IS NOT NULL";
86 my $h = $schema->storage->dbh()->prepare($q);
87 $h->execute($marker_name, $protocol_id, $vcf_map_details_markers_cvterm_id, $marker_name);
88 my ($chrom) = $h->fetchrow_array();
90 if ($chrom){
91 $chrom_hash{$chrom}{$marker_name}{'DS'} = $allele_dosage;
92 } else {
93 push @incorrect_marker_names, "Marker name: $marker_name is not in genotyping protocol: $protocol_name. \n";
95 } elsif ($data_type eq 'SNP') {
96 my $allele_1 = $params{allele1};
97 my $allele_2 = $params{allele2};
98 my @allele_param = ($allele_1, $allele_2);
100 my @ref_alt_chrom = ();
101 my $q = "SELECT value->?->>'ref', value->?->>'alt', value->?->>'chrom'
102 FROM nd_protocolprop WHERE nd_protocol_id = ? AND type_id =? AND value->?->>'ref' IS NOT NULL";
104 my $h = $schema->storage->dbh()->prepare($q);
105 $h->execute($marker_name, $marker_name, $marker_name, $protocol_id, $vcf_map_details_markers_cvterm_id, $marker_name);
107 while (my ($ref, $alt, $chrom) = $h->fetchrow_array()){
108 push @ref_alt_chrom, ($ref, $alt, $chrom);
109 if (!@ref_alt_chrom) {
110 push @incorrect_marker_names, "Marker name: $marker_name is not in genotyping protocol: $protocol_name. \n";
114 my @nt = ();
115 if (@ref_alt_chrom) {
116 my $q_gt = "SELECT genotypeprop.value->?->>'GT' FROM nd_experiment_protocol
117 JOIN nd_experiment_genotype ON (nd_experiment_genotype.nd_experiment_id = nd_experiment_protocol.nd_experiment_id)
118 JOIN genotypeprop ON (nd_experiment_genotype.genotype_id = genotypeprop.genotype_id) AND genotypeprop.type_id = ?
119 where nd_experiment_protocol.nd_protocol_id = ? AND genotypeprop.value->?->>'GT' IS NOT NULL LIMIT 1";
121 my $h_gt = $schema->storage->dbh()->prepare($q_gt);
122 $h_gt->execute($marker_name, $vcf_snp_genotyping_cvterm_id, $protocol_id, $marker_name);
123 my ($gt_value) = $h_gt->fetchrow_array();
124 my $separator = ',';
125 my @gt_alleles = split (/\//, $gt_value);
126 if (scalar(@gt_alleles) <= 1){
127 @gt_alleles = split (/\|/, $gt_value);
128 if (scalar(@gt_alleles) > 1) {
129 $separator = '|';
133 if ($allele_1 ne $allele_2){
134 foreach my $allele(@allele_param){
135 if (grep{/$allele/}(@ref_alt_chrom)){
136 if ($allele eq $ref_alt_chrom[0]){
137 $nt[0] = $allele;
138 } elsif ($allele eq $ref_alt_chrom[1]){
139 $nt[1] = $allele;
141 my $nt_string = join $separator, @nt;
143 $chrom_hash{$ref_alt_chrom[2]}{$marker_name} = {'NT' => $nt_string};
144 } else {
145 last;
148 } elsif ($allele_1 eq $allele_2){
149 if (grep{/$allele_1/}(@ref_alt_chrom)){
150 @nt = ($allele_1, $allele_2);
151 my $nt_string = join $separator, @nt;
152 $chrom_hash{$ref_alt_chrom[2]}{$marker_name} = {'NT' => $nt_string};
153 } else {
154 last;
161 if (scalar(@incorrect_marker_names) > 0) {
162 return {incorrect_marker_names=> \@incorrect_marker_names};
164 my @formatted_parameters;
165 my @rank_and_params;
166 if (%chrom_hash) {
167 foreach my $chromosome (keys %chrom_hash) {
168 my $rank_q= "SELECT value->'chromosomes'->?->>'rank'
169 FROM nd_protocolprop WHERE nd_protocol_id = ? AND type_id =? ";
171 my $rank_h = $schema->storage->dbh()->prepare($rank_q);
172 $rank_h->execute($chromosome, $protocol_id, $vcf_map_details_id);
174 my ($rank) = $rank_h->fetchrow_array();
175 my $marker_params = $chrom_hash{$chromosome};
176 my $each_chrom_markers_string = encode_json $marker_params;
177 push @formatted_parameters, $each_chrom_markers_string;
178 push @rank_and_params, [$rank, $each_chrom_markers_string]
182 my @sorted_markers = natsort @formatted_parameters;
183 my $genotype_string = join("<br>", @sorted_markers);
185 my $join_type;
186 if ($sample_type eq 'accession') {
187 $join_type = "JOIN nd_experiment_stock ON (stock1.stock_id = nd_experiment_stock.stock_id)";
188 } elsif ($sample_type eq 'tissue_sample') {
189 $join_type = "JOIN stock_relationship ON (stock_relationship.object_id = stock1.stock_id) AND stock_relationship.type_id = $tissue_sample_of_cvterm_id
190 JOIN nd_experiment_stock ON stock_relationship.subject_id = nd_experiment_stock.stock_id"
193 my @selected_stocks_details;
194 foreach my $param (@rank_and_params) {
195 my $stock_table = "DROP TABLE IF EXISTS stock_table;
196 CREATE TEMP TABLE stock_table(stock_name Varchar(100))";
197 my $s_t = $schema->storage->dbh()->prepare($stock_table);
198 $s_t->execute();
200 foreach my $st(@stocks){
201 my $added_table = "INSERT INTO stock_table (stock_name) VALUES (?)";
202 my $table_h = $schema->storage->dbh()->prepare($added_table);
203 $table_h->execute($st);
206 @stocks = ();
207 @selected_stocks_details = ();
209 if (($sample_type eq 'accession') || ($sample_type eq 'tissue_sample')) {
210 my $q2 = "SELECT DISTINCT stock1.stock_id, stock1.uniquename, stock2.stock_id, stock2.uniquename, cvterm.name FROM stock_table
211 JOIN stock AS stock1 ON (stock_table.stock_name = stock1.uniquename) AND stock1.type_id = ?
212 $join_type
213 JOIN stock AS stock2 ON (nd_experiment_stock.stock_id = stock2.stock_id)
214 JOIN cvterm ON (stock2.type_id = cvterm.cvterm_id)
215 JOIN nd_experiment_protocol ON (nd_experiment_stock.nd_experiment_id = nd_experiment_protocol.nd_experiment_id) AND nd_experiment_stock.type_id = ? AND nd_experiment_protocol.nd_protocol_id =?
216 JOIN nd_experiment_genotype on (nd_experiment_genotype.nd_experiment_id = nd_experiment_stock.nd_experiment_id)
217 JOIN genotypeprop on (nd_experiment_genotype.genotype_id = genotypeprop.genotype_id) AND genotypeprop.type_id = ? AND genotypeprop.rank = ?
218 WHERE genotypeprop.value @> ? ";
220 my $h2 = $schema->storage->dbh()->prepare($q2);
221 $h2->execute($accession_cvterm_id, $genotyping_experiment_cvterm_id, $protocol_id, $vcf_snp_genotyping_cvterm_id, $param->[0], $param->[1]);
223 while (my ($selected_accession_id, $selected_accession_name, $selected_sample_id, $selected_sample_name, $sample_type) = $h2->fetchrow_array()){
224 push @selected_stocks_details, [$selected_accession_id, $selected_accession_name, $selected_sample_id, $selected_sample_name, $sample_type, $genotype_string ];
225 push @stocks, $selected_accession_name
227 } elsif ($sample_type eq 'stocks') {
228 my $q2 = "SELECT DISTINCT stock1.stock_id, stock1.uniquename, stock2.stock_id, stock2.uniquename, cvterm.name FROM stock_table
229 JOIN stock AS stock1 ON (stock_table.stock_name = stock1.uniquename) AND stock1.type_id = ?
230 JOIN nd_experiment_stock ON (stock1.stock_id = nd_experiment_stock.stock_id)
231 JOIN stock AS stock2 ON (nd_experiment_stock.stock_id = stock2.stock_id)
232 JOIN cvterm ON (stock2.type_id = cvterm.cvterm_id)
233 JOIN nd_experiment_protocol ON (nd_experiment_stock.nd_experiment_id = nd_experiment_protocol.nd_experiment_id) AND nd_experiment_stock.type_id = ? AND nd_experiment_protocol.nd_protocol_id =?
234 JOIN nd_experiment_genotype on (nd_experiment_genotype.nd_experiment_id = nd_experiment_stock.nd_experiment_id)
235 JOIN genotypeprop on (nd_experiment_genotype.genotype_id = genotypeprop.genotype_id) AND genotypeprop.type_id = ? AND genotypeprop.rank = ?
236 WHERE genotypeprop.value @> ?
238 UNION ALL
240 SELECT DISTINCT stock1.stock_id, stock1.uniquename, stock2.stock_id, stock2.uniquename, cvterm.name FROM stock_table
241 JOIN stock AS stock1 ON (stock_table.stock_name = stock1.uniquename) AND stock1.type_id = ?
242 JOIN stock_relationship ON (stock_relationship.object_id = stock1.stock_id) AND stock_relationship.type_id IN (?,?,?)
243 JOIN nd_experiment_stock ON stock_relationship.subject_id = nd_experiment_stock.stock_id
244 JOIN stock AS stock2 ON (nd_experiment_stock.stock_id = stock2.stock_id)
245 JOIN cvterm ON (stock2.type_id = cvterm.cvterm_id)
246 JOIN nd_experiment_protocol ON (nd_experiment_stock.nd_experiment_id = nd_experiment_protocol.nd_experiment_id) AND nd_experiment_stock.type_id = ? AND nd_experiment_protocol.nd_protocol_id =?
247 JOIN nd_experiment_genotype on (nd_experiment_genotype.nd_experiment_id = nd_experiment_stock.nd_experiment_id)
248 JOIN genotypeprop on (nd_experiment_genotype.genotype_id = genotypeprop.genotype_id) AND genotypeprop.type_id = ? AND genotypeprop.rank = ?
249 WHERE genotypeprop.value @> ?";
251 my $h2= $schema->storage->dbh()->prepare($q2);
252 $h2->execute($accession_cvterm_id, $genotyping_experiment_cvterm_id, $protocol_id, $vcf_snp_genotyping_cvterm_id, $param->[0], $param->[1], $accession_cvterm_id, $plot_of_cvterm_id, $plant_of_cvterm_id, $tissue_sample_of_cvterm_id, $genotyping_experiment_cvterm_id, $protocol_id, $vcf_snp_genotyping_cvterm_id, $param->[0], $param->[1]);
254 while (my ($selected_accession_id, $selected_accession_name, $selected_sample_id, $selected_sample_name, $sample_type) = $h2->fetchrow_array()){
255 push @selected_stocks_details, [$selected_accession_id, $selected_accession_name, $selected_sample_id, $selected_sample_name, $sample_type, $genotype_string ];
256 push @stocks, $selected_accession_name
261 return {selected_stocks=> \@selected_stocks_details};