add is_variable accessor.
[sgn.git] / lib / CXGN / Genotype / QcGenotype.pm
blob3db1b62ac1fca95fca4e39d6045dbbfbae4e6892
1 package CXGN::Genotype::QcGenotype;
3 =head1 NAME
5 CXGN::Genotype::QcGenotype - an object to handle searching genotypes for stocks
7 =head1 USAGE
9 my $genotypes_search = CXGN::Genotype::QcGenotype->new({
10 bcs_schema=>$schema,
11 accession_list=>$accession_list,
12 marker_scores_hash=>\%marker_scores_hash,
13 });
14 my ($total_count, $genotypes) = $genotypes_search->get_genotype_qc_info();
16 =head1 DESCRIPTION
19 =head1 AUTHORS
22 =cut
24 use strict;
25 use warnings;
26 use Moose;
27 use Try::Tiny;
28 use Data::Dumper;
29 use SGN::Model::Cvterm;
30 use CXGN::Trial;
31 use JSON;
32 use CXGN::Stock::Accession;
34 has 'bcs_schema' => ( isa => 'Bio::Chado::Schema',
35 is => 'rw',
36 required => 1,
39 has 'protocol_id_list' => (
40 isa => 'ArrayRef[Int]|Undef',
41 is => 'rw',
44 has 'markerprofile_id_list' => (
45 isa => 'ArrayRef[Int]|Undef',
46 is => 'ro',
49 has 'accession_list' => (
50 isa => 'ArrayRef[Int]|Undef',
51 is => 'ro',
54 =head2 get_genotype_info
56 returns: an array with genotype information
58 =cut
60 sub get_genotype_qc_info {
61 my $self = shift;
62 my $schema = $self->bcs_schema;
63 my $trial_list = $self->trial_list;
64 my $protocol_id_list = $self->protocol_id_list;
65 my $markerprofile_id_list = $self->markerprofile_id_list;
66 my $accession_list = $self->accession_list;
67 my @data;
68 my %search_params;
69 my @where_clause;
71 my $q = "SELECT $stock_select, genotype_values.genotypeprop_id, igd_number_genotypeprop.value, nd_protocol.nd_protocol_id, nd_protocol.name, stock.uniquename, stock.type_id, stock_cvterm.name, genotype.genotype_id, genotype.uniquename, genotype.description, project.project_id, project.name, project.description, accession_of_tissue_sample.stock_id, accession_of_tissue_sample.uniquename, count(genotype_values.genotypeprop_id) OVER() AS full_count
72 FROM stock
73 JOIN cvterm AS stock_cvterm ON(stock.type_id = stock_cvterm.cvterm_id)
74 LEFT JOIN stock_relationship ON(stock_relationship.subject_id=stock.stock_id AND stock_relationship.type_id = $tissue_sample_of_cvterm_id)
75 LEFT JOIN stock AS accession_of_tissue_sample ON(stock_relationship.object_id=accession_of_tissue_sample.stock_id)
76 JOIN nd_experiment_stock ON(stock.stock_id=nd_experiment_stock.stock_id)
77 JOIN nd_experiment USING(nd_experiment_id)
78 JOIN nd_experiment_protocol USING(nd_experiment_id)
79 JOIN nd_experiment_project USING(nd_experiment_id)
80 JOIN nd_experiment_genotype USING(nd_experiment_id)
81 JOIN nd_protocol USING(nd_protocol_id)
82 LEFT JOIN nd_protocolprop ON(nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id AND nd_protocolprop.type_id = $vcf_map_details_cvterm_id)
83 JOIN genotype USING(genotype_id)
84 LEFT JOIN genotypeprop AS igd_number_genotypeprop ON(igd_number_genotypeprop.genotype_id = genotype.genotype_id AND igd_number_genotypeprop.type_id = $igd_genotypeprop_cvterm_id)
85 JOIN genotypeprop AS genotype_values ON(genotype_values.genotype_id = genotype.genotype_id AND genotype_values.type_id = $vcf_snp_genotyping_cvterm_id)
86 JOIN project USING(project_id)
87 $where_clause
88 ORDER BY stock.stock_id, genotype_values.genotypeprop_id ASC
89 $limit_clause
90 $offset_clause;";
92 print STDERR Dumper $q;
93 my $h = $schema->storage->dbh()->prepare($q);
94 $h->execute();
96 my $total_count = 0;
97 my @genotypeprop_array;
98 my %genotypeprop_hash;
99 my %protocolprop_hash;
100 while (my ($stock_id, $genotypeprop_id, $igd_number_json, $protocol_id, $protocol_name, $stock_name, $stock_type_id, $stock_type_name, $genotype_id, $genotype_uniquename, $genotype_description, $project_id, $project_name, $project_description, $accession_id, $accession_uniquename, $full_count) = $h->fetchrow_array()) {
101 my $igd_number_hash = $igd_number_json ? decode_json $igd_number_json : undef;
102 my $igd_number = $igd_number_hash ? $igd_number_hash->{'igd number'} : undef;
103 $igd_number = !$igd_number && $igd_number_hash ? $igd_number_hash->{'igd_number'} : undef;
105 my $germplasmName = '';
106 my $germplasmDbId = '';
107 if ($stock_type_name eq 'accession'){
108 $germplasmName = $stock_name;
109 $germplasmDbId = $stock_id;
111 if ($stock_type_name eq 'tissue_sample'){
112 $germplasmName = $accession_uniquename;
113 $germplasmDbId = $accession_id;
116 my $stock_object = CXGN::Stock::Accession->new({schema=>$self->bcs_schema, stock_id=>$germplasmDbId});
118 push @genotypeprop_array, $genotypeprop_id;
119 $genotypeprop_hash{$genotypeprop_id} = {
120 markerProfileDbId => $genotypeprop_id,
121 germplasmDbId => $germplasmDbId,
122 germplasmName => $germplasmName,
123 synonyms => $stock_object->synonyms,
124 stock_id => $stock_id,
125 stock_name => $stock_name,
126 stock_type_id => $stock_type_id,
127 stock_type_name => $stock_type_name,
128 genotypeDbId => $genotype_id,
129 genotypeUniquename => $genotype_uniquename,
130 genotypeDescription => $genotype_description,
131 analysisMethodDbId => $protocol_id,
132 analysisMethod => $protocol_name,
133 genotypingDataProjectDbId => $project_id,
134 genotypingDataProjectName => $project_name,
135 genotypingDataProjectDescription => $project_description,
136 igd_number => $igd_number,
138 $protocolprop_hash{$protocol_id}++;
139 $total_count = $full_count;
141 print STDERR "CXGN::Genotype::Search has genotypeprop_ids $total_count\n";
143 my @found_genotypeprop_ids = keys %genotypeprop_hash;
144 my @genotypeprop_hash_select_arr;
145 foreach (@$genotypeprop_hash_select){
146 push @genotypeprop_hash_select_arr, "s.value->>'$_'";
148 if (scalar(@found_genotypeprop_ids)>0) {
149 my $genotypeprop_id_sql = join ("," , @found_genotypeprop_ids);
150 my $genotypeprop_hash_select_sql = scalar(@genotypeprop_hash_select_arr) > 0 ? ', '.join ',', @genotypeprop_hash_select_arr : '';
151 my $genotypeprop_q = "SELECT s.key $genotypeprop_hash_select_sql from genotypeprop, jsonb_each(genotypeprop.value) as s WHERE genotypeprop_id = ? and type_id = $vcf_snp_genotyping_cvterm_id;";
152 my $genotypeprop_h = $schema->storage->dbh()->prepare($genotypeprop_q);
153 foreach my $genotypeprop_id (@found_genotypeprop_ids){
154 $genotypeprop_h->execute($genotypeprop_id);
155 while (my ($marker_name, @genotypeprop_info_return) = $genotypeprop_h->fetchrow_array()) {
156 for my $s (0 .. scalar(@genotypeprop_hash_select_arr)-1){
157 $genotypeprop_hash{$genotypeprop_id}->{selected_genotype_hash}->{$marker_name}->{$genotypeprop_hash_select->[$s]} = $genotypeprop_info_return[$s];
162 print STDERR "CXGN::Genotype::Search has genotypeprops\n";
164 my @found_protocolprop_ids = keys %protocolprop_hash;
165 my @protocolprop_marker_hash_select_arr;
166 foreach (@$protocolprop_marker_hash_select){
167 push @protocolprop_marker_hash_select_arr, "s.value->>'$_'";
169 my @protocolprop_top_key_select_arr;
170 foreach (@$protocolprop_top_key_select){
171 push @protocolprop_top_key_select_arr, "value->>'$_'";
173 my %selected_protocol_marker_info;
174 my %selected_protocol_top_key_info;
175 if (scalar(@found_protocolprop_ids)>0){
176 my $protocolprop_id_sql = join ("," , @found_protocolprop_ids);
177 my $protocolprop_where_sql = "nd_protocol_id in ($protocolprop_id_sql) and type_id = $vcf_map_details_cvterm_id";
178 my $protocolprop_hash_select_sql = scalar(@protocolprop_marker_hash_select_arr) > 0 ? ', '.join ',', @protocolprop_marker_hash_select_arr : '';
179 my $protocolprop_q = "SELECT nd_protocol_id, s.key $protocolprop_hash_select_sql from nd_protocolprop, jsonb_each(nd_protocolprop.value->'markers') as s WHERE $protocolprop_where_sql;";
180 my $protocolprop_h = $schema->storage->dbh()->prepare($protocolprop_q);
181 $protocolprop_h->execute();
182 while (my ($protocol_id, $marker_name, @protocolprop_info_return) = $protocolprop_h->fetchrow_array()) {
183 for my $s (0 .. scalar(@protocolprop_marker_hash_select_arr)-1){
184 $selected_protocol_marker_info{$protocol_id}->{$marker_name}->{$protocolprop_marker_hash_select->[$s]} = $protocolprop_info_return[$s];
187 my $protocolprop_top_key_select_sql = scalar(@protocolprop_top_key_select_arr) > 0 ? ', '.join ',', @protocolprop_top_key_select_arr : '';
188 my $protocolprop_top_key_q = "SELECT nd_protocol_id $protocolprop_top_key_select_sql from nd_protocolprop WHERE $protocolprop_where_sql;";
190 my $protocolprop_top_key_h = $schema->storage->dbh()->prepare($protocolprop_top_key_q);
191 $protocolprop_top_key_h->execute();
192 while (my ($protocol_id, @protocolprop_top_key_return) = $protocolprop_top_key_h->fetchrow_array()) {
193 for my $s (0 .. scalar(@protocolprop_top_key_select_arr)-1){
194 my $protocolprop_i = $protocolprop_top_key_select->[$s];
195 my $val;
196 if ($protocolprop_i eq 'header_information_lines' || $protocolprop_i eq 'markers_array' || $protocolprop_i eq 'markers' || $protocolprop_i eq 'marker_names') {
197 $val = decode_json $protocolprop_top_key_return[$s];
198 } else {
199 $val = $protocolprop_top_key_return[$s];
201 $selected_protocol_top_key_info{$protocol_id}->{$protocolprop_i} = $val;
205 print STDERR "CXGN::Genotype::Search has protocolprops\n";
207 foreach (@genotypeprop_array) {
208 my $info = $genotypeprop_hash{$_};
209 my $selected_marker_info = $selected_protocol_marker_info{$info->{analysisMethodDbId}} ? $selected_protocol_marker_info{$info->{analysisMethodDbId}} : {};
210 my $selected_protocol_info = $selected_protocol_top_key_info{$info->{analysisMethodDbId}} ? $selected_protocol_top_key_info{$info->{analysisMethodDbId}} : {};
211 my @all_protocol_marker_names = keys %$selected_marker_info;
212 $selected_protocol_info->{markers} = $selected_marker_info;
213 $info->{resultCount} = scalar(keys %{$info->{selected_genotype_hash}});
214 $info->{all_protocol_marker_names} = \@all_protocol_marker_names;
215 $info->{selected_protocol_hash} = $selected_protocol_info;
216 push @data, $info;
219 #print STDERR Dumper \@data;
220 return ($total_count, \@data);