1 package CXGN
::Genotype
::QcGenotype
;
5 CXGN::Genotype::QcGenotype - an object to handle searching genotypes for stocks
9 my $genotypes_search = CXGN::Genotype::QcGenotype->new({
11 accession_list=>$accession_list,
12 marker_scores_hash=>\%marker_scores_hash,
14 my ($total_count, $genotypes) = $genotypes_search->get_genotype_qc_info();
29 use SGN
::Model
::Cvterm
;
32 use CXGN
::Stock
::Accession
;
34 has
'bcs_schema' => ( isa
=> 'Bio::Chado::Schema',
39 has
'protocol_id_list' => (
40 isa
=> 'ArrayRef[Int]|Undef',
44 has
'markerprofile_id_list' => (
45 isa
=> 'ArrayRef[Int]|Undef',
49 has
'accession_list' => (
50 isa
=> 'ArrayRef[Int]|Undef',
54 =head2 get_genotype_info
56 returns: an array with genotype information
60 sub get_genotype_qc_info
{
62 my $schema = $self->bcs_schema;
63 my $trial_list = $self->trial_list;
64 my $protocol_id_list = $self->protocol_id_list;
65 my $markerprofile_id_list = $self->markerprofile_id_list;
66 my $accession_list = $self->accession_list;
71 my $q = "SELECT $stock_select, genotype_values.genotypeprop_id, igd_number_genotypeprop.value, nd_protocol.nd_protocol_id, nd_protocol.name, stock.uniquename, stock.type_id, stock_cvterm.name, genotype.genotype_id, genotype.uniquename, genotype.description, project.project_id, project.name, project.description, accession_of_tissue_sample.stock_id, accession_of_tissue_sample.uniquename, count(genotype_values.genotypeprop_id) OVER() AS full_count
73 JOIN cvterm AS stock_cvterm ON(stock.type_id = stock_cvterm.cvterm_id)
74 LEFT JOIN stock_relationship ON(stock_relationship.subject_id=stock.stock_id AND stock_relationship.type_id = $tissue_sample_of_cvterm_id)
75 LEFT JOIN stock AS accession_of_tissue_sample ON(stock_relationship.object_id=accession_of_tissue_sample.stock_id)
76 JOIN nd_experiment_stock ON(stock.stock_id=nd_experiment_stock.stock_id)
77 JOIN nd_experiment USING(nd_experiment_id)
78 JOIN nd_experiment_protocol USING(nd_experiment_id)
79 JOIN nd_experiment_project USING(nd_experiment_id)
80 JOIN nd_experiment_genotype USING(nd_experiment_id)
81 JOIN nd_protocol USING(nd_protocol_id)
82 LEFT JOIN nd_protocolprop ON(nd_protocolprop.nd_protocol_id = nd_protocol.nd_protocol_id AND nd_protocolprop.type_id = $vcf_map_details_cvterm_id)
83 JOIN genotype USING(genotype_id)
84 LEFT JOIN genotypeprop AS igd_number_genotypeprop ON(igd_number_genotypeprop.genotype_id = genotype.genotype_id AND igd_number_genotypeprop.type_id = $igd_genotypeprop_cvterm_id)
85 JOIN genotypeprop AS genotype_values ON(genotype_values.genotype_id = genotype.genotype_id AND genotype_values.type_id = $vcf_snp_genotyping_cvterm_id)
86 JOIN project USING(project_id)
88 ORDER BY stock.stock_id, genotype_values.genotypeprop_id ASC
92 print STDERR Dumper
$q;
93 my $h = $schema->storage->dbh()->prepare($q);
97 my @genotypeprop_array;
98 my %genotypeprop_hash;
99 my %protocolprop_hash;
100 while (my ($stock_id, $genotypeprop_id, $igd_number_json, $protocol_id, $protocol_name, $stock_name, $stock_type_id, $stock_type_name, $genotype_id, $genotype_uniquename, $genotype_description, $project_id, $project_name, $project_description, $accession_id, $accession_uniquename, $full_count) = $h->fetchrow_array()) {
101 my $igd_number_hash = $igd_number_json ? decode_json
$igd_number_json : undef;
102 my $igd_number = $igd_number_hash ?
$igd_number_hash->{'igd number'} : undef;
103 $igd_number = !$igd_number && $igd_number_hash ?
$igd_number_hash->{'igd_number'} : undef;
105 my $germplasmName = '';
106 my $germplasmDbId = '';
107 if ($stock_type_name eq 'accession'){
108 $germplasmName = $stock_name;
109 $germplasmDbId = $stock_id;
111 if ($stock_type_name eq 'tissue_sample'){
112 $germplasmName = $accession_uniquename;
113 $germplasmDbId = $accession_id;
116 my $stock_object = CXGN
::Stock
::Accession
->new({schema
=>$self->bcs_schema, stock_id
=>$germplasmDbId});
118 push @genotypeprop_array, $genotypeprop_id;
119 $genotypeprop_hash{$genotypeprop_id} = {
120 markerProfileDbId
=> $genotypeprop_id,
121 germplasmDbId
=> $germplasmDbId,
122 germplasmName
=> $germplasmName,
123 synonyms
=> $stock_object->synonyms,
124 stock_id
=> $stock_id,
125 stock_name
=> $stock_name,
126 stock_type_id
=> $stock_type_id,
127 stock_type_name
=> $stock_type_name,
128 genotypeDbId
=> $genotype_id,
129 genotypeUniquename
=> $genotype_uniquename,
130 genotypeDescription
=> $genotype_description,
131 analysisMethodDbId
=> $protocol_id,
132 analysisMethod
=> $protocol_name,
133 genotypingDataProjectDbId
=> $project_id,
134 genotypingDataProjectName
=> $project_name,
135 genotypingDataProjectDescription
=> $project_description,
136 igd_number
=> $igd_number,
138 $protocolprop_hash{$protocol_id}++;
139 $total_count = $full_count;
141 print STDERR
"CXGN::Genotype::Search has genotypeprop_ids $total_count\n";
143 my @found_genotypeprop_ids = keys %genotypeprop_hash;
144 my @genotypeprop_hash_select_arr;
145 foreach (@
$genotypeprop_hash_select){
146 push @genotypeprop_hash_select_arr, "s.value->>'$_'";
148 if (scalar(@found_genotypeprop_ids)>0) {
149 my $genotypeprop_id_sql = join ("," , @found_genotypeprop_ids);
150 my $genotypeprop_hash_select_sql = scalar(@genotypeprop_hash_select_arr) > 0 ?
', '.join ',', @genotypeprop_hash_select_arr : '';
151 my $genotypeprop_q = "SELECT s.key $genotypeprop_hash_select_sql from genotypeprop, jsonb_each(genotypeprop.value) as s WHERE genotypeprop_id = ? and type_id = $vcf_snp_genotyping_cvterm_id;";
152 my $genotypeprop_h = $schema->storage->dbh()->prepare($genotypeprop_q);
153 foreach my $genotypeprop_id (@found_genotypeprop_ids){
154 $genotypeprop_h->execute($genotypeprop_id);
155 while (my ($marker_name, @genotypeprop_info_return) = $genotypeprop_h->fetchrow_array()) {
156 for my $s (0 .. scalar(@genotypeprop_hash_select_arr)-1){
157 $genotypeprop_hash{$genotypeprop_id}->{selected_genotype_hash
}->{$marker_name}->{$genotypeprop_hash_select->[$s]} = $genotypeprop_info_return[$s];
162 print STDERR
"CXGN::Genotype::Search has genotypeprops\n";
164 my @found_protocolprop_ids = keys %protocolprop_hash;
165 my @protocolprop_marker_hash_select_arr;
166 foreach (@
$protocolprop_marker_hash_select){
167 push @protocolprop_marker_hash_select_arr, "s.value->>'$_'";
169 my @protocolprop_top_key_select_arr;
170 foreach (@
$protocolprop_top_key_select){
171 push @protocolprop_top_key_select_arr, "value->>'$_'";
173 my %selected_protocol_marker_info;
174 my %selected_protocol_top_key_info;
175 if (scalar(@found_protocolprop_ids)>0){
176 my $protocolprop_id_sql = join ("," , @found_protocolprop_ids);
177 my $protocolprop_where_sql = "nd_protocol_id in ($protocolprop_id_sql) and type_id = $vcf_map_details_cvterm_id";
178 my $protocolprop_hash_select_sql = scalar(@protocolprop_marker_hash_select_arr) > 0 ?
', '.join ',', @protocolprop_marker_hash_select_arr : '';
179 my $protocolprop_q = "SELECT nd_protocol_id, s.key $protocolprop_hash_select_sql from nd_protocolprop, jsonb_each(nd_protocolprop.value->'markers') as s WHERE $protocolprop_where_sql;";
180 my $protocolprop_h = $schema->storage->dbh()->prepare($protocolprop_q);
181 $protocolprop_h->execute();
182 while (my ($protocol_id, $marker_name, @protocolprop_info_return) = $protocolprop_h->fetchrow_array()) {
183 for my $s (0 .. scalar(@protocolprop_marker_hash_select_arr)-1){
184 $selected_protocol_marker_info{$protocol_id}->{$marker_name}->{$protocolprop_marker_hash_select->[$s]} = $protocolprop_info_return[$s];
187 my $protocolprop_top_key_select_sql = scalar(@protocolprop_top_key_select_arr) > 0 ?
', '.join ',', @protocolprop_top_key_select_arr : '';
188 my $protocolprop_top_key_q = "SELECT nd_protocol_id $protocolprop_top_key_select_sql from nd_protocolprop WHERE $protocolprop_where_sql;";
190 my $protocolprop_top_key_h = $schema->storage->dbh()->prepare($protocolprop_top_key_q);
191 $protocolprop_top_key_h->execute();
192 while (my ($protocol_id, @protocolprop_top_key_return) = $protocolprop_top_key_h->fetchrow_array()) {
193 for my $s (0 .. scalar(@protocolprop_top_key_select_arr)-1){
194 my $protocolprop_i = $protocolprop_top_key_select->[$s];
196 if ($protocolprop_i eq 'header_information_lines' || $protocolprop_i eq 'markers_array' || $protocolprop_i eq 'markers' || $protocolprop_i eq 'marker_names') {
197 $val = decode_json
$protocolprop_top_key_return[$s];
199 $val = $protocolprop_top_key_return[$s];
201 $selected_protocol_top_key_info{$protocol_id}->{$protocolprop_i} = $val;
205 print STDERR
"CXGN::Genotype::Search has protocolprops\n";
207 foreach (@genotypeprop_array) {
208 my $info = $genotypeprop_hash{$_};
209 my $selected_marker_info = $selected_protocol_marker_info{$info->{analysisMethodDbId
}} ?
$selected_protocol_marker_info{$info->{analysisMethodDbId
}} : {};
210 my $selected_protocol_info = $selected_protocol_top_key_info{$info->{analysisMethodDbId
}} ?
$selected_protocol_top_key_info{$info->{analysisMethodDbId
}} : {};
211 my @all_protocol_marker_names = keys %$selected_marker_info;
212 $selected_protocol_info->{markers
} = $selected_marker_info;
213 $info->{resultCount
} = scalar(keys %{$info->{selected_genotype_hash
}});
214 $info->{all_protocol_marker_names
} = \
@all_protocol_marker_names;
215 $info->{selected_protocol_hash
} = $selected_protocol_info;
219 #print STDERR Dumper \@data;
220 return ($total_count, \
@data);