update current serial number
[sgn.git] / db / 00158 / GenotypeDosageChange.pm
blob841c66b2852aa386d8b66541ee24a2dba894f791
1 #!/usr/bin/env perl
4 =head1 NAME
6 GenotypeDosageChange
8 =head1 SYNOPSIS
10 mx-run GenotypeDosageChange [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
16 This patch changes calculated dosage from REF to ALT
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
19 =head1 AUTHOR
21 Nick Morales<nm529@cornell.edu>
23 =head1 COPYRIGHT & LICENSE
25 Copyright 2010 Boyce Thompson Institute for Plant Research
27 This program is free software; you can redistribute it and/or modify
28 it under the same terms as Perl itself.
30 =cut
33 package GenotypeDosageChange;
35 use Moose;
36 use Bio::Chado::Schema;
37 use Try::Tiny;
38 use SGN::Model::Cvterm;
39 use JSON;
40 use Data::Dumper;
41 use Scalar::Util qw(looks_like_number);
43 extends 'CXGN::Metadata::Dbpatch';
46 has '+description' => ( default => <<'' );
47 This patch changes calculated dosage from REF to ALT
49 has '+prereq' => (
50 default => sub {
51 [],
56 sub patch {
57 my $self=shift;
59 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
61 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
63 print STDOUT "\nExecuting the SQL commands.\n";
64 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
67 my $vcf_map_details_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
68 my $vcf_map_details_markers_array_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details_markers_array', 'protocol_property')->cvterm_id();
69 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
70 my $snp_vcf_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_snp_genotyping', 'genotype_property')->cvterm_id();
72 my $q = "SELECT nd_protocol.nd_protocol_id, markers_array.value
73 FROM nd_protocol
74 JOIN nd_protocolprop AS markers_array ON(markers_array.nd_protocol_id=nd_protocol.nd_protocol_id AND markers_array.type_id=$vcf_map_details_markers_array_cvterm_id)
75 ;";
76 # print STDERR Dumper $q;
77 my $h = $schema->storage->dbh()->prepare($q);
78 $h->execute();
80 my $q1_1 = "SELECT nd_experiment_stock.stock_id, genotypeprop.genotypeprop_id, genotype.genotype_id
81 FROM nd_protocol
82 JOIN nd_experiment_protocol ON(nd_protocol.nd_protocol_id=nd_experiment_protocol.nd_protocol_id)
83 JOIN nd_experiment ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
84 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
85 JOIN nd_experiment_genotype ON(nd_experiment.nd_experiment_id=nd_experiment_genotype.nd_experiment_id)
86 JOIN genotype ON(nd_experiment_genotype.genotype_id=genotype.genotype_id)
87 JOIN genotypeprop ON(genotype.genotype_id=genotypeprop.genotype_id)
88 WHERE nd_protocol.nd_protocol_id=?
89 ;";
90 my $h1_1 = $schema->storage->dbh()->prepare($q1_1);
92 my %protocol_geno_check;
93 while (my ($nd_protocol_id, $markers_array_json) = $h->fetchrow_array()) {
94 my $markers_array = $markers_array_json ? decode_json $markers_array_json : [];
95 if (scalar(@$markers_array)>0) {
96 foreach (@$markers_array) {
97 $protocol_geno_check{$nd_protocol_id}->{$_->{name}}++;
101 # print STDERR Dumper \%protocol_geno_check;
103 my %protocols_hash;
104 my %protocols_all_stock_ids;
105 while (my($nd_protocol_id, $marker_check) = each %protocol_geno_check) {
106 $h1_1->execute($nd_protocol_id);
108 while (my ($stock_id, $genotypeprop_id, $genotype_id) = $h1_1->fetchrow_array()) {
109 if ($stock_id) {
110 $protocols_hash{$nd_protocol_id} = {
111 marker_check => $marker_check,
112 stock_id => $stock_id
114 $protocols_all_stock_ids{$nd_protocol_id}->{$stock_id}++;
118 # print STDERR Dumper \%protocols_hash;
119 # print STDERR Dumper \%protocols_all_stock_ids;
121 my $q2 = "SELECT genotypeprop.genotypeprop_id, genotype.genotype_id, genotypeprop.value
122 FROM genotypeprop
123 JOIN genotype ON(genotypeprop.genotype_id=genotype.genotype_id)
124 JOIN nd_experiment_genotype ON(genotype.genotype_id=nd_experiment_genotype.genotype_id)
125 JOIN nd_experiment ON(nd_experiment.nd_experiment_id=nd_experiment_genotype.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
126 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
127 JOIN nd_experiment_protocol ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id)
128 WHERE stock_id=? AND genotypeprop.type_id=$snp_vcf_cvterm_id AND nd_protocol_id=?;";
129 my $h2 = $schema->storage->dbh()->prepare($q2);
131 # Distinct GT in localhost, cassavabase, cassava-test, musabase, musabase-test, sweetpotatobase
132 # ""
133 # "0|1"
134 # "0|0"
135 # "1/1"
136 # "NA"
137 # "1|0"
138 # "0/0"
139 # "1/0"
140 # "1|1"
141 # "0/1"
142 # "4/1"
143 # "0/5"
144 # "2/2"
145 # "0/4"
146 # "1/5"
147 # "5/5"
148 # "0/2"
149 # "4/4"
150 # "3/2"
151 # "2/5"
152 # "4/3"
153 # "2/4"
154 # "1/3"
155 # "4/5"
156 # "3/3"
157 # "1/0"
158 # "2/0"
159 # "3/1"
160 # "./."
161 # "0/3"
162 # "1/2"
163 # "3/0"
164 # "4/2"
165 # "3/4"
166 # "2/3"
167 # "4/0"
168 # "3/5"
169 # "1/4"
170 # "2/1"
171 # "1/1/1/1"
172 # "0/0/0"
173 # "0/1/1"
174 # "0/1/1/1"
175 # "-/-/-"
176 # "0/0/0/0"
177 # "0/0/1/1"
178 # "0/0/1"
179 # "1/1/1"
180 # "-/-"
181 # "-/-/-/-"
182 # "0/0/0/1"
183 # "1/2/2"
184 # "3/3/3"
185 # "0/0/2"
186 # "2/2/2"
187 # "././."
188 # "1/1/2"
189 # "0/1/2"
190 # "0/0/0/1"
191 # "0/2/2"
192 # "././.\n"
194 # Distinct DS in localhost, cassavabase, cassava-test, musabase
195 # ""
196 # "2"
197 # "4"
198 # "1"
199 # "NA"
200 # "3"
201 # "0"
202 # "0.019"
203 # "0.026"
204 # "0.038"
205 # "1.865"
206 # "0.825"
207 # "0.889"
208 # "1.926"
209 # "0.999"
210 # "0.928"
211 # "0.017"
212 # "0.938"
213 # "0.525"
214 # "0.915"
215 # "0.002"
216 # "1.859"
217 # "1.856"
218 # "0.913"
220 # "0.943"
221 # "0.891"
222 # "0.003"
223 # "0.027"
224 # "1.779"
226 # "0.032"
227 # "0.048"
228 # "0.945"
229 # "0.484"
230 # "0.305"
231 # "0.466"
232 # "1.87"
233 # "0.858"
234 # "0.932"
235 # "1.876"
236 # "0.985"
237 # "1.721"
238 # "0.481"
239 # "0.848"
240 # "0.692"
241 # "0.001"
242 # "0.031"
243 # "1.104"
244 # "0.939"
245 # "0.042"
246 # "0.034"
247 # "0.897"
249 # "0.974"
250 # "0.033"
251 # "0.87"
252 # "0.475"
253 # "0.286"
254 # "0.005"
255 # "0.958"
256 # "1.932"
257 # "0.935"
258 # "0.029"
259 # "0.004"
260 # "0.901"
261 # "0.559"
262 # "0.892"
263 # "0.028"
264 # "0.679"
265 # "0.364"
266 # "0.03"
267 # null
268 # 'A/C' # Lots of allele combos... like "CTAATTATAAAACTAT/CTAATTATAAAACTAT"
270 my %protocol_to_change;
271 foreach my $nd_protocol_id (sort keys %protocols_hash) {
272 my $o = $protocols_hash{$nd_protocol_id};
273 my $check_marker_obj = $o->{marker_check};
275 CHECK: foreach my $stock_id (sort keys %{$protocols_all_stock_ids{$nd_protocol_id}}) {
276 print STDERR "$nd_protocol_id.$stock_id ";
278 $h2->execute($stock_id, $nd_protocol_id);
279 my %check_geno_all_chrom;
280 while (my ($genotypeprop_id, $genotype_id, $check_geno_all_json) = $h2->fetchrow_array()) {
281 my $check_geno_all = decode_json $check_geno_all_json;
282 # print STDERR Dumper $check_geno_all;
283 while (my($k,$v) = each %$check_geno_all) {
284 $check_geno_all_chrom{$k} = $v;
288 while (my($check_marker_name, $p) = each %$check_marker_obj) {
289 # print STDERR Dumper $check_marker_name;
290 my $check_geno = $check_geno_all_chrom{$check_marker_name};
291 # print STDERR Dumper $check_geno;
293 if ($check_geno) {
294 my $check_ds = $check_geno->{DS};
295 my $check_gt = $check_geno->{GT};
297 # Check genotype call with DS set and not equal to 1 (e.g. 0 or 2) and has GT defined and not equal to NA
298 if (defined($check_gt) && defined($check_ds) && looks_like_number($check_ds) && $check_ds ne '1' && $check_ds ne '' && $check_gt ne '' && $check_gt ne 'NA') {
299 chomp($check_gt);
300 chomp($check_ds);
302 my @gts = split '/', $check_gt;
303 if (scalar(@gts) <= 1) {
304 @gts = split '|', $check_gt;
306 # print STDERR Dumper \@gts;
308 if (scalar(@gts) > 0) {
309 my $old_ds = 0;
310 my $new_ds = 0;
311 my $has_calls = 0;
312 foreach my $gt (@gts) {
313 # Check that call is defined and is not 'NA', '.', or '-'
314 if (looks_like_number($gt)) {
315 if ($gt eq '0') {
316 $old_ds++;
318 else {
319 $new_ds++;
321 $has_calls = 1;
325 if ($has_calls) {
326 print STDERR Dumper [$old_ds, $new_ds, $check_ds];
327 if ($check_ds == $old_ds) {
328 print STDERR Dumper $check_geno;
329 $protocol_to_change{$nd_protocol_id}++;
332 last CHECK;
339 print STDERR "End Protocol Check $nd_protocol_id.....\n";
342 print STDERR "GENOTYPING PROTOCOLS TO CHANGE DS IN:\n";
343 print STDERR Dumper \%protocol_to_change;
345 my $nd_protocol_id;
346 my $coderef = sub {
347 my $start_time = time();
349 my $q3 = "SELECT genotypeprop.value, genotypeprop.genotypeprop_id, genotypeprop.rank
350 FROM genotypeprop
351 JOIN genotype ON(genotype.genotype_id=genotypeprop.genotype_id)
352 JOIN nd_experiment_genotype ON(genotype.genotype_id=nd_experiment_genotype.genotype_id)
353 JOIN nd_experiment ON(nd_experiment_genotype.nd_experiment_id=nd_experiment.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
354 JOIN nd_experiment_protocol ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id)
355 JOIN nd_protocol ON(nd_protocol.nd_protocol_id=nd_experiment_protocol.nd_protocol_id)
356 JOIN nd_protocolprop ON(nd_protocol.nd_protocol_id=nd_protocolprop.nd_protocol_id)
357 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
358 WHERE nd_protocol.nd_protocol_id=? AND nd_protocolprop.type_id=$vcf_map_details_id AND genotypeprop.type_id=$snp_vcf_cvterm_id AND nd_experiment_stock.stock_id=?
359 ORDER BY genotypeprop.genotypeprop_id ASC;";
360 my $h3 = $schema->storage->dbh()->prepare($q3);
362 my $genotypeprop_rs = $schema->resultset('Genetic::Genotypeprop');
364 my @stock_ids = sort keys %{$protocols_all_stock_ids{$nd_protocol_id}};
365 my $total_stocks = scalar(@stock_ids);
366 print STDERR "STOCKS IN $nd_protocol_id: ".$total_stocks."\n";
368 my $stock_count = 1;
369 foreach my $stock_id (@stock_ids) {
371 $h3->execute($nd_protocol_id, $stock_id);
372 while (my($prop_json, $genotypeprop_id, $rank) = $h3->fetchrow_array()) {
373 if ($prop_json) {
374 my $prop = decode_json $prop_json;
376 while (my($marker_name,$v) = each %$prop) {
377 if ($marker_name ne 'CHROM') {
379 my $gt = $v->{GT};
380 # Check that GT is defined and is not NA
381 if (defined($gt) && $gt ne '' && $gt ne 'NA') {
383 my @gts = split '/', $gt;
384 if (scalar(@gts) <= 1) {
385 @gts = split '|', $gt;
387 # print STDERR Dumper \@gts;
389 my $alt_ds = 0;
390 my $has_calls;
391 foreach my $gt (@gts) {
392 # Check that call is defined and is not 'NA', '.', or '-'
393 if (looks_like_number($gt)) {
394 if ($gt ne '0') {
395 $alt_ds++;
397 $has_calls = 1;
400 if ($has_calls) {
401 $v->{DS} = $alt_ds;
403 else {
404 $v->{DS} = "NA";
407 chomp($gt);
408 $v->{GT} = $gt;
410 else {
411 # GT is "NA"
412 $v->{DS} = "NA";
418 my $prop_save = encode_json $prop;
420 my $row = $genotypeprop_rs->find({genotypeprop_id=>$genotypeprop_id});
421 $row->value($prop_save);
422 $row->update();
425 my $end_time = time();
426 my $seconds_passed = $end_time - $start_time;
427 print STDERR "$nd_protocol_id:$stock_id ($stock_count"."/"."$total_stocks) $seconds_passed"."s. ";
428 $stock_count++;
431 my $end_time = time();
432 my $seconds_passed = $end_time - $start_time;
433 print STDERR "Protocol $nd_protocol_id completed $seconds_passed seconds\n";
436 foreach my $nd_protocol_id_ref (sort keys %protocol_to_change) {
437 $nd_protocol_id = $nd_protocol_id_ref;
438 try {
439 $schema->txn_do($coderef);
440 print "You're done protocol $nd_protocol_id!\n";
441 } catch {
442 my $transaction_error = $_;
443 print STDERR Dumper $transaction_error;
447 print "You're done!\n";
451 ####
452 1; #
453 ####