10 mx-run GenotypeDosageChange [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
16 This patch changes calculated dosage from REF to ALT
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
21 Nick Morales<nm529@cornell.edu>
23 =head1 COPYRIGHT & LICENSE
25 Copyright 2010 Boyce Thompson Institute for Plant Research
27 This program is free software; you can redistribute it and/or modify
28 it under the same terms as Perl itself.
33 package GenotypeDosageChange
;
36 use Bio
::Chado
::Schema
;
38 use SGN
::Model
::Cvterm
;
41 use Scalar
::Util
qw(looks_like_number);
43 extends
'CXGN::Metadata::Dbpatch';
46 has
'+description' => ( default => <<'' );
47 This patch changes calculated dosage from REF to ALT
59 print STDOUT
"Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
61 print STDOUT
"\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
63 print STDOUT
"\nExecuting the SQL commands.\n";
64 my $schema = Bio
::Chado
::Schema
->connect( sub { $self->dbh->clone } );
67 my $vcf_map_details_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
68 my $vcf_map_details_markers_array_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'vcf_map_details_markers_array', 'protocol_property')->cvterm_id();
69 my $geno_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
70 my $snp_vcf_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'vcf_snp_genotyping', 'genotype_property')->cvterm_id();
72 my $q = "SELECT nd_protocol.nd_protocol_id, markers_array.value
74 JOIN nd_protocolprop AS markers_array ON(markers_array.nd_protocol_id=nd_protocol.nd_protocol_id AND markers_array.type_id=$vcf_map_details_markers_array_cvterm_id)
76 # print STDERR Dumper $q;
77 my $h = $schema->storage->dbh()->prepare($q);
80 my $q1_1 = "SELECT nd_experiment_stock.stock_id, genotypeprop.genotypeprop_id, genotype.genotype_id
82 JOIN nd_experiment_protocol ON(nd_protocol.nd_protocol_id=nd_experiment_protocol.nd_protocol_id)
83 JOIN nd_experiment ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
84 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
85 JOIN nd_experiment_genotype ON(nd_experiment.nd_experiment_id=nd_experiment_genotype.nd_experiment_id)
86 JOIN genotype ON(nd_experiment_genotype.genotype_id=genotype.genotype_id)
87 JOIN genotypeprop ON(genotype.genotype_id=genotypeprop.genotype_id)
88 WHERE nd_protocol.nd_protocol_id=?
90 my $h1_1 = $schema->storage->dbh()->prepare($q1_1);
92 my %protocol_geno_check;
93 while (my ($nd_protocol_id, $markers_array_json) = $h->fetchrow_array()) {
94 my $markers_array = $markers_array_json ? decode_json
$markers_array_json : [];
95 if (scalar(@
$markers_array)>0) {
96 foreach (@
$markers_array) {
97 $protocol_geno_check{$nd_protocol_id}->{$_->{name
}}++;
101 # print STDERR Dumper \%protocol_geno_check;
104 my %protocols_all_stock_ids;
105 while (my($nd_protocol_id, $marker_check) = each %protocol_geno_check) {
106 $h1_1->execute($nd_protocol_id);
108 while (my ($stock_id, $genotypeprop_id, $genotype_id) = $h1_1->fetchrow_array()) {
110 $protocols_hash{$nd_protocol_id} = {
111 marker_check
=> $marker_check,
112 stock_id
=> $stock_id
114 $protocols_all_stock_ids{$nd_protocol_id}->{$stock_id}++;
118 # print STDERR Dumper \%protocols_hash;
119 # print STDERR Dumper \%protocols_all_stock_ids;
121 my $q2 = "SELECT genotypeprop.genotypeprop_id, genotype.genotype_id, genotypeprop.value
123 JOIN genotype ON(genotypeprop.genotype_id=genotype.genotype_id)
124 JOIN nd_experiment_genotype ON(genotype.genotype_id=nd_experiment_genotype.genotype_id)
125 JOIN nd_experiment ON(nd_experiment.nd_experiment_id=nd_experiment_genotype.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
126 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
127 JOIN nd_experiment_protocol ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id)
128 WHERE stock_id=? AND genotypeprop.type_id=$snp_vcf_cvterm_id AND nd_protocol_id=?;";
129 my $h2 = $schema->storage->dbh()->prepare($q2);
131 # Distinct GT in localhost, cassavabase, cassava-test, musabase, musabase-test, sweetpotatobase
194 # Distinct DS in localhost, cassavabase, cassava-test, musabase
268 # 'A/C' # Lots of allele combos... like "CTAATTATAAAACTAT/CTAATTATAAAACTAT"
270 my %protocol_to_change;
271 foreach my $nd_protocol_id (sort keys %protocols_hash) {
272 my $o = $protocols_hash{$nd_protocol_id};
273 my $check_marker_obj = $o->{marker_check
};
275 CHECK: foreach my $stock_id (sort keys %{$protocols_all_stock_ids{$nd_protocol_id}}) {
276 print STDERR
"$nd_protocol_id.$stock_id ";
278 $h2->execute($stock_id, $nd_protocol_id);
279 my %check_geno_all_chrom;
280 while (my ($genotypeprop_id, $genotype_id, $check_geno_all_json) = $h2->fetchrow_array()) {
281 my $check_geno_all = decode_json
$check_geno_all_json;
282 # print STDERR Dumper $check_geno_all;
283 while (my($k,$v) = each %$check_geno_all) {
284 $check_geno_all_chrom{$k} = $v;
288 while (my($check_marker_name, $p) = each %$check_marker_obj) {
289 # print STDERR Dumper $check_marker_name;
290 my $check_geno = $check_geno_all_chrom{$check_marker_name};
291 # print STDERR Dumper $check_geno;
294 my $check_ds = $check_geno->{DS
};
295 my $check_gt = $check_geno->{GT
};
297 # Check genotype call with DS set and not equal to 1 (e.g. 0 or 2) and has GT defined and not equal to NA
298 if (defined($check_gt) && defined($check_ds) && looks_like_number
($check_ds) && $check_ds ne '1' && $check_ds ne '' && $check_gt ne '' && $check_gt ne 'NA') {
302 my @gts = split '/', $check_gt;
303 if (scalar(@gts) <= 1) {
304 @gts = split '|', $check_gt;
306 # print STDERR Dumper \@gts;
308 if (scalar(@gts) > 0) {
312 foreach my $gt (@gts) {
313 # Check that call is defined and is not 'NA', '.', or '-'
314 if (looks_like_number
($gt)) {
326 print STDERR Dumper
[$old_ds, $new_ds, $check_ds];
327 if ($check_ds == $old_ds) {
328 print STDERR Dumper
$check_geno;
329 $protocol_to_change{$nd_protocol_id}++;
339 print STDERR
"End Protocol Check $nd_protocol_id.....\n";
342 print STDERR
"GENOTYPING PROTOCOLS TO CHANGE DS IN:\n";
343 print STDERR Dumper \
%protocol_to_change;
347 my $start_time = time();
349 my $q3 = "SELECT genotypeprop.value, genotypeprop.genotypeprop_id, genotypeprop.rank
351 JOIN genotype ON(genotype.genotype_id=genotypeprop.genotype_id)
352 JOIN nd_experiment_genotype ON(genotype.genotype_id=nd_experiment_genotype.genotype_id)
353 JOIN nd_experiment ON(nd_experiment_genotype.nd_experiment_id=nd_experiment.nd_experiment_id AND nd_experiment.type_id=$geno_cvterm_id)
354 JOIN nd_experiment_protocol ON(nd_experiment.nd_experiment_id=nd_experiment_protocol.nd_experiment_id)
355 JOIN nd_protocol ON(nd_protocol.nd_protocol_id=nd_experiment_protocol.nd_protocol_id)
356 JOIN nd_protocolprop ON(nd_protocol.nd_protocol_id=nd_protocolprop.nd_protocol_id)
357 JOIN nd_experiment_stock ON(nd_experiment.nd_experiment_id=nd_experiment_stock.nd_experiment_id)
358 WHERE nd_protocol.nd_protocol_id=? AND nd_protocolprop.type_id=$vcf_map_details_id AND genotypeprop.type_id=$snp_vcf_cvterm_id AND nd_experiment_stock.stock_id=?
359 ORDER BY genotypeprop.genotypeprop_id ASC;";
360 my $h3 = $schema->storage->dbh()->prepare($q3);
362 my $genotypeprop_rs = $schema->resultset('Genetic::Genotypeprop');
364 my @stock_ids = sort keys %{$protocols_all_stock_ids{$nd_protocol_id}};
365 my $total_stocks = scalar(@stock_ids);
366 print STDERR
"STOCKS IN $nd_protocol_id: ".$total_stocks."\n";
369 foreach my $stock_id (@stock_ids) {
371 $h3->execute($nd_protocol_id, $stock_id);
372 while (my($prop_json, $genotypeprop_id, $rank) = $h3->fetchrow_array()) {
374 my $prop = decode_json
$prop_json;
376 while (my($marker_name,$v) = each %$prop) {
377 if ($marker_name ne 'CHROM') {
380 # Check that GT is defined and is not NA
381 if (defined($gt) && $gt ne '' && $gt ne 'NA') {
383 my @gts = split '/', $gt;
384 if (scalar(@gts) <= 1) {
385 @gts = split '|', $gt;
387 # print STDERR Dumper \@gts;
391 foreach my $gt (@gts) {
392 # Check that call is defined and is not 'NA', '.', or '-'
393 if (looks_like_number
($gt)) {
418 my $prop_save = encode_json
$prop;
420 my $row = $genotypeprop_rs->find({genotypeprop_id
=>$genotypeprop_id});
421 $row->value($prop_save);
425 my $end_time = time();
426 my $seconds_passed = $end_time - $start_time;
427 print STDERR
"$nd_protocol_id:$stock_id ($stock_count"."/"."$total_stocks) $seconds_passed"."s. ";
431 my $end_time = time();
432 my $seconds_passed = $end_time - $start_time;
433 print STDERR
"Protocol $nd_protocol_id completed $seconds_passed seconds\n";
436 foreach my $nd_protocol_id_ref (sort keys %protocol_to_change) {
437 $nd_protocol_id = $nd_protocol_id_ref;
439 $schema->txn_do($coderef);
440 print "You're done protocol $nd_protocol_id!\n";
442 my $transaction_error = $_;
443 print STDERR Dumper
$transaction_error;
447 print "You're done!\n";