update current serial number
[sgn.git] / db / 00124 / FixGenotypeStorageDosageToMajorAlleleDosage.pm
blob17b6d04f607e03c62aade69a743bc9911823515d
1 #!/usr/bin/env perl
4 =head1 NAME
6 FixGenotypeStorageDosageToMajorAlleleDosage
8 =head1 SYNOPSIS
10 mx-run FixGenotypeStorageDosageToMajorAlleleDosage [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
16 This patch fixes the genotype storage error of loading dosage as minor allele dosage, when it should be major allele dosage
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
19 =head1 AUTHOR
22 =head1 COPYRIGHT & LICENSE
24 Copyright 2010 Boyce Thompson Institute for Plant Research
26 This program is free software; you can redistribute it and/or modify
27 it under the same terms as Perl itself.
29 =cut
32 package FixGenotypeStorageDosageToMajorAlleleDosage;
34 use Moose;
35 use Bio::Chado::Schema;
36 use CXGN::People::Schema;
37 use Try::Tiny;
38 use CXGN::Genotype::Search;
39 use JSON;
40 use Scalar::Util qw(looks_like_number);
41 extends 'CXGN::Metadata::Dbpatch';
44 has '+description' => ( default => <<'' );
45 This patch fixes the genotype storage error of loading dosage as minor allele dosage, when it should be major allele dosage
47 has '+prereq' => (
48 default => sub {
49 [],
54 sub patch {
55 my $self=shift;
57 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
59 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
61 print STDOUT "\nExecuting the SQL commands.\n";
62 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
63 my $people_schema = CXGN::People::Schema->connect( sub { $self->dbh->clone } );
65 my $update_q = "UPDATE genotypeprop SET value = ? WHERE genotypeprop_id = ?;";
66 my $h_update = $schema->storage->dbh()->prepare($update_q);
68 # Restricting search to "NEW" genotyping protocols which have nd_protocolprop information. Old genotyping protocols only had the DS value and shuold be assumed to be stored correctly.
69 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
70 my $vcf_map_details_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
71 my $q = "SELECT nd_protocol_id FROM nd_protocol JOIN nd_protocolprop USING(nd_protocol_id) WHERE nd_protocol.type_id=$geno_cvterm_id AND nd_protocolprop.type_id=$vcf_map_details_id ORDER BY nd_protocol_id ASC;";
72 my $h = $schema->storage->dbh()->prepare($q);
73 $h->execute();
74 while (my ($protocol_id) = $h->fetchrow_array()) {
75 my $genotypes_search = CXGN::Genotype::Search->new({
76 bcs_schema=>$schema,
77 people_schema=>$people_schema,
78 protocol_id_list=>[$protocol_id],
79 protocolprop_top_key_select=>[],
80 protocolprop_marker_hash_select=>[]
81 });
82 $genotypes_search->init_genotype_iterator();
83 while (my ($count, $genotype_data) = $genotypes_search->get_next_genotype_info) {
84 #my $m_hash = $genotype_data->{selected_protocol_hash}->{markers};
85 my $g_hash = $genotype_data->{selected_genotype_hash};
87 while (my ($k, $v) = each %$g_hash) {
88 my $gt_dosage_val = 'NA';
89 my $gt_dosage = 0;
90 if (exists($v->{GT})) {
91 my $gt = $v->{GT};
92 my $separator = '/';
93 my @alleles = split (/\//, $gt);
94 if (scalar(@alleles) <= 1){
95 @alleles = split (/\|/, $gt);
96 if (scalar(@alleles) > 1) {
97 $separator = '|';
101 foreach (@alleles) {
102 if (looks_like_number($_)) {
103 if ($_ eq '0' || $_ == 0) {
104 $gt_dosage++;
106 $gt_dosage_val = $gt_dosage;
110 if (exists($v->{GT}) ) {
111 $v->{DS} = $gt_dosage_val;
115 my $genotypeprop_id = $genotype_data->{markerProfileDbId};
116 print STDERR "CHECKING genotypeprop_id $genotypeprop_id\n";
117 $h_update->execute(encode_json($g_hash), $genotypeprop_id);
121 print "You're done!\n";
125 ####
126 1; #
127 ####