Merge pull request #5280 from solgenomics/5714_phenotype_genotype_data_check
[sgn.git] / db / 00120 / FixGenotypeStorageDosage.pm
blob66a91a42026e646a3540b51fa0b339aed8085847
1 #!/usr/bin/env perl
4 =head1 NAME
6 FixGenotypeStorageDosage
8 =head1 SYNOPSIS
10 mx-run FixGenotypeStorageDosage [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
16 This patch fixes the genotype storage error of loading DS=0 when the GT is infact unknown (e.g. GT='./.'). DS is now stored as 'NA' in these cases.
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
19 =head1 AUTHOR
22 =head1 COPYRIGHT & LICENSE
24 Copyright 2010 Boyce Thompson Institute for Plant Research
26 This program is free software; you can redistribute it and/or modify
27 it under the same terms as Perl itself.
29 =cut
32 package FixGenotypeStorageDosage;
34 use Moose;
35 use Bio::Chado::Schema;
36 use CXGN::People::Schema;
37 use Try::Tiny;
38 use CXGN::Genotype::Search;
39 use JSON;
40 extends 'CXGN::Metadata::Dbpatch';
43 has '+description' => ( default => <<'' );
44 This patch fixes the genotype storage error of loading DS=0 when the GT is infact unknown (e.g. GT='./.'). DS is now stored as 'NA' in these cases.
46 has '+prereq' => (
47 default => sub {
48 [],
53 sub patch {
54 my $self=shift;
56 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
58 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
60 print STDOUT "\nExecuting the SQL commands.\n";
61 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
62 my $people_schema = CXGN::People::Schema->connect( sub { $self->dbh->clone } );
64 my $update_q = "UPDATE genotypeprop SET value = ? WHERE genotypeprop_id = ?;";
65 my $h_update = $schema->storage->dbh()->prepare($update_q);
67 # Restricting search to "NEW" genotyping protocols which have nd_protocolprop information. Old genotyping protocols only had the DS value and shuold be assumed to be stored correctly.
68 my $geno_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'genotyping_experiment', 'experiment_type')->cvterm_id();
69 my $vcf_map_details_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'vcf_map_details', 'protocol_property')->cvterm_id();
70 my $q = "SELECT nd_protocol_id FROM nd_protocol JOIN nd_protocolprop USING(nd_protocol_id) WHERE nd_protocol.type_id=$geno_cvterm_id AND nd_protocolprop.type_id=$vcf_map_details_id ORDER BY nd_protocol_id ASC;";
71 my $h = $schema->storage->dbh()->prepare($q);
72 $h->execute();
73 while (my ($protocol_id) = $h->fetchrow_array()) {
74 my $genotypes_search = CXGN::Genotype::Search->new({
75 bcs_schema=>$schema,
76 people_schema=>$people_schema,
77 protocol_id_list=>[$protocol_id],
78 protocolprop_top_key_select=>[],
79 protocolprop_marker_hash_select=>[]
80 });
81 $genotypes_search->init_genotype_iterator();
82 while (my ($count, $genotype_data) = $genotypes_search->get_next_genotype_info) {
83 #my $m_hash = $genotype_data->{selected_protocol_hash}->{markers};
84 my $g_hash = $genotype_data->{selected_genotype_hash};
86 while (my ($k, $v) = each %$g_hash) {
87 if (!$v->{GT} || $v->{GT} eq './.' || $v->{GT} eq '.|.' || $v->{GT} eq '././.' || $v->{GT} eq '.|.|.' || $v->{GT} eq './././.' || $v->{GT} eq '.|.|.|.') {
88 $v->{DS} = 'NA';
89 $v->{NT} = '';
93 my $genotypeprop_id = $genotype_data->{markerProfileDbId};
94 print STDERR "CHECKING genotypeprop_id $genotypeprop_id\n";
95 $h_update->execute(encode_json($g_hash), $genotypeprop_id);
99 print "You're done!\n";
103 ####
104 1; #
105 ####