Pass along stock type info to trials.mas to hide analysis usage table in non-accessio...
[sgn.git] / db / 00107 / ChangeGenotypepropGTKeyToDS.pm
blob836384f4ef2d78b823a9279f2c8f26274ec6f346
1 #!/usr/bin/env perl
4 =head1 NAME
6 ChangeGenotypepropGTKeyToDS.pm
8 =head1 SYNOPSIS
10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
16 This patch converts the genotypeprop GT key from the nucleotide representation of 'C/T' to the VCF genotype representation of '0/1' and adds a key NT that contains the nucleotide representation of 'C/T' and adds a DS key if the DS key is not there by summing the GT, and rounds the DS key if it exists.
18 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
20 =head1 AUTHOR
22 Nicolas Morales
24 =head1 COPYRIGHT & LICENSE
26 Copyright 2010 Boyce Thompson Institute for Plant Research
28 This program is free software; you can redistribute it and/or modify
29 it under the same terms as Perl itself.
31 =cut
34 package ChangeGenotypepropGTKeyToDS;
36 use Moose;
37 use Bio::Chado::Schema;
38 use SGN::Model::Cvterm;
39 use Try::Tiny;
40 use JSON;
41 use Scalar::Util qw(looks_like_number);
42 use Math::Round qw(round);
44 extends 'CXGN::Metadata::Dbpatch';
47 has '+description' => ( default => <<'' );
48 Allows addition of a link to the raw data file for genotyping plates
50 has '+prereq' => (
51 default => sub {
52 [],
57 sub patch {
58 my $self=shift;
60 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
62 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
64 print STDOUT "\nExecuting the SQL commands.\n";
65 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
68 print STDERR "INSERTING CV TERMS...\n";
70 my $protocol_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, "vcf_map_details", "protocol_property")->cvterm_id();
71 my $genotype_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, "vcf_snp_genotyping", "genotype_property")->cvterm_id();
73 my $q = "SELECT nd_protocolprop.nd_protocol_id, nd_protocolprop.value, genotypeprop.genotypeprop_id FROM nd_protocolprop JOIN nd_protocol using(nd_protocol_id) JOIN nd_experiment_protocol USING(nd_protocol_id) JOIN nd_experiment USING(nd_experiment_id) JOIN nd_experiment_genotype USING(nd_experiment_id) JOIN genotype USING(genotype_id) JOIN genotypeprop USING (genotype_id) WHERE nd_protocolprop.type_id = $protocol_type_id AND genotypeprop.type_id=$genotype_type_id;";
75 my $h = $schema->storage->dbh()->prepare($q);
76 $h->execute();
77 while (my ($protocol_id, $protocolprop_value, $genotypeprop_id) = $h->fetchrow_array()) {
79 my $protocolprop_hash = decode_json $protocolprop_value;
80 my $markers_hash = $protocolprop_hash->{'markers'};
82 my $q2 = "SELECT genotypeprop_id, value FROM genotypeprop WHERE genotypeprop_id = $genotypeprop_id;";
83 my $h2 = $schema->storage->dbh()->prepare($q2);
84 $h2->execute();
85 while (my ($genotypeprop_id, $genotypeprop_value) = $h2->fetchrow_array()) {
86 print STDERR "Updating genotypeprop_id $genotypeprop_id\n";
87 my $genotypeprop_hash = decode_json $genotypeprop_value;
88 my %new_genotypeprop_hash;
89 while (my ($marker_name, $geno) = each %$genotypeprop_hash) {
90 my $gt = $geno->{'GT'};
91 my $gt_dosage;
92 my $gt_not_empty;
94 if ($gt) {
95 my $marker_info = $markers_hash->{$marker_name};
96 my $ref = $marker_info->{'ref'};
97 my $alt = $marker_info->{'alt'};
98 my @separated_alts = split ',', $alt;
99 my @complete_alleles= ($ref, @separated_alts);
100 my %allele_lookup;
101 my $index = 0;
102 foreach (@complete_alleles) {
103 $allele_lookup{$_} = $index;
104 $index++;
106 $allele_lookup{'.'} = '.';
108 my @nucleotide_genotype;
109 my @gt_genotype;
111 my $separator = '/';
112 my @alleles = split (/\//, $gt);
113 if (scalar(@alleles) <= 1){
114 @alleles = split (/\|/, $gt);
115 if (scalar(@alleles) > 1) {
116 $separator = '|';
119 foreach (@alleles) { ### ['A', 'A'] OR [1, 1] OR ['.', '.']
120 if (looks_like_number($_)) {
121 push @gt_genotype, $_;
122 $gt_dosage = $gt_dosage + $_;
123 } else {
124 push @nucleotide_genotype, $_;
125 push @gt_genotype, $allele_lookup{$_}; #convert 'A' => 0
128 if ($_ ne '.') {
129 $gt_not_empty = 1;
132 $geno->{'GT'} = join $separator, @gt_genotype;
133 $geno->{'NT'} = join $separator, @nucleotide_genotype;
135 if ($gt_not_empty && !looks_like_number($geno->{'DS'})) {
136 $geno->{'DS'} = $gt_dosage;
138 if (looks_like_number($geno->{'DS'})) {
139 $geno->{'DS'} = round($geno->{'DS'});
142 $new_genotypeprop_hash{$marker_name} = $geno;
144 my $new_genotypeprop_string = encode_json \%new_genotypeprop_hash;
145 my $q3 = "UPDATE genotypeprop SET value = '$new_genotypeprop_string' WHERE genotypeprop_id = $genotypeprop_id;";
146 my $h3 = $schema->storage->dbh()->prepare($q3);
147 $h3->execute();
151 print "You're done!\n";
155 ####
156 1; #
157 ####