Merge pull request #2902 from solgenomics/topic/fix_empty_fixture
[sgn.git] / db / 00106 / ChangeGenotypepropGTKeyToNucleotides.pm
blobe2b15a4b5b523db1a7bb7175205b88ffb87b5e61
1 #!/usr/bin/env perl
4 =head1 NAME
6 ChangeGenotypepropGTKeyToNucleotides.pm
8 =head1 SYNOPSIS
10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
15 =head1 DESCRIPTION
16 This patch converts the genotypeprop GT key from the VCF representation of '0/1' to the nucleotide representation of 'C/T' using the nd_protocolprop 'ref' and 'alt' information in the database.
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
19 =head1 AUTHOR
21 Nicolas Morales
23 =head1 COPYRIGHT & LICENSE
25 Copyright 2010 Boyce Thompson Institute for Plant Research
27 This program is free software; you can redistribute it and/or modify
28 it under the same terms as Perl itself.
30 =cut
33 package ChangeGenotypepropGTKeyToNucleotides;
35 use Moose;
36 use Bio::Chado::Schema;
37 use SGN::Model::Cvterm;
38 use Try::Tiny;
39 use JSON;
40 use Scalar::Util qw(looks_like_number);
41 extends 'CXGN::Metadata::Dbpatch';
44 has '+description' => ( default => <<'' );
45 Allows addition of a link to the raw data file for genotyping plates
47 has '+prereq' => (
48 default => sub {
49 [],
54 sub patch {
55 my $self=shift;
57 print STDOUT "Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
59 print STDOUT "\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
61 print STDOUT "\nExecuting the SQL commands.\n";
62 my $schema = Bio::Chado::Schema->connect( sub { $self->dbh->clone } );
65 print STDERR "INSERTING CV TERMS...\n";
67 my $protocol_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, "vcf_map_details", "protocol_property")->cvterm_id();
68 my $genotype_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, "vcf_snp_genotyping", "genotype_property")->cvterm_id();
70 my $q = "SELECT nd_protocolprop.nd_protocol_id, nd_protocolprop.value, genotypeprop.genotypeprop_id FROM nd_protocolprop JOIN nd_protocol using(nd_protocol_id) JOIN nd_experiment_protocol USING(nd_protocol_id) JOIN nd_experiment USING(nd_experiment_id) JOIN nd_experiment_genotype USING(nd_experiment_id) JOIN genotype USING(genotype_id) JOIN genotypeprop USING (genotype_id) WHERE nd_protocolprop.type_id = $protocol_type_id AND genotypeprop.type_id=$genotype_type_id;";
72 my $h = $schema->storage->dbh()->prepare($q);
73 $h->execute();
74 while (my ($protocol_id, $protocolprop_value, $genotypeprop_id) = $h->fetchrow_array()) {
76 my $protocolprop_hash = decode_json $protocolprop_value;
77 my $markers_hash = $protocolprop_hash->{'markers'};
79 my $q2 = "SELECT genotypeprop_id, value FROM genotypeprop WHERE genotypeprop_id = $genotypeprop_id;";
80 my $h2 = $schema->storage->dbh()->prepare($q2);
81 $h2->execute();
82 while (my ($genotypeprop_id, $genotypeprop_value) = $h2->fetchrow_array()) {
83 print STDERR "Updating genotypeprop_id $genotypeprop_id\n";
84 my $genotypeprop_hash = decode_json $genotypeprop_value;
85 my %new_genotypeprop_hash;
86 while (my ($marker_name, $geno) = each %$genotypeprop_hash) {
87 my $gt = $geno->{'GT'};
88 if ($gt) {
89 my $marker_info = $markers_hash->{$marker_name};
90 my $ref = $marker_info->{'ref'};
91 my $alt = $marker_info->{'alt'};
92 my @separated_alts = split ',', $alt;
94 my @nucleotide_genotype;
95 my $separator = '/';
96 my @alleles = split (/\//, $gt);
97 if (scalar(@alleles) < 1){
98 @alleles = split (/\|/, $gt);
99 if (scalar(@alleles) > 0) {
100 $separator = '|';
103 foreach (@alleles) {
104 if (looks_like_number($_)) {
105 my $index = $_ + 0;
106 if ($index == 0) {
107 push @nucleotide_genotype, $ref; #Using Reference Allele
108 } else {
109 push @nucleotide_genotype, $separated_alts[$index-1]; #Using Alternate Allele
111 } else {
112 push @nucleotide_genotype, $_;
115 $geno->{'GT'} = join $separator, @nucleotide_genotype;
117 $new_genotypeprop_hash{$marker_name} = $geno;
119 my $new_genotypeprop_string = encode_json \%new_genotypeprop_hash;
120 my $q3 = "UPDATE genotypeprop SET value = '$new_genotypeprop_string' WHERE genotypeprop_id = $genotypeprop_id;";
121 my $h3 = $schema->storage->dbh()->prepare($q3);
122 $h3->execute();
126 print "You're done!\n";
130 ####
131 1; #
132 ####