6 ChangeGenotypepropGTKeyToNucleotides.pm
10 mx-run ThisPackageName [options] -H hostname -D dbname -u username [-F]
12 this is a subclass of L<CXGN::Metadata::Dbpatch>
13 see the perldoc of parent class for more details.
16 This patch converts the genotypeprop GT key from the VCF representation of '0/1' to the nucleotide representation of 'C/T' using the nd_protocolprop 'ref' and 'alt' information in the database.
17 This subclass uses L<Moose>. The parent class uses L<MooseX::Runnable>
23 =head1 COPYRIGHT & LICENSE
25 Copyright 2010 Boyce Thompson Institute for Plant Research
27 This program is free software; you can redistribute it and/or modify
28 it under the same terms as Perl itself.
33 package ChangeGenotypepropGTKeyToNucleotides
;
36 use Bio
::Chado
::Schema
;
37 use SGN
::Model
::Cvterm
;
40 use Scalar
::Util
qw(looks_like_number);
41 extends
'CXGN::Metadata::Dbpatch';
44 has
'+description' => ( default => <<'' );
45 Allows addition of a
link to the raw data file
for genotyping plates
57 print STDOUT
"Executing the patch:\n " . $self->name . ".\n\nDescription:\n ". $self->description . ".\n\nExecuted by:\n " . $self->username . " .";
59 print STDOUT
"\nChecking if this db_patch was executed before or if previous db_patches have been executed.\n";
61 print STDOUT
"\nExecuting the SQL commands.\n";
62 my $schema = Bio
::Chado
::Schema
->connect( sub { $self->dbh->clone } );
65 print STDERR
"INSERTING CV TERMS...\n";
67 my $protocol_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, "vcf_map_details", "protocol_property")->cvterm_id();
68 my $genotype_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, "vcf_snp_genotyping", "genotype_property")->cvterm_id();
70 my $q = "SELECT nd_protocolprop.nd_protocol_id, nd_protocolprop.value, genotypeprop.genotypeprop_id FROM nd_protocolprop JOIN nd_protocol using(nd_protocol_id) JOIN nd_experiment_protocol USING(nd_protocol_id) JOIN nd_experiment USING(nd_experiment_id) JOIN nd_experiment_genotype USING(nd_experiment_id) JOIN genotype USING(genotype_id) JOIN genotypeprop USING (genotype_id) WHERE nd_protocolprop.type_id = $protocol_type_id AND genotypeprop.type_id=$genotype_type_id;";
72 my $h = $schema->storage->dbh()->prepare($q);
74 while (my ($protocol_id, $protocolprop_value, $genotypeprop_id) = $h->fetchrow_array()) {
76 my $protocolprop_hash = decode_json
$protocolprop_value;
77 my $markers_hash = $protocolprop_hash->{'markers'};
79 my $q2 = "SELECT genotypeprop_id, value FROM genotypeprop WHERE genotypeprop_id = $genotypeprop_id;";
80 my $h2 = $schema->storage->dbh()->prepare($q2);
82 while (my ($genotypeprop_id, $genotypeprop_value) = $h2->fetchrow_array()) {
83 print STDERR
"Updating genotypeprop_id $genotypeprop_id\n";
84 my $genotypeprop_hash = decode_json
$genotypeprop_value;
85 my %new_genotypeprop_hash;
86 while (my ($marker_name, $geno) = each %$genotypeprop_hash) {
87 my $gt = $geno->{'GT'};
89 my $marker_info = $markers_hash->{$marker_name};
90 my $ref = $marker_info->{'ref'};
91 my $alt = $marker_info->{'alt'};
92 my @separated_alts = split ',', $alt;
94 my @nucleotide_genotype;
96 my @alleles = split (/\//, $gt);
97 if (scalar(@alleles) < 1){
98 @alleles = split (/\|/, $gt);
99 if (scalar(@alleles) > 0) {
104 if (looks_like_number
($_)) {
107 push @nucleotide_genotype, $ref; #Using Reference Allele
109 push @nucleotide_genotype, $separated_alts[$index-1]; #Using Alternate Allele
112 push @nucleotide_genotype, $_;
115 $geno->{'GT'} = join $separator, @nucleotide_genotype;
117 $new_genotypeprop_hash{$marker_name} = $geno;
119 my $new_genotypeprop_string = encode_json \
%new_genotypeprop_hash;
120 my $q3 = "UPDATE genotypeprop SET value = '$new_genotypeprop_string' WHERE genotypeprop_id = $genotypeprop_id;";
121 my $h3 = $schema->storage->dbh()->prepare($q3);
126 print "You're done!\n";