Merge pull request #4363 from solgenomics/topic/ordering_system_N
[sgn.git] / bin / check_genotyped_accessions.pl
blob5586c094402ec704fee370d4e7a8a8a4d9c880ff
1 #!/usr/bin/perl
3 =head1
5 check_genotyped_accessions.pl - checks if accessions are genotyped or not using a genotyping protocol.
7 =head1 SYNOPSIS
9 perl bin/check_genotyped_accessions.pl -h [dbhost] -d [dbname] -i [infile] -o [outfile] -p [genotyping_protocol]
11 =head1 REQUIRED ARGUMENTS
12 -h host name e.g. "localhost"
13 -d database name e.g. "cxgn_cassava"
14 -p genotyping protocol name
15 -i path to infile
16 -o path to output file
18 =head1 DESCRIPTION
20 This script checks if accessions are genotyped using a given genotyping protocol. The accessions are in a single column, one accessions per line. File should not have a header.
22 =head1 AUTHOR
24 Isaak Y Tecle
26 =cut
29 use strict;
30 use warnings;
32 use Bio::Chado::Schema;
33 use Getopt::Std;
34 use SGN::Model::Cvterm;
35 use CXGN::DB::InsertDBH;
36 use File::Slurp qw /read_file write_file/;
37 use Data::Dumper;
39 our ($opt_h, $opt_d, $opt_p, $opt_i, $opt_o);
41 getopts("h:d:p:i:o:");
42 my $dbhost = $opt_h;
43 my $dbname = $opt_d;
44 my $in_file = $opt_i;
45 my $out_file = $opt_o;
46 my $protocol_name = $opt_p;
48 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
49 dbname=>$dbname,
50 dbargs => {AutoCommit => 1,
51 RaiseError => 1,
54 } );
57 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() });
60 my $q = "SELECT nd_protocol_id, name
61 FROM nd_protocol
62 WHERE name = ?";
64 my $h = $dbh->prepare($q);
65 $h->execute($protocol_name);
67 my $protocol_exists;
69 while (my ($pr_id, $pr_name) = $h->fetchrow_array()) {
70 print STDERR "\nFound genotyping protocol: $pr_name -- id: $pr_id\n";
71 $protocol_exists = 1;
74 if (!$protocol_exists) {
75 die "\n\nGENOTYPING PROTOCOL $protocol_name does not exist in the database\n\n";
78 print STDERR "Getting genotype names... ";
80 $q = "SELECT genotype.uniquename, stock.uniquename
81 FROM genotype
82 JOIN nd_experiment_genotype USING(genotype_id)
83 JOIN nd_experiment_stock USING(nd_experiment_id)
84 JOIN stock USING(stock_id)
85 JOIN nd_experiment USING (nd_experiment_id)
86 JOIN nd_experiment_protocol USING(nd_experiment_id)
87 JOIN nd_protocol USING (nd_protocol_id)
88 WHERE nd_protocol.name = ?";
91 $h = $dbh->prepare($q);
92 $h->execute($protocol_name);
94 my %g2s;
95 my $cnt = 0;
97 while (my ($gt, $stock) = $h->fetchrow_array()) {
98 if ($stock =~ m/(.*?)\.(.*)/) {
99 $stock = $1;
102 if (!$g2s{$stock}) {
103 $cnt++;
104 $g2s{$stock} = uc($gt);
105 print STDERR "\n$stock-- $gt\n" if $cnt < 10;
109 my $synonym_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
111 my @clones = read_file($in_file);
113 my $output = "Clone\tUniquename\tMatch_type\tGenotyped\n";
115 my $genotyped;
116 my $genotyped_cnt = 0;
117 my $not_genotyped_cnt = 0;
118 my $total_clones = @clones;
120 foreach my $clone (@clones) {
122 $clone =~ s/\s+//;
124 my $uniquename = "";
125 my $match_type = "";
126 my $stock_type = "";
128 if ($g2s{$clone}) {
129 $uniquename = $clone;
130 $match_type = "uniquename";
131 $genotyped = 'Yes';
133 $genotyped_cnt++;
135 else {
136 print STDERR "\n checking if $clone is a synonym and find its uniquename\n";
138 my $syn_rs = $schema->resultset("Stock::Stockprop")->search( { value => { ilike => $clone}, 'me.type_id' => $synonym_id }, { join => 'stock' } );
140 if ($syn_rs->count()) {
141 my $row = $syn_rs->first()->stock();
142 my $uniquename = $row->uniquename();
144 if ($g2s{$uniquename}) {
145 $match_type = "synonym";
146 $genotyped = 'Yes';
147 $genotyped_cnt++;
148 } else {
149 $uniquename = $clone;
150 $match_type = "--";
151 $genotyped = 'No';
152 $not_genotyped_cnt++;
155 else {
156 $uniquename = $clone;
157 $match_type = "--";
158 $genotyped = 'No';
159 $not_genotyped_cnt++;
163 my $out = join("\t", $clone, $uniquename, $match_type, $genotyped);
164 $output .= $out . "\n";
168 print STDERR "\nChecked for $total_clones clones: $genotyped_cnt are genotyped, $not_genotyped_cnt are not genotyped with $protocol_name.\n";
170 print STDERR "\nwriting output to file: $out_file\n";
171 write_file($out_file, $output);
173 print STDERR "Done.\n";