5 check_genotyped_accessions.pl - checks if accessions are genotyped or not using a genotyping protocol.
9 perl bin/check_genotyped_accessions.pl -h [dbhost] -d [dbname] -i [infile] -o [outfile] -p [genotyping_protocol]
11 =head1 REQUIRED ARGUMENTS
12 -h host name e.g. "localhost"
13 -d database name e.g. "cxgn_cassava"
14 -p genotyping protocol name
16 -o path to output file
20 This script checks if accessions are genotyped using a given genotyping protocol. The accessions are in a single column, one accessions per line. File should not have a header.
32 use Bio
::Chado
::Schema
;
34 use SGN
::Model
::Cvterm
;
35 use CXGN
::DB
::InsertDBH
;
36 use File
::Slurp qw
/read_file write_file/;
39 our ($opt_h, $opt_d, $opt_p, $opt_i, $opt_o);
41 getopts
("h:d:p:i:o:");
45 my $out_file = $opt_o;
46 my $protocol_name = $opt_p;
48 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
50 dbargs
=> {AutoCommit
=> 1,
57 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() });
60 my $q = "SELECT nd_protocol_id, name
64 my $h = $dbh->prepare($q);
65 $h->execute($protocol_name);
69 while (my ($pr_id, $pr_name) = $h->fetchrow_array()) {
70 print STDERR
"\nFound genotyping protocol: $pr_name -- id: $pr_id\n";
74 if (!$protocol_exists) {
75 die "\n\nGENOTYPING PROTOCOL $protocol_name does not exist in the database\n\n";
78 print STDERR
"Getting genotype names... ";
80 $q = "SELECT genotype.uniquename, stock.uniquename
82 JOIN nd_experiment_genotype USING(genotype_id)
83 JOIN nd_experiment_stock USING(nd_experiment_id)
84 JOIN stock USING(stock_id)
85 JOIN nd_experiment USING (nd_experiment_id)
86 JOIN nd_experiment_protocol USING(nd_experiment_id)
87 JOIN nd_protocol USING (nd_protocol_id)
88 WHERE nd_protocol.name = ?";
91 $h = $dbh->prepare($q);
92 $h->execute($protocol_name);
97 while (my ($gt, $stock) = $h->fetchrow_array()) {
98 if ($stock =~ m/(.*?)\.(.*)/) {
104 $g2s{$stock} = uc($gt);
105 print STDERR
"\n$stock-- $gt\n" if $cnt < 10;
109 my $synonym_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
111 my @clones = read_file
($in_file);
113 my $output = "Clone\tUniquename\tMatch_type\tGenotyped\n";
116 my $genotyped_cnt = 0;
117 my $not_genotyped_cnt = 0;
118 my $total_clones = @clones;
120 foreach my $clone (@clones) {
129 $uniquename = $clone;
130 $match_type = "uniquename";
136 print STDERR
"\n checking if $clone is a synonym and find its uniquename\n";
138 my $syn_rs = $schema->resultset("Stock::Stockprop")->search( { value
=> { ilike
=> $clone}, 'me.type_id' => $synonym_id }, { join => 'stock' } );
140 if ($syn_rs->count()) {
141 my $row = $syn_rs->first()->stock();
142 my $uniquename = $row->uniquename();
144 if ($g2s{$uniquename}) {
145 $match_type = "synonym";
149 $uniquename = $clone;
152 $not_genotyped_cnt++;
156 $uniquename = $clone;
159 $not_genotyped_cnt++;
163 my $out = join("\t", $clone, $uniquename, $match_type, $genotyped);
164 $output .= $out . "\n";
168 print STDERR
"\nChecked for $total_clones clones: $genotyped_cnt are genotyped, $not_genotyped_cnt are not genotyped with $protocol_name.\n";
170 print STDERR
"\nwriting output to file: $out_file\n";
171 write_file
($out_file, $output);
173 print STDERR
"Done.\n";