fix the observationUnitPUI url.
[sgn.git] / bin / find_stock_uniquename.pl
blob0e94ecc5c572576544ffc69d035a484d4e8c8b39
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Bio::Chado::Schema;
6 use Getopt::Std;
7 use SGN::Model::Cvterm;
8 use CXGN::DB::InsertDBH;
11 our ($opt_H, $opt_D);
12 getopts("H:D:");
13 my $dbhost = $opt_H;
14 my $dbname = $opt_D;
15 my $file = shift;
19 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
20 dbname=>$dbname,
21 dbargs => {AutoCommit => 1,
22 RaiseError => 1,
25 } );
28 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() });
30 print STDERR "Getting genotype names... ";
31 my %g2s;
33 my $q = "SELECT genotype.uniquename, stock.uniquename FROM genotype join nd_experiment_genotype using(genotype_id) JOIN nd_experiment_stock using(nd_experiment_id) join stock using(stock_id)";
35 my $h = $dbh->prepare($q);
36 $h->execute();
38 while (my ($gt, $stock) = $h->fetchrow_array()) {
39 $g2s{uc($gt)} = $stock;#uc($stock);
40 if ($gt =~ m/(.*?)\|(.*)/) {
41 $g2s{uc($1)} = $stock; #uc($stock);
45 print STDERR "Done.\n";
47 my %stock_types;
48 print STDERR "Getting stock type ids... ";
49 my $rs = $schema->resultset("Cv::Cvterm")->search( { 'cv.name' => 'stock_type'}, { join => 'cv' } );
51 while (my $r = $rs->next()) {
52 $stock_types{$r->cvterm_id()} = $r->name();
55 print STDERR "Done.\n";
57 open (my $file_fh, "<", $file ) || die ("\nERROR: the file $file could not be found\n" );
59 my $synonym_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
60 my $accession_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
62 my %stats;
63 my $line_count = 0;
65 while (my $line = <$file_fh>) {
66 chomp($line);
68 $line_count++;
70 $line = uc($line);
72 my $id = $line;
73 if ($line =~ m/(.*?)\:(.*)$/) {
74 $id = $1;
77 # if ($line =~ /^(TMS.*)\_A\d{5}$/) {
78 # $id = $1;
79 # }
81 my $uniquename = "";
82 my $match_type = "";
83 my $stock_type = "";
85 if ($g2s{$line}) {
86 $uniquename = $g2s{$line};
87 $match_type = "hash_direct";
88 $stats{hash_direct}++;
90 my $stock_row = $schema->resultset("Stock::Stock")->find( { uniquename => { ilike => $uniquename } });
92 $stock_type = $stock_types{$stock_row->type_id};# "accession";
94 elsif( $g2s{$id}) {
95 $uniquename = $g2s{$id};
96 $match_type = "hash_modified";
97 $stats{hash_modified}++;
99 my $stock_row = $schema->resultset("Stock::Stock")->find( { uniquename => { ilike => $uniquename } });
100 $stock_type = $stock_types{$stock_row->type_id}; #"accession";
102 else {
104 my $stock_row = $schema->resultset("Stock::Stock")->find( { uniquename => { ilike => $id } });
106 if ($stock_row) {
107 $uniquename = $stock_row->uniquename();
108 $match_type = "direct";
109 $stats{direct}++;
110 $stock_type = $stock_types{$stock_row->type_id} || $stock_row->type_id;
112 else {
113 my $syn_rs = $schema->resultset("Stock::Stockprop")->search( { value => { ilike => $id }, 'me.type_id' => $synonym_id }, { join => 'stock' } );
116 if ($syn_rs->count() == 1) {
117 my $row = $syn_rs->first()->stock();
118 $uniquename = $row->uniquename();
119 $match_type = "synonym";
120 $stats{synonym}++;
121 $stock_type = $stock_types{$row->type_id()} || $row->type_id();
123 elsif ($syn_rs->count() > 1) {
124 while (my $r = $syn_rs->next()) {
125 $uniquename .= $r->stock->uniquename().",";
126 $match_type = "multiple synonyms";
127 $stats{multiple_synonyms}++;
128 $stock_type .= $stock_types{$r->stock->type_id()}.",";
131 else {
132 $uniquename = $line;
133 $match_type = "absent";
134 $stats{absent}++;
139 my $out = join("\t", $line, $id, $uniquename, $match_type, $stock_type);
141 print "$out\n";
144 if ($line_count % 100 == 0) {
145 print STDERR "Processing: ".join (", ", (map { "$_ : $stats{$_}" } sort keys(%stats)))."\r";
152 print STDERR "Done.\n";
153 print STDERR "Complete.\n";