Merge pull request #4106 from solgenomics/topic/wishlist
[sgn.git] / bin / change_trait_ids.pl
blob908cb5970455f5db2d58088bd67b56ea3573d7e8
1 #!/usr/bin/perl
3 =head1
5 change_trait_ids.pl
7 =head1 SYNOPSIS
9 change_trait_ids.pl -H [dbhost] -D [dbname] -i [tsv file with old and new ids] [-t]
11 If an error caused by a duplicate key value is thrown, for example:
13 DBD::Pg::st execute failed: ERROR: duplicate key value violates unique constraint "dbxref_c1"
14 DETAIL: Key (db_id, accession, version)=(274, 1000004, ) already exists. at ./sgn/bin/change_trait_ids.pl line 69, <FILE> line 87.
16 You can manually alter the dbxref accession value in the db to something non-conflicting, for example:
17 "update dbxref set accession = '1444444' where accession = '1000004' and db_id = 274;"
18 then re-run this script. This process may have to be repeated for multiple ids
20 =head1 COMMAND-LINE OPTIONS
22 -H host name
23 -D database name
24 -i file with old and new ids
25 -t Test run. Rolls back at the end.
27 =head1 DESCRIPTION
29 =head1 AUTHOR
31 Bryan Ellerbrock (bje24@cornell.edu) - Sept 2016
33 =cut
35 use strict;
36 use warnings;
38 use Getopt::Std;
39 use Try::Tiny;
40 use CXGN::DB::InsertDBH;
42 our ($opt_H, $opt_D, $opt_i, $opt_t);
44 getopts('H:i:D:t');
46 my $dbhost = $opt_H;
47 my $dbname = $opt_D;
48 my $file = $opt_i;
49 my $dbh;
50 my %seq;
52 # store database handle and schema
54 $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
55 dbname=>$dbname,
56 dbargs => {AutoCommit => 0,
57 RaiseError => 1}
61 # prepare sql delete statements
63 my $update_query = "Update dbxref set accession = ? where accession = ? and db_id = ((SELECT dbxref_1.db_id FROM stock JOIN nd_experiment_stock USING (stock_id) JOIN nd_experiment_phenotype USING (nd_experiment_id) JOIN phenotype USING (phenotype_id) JOIN cvterm cvterm_1 ON phenotype.cvalue_id = cvterm_1.cvterm_id JOIN dbxref dbxref_1 ON cvterm_1.dbxref_id = dbxref_1.dbxref_id LIMIT 1 ));";
64 my $update_handle = $dbh->prepare($update_query);
66 try {
67 # identify ids to be changed
69 open (FILE, "<", $opt_i) || die "Can't open infile $opt_i \n";
71 while (<FILE>) {
72 my ($new_id, $old_id) = split('\t', $_);
73 chomp $old_id;
74 $new_id =~ s/.*://;
75 $old_id =~ s/.*://;
76 print STDERR "Updating old id $old_id to new id $new_id\n";
77 #chomp (my $ids = $_);
78 $update_handle->execute($new_id, $old_id);
79 my $pattern = '%';
80 $pattern .= $old_id;
81 my $list_fix = "update sgn_people.list_item set content = regexp_replace(content, ':.*', ':$new_id') where content like '$pattern';";
82 print STDERR "updating list elements with query: $list_fix\n";
83 my $list_fix_handle = $dbh->prepare($list_fix);
84 $list_fix_handle->execute();
86 } catch {
87 # Rollback if transaction failed
89 $dbh->rollback();
90 die "An error occured! Transaction rolled back!" . $_ . "\n";
93 if (!$opt_t) {
94 # commit if this is not a test run
96 $dbh->commit();
97 print "Id update succeeded! Commiting new ids! \n\n";
98 } else {
99 # Rolling back because test run
101 print "No errors occurred. Rolling back test run. \n\n";
102 $dbh->rollback();