Merge pull request #2802 from solgenomics/topic/cross_progenies
[sgn.git] / bin / update_cvterm_annotations.pl
blob901b4f9c1dff705217a30b0fb6638ce72fe14da8
1 #!/usr/bin/perl
3 =head1
5 update_cvterm_annotations.pl
7 =head1 SYNOPSIS
9 update_cvterm_annotationss.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "sandbox_musabase"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script updates phenotypes associated with depracated cvterms to the current ones. The infile provided has two columns, in the first column is the cvterm accession as it is in the database, and in the second column is the new cvterm accession (format is db.name:dbxref.accession e.g. PREFIX:NNNNNNN) . There is no header on the infile and the infile is .xls
22 =head1 AUTHOR
24 Naama Menda (nm249@cornell.edu)
26 =cut
28 use strict;
30 use Getopt::Std;
31 use Data::Dumper;
32 use Carp qw /croak/ ;
33 use Pod::Usage;
34 use Spreadsheet::ParseExcel;
35 use Bio::Chado::Schema;
36 use CXGN::DB::InsertDBH;
37 use Try::Tiny;
39 our ($opt_H, $opt_D, $opt_i, $opt_t);
41 getopts('H:D:ti:');
43 if (!$opt_H || !$opt_D || !$opt_i ) {
44 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
47 my $dbhost = $opt_H;
48 my $dbname = $opt_D;
49 my $parser = Spreadsheet::ParseExcel->new();
50 my $excel_obj = $parser->parse($opt_i);
52 my $dbh = CXGN::DB::InsertDBH->new({
53 dbhost=>$dbhost,
54 dbname=>$dbname,
55 dbargs => {AutoCommit => 1, RaiseError => 1}
56 });
58 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
59 $dbh->do('SET search_path TO public,sgn');
62 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
63 my ( $row_min, $row_max ) = $worksheet->row_range();
64 my ( $col_min, $col_max ) = $worksheet->col_range();
66 my $coderef = sub {
67 for my $row ( 0 .. $row_max ) {
69 my $db_cvterm = $worksheet->get_cell($row,0)->value();
70 my $file_cvterm = $worksheet->get_cell($row,1)->value();
72 my ($old_db_name, $old_accession ) = split ":", $db_cvterm ;
73 my ($new_db_name, $new_accession ) = split ":" , $file_cvterm;
77 my $old_cvterm = $schema->resultset('Cv::Cvterm')->find(
79 'db.name' => $old_db_name,
80 'dbxref.accession' => $old_accession,
82 { join => { 'dbxref' => 'db'} , }
83 ) ;
84 if ( !defined $old_cvterm ) {
85 print STDERR "Cannot find cvterm $db_cvterm in the database! skipping\n";
86 next();
89 my $new_cvterm = $schema->resultset('Cv::Cvterm')->find(
91 'db.name' => $new_db_name,
92 'dbxref.accession' => $new_accession,
94 { join => { 'dbxref' => 'db'} , }
97 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search(
99 observable_id => $old_cvterm->cvterm_id,
100 cvalue_id => $old_cvterm->cvterm_id,
101 } ) ;
103 print STDERR "Updating cvterm $db_cvterm to $file_cvterm\n";
105 $phenotypes->update( { observable_id => $new_cvterm->cvterm_id } );
106 $phenotypes->update( { cvalue_id => $new_cvterm->cvterm_id } );
110 my $transaction_error;
111 try {
112 $schema->txn_do($coderef);
113 } catch {
114 $transaction_error = $_;
117 if ($transaction_error || $opt_t) {
118 $dbh->rollback;
119 print STDERR "Transaction error storing terms: $transaction_error\n";
120 } else {
121 print STDERR "Script Complete.\n";