bin/update_cvterm_annotations.pl

   1 #!/usr/bin/perl
   2
   3 =head1
   4
   5 update_cvterm_annotations.pl
   6
   7 =head1 SYNOPSIS
   8
   9     update_cvterm_annotationss.pl -H [dbhost] -D [dbname] -i [infile]
  10
  11 =head1 COMMAND-LINE OPTIONS
  12   ARGUMENTS
  13  -H host name (required) e.g. "localhost"
  14  -D database name (required) e.g. "sandbox_musabase"
  15  -i path to infile (required)
  16
  17 =head1 DESCRIPTION
  18
  19 This script updates phenotypes associated with depracated cvterms to the current ones. The infile provided has two columns, in the first column is the cvterm accession as it is in the database, and in the second column is the new cvterm accession (format is db.name:dbxref.accession e.g. PREFIX:NNNNNNN) . There is no header on the infile and the infile is .xls and .xlsx.
  20
  21
  22 =head1 AUTHOR
  23
  24  Naama Menda (nm249@cornell.edu)
  25
  26 =cut
  27
  28 use strict;
  29
  30 use Getopt::Std;
  31 use Data::Dumper;
  32 use Carp qw /croak/ ;
  33 use Pod::Usage;
  34 use Spreadsheet::ParseExcel;
  35 use Spreadsheet::ParseXLSX;
  36 use Bio::Chado::Schema;
  37 use CXGN::DB::InsertDBH;
  38 use Try::Tiny;
  39
  40 our ($opt_H, $opt_D, $opt_i, $opt_t);
  41
  42 getopts('H:D:ti:');
  43
  44 if (!$opt_H || !$opt_D || !$opt_i ) {
  45     pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
  46 }
  47
  48 my $dbhost = $opt_H;
  49 my $dbname = $opt_D;
  50
  51 # Match a dot, extension .xls / .xlsx
  52 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
  53 my $parser;
  54
  55 if ($extension eq '.xlsx') {
  56         $parser = Spreadsheet::ParseXLSX->new();
  57 }
  58 else {
  59         $parser = Spreadsheet::ParseExcel->new();
  60 }
  61
  62 my $excel_obj = $parser->parse($opt_i);
  63
  64 my $dbh = CXGN::DB::InsertDBH->new({
  65         dbhost=>$dbhost,
  66         dbname=>$dbname,
  67         dbargs => {AutoCommit => 1, RaiseError => 1}
  68 });
  69
  70 my $schema= Bio::Chado::Schema->connect(  sub { $dbh->get_actual_dbh() } );
  71 $dbh->do('SET search_path TO public,sgn');
  72
  73
  74 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
  75 my ( $row_min, $row_max ) = $worksheet->row_range();
  76 my ( $col_min, $col_max ) = $worksheet->col_range();
  77
  78 my $coderef = sub {
  79     for my $row ( 0 .. $row_max ) {
  80
  81         my $db_cvterm = $worksheet->get_cell($row,0)->value();
  82         my $file_cvterm = $worksheet->get_cell($row,1)->value();
  83
  84         my ($old_db_name, $old_accession ) = split ":", $db_cvterm ;
  85         my ($new_db_name, $new_accession ) = split ":" , $file_cvterm;
  86
  87
  88
  89         my $old_cvterm = $schema->resultset('Cv::Cvterm')->find(
  90             {
  91                 'db.name'          => $old_db_name,
  92                 'dbxref.accession' => $old_accession,
  93             },
  94             { join => { 'dbxref' => 'db'} , }
  95             ) ;
  96         if ( !defined $old_cvterm ) {
  97             print STDERR "Cannot find cvterm $db_cvterm in the database! skipping\n";
  98             next();
  99         }
 100
 101         my $new_cvterm = $schema->resultset('Cv::Cvterm')->find(
 102             {
 103                 'db.name'          => $new_db_name,
 104                 'dbxref.accession' => $new_accession,
 105             },
 106             { join => { 'dbxref' => 'db'} , }
 107             );
 108
 109         my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search(
 110             {
 111                 observable_id => $old_cvterm->cvterm_id,
 112                 cvalue_id     => $old_cvterm->cvterm_id,
 113             } ) ;
 114
 115         print STDERR "Updating cvterm $db_cvterm to $file_cvterm\n";
 116
 117         $phenotypes->update(  { observable_id => $new_cvterm->cvterm_id }  );
 118         $phenotypes->update( { cvalue_id => $new_cvterm->cvterm_id } );
 119     }
 120 };
 121
 122 my $transaction_error;
 123 try {
 124     $schema->txn_do($coderef);
 125 } catch {
 126     $transaction_error =  $_;
 127 };
 128
 129 if ($transaction_error || $opt_t) {
 130     $dbh->rollback;
 131     print STDERR "Transaction error storing terms: $transaction_error\n";
 132 } else {
 133     print STDERR "Script Complete.\n";
 134 }