5 update_cvterm_annotations.pl
9 update_cvterm_annotationss.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "sandbox_musabase"
15 -i path to infile (required)
19 This script updates phenotypes associated with depracated cvterms to the current ones. The infile provided has two columns, in the first column is the cvterm accession as it is in the database, and in the second column is the new cvterm accession (format is db.name:dbxref.accession e.g. PREFIX:NNNNNNN) . There is no header on the infile and the infile is .xls and .xlsx.
24 Naama Menda (nm249@cornell.edu)
34 use Spreadsheet
::ParseExcel
;
35 use Spreadsheet
::ParseXLSX
;
36 use Bio
::Chado
::Schema
;
37 use CXGN
::DB
::InsertDBH
;
40 our ($opt_H, $opt_D, $opt_i, $opt_t);
44 if (!$opt_H || !$opt_D || !$opt_i ) {
45 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file) \n");
51 # Match a dot, extension .xls / .xlsx
52 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
55 if ($extension eq '.xlsx') {
56 $parser = Spreadsheet
::ParseXLSX
->new();
59 $parser = Spreadsheet
::ParseExcel
->new();
62 my $excel_obj = $parser->parse($opt_i);
64 my $dbh = CXGN
::DB
::InsertDBH
->new({
67 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
70 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
71 $dbh->do('SET search_path TO public,sgn');
74 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
75 my ( $row_min, $row_max ) = $worksheet->row_range();
76 my ( $col_min, $col_max ) = $worksheet->col_range();
79 for my $row ( 0 .. $row_max ) {
81 my $db_cvterm = $worksheet->get_cell($row,0)->value();
82 my $file_cvterm = $worksheet->get_cell($row,1)->value();
84 my ($old_db_name, $old_accession ) = split ":", $db_cvterm ;
85 my ($new_db_name, $new_accession ) = split ":" , $file_cvterm;
89 my $old_cvterm = $schema->resultset('Cv::Cvterm')->find(
91 'db.name' => $old_db_name,
92 'dbxref.accession' => $old_accession,
94 { join => { 'dbxref' => 'db'} , }
96 if ( !defined $old_cvterm ) {
97 print STDERR
"Cannot find cvterm $db_cvterm in the database! skipping\n";
101 my $new_cvterm = $schema->resultset('Cv::Cvterm')->find(
103 'db.name' => $new_db_name,
104 'dbxref.accession' => $new_accession,
106 { join => { 'dbxref' => 'db'} , }
109 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search(
111 observable_id
=> $old_cvterm->cvterm_id,
112 cvalue_id
=> $old_cvterm->cvterm_id,
115 print STDERR
"Updating cvterm $db_cvterm to $file_cvterm\n";
117 $phenotypes->update( { observable_id
=> $new_cvterm->cvterm_id } );
118 $phenotypes->update( { cvalue_id
=> $new_cvterm->cvterm_id } );
122 my $transaction_error;
124 $schema->txn_do($coderef);
126 $transaction_error = $_;
129 if ($transaction_error || $opt_t) {
131 print STDERR
"Transaction error storing terms: $transaction_error\n";
133 print STDERR
"Script Complete.\n";