Merge pull request #5046 from solgenomics/fix-dependabot3
[sgn.git] / bin / rename_cvterms.pl
blob0df05284d3370cdc6858fc11dc62d396595abad6
1 #!/usr/bin/perl
3 =head1
5 rename_cvterms.pl - for renaming cvterms in bulk
7 =head1 SYNOPSIS
9 rename_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script rename cvterms in bulk. The infile provided has two columns, in the first column is the cvterm name as it is in the database, and in the second column is the new cvterm name. There is no header on hte infile and the infile is .xls and .xlsx.
22 =head1 AUTHOR
24 Nicolas Morales (nm529@cornell.edu)
26 =cut
28 use strict;
30 use Getopt::Std;
31 use Data::Dumper;
32 use Carp qw /croak/ ;
33 use Pod::Usage;
34 use Spreadsheet::ParseExcel;
35 use Spreadsheet::ParseXLSX;
36 use Bio::Chado::Schema;
37 use CXGN::DB::InsertDBH;
38 use Try::Tiny;
40 our ($opt_H, $opt_D, $opt_i, $opt_c);
42 getopts('H:D:i:c:');
44 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
45 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
48 my $dbhost = $opt_H;
49 my $dbname = $opt_D;
51 # Match a dot, extension .xls / .xlsx
52 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
53 my $parser;
55 if ($extension eq '.xlsx') {
56 $parser = Spreadsheet::ParseXLSX->new();
58 else {
59 $parser = Spreadsheet::ParseExcel->new();
62 my $excel_obj = $parser->parse($opt_i);
64 my $dbh = CXGN::DB::InsertDBH->new({
65 dbhost=>$dbhost,
66 dbname=>$dbname,
67 dbargs => {AutoCommit => 1, RaiseError => 1}
68 });
70 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
71 $dbh->do('SET search_path TO public,sgn');
74 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
75 my ( $row_min, $row_max ) = $worksheet->row_range();
76 my ( $col_min, $col_max ) = $worksheet->col_range();
78 my $coderef = sub {
79 my $cv = $schema->resultset('Cv::Cv')->find({ name => $opt_c });
80 for my $row ( 0 .. $row_max ) {
82 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
83 my $new_cvterm_name = $worksheet->get_cell($row,1)->value();
84 print STDERR $db_cvterm_name."\n";
86 my $old_cvterm = $schema->resultset('Cv::Cvterm')->find({ name => $db_cvterm_name, cv_id => $cv->cv_id() });
87 my $new_cvterm = $old_cvterm->update({ name => $new_cvterm_name});
92 my $transaction_error;
93 try {
94 $schema->txn_do($coderef);
95 } catch {
96 $transaction_error = $_;
99 if ($transaction_error) {
100 print STDERR "Transaction error storing terms: $transaction_error\n";
101 } else {
102 print STDERR "Script Complete.\n";