Merge pull request #5186 from solgenomics/topic/bulk_search_lists
[sgn.git] / bin / rename_locations.pl
blob8e8eddb23e0c5494c453a90cb2a5d8c7a2e460cc
1 #!/usr/bin/perl
3 =head1
5 rename_locations.pl - renaming locations in a cxgn database
7 =head1 SYNOPSIS
9 rename_locations.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
17 =head1 DESCRIPTION
19 This script loads locations data into Chado, by adding data to nd_geolocation table. Infile is Excel .xls and .xlsx format.
20 Header is in this order: 'old_location_name', 'new_location_name'
22 =head1 AUTHOR
24 Lukas Mueller (lam87@cornell.edu)
26 Based on a script by Nicolas Morales (nm529@cornell.edu)
28 =cut
30 use strict;
32 use Try::Tiny;
33 use Getopt::Std;
34 use Data::Dumper;
35 use Carp qw /croak/ ;
36 use Pod::Usage;
37 use Spreadsheet::ParseExcel;
38 use Spreadsheet::ParseXLSX;
39 use Bio::Chado::Schema;
40 use CXGN::DB::InsertDBH;
42 our ($opt_H, $opt_D, $opt_i);
44 getopts('H:D:i:');
46 if (!$opt_H || !$opt_D || !$opt_i) {
47 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file) \n");
50 my $dbhost = $opt_H;
51 my $dbname = $opt_D;
53 # Match a dot, extension .xls / .xlsx
54 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
55 my $parser;
57 if ($extension eq '.xlsx') {
58 $parser = Spreadsheet::ParseXLSX->new();
60 else {
61 $parser = Spreadsheet::ParseExcel->new();
64 my $excel_obj = $parser->parse($opt_i);
66 my $dbh = CXGN::DB::InsertDBH->new({
67 dbhost=>$dbhost,
68 dbname=>$dbname,
69 dbargs => {AutoCommit => 1, RaiseError => 1}
70 });
72 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
73 $dbh->do('SET search_path TO public,sgn');
76 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
77 my ( $row_min, $row_max ) = $worksheet->row_range();
78 my ( $col_min, $col_max ) = $worksheet->col_range();
80 if ($col_max ne '1' || $worksheet->get_cell(0,0)->value() ne 'old_location_name' || $worksheet->get_cell(0,1)->value() ne 'new_location_name') {
81 pod2usage(-verbose => 2, -message => "Headers must be only in this order: old_location_name, new_location_name.\n");
85 try {
86 $schema->txn_do(
87 sub {
88 for my $row ( 1 .. $row_max ) {
89 my $old_name = $worksheet->get_cell($row,0)->value();
90 my $new_name = $worksheet->get_cell($row,1)->value();
91 my $row = $schema->resultset('NaturalDiversity::NdGeolocation')->find({ description => $old_name });
92 if ($row) {
93 $row->description($new_name);
94 $row->update();
95 print STDERR "Updated $old_name to $new_name\n";
97 else {
98 print STDERR "Location $old_name was not found. Skipping.\n";
104 catch {
105 my $error = shift;
106 print STDERR "An error occurred ($error). Rolling back.\n";
109 print STDERR "Script Complete.\n";