make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / replace_plots.pl
blob4ee3b95c5da81acc9238fb1617c69b4e431e63f0
1 #!/usr/bin/perl
3 =head1 NAME
5 replace_plots.pl - a bulk script to replace accessions in field layouts
7 =head1 SYNOPSIS
9 rename_stocks.pl -H [dbhost] -D [dbname] -i [infile] <-t>
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) e.g. "localhost"
14 -D database name (required) e.g. "cxgn_cassava"
15 -i path to infile (required)
16 -t (optional) test - do not modify the database.
18 =head1 DESCRIPTION
20 This script replaces accessions in field layouts in bulk.
22 The infile provided has three columns:
24 1) the plot name that needs to be changed.
25 2) the stock uniquename as it is in the database that is currently associated
26 3) the new stock uniquename (needs to be in the database).
27 4) optional: a new plot name
29 There is no header on the infile and the infile is .xls and .xlsx.
31 The script will dissociate the old accession in column 1 and associate the new accession. The plot name will be unchanged.
33 =head1 AUTHOR
35 Lukas Mueller (lam87@cornell.edu)
37 Adapted from a cvterm renaming script by:
38 Guillaume Bauchet (gjb99@cornell.edu)
39 Nicolas Morales (nm529@cornell.edu)
41 =cut
43 use strict;
45 use Getopt::Std;
46 use Data::Dumper;
47 use Carp qw /croak/ ;
48 use Pod::Usage;
49 use Spreadsheet::ParseExcel;
50 use Spreadsheet::ParseXLSX;
51 use Bio::Chado::Schema;
52 use CXGN::DB::InsertDBH;
53 use Try::Tiny;
54 use SGN::Model::Cvterm;
56 our ($opt_H, $opt_D, $opt_i, $opt_t);
58 getopts('H:D:i:t');
60 if (!$opt_H || !$opt_D || !$opt_i) {
61 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file)\n");
64 my $dbhost = $opt_H;
65 my $dbname = $opt_D;
67 # Match a dot, extension .xls / .xlsx
68 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
69 my $parser;
71 if ($extension eq '.xlsx') {
72 $parser = Spreadsheet::ParseXLSX->new();
74 else {
75 $parser = Spreadsheet::ParseExcel->new();
78 my $excel_obj = $parser->parse($opt_i);
80 my $dbh = CXGN::DB::InsertDBH->new({
81 dbhost=>$dbhost,
82 dbname=>$dbname,
83 dbargs => {AutoCommit => 1, RaiseError => 1}
84 });
86 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
87 $dbh->do('SET search_path TO public,sgn');
90 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
91 my ( $row_min, $row_max ) = $worksheet->row_range();
92 my ( $col_min, $col_max ) = $worksheet->col_range();
94 my $coderef = sub {
95 for my $row ( 0 .. $row_max ) {
97 my $plot_name = $worksheet->get_cell($row, 0)->value();
98 my $plot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
99 my $plot_id = $schema->resultset('Stock::Stock')->find({uniquename=>$plot_name, type_id=>$plot_cvterm_id})->stock_id();
101 # my $plot_id = $worksheet->get_cell($row, 1)->value();
102 my $db_uniquename = $worksheet->get_cell($row,1)->value();
103 my $new_uniquename = $worksheet->get_cell($row,2)->value();
105 my $new_plotname = "";
106 eval {
107 $new_plotname = $worksheet->get_cell($row, 3)->value();
110 if ($@) {
111 print STDER "no alternate plot name provided. Keeping old name.\n";
114 print STDERR "$db_uniquename -> $new_uniquename\n";
116 my $old_stock = $schema->resultset('Stock::Stock')->find({ uniquename => $db_uniquename });
117 my $new_stock = $schema->resultset('Stock::Stock')->find({ uniquename => $new_uniquename });
119 if (!$old_stock) {
120 print STDERR "Warning! Stock with uniquename $db_uniquename was not found in the database.\n";
121 next();
123 if (!$new_stock) {
124 print STDERR "Warning! Stock with uniquename $new_uniquename was not found in the database.\n";
125 next();
128 my $q = "select plot.uniquename, accession.uniquename, sr.stock_relationship_id, cvterm.name from stock as plot join stock_relationship as sr on (plot.stock_id=sr.subject_id) join stock as accession on (accession.stock_id=sr.object_id) join cvterm on (sr.type_id=cvterm.cvterm_id) where plot.stock_id=? and accession.uniquename=? and cvterm.name='plot_of'";
130 my $h = $dbh->prepare($q);
131 $h->execute($plot_id, $db_uniquename);
133 my ($plot, $acc, $stock_relationship_id) = $h->fetchrow_array();
135 #update stock_relationship row with new object_id...
136 my $uq = "UPDATE stock_relationship set object_id=? where stock_relationship_id=?";
137 my $uh = $dbh->prepare($uq);
138 print STDERR "Changing object_id to ".$new_stock->stock_id()." for relationship $stock_relationship_id\n";
139 if ($opt_t) {
140 print STDERR "Not updating because of -t option...\n";
142 else {
143 $uh->execute($new_stock->stock_id, $stock_relationship_id);
146 if ($new_plotname) {
147 print STDERR "Renaming plot to $new_plotname...\n";
148 my $pq = "UPDATE stock SET uniquename=?, name=? WHERE stock_id=?";
149 my $ph = $dbh->prepare($pq);
150 $ph->execute($new_plotname, $new_plotname, $plot_id);
156 my $transaction_error;
157 try {
158 $schema->txn_do($coderef);
159 } catch {
160 $transaction_error = $_;
163 if ($transaction_error) {
164 print STDERR "Transaction error storing terms: $transaction_error\n";
165 } else {
166 print STDERR "Script Complete.\n";