Merge pull request #2754 from solgenomics/topic/fix_homepage_add_accessions_dialog
[sgn.git] / bin / delete_cvterms.pl
blobbd671c116c3ccc1fe79880341ee7b43abb7482bd
1 #!/usr/bin/perl
3 =head1
5 delete_cvterms.pl - for deleting cvterms in bulk
7 =head1 SYNOPSIS
9 delete_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 ARGUMENTS
14 -H host name (required) e.g. "localhost"
15 -D database name (required) e.g. "cxgn_cassava"
16 -c cvname
17 -i path to infile (required)
18 -t test (lists the number of observations associated with each term)
20 =head1 DESCRIPTION
22 This script deletes cvterms in bulk. The infile provided has one column containing the cvterm name as it is in the database which should be deleted.
24 There is no header in the infile and the format is .xls
26 =head1 AUTHOR
28 Lukas Mueller, based on a script by Nick Morales
30 =cut
32 use strict;
34 use Getopt::Std;
35 use Data::Dumper;
36 use Carp qw /croak/ ;
37 use Pod::Usage;
38 use Spreadsheet::ParseExcel;
39 use Bio::Chado::Schema;
40 use CXGN::DB::InsertDBH;
41 use Try::Tiny;
43 our ($opt_H, $opt_D, $opt_i, $opt_c, $opt_t);
45 getopts('H:D:i:c:t');
47 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
48 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
51 my $dbhost = $opt_H;
52 my $dbname = $opt_D;
53 my $parser = Spreadsheet::ParseExcel->new();
54 my $excel_obj = $parser->parse($opt_i);
56 my $dbh = CXGN::DB::InsertDBH->new({
57 dbhost=>$dbhost,
58 dbname=>$dbname,
59 dbargs => {AutoCommit => 1, RaiseError => 1}
60 });
62 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
63 $dbh->do('SET search_path TO public,sgn');
66 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
67 my ( $row_min, $row_max ) = $worksheet->row_range();
68 my ( $col_min, $col_max ) = $worksheet->col_range();
70 my $coderef = sub {
71 my $cv = $schema->resultset('Cv::Cv')->find({ name => $opt_c });
72 for my $row ( 0 .. $row_max ) {
74 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
76 my $cvterm = $schema->resultset('Cv::Cvterm')->find({ name => $db_cvterm_name, cv_id => $cv->cv_id() });
78 if (!$cvterm) { print STDERR "Cvterm $db_cvterm_name does not exit. SKIPPING!\n";
79 next;
81 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search( { cvalue_id => $cvterm->cvterm_id() });
82 if ($opt_t) {
84 if ($phenotypes->count() > 0) {
85 print STDERR $cvterm->name()."\t".$phenotypes->count()."\n";
88 else {
89 if ($phenotypes->count() > 0) {
90 print STDERR "Not deleting term ".$cvterm->name()." with ".$phenotypes->count()." associated phenotypes.\n";
92 else {
93 my $dbxref = $schema->resultset('General::Dbxref')->find({ dbxref_id => $cvterm->dbxref_id() });
95 # check if the dbxref is referenced by other cvterms, only delete
96 # if it's only referenced by this one term
98 my $dbxref_count_rs = $schema->resultset('Cv::Cvterm')->search( { dbxref_id=> $cvterm->dbxref_id() });
100 if ($dbxref_count_rs->count() == 1) {
101 $dbxref->delete();
103 my $name = $cvterm->name();
104 $cvterm->delete();
105 print STDERR "Deleted term $name.\n";
112 my $transaction_error;
113 try {
114 $schema->txn_do($coderef);
115 } catch {
116 $transaction_error = $_;
119 if ($transaction_error) {
120 print STDERR "Transaction error storing terms: $transaction_error\n";
121 } else {
122 print STDERR "Script Complete.\n";