Merge pull request #4363 from solgenomics/topic/ordering_system_N
[sgn.git] / bin / delete_cvterms.pl
blob8053584e7b017f5a547c6764c442645d812c56ec
1 #!/usr/bin/perl
3 =head1
5 delete_cvterms.pl - for deleting cvterms in bulk
7 =head1 SYNOPSIS
9 delete_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 ARGUMENTS
14 -H host name (required) e.g. "localhost"
15 -D database name (required) e.g. "cxgn_cassava"
16 -c cvname
17 -i path to infile (required)
18 -t test (lists the number of observations associated with each term)
20 =head1 DESCRIPTION
22 This script deletes cvterms in bulk. The infile provided has one column containing the cvterm name as it is in the database which should be deleted.
24 There is no header in the infile and the format is .xls and .xlsx
26 =head1 AUTHOR
28 Lukas Mueller, based on a script by Nick Morales
30 =cut
32 use strict;
34 use Getopt::Std;
35 use Data::Dumper;
36 use Carp qw /croak/ ;
37 use Pod::Usage;
38 use Spreadsheet::ParseExcel;
39 use Spreadsheet::ParseXLSX;
40 use Bio::Chado::Schema;
41 use CXGN::DB::InsertDBH;
42 use Try::Tiny;
44 our ($opt_H, $opt_D, $opt_i, $opt_c, $opt_t, $opt_a);
46 getopts('H:D:i:c:ta');
49 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
50 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
53 if ($opt_a) { print STDERR "Using accession instead of name\n"; }
54 my $dbhost = $opt_H;
55 my $dbname = $opt_D;
57 # Match a dot, extension .xls / .xlsx
58 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
59 my $parser;
61 if ($extension eq '.xlsx') {
62 $parser = Spreadsheet::ParseXLSX->new();
64 else {
65 $parser = Spreadsheet::ParseExcel->new();
68 my $excel_obj = $parser->parse($opt_i);
70 my $dbh = CXGN::DB::InsertDBH->new({
71 dbhost=>$dbhost,
72 dbname=>$dbname,
73 dbargs => {AutoCommit => 1, RaiseError => 1}
74 });
76 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
77 $dbh->do('SET search_path TO public,sgn');
80 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
81 my ( $row_min, $row_max ) = $worksheet->row_range();
82 my ( $col_min, $col_max ) = $worksheet->col_range();
84 my $coderef = sub {
85 my $cv = $schema->resultset('Cv::Cv')->find({ name => $opt_c });
86 for my $row ( 0 .. $row_max ) {
88 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
90 my $cvterm;
92 if ($opt_a) {
93 my ($db, $acc) = split /\:/, $db_cvterm_name;
94 my $db_row = $schema->resultset("General::Db")->find({ name => $db });
95 my $dbxref = $schema->resultset("General::Dbxref")->find( { accession => $acc, db_id => $db_row->db_id() });
96 if (!$dbxref) {
97 print STDERR "Term $db_cvterm_name NOT found... Skipping...\n";
98 next();
100 $cvterm = $schema->resultset("Cv::Cvterm")->find( { dbxref_id => $dbxref->dbxref_id() });
102 else {
103 $cvterm = $schema->resultset('Cv::Cvterm')->find({ name => $db_cvterm_name, cv_id => $cv->cv_id() });
106 if (!$cvterm) { print STDERR "Cvterm $db_cvterm_name does not exit. SKIPPING!\n";
107 next;
109 print STDERR "FOUND CVTERM : ".$cvterm->name()."\n";
110 #print STDERR "Deleting $db_cvterm_name... ";
112 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search( { cvalue_id => $cvterm->cvterm_id() });
113 if ($opt_t) {
115 if ($phenotypes->count() > 0) {
116 print STDERR $cvterm->name()."\t".$phenotypes->count()."\n";
119 else {
120 if ($phenotypes->count() > 0) {
121 print STDERR "Not deleting term ".$cvterm->name()." with ".$phenotypes->count()." associated phenotypes.\n";
123 else {
124 my $dbxref_row = $schema->resultset('General::Dbxref')->find({ dbxref_id => $cvterm->dbxref_id() });
126 my $name = $cvterm->name();
127 $cvterm->delete();
129 print STDERR "DBXREF ROW : ".ref($dbxref_row)."\n";
130 # check if the dbxref is referenced by other cvterms, only delete
131 # if it's only referenced by this one term
133 $dbxref_row->delete();
135 print STDERR "Deleted term $name.\n";
142 my $transaction_error;
143 try {
144 $schema->txn_do($coderef);
145 } catch {
146 $transaction_error = $_;
149 if ($transaction_error) {
150 print STDERR "Transaction error storing terms: $transaction_error\n";
151 } else {
152 print STDERR "Script Complete.\n";