5 delete_cvterms.pl - for deleting cvterms in bulk
9 delete_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
14 -H host name (required) e.g. "localhost"
15 -D database name (required) e.g. "cxgn_cassava"
17 -i path to infile (required)
18 -t test (lists the number of observations associated with each term)
22 This script deletes cvterms in bulk. The infile provided has one column containing the cvterm name as it is in the database which should be deleted.
24 There is no header in the infile and the format is .xls and .xlsx
28 Lukas Mueller, based on a script by Nick Morales
38 use Spreadsheet
::ParseExcel
;
39 use Spreadsheet
::ParseXLSX
;
40 use Bio
::Chado
::Schema
;
41 use CXGN
::DB
::InsertDBH
;
44 our ($opt_H, $opt_D, $opt_i, $opt_c, $opt_t, $opt_a);
46 getopts
('H:D:i:c:ta');
49 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
50 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
53 if ($opt_a) { print STDERR
"Using accession instead of name\n"; }
57 # Match a dot, extension .xls / .xlsx
58 my ($extension) = $opt_i =~ /(\.[^.]+)$/;
61 if ($extension eq '.xlsx') {
62 $parser = Spreadsheet
::ParseXLSX
->new();
65 $parser = Spreadsheet
::ParseExcel
->new();
68 my $excel_obj = $parser->parse($opt_i);
70 my $dbh = CXGN
::DB
::InsertDBH
->new({
73 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
76 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
77 $dbh->do('SET search_path TO public,sgn');
80 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
81 my ( $row_min, $row_max ) = $worksheet->row_range();
82 my ( $col_min, $col_max ) = $worksheet->col_range();
85 my $cv = $schema->resultset('Cv::Cv')->find({ name
=> $opt_c });
86 for my $row ( 0 .. $row_max ) {
88 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
93 my ($db, $acc) = split /\:/, $db_cvterm_name;
94 my $db_row = $schema->resultset("General::Db")->find({ name
=> $db });
95 my $dbxref = $schema->resultset("General::Dbxref")->find( { accession
=> $acc, db_id
=> $db_row->db_id() });
97 print STDERR
"Term $db_cvterm_name NOT found... Skipping...\n";
100 $cvterm = $schema->resultset("Cv::Cvterm")->find( { dbxref_id
=> $dbxref->dbxref_id() });
103 $cvterm = $schema->resultset('Cv::Cvterm')->find({ name
=> $db_cvterm_name, cv_id
=> $cv->cv_id() });
106 if (!$cvterm) { print STDERR
"Cvterm $db_cvterm_name does not exit. SKIPPING!\n";
109 print STDERR
"FOUND CVTERM : ".$cvterm->name()."\n";
110 #print STDERR "Deleting $db_cvterm_name... ";
112 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search( { cvalue_id
=> $cvterm->cvterm_id() });
115 if ($phenotypes->count() > 0) {
116 print STDERR
$cvterm->name()."\t".$phenotypes->count()."\n";
120 if ($phenotypes->count() > 0) {
121 print STDERR
"Not deleting term ".$cvterm->name()." with ".$phenotypes->count()." associated phenotypes.\n";
124 my $dbxref_row = $schema->resultset('General::Dbxref')->find({ dbxref_id
=> $cvterm->dbxref_id() });
126 my $name = $cvterm->name();
129 print STDERR
"DBXREF ROW : ".ref($dbxref_row)."\n";
130 # check if the dbxref is referenced by other cvterms, only delete
131 # if it's only referenced by this one term
133 $dbxref_row->delete();
135 print STDERR
"Deleted term $name.\n";
142 my $transaction_error;
144 $schema->txn_do($coderef);
146 $transaction_error = $_;
149 if ($transaction_error) {
150 print STDERR
"Transaction error storing terms: $transaction_error\n";
152 print STDERR
"Script Complete.\n";