5 delete_cvterms.pl - for deleting cvterms in bulk
9 delete_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
14 -H host name (required) e.g. "localhost"
15 -D database name (required) e.g. "cxgn_cassava"
17 -i path to infile (required)
18 -t test (lists the number of observations associated with each term)
22 This script deletes cvterms in bulk. The infile provided has one column containing the cvterm name as it is in the database which should be deleted.
24 There is no header in the infile and the format is .xls
28 Lukas Mueller, based on a script by Nick Morales
38 use Spreadsheet
::ParseExcel
;
39 use Bio
::Chado
::Schema
;
40 use CXGN
::DB
::InsertDBH
;
43 our ($opt_H, $opt_D, $opt_i, $opt_c, $opt_t, $opt_a);
45 getopts
('H:D:i:c:ta');
48 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
49 pod2usage
(-verbose
=> 2, -message
=> "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
52 if ($opt_a) { print STDERR
"Using accession instead of name\n"; }
55 my $parser = Spreadsheet
::ParseExcel
->new();
56 my $excel_obj = $parser->parse($opt_i);
58 my $dbh = CXGN
::DB
::InsertDBH
->new({
61 dbargs
=> {AutoCommit
=> 1, RaiseError
=> 1}
64 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } );
65 $dbh->do('SET search_path TO public,sgn');
68 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
69 my ( $row_min, $row_max ) = $worksheet->row_range();
70 my ( $col_min, $col_max ) = $worksheet->col_range();
73 my $cv = $schema->resultset('Cv::Cv')->find({ name
=> $opt_c });
74 for my $row ( 0 .. $row_max ) {
76 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
81 my ($db, $acc) = split /\:/, $db_cvterm_name;
82 my $db_row = $schema->resultset("General::Db")->find({ name
=> $db });
83 my $dbxref = $schema->resultset("General::Dbxref")->find( { accession
=> $acc, db_id
=> $db_row->db_id() });
85 print STDERR
"Term $db_cvterm_name NOT found... Skipping...\n";
88 $cvterm = $schema->resultset("Cv::Cvterm")->find( { dbxref_id
=> $dbxref->dbxref_id() });
91 $cvterm = $schema->resultset('Cv::Cvterm')->find({ name
=> $db_cvterm_name, cv_id
=> $cv->cv_id() });
94 if (!$cvterm) { print STDERR
"Cvterm $db_cvterm_name does not exit. SKIPPING!\n";
97 print STDERR
"FOUND CVTERM : ".$cvterm->name()."\n";
98 #print STDERR "Deleting $db_cvterm_name... ";
100 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search( { cvalue_id
=> $cvterm->cvterm_id() });
103 if ($phenotypes->count() > 0) {
104 print STDERR
$cvterm->name()."\t".$phenotypes->count()."\n";
108 if ($phenotypes->count() > 0) {
109 print STDERR
"Not deleting term ".$cvterm->name()." with ".$phenotypes->count()." associated phenotypes.\n";
112 my $dbxref_row = $schema->resultset('General::Dbxref')->find({ dbxref_id
=> $cvterm->dbxref_id() });
114 my $name = $cvterm->name();
117 print STDERR
"DBXREF ROW : ".ref($dbxref_row)."\n";
118 # check if the dbxref is referenced by other cvterms, only delete
119 # if it's only referenced by this one term
121 $dbxref_row->delete();
123 print STDERR
"Deleted term $name.\n";
130 my $transaction_error;
132 $schema->txn_do($coderef);
134 $transaction_error = $_;
137 if ($transaction_error) {
138 print STDERR
"Transaction error storing terms: $transaction_error\n";
140 print STDERR
"Script Complete.\n";