display programs alphabetically
[sgn.git] / bin / delete_cvterms.pl
blobd8d5d127336650ff5a451aca89d2e3833d80bcc9
1 #!/usr/bin/perl
3 =head1
5 delete_cvterms.pl - for deleting cvterms in bulk
7 =head1 SYNOPSIS
9 delete_cvterms.pl -H [dbhost] -D [dbname] -i [infile]
11 =head1 COMMAND-LINE OPTIONS
13 ARGUMENTS
14 -H host name (required) e.g. "localhost"
15 -D database name (required) e.g. "cxgn_cassava"
16 -c cvname
17 -i path to infile (required)
18 -t test (lists the number of observations associated with each term)
20 =head1 DESCRIPTION
22 This script deletes cvterms in bulk. The infile provided has one column containing the cvterm name as it is in the database which should be deleted.
24 There is no header in the infile and the format is .xls
26 =head1 AUTHOR
28 Lukas Mueller, based on a script by Nick Morales
30 =cut
32 use strict;
34 use Getopt::Std;
35 use Data::Dumper;
36 use Carp qw /croak/ ;
37 use Pod::Usage;
38 use Spreadsheet::ParseExcel;
39 use Bio::Chado::Schema;
40 use CXGN::DB::InsertDBH;
41 use Try::Tiny;
43 our ($opt_H, $opt_D, $opt_i, $opt_c, $opt_t, $opt_a);
45 getopts('H:D:i:c:ta');
48 if (!$opt_H || !$opt_D || !$opt_i || !$opt_c) {
49 pod2usage(-verbose => 2, -message => "Must provide options -H (hostname), -D (database name), -i (input file), -c CVNAME \n");
52 if ($opt_a) { print STDERR "Using accession instead of name\n"; }
53 my $dbhost = $opt_H;
54 my $dbname = $opt_D;
55 my $parser = Spreadsheet::ParseExcel->new();
56 my $excel_obj = $parser->parse($opt_i);
58 my $dbh = CXGN::DB::InsertDBH->new({
59 dbhost=>$dbhost,
60 dbname=>$dbname,
61 dbargs => {AutoCommit => 1, RaiseError => 1}
62 });
64 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
65 $dbh->do('SET search_path TO public,sgn');
68 my $worksheet = ( $excel_obj->worksheets() )[0]; #support only one worksheet
69 my ( $row_min, $row_max ) = $worksheet->row_range();
70 my ( $col_min, $col_max ) = $worksheet->col_range();
72 my $coderef = sub {
73 my $cv = $schema->resultset('Cv::Cv')->find({ name => $opt_c });
74 for my $row ( 0 .. $row_max ) {
76 my $db_cvterm_name = $worksheet->get_cell($row,0)->value();
78 my $cvterm;
80 if ($opt_a) {
81 my ($db, $acc) = split /\:/, $db_cvterm_name;
82 my $db_row = $schema->resultset("General::Db")->find({ name => $db });
83 my $dbxref = $schema->resultset("General::Dbxref")->find( { accession => $acc, db_id => $db_row->db_id() });
84 if (!$dbxref) {
85 print STDERR "Term $db_cvterm_name NOT found... Skipping...\n";
86 next();
88 $cvterm = $schema->resultset("Cv::Cvterm")->find( { dbxref_id => $dbxref->dbxref_id() });
90 else {
91 $cvterm = $schema->resultset('Cv::Cvterm')->find({ name => $db_cvterm_name, cv_id => $cv->cv_id() });
94 if (!$cvterm) { print STDERR "Cvterm $db_cvterm_name does not exit. SKIPPING!\n";
95 next;
97 print STDERR "FOUND CVTERM : ".$cvterm->name()."\n";
98 #print STDERR "Deleting $db_cvterm_name... ";
100 my $phenotypes = $schema->resultset('Phenotype::Phenotype')->search( { cvalue_id => $cvterm->cvterm_id() });
101 if ($opt_t) {
103 if ($phenotypes->count() > 0) {
104 print STDERR $cvterm->name()."\t".$phenotypes->count()."\n";
107 else {
108 if ($phenotypes->count() > 0) {
109 print STDERR "Not deleting term ".$cvterm->name()." with ".$phenotypes->count()." associated phenotypes.\n";
111 else {
112 my $dbxref_row = $schema->resultset('General::Dbxref')->find({ dbxref_id => $cvterm->dbxref_id() });
114 my $name = $cvterm->name();
115 $cvterm->delete();
117 print STDERR "DBXREF ROW : ".ref($dbxref_row)."\n";
118 # check if the dbxref is referenced by other cvterms, only delete
119 # if it's only referenced by this one term
121 $dbxref_row->delete();
123 print STDERR "Deleted term $name.\n";
130 my $transaction_error;
131 try {
132 $schema->txn_do($coderef);
133 } catch {
134 $transaction_error = $_;
137 if ($transaction_error) {
138 print STDERR "Transaction error storing terms: $transaction_error\n";
139 } else {
140 print STDERR "Script Complete.\n";