5 remove_accessions.pl - removes accessions from cxgn databases that were loaded with the load_genotypes script, based on accession names or obvious error (colon) in accession names.
9 remove_accessions.pl -H [dbhost] -D [dbname] -i [file with accession names] [-t]
11 =head1 COMMAND-LINE OPTIONS
15 -i file with accession names
16 -t Test run. Rolls back at the end.
20 This script removes data loaded in error. It identifies rows to be deleted in the genotype and stock tables of the Chado schema, linked by the nd_experiment table. Then, working backwards, it deletes from all three tables using the relevant ids.
24 Bryan Ellerbrock (bje24@cornell.edu) - July 2015, with large parts lifted from Naama's load_genotypes.pl script and a lot of help.
33 use CXGN
::DB
::InsertDBH
;
35 our ($opt_H, $opt_D, $opt_i, $opt_t);
45 # store database handle and schema
47 $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
49 dbargs
=> {AutoCommit
=> 0,
54 # prepare sql delete statements
56 my $s_query = "Delete from stock where stock_id = ?;";
57 my $n_query = "Delete from nd_experiment where nd_experiment_id = ?;";
58 my $g_query = "Delete from genotype where genotype_id = ?;";
59 my $g=$dbh->prepare($g_query);
60 my $n=$dbh->prepare($n_query);
61 my $s=$dbh->prepare($s_query);
67 # identify accessions to be removed from names supplied in a file
69 open (FILE
, "<", $opt_i) || die "Can't open infile $opt_i \n";
71 my $l_query = "select stock.stock_id, nd_experiment.nd_experiment_id, genotype.genotype_id from stock left join nd_experiment_stock on (stock.stock_id = nd_experiment_stock.stock_id) left join nd_experiment on (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id) left join nd_experiment_genotype on (nd_experiment.nd_experiment_id = nd_experiment_genotype.nd_experiment_id) left join genotype on (nd_experiment_genotype.genotype_id = genotype.genotype_id) left join genotypeprop on (genotype.genotype_id = genotypeprop.genotype_id) where stock.name = ?;";
73 my $l=$dbh->prepare($l_query);
76 chomp (my $accession = $_ );
77 $l->execute($accession);
86 # if no infile, identify accessions to be removed by confirming that stock_id is for an accession and the accession has a colon in the stock name
88 my $q = "select stock.stock_id, nd_experiment.nd_experiment_id, genotype.genotype_id from stock left join nd_experiment_stock on (stock.stock_id = nd_experiment_stock.stock_id) left join nd_experiment on (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id) left join nd_experiment_genotype on (nd_experiment.nd_experiment_id = nd_experiment_genotype.nd_experiment_id) left join genotype on (nd_experiment_genotype.genotype_id = genotype.genotype_id) left join genotypeprop on (genotype.genotype_id = genotypeprop.genotype_id) where stock.name like '%:%' and stock.type_id = 76392;";
90 my $h=$dbh->prepare($q);
94 # then loop through rows of table containing info of accessions to be deleted, and delete them by id from the genotype, nd_experiment, and stock tables
102 # Rollback if transaction failed
105 die "An error occured! Transaction rolled back!" . $_ . "\n";
109 # commit if this is not a test run
112 print "Deletion succeeded! Commiting deletion of accessions! \n\n";
114 # Rolling back because test run
116 print "No errors occurred. Rolling back test run. \n\n";
122 while (my($stock_id,$nd_exp_id,$genotype_id) = $sth->fetchrow_array) {
123 print STDERR
"Deleting from genotype table where genotype id = $genotype_id \n";
124 print STDERR
"Deleting from nd_experiment table where nd_experiment id = $nd_exp_id \n";
125 print STDERR
"Deleting from stock table where stock id = $stock_id \n";
126 $g->execute($genotype_id);
127 $n->execute($nd_exp_id);
128 $s->execute($stock_id);