Merge pull request #5205 from solgenomics/topic/generic_trial_upload
[sgn.git] / bin / remove_genotypes.pl
bloba230e414935d182e859b50f9082fae5692b8427c
1 #!/usr/bin/perl
3 =head1
5 remove_accessions.pl - removes accessions from cxgn databases that were loaded with the load_genotypes script, based on accession names or obvious error (colon) in accession names.
7 =head1 SYNOPSIS
9 remove_accessions.pl -H [dbhost] -D [dbname] -i [file with accession names] [-t]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name
14 -D database name
15 -i file with accession names
16 -t Test run. Rolls back at the end.
18 =head1 DESCRIPTION
20 This script removes data loaded in error. It identifies rows to be deleted in the genotype and stock tables of the Chado schema, linked by the nd_experiment table. Then, working backwards, it deletes from all three tables using the relevant ids.
22 =head1 AUTHOR
24 Bryan Ellerbrock (bje24@cornell.edu) - July 2015, with large parts lifted from Naama's load_genotypes.pl script and a lot of help.
26 =cut
28 use strict;
29 use warnings;
31 use Getopt::Std;
32 use Try::Tiny;
33 use CXGN::DB::InsertDBH;
35 our ($opt_H, $opt_D, $opt_i, $opt_t);
37 getopts('H:i:D:t');
39 my $dbhost = $opt_H;
40 my $dbname = $opt_D;
41 my $file = $opt_i;
42 my $dbh;
43 my %seq;
45 # store database handle and schema
47 $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
48 dbname=>$dbname,
49 dbargs => {AutoCommit => 0,
50 RaiseError => 1}
54 # prepare sql delete statements
56 my $s_query = "Delete from stock where stock_id = ?;";
57 my $n_query = "Delete from nd_experiment where nd_experiment_id = ?;";
58 my $g_query = "Delete from genotype where genotype_id = ?;";
59 my $g=$dbh->prepare($g_query);
60 my $n=$dbh->prepare($n_query);
61 my $s=$dbh->prepare($s_query);
63 try {
65 if ($opt_i) {
67 # identify accessions to be removed from names supplied in a file
69 open (FILE, "<", $opt_i) || die "Can't open infile $opt_i \n";
71 my $l_query = "select stock.stock_id, nd_experiment.nd_experiment_id, genotype.genotype_id from stock left join nd_experiment_stock on (stock.stock_id = nd_experiment_stock.stock_id) left join nd_experiment on (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id) left join nd_experiment_genotype on (nd_experiment.nd_experiment_id = nd_experiment_genotype.nd_experiment_id) left join genotype on (nd_experiment_genotype.genotype_id = genotype.genotype_id) left join genotypeprop on (genotype.genotype_id = genotypeprop.genotype_id) where stock.name = ?;";
73 my $l=$dbh->prepare($l_query);
75 while (<FILE>) {
76 chomp (my $accession = $_ );
77 $l->execute($accession);
79 &loopdelete($l);
84 } else {
86 # if no infile, identify accessions to be removed by confirming that stock_id is for an accession and the accession has a colon in the stock name
88 my $q = "select stock.stock_id, nd_experiment.nd_experiment_id, genotype.genotype_id from stock left join nd_experiment_stock on (stock.stock_id = nd_experiment_stock.stock_id) left join nd_experiment on (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id) left join nd_experiment_genotype on (nd_experiment.nd_experiment_id = nd_experiment_genotype.nd_experiment_id) left join genotype on (nd_experiment_genotype.genotype_id = genotype.genotype_id) left join genotypeprop on (genotype.genotype_id = genotypeprop.genotype_id) where stock.name like '%:%' and stock.type_id = 76392;";
90 my $h=$dbh->prepare($q);
92 $h->execute();
94 # then loop through rows of table containing info of accessions to be deleted, and delete them by id from the genotype, nd_experiment, and stock tables
96 &loopdelete($h);
101 } catch {
102 # Rollback if transaction failed
104 $dbh->rollback();
105 die "An error occured! Transaction rolled back!" . $_ . "\n";
108 if (!$opt_t) {
109 # commit if this is not a test run
111 $dbh->commit();
112 print "Deletion succeeded! Commiting deletion of accessions! \n\n";
113 } else {
114 # Rolling back because test run
116 print "No errors occurred. Rolling back test run. \n\n";
117 $dbh->rollback();
120 sub loopdelete {
121 my $sth = $_[0];
122 while (my($stock_id,$nd_exp_id,$genotype_id) = $sth->fetchrow_array) {
123 print STDERR "Deleting from genotype table where genotype id = $genotype_id \n";
124 print STDERR "Deleting from nd_experiment table where nd_experiment id = $nd_exp_id \n";
125 print STDERR "Deleting from stock table where stock id = $stock_id \n";
126 $g->execute($genotype_id);
127 $n->execute($nd_exp_id);
128 $s->execute($stock_id);