make test pass for multicat parsing with two xlsx files for testing.
[sgn.git] / bin / remove_whitespace_from_names.pl
blob439deb06f9d22fea522339ed81af47dcf8cc1a13
1 #!/usr/bin/perl
3 =head1
5 remove_whitespace_from_names.pl - Find any stock, synonym, project, or location that is saved with leading or trailing whitespace. If opt s, update the names with the whitespace removed.
7 =head1 SYNOPSIS
9 remove_whitespace_from_names.pl -H localhost -D cxgn -s
11 =head1 COMMAND-LINE OPTIONS
12 ARGUMENTS
13 -H localhost
14 -D database
15 -s save changes
17 =head1 DESCRIPTION
20 =head1 AUTHOR
22 Bryan Ellerbrock bje24@cornell.edu
24 =cut
26 use strict;
28 use Getopt::Std;
29 use Bio::Chado::Schema;
30 use CXGN::DB::InsertDBH;
31 use CXGN::Stock;
32 use SGN::Schema;
34 our ($opt_H, $opt_D, $opt_s);
36 getopts('H:D:s');
38 if (!$opt_H || !$opt_D) {
39 pod2usage(-verbose => 2, -message => "Must provide options -H and -D\n");
42 my $dbhost = $opt_H;
43 my $dbname = $opt_D;
45 my $dbh = CXGN::DB::InsertDBH->new({
46 dbhost=>$dbhost,
47 dbname=>$dbname,
48 dbargs => {AutoCommit => 1, RaiseError => 1}
49 });
51 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } );
52 $dbh->do('SET search_path TO public,sgn');
54 print STDERR "Found the following entries with leading or trailing whitespace:\n";
55 print STDERR "Name:\tType:\n";
57 #stocks
58 my $stock_query = 'SELECT format( \'"%s"\', uniquename ) AS name, cvterm.name AS type FROM stock JOIN cvterm ON(type_id = cvterm_id) WHERE trim(uniquename) != uniquename';
59 my $stocks = $dbh->prepare($stock_query);
60 $stocks->execute();
62 while (my ($name, $type) = $stocks->fetchrow_array()) {
63 print STDERR $name . "\t" . $type ."\n";
66 #synonyms
67 my $synonym_query = 'SELECT format( \'"%s"\', value ) AS name, cvterm.name AS type FROM stockprop JOIN cvterm ON(type_id = cvterm_id) WHERE cvterm.name = \'stock_synonym\' AND trim(value) != value';
68 my $synonyms = $dbh->prepare($synonym_query);
69 $synonyms->execute();
71 while (my ($name, $type) = $synonyms->fetchrow_array()) {
72 print STDERR $name . "\t" . $type ."\n";
75 #projects
76 my $project_query = 'SELECT format( \'"%s"\', name ) AS name FROM project WHERE trim(name) != name';
77 my $projects = $dbh->prepare($project_query);
78 $projects->execute();
80 while (my $name = $projects->fetchrow_array()) {
81 print STDERR $name . "\tproject\n";
84 #locations
85 my $location_query = 'SELECT format( \'"%s"\', description) AS name FROM nd_geolocation WHERE trim(description) != description';
86 my $locations = $dbh->prepare($location_query);
87 $locations->execute();
89 while (my $name = $locations->fetchrow_array()) {
90 print STDERR $name . "\tlocation\n";
93 if ($opt_s){
95 # handle merge_stock
96 my $merge_query = 'SELECT stock_id, trim(uniquename) FROM stock JOIN cvterm ON(type_id = cvterm_id) WHERE trim(uniquename) != uniquename AND trim(uniquename) IN (SELECT uniquename FROM stock)';
97 my $merges = $dbh->prepare($merge_query);
98 $merges->execute();
99 while (my ($merge_id, $trimmed_name) = $merges->fetchrow_array()) {
100 my $correct_stock = $schema->resultset("Stock::Stock")->find({ uniquename=> $trimmed_name });
101 my $s = CXGN::Stock->new( schema => $schema, stock_id => $correct_stock->stock_id());
102 $s->merge($merge_id, 1);
105 my $stock_query = 'UPDATE stock SET uniquename = trim(uniquename) WHERE trim(uniquename) != uniquename';
106 my $stocks = $dbh->prepare($stock_query);
107 $stocks->execute();
109 my $synonym_query = 'UPDATE stockprop SET value = trim(value) WHERE type_id = (SELECT cvterm_id from cvterm where cvterm.name = \'stock_synonym\') AND trim(value) != value';
110 my $synonyms = $dbh->prepare($synonym_query);
111 $synonyms->execute();
113 my $project_query = 'UPDATE project SET name = trim(name) WHERE trim(name) != name';
114 my $projects = $dbh->prepare($project_query);
115 $projects->execute();
117 my $location_query = 'UPDATE nd_geolocation SET description = trim(description) WHERE trim(description) != description';
118 my $locations = $dbh->prepare($location_query);
119 $locations->execute();