Merge pull request #3948 from solgenomics/lukasmueller-patch-1
[sgn.git] / bin / detect_duplicate_field_design_stock_properties.pl
blobe3b7c3ca7bfedf95a3c39692173eface6a22b782
1 #!/usr/bin/perl
3 =head1
5 detect_duplicate_field_design_stock_properties.pl - backend script for detecting and solving issue of plots with multiple plot_numbers/block/replicate/etc
7 =head1 SYNOPSIS
9 detect_duplicate_field_design_stock_properties.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -t [test]
10 =head1 COMMAND-LINE OPTIONS
11 ARGUMENTS
12 -H host name (required) e.g. "localhost"
13 -D database name (required) e.g. "cxgn_cassava"
14 -U database username (required)
15 -P database userpass (required)
16 -t provide 1 to only show duplicates and not delete
18 =head1 DESCRIPTION
20 perl bin/detect_duplicate_field_design_stock_properties.pl -D cass -H localhost -U postgres -P postgres
22 This script will detect and correct (if t is passed) the issue of plots having multiple plot_numbers associated.
24 =head1 AUTHOR
26 Nicolas Morales (nm529@cornell.edu)
28 =cut
30 use strict;
32 use Getopt::Std;
33 use Data::Dumper;
34 use Carp qw /croak/ ;
35 use Pod::Usage;
36 use DateTime;
37 use Bio::Chado::Schema;
38 use SGN::Model::Cvterm;
40 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_t);
42 getopts('H:D:U:P:t:');
44 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P ) {
45 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password)\n";
48 my $schema = Bio::Chado::Schema->connect(
49 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
50 $opt_U, # Username
51 $opt_P # Password
54 my $plot_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot number', 'stock_property')->cvterm_id();
55 my $block_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'block', 'stock_property')->cvterm_id();
56 my $row_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'row_number', 'stock_property')->cvterm_id();
57 my $col_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'col_number', 'stock_property')->cvterm_id();
58 my $is_a_control_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'is a control', 'stock_property')->cvterm_id();
59 my $plant_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant_index_number', 'stock_property')->cvterm_id();
60 my $subplot_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'subplot_index_number', 'stock_property')->cvterm_id();
61 my $tissue_sample_number_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'tissue_sample_index_number', 'stock_property')->cvterm_id();
62 my $replicate_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'replicate', 'stock_property')->cvterm_id();
63 my $range_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'range', 'stock_property')->cvterm_id();
64 my @type_ids_of_interest = ($plot_number_type_id, $block_type_id, $row_number_type_id, $col_number_type_id, $is_a_control_type_id, $plant_number_type_id, $subplot_number_type_id, $tissue_sample_number_type_id, $replicate_type_id, $range_type_id);
66 my $stockprops = $schema->resultset("Stock::Stockprop")->search(
67 {'me.type_id' => {-in => \@type_ids_of_interest}},
69 'join' => ['type', {'stock' => {'nd_experiment_stocks'=>{'nd_experiment'=>{'nd_experiment_projects'=>'project'}}}}],
70 '+select' => ['type.name', 'stock.uniquename', 'project.name'],
71 '+as' => ['type', 'stock', 'project'],
72 distinct => 1,
73 order_by => 'me.stockprop_id'
76 my %results;
77 while (my $r = $stockprops->next){
78 push @{$results{$r->get_column('project')}->{$r->get_column('stock')}->{$r->get_column('type')}}, [$r->value, $r->stockprop_id];
81 my @all_but_last_values;
82 while (my ($p, $x) = each %results){
83 while (my ($k, $v) = each %$x){
84 while (my ($t, $z) = each %$v){
85 if (scalar(@$z) > 1){
86 my @z = @$z;
87 my @values = map $_->[0], @z;
88 my $values = join ',', @values;
89 print STDERR "More than one for project $p for stock $k for type $t with $values\n";
90 my @old_values = @z[0..$#z-1];
91 my @old_Vals = map $_->[1], @old_values;
92 push @all_but_last_values, @old_Vals;
98 #print STDERR Dumper \@all_but_last_values;
99 if (!$opt_t){
100 my $stockprops = $schema->resultset("Stock::Stockprop")->search({'me.stockprop_id' => {-in => \@all_but_last_values}});
101 while (my $r = $stockprops->next){
102 $r->delete();
106 print STDERR "Script Complete.\n";