5 detect_duplicate_field_design_stock_properties.pl - backend script for detecting and solving issue of plots with multiple plot_numbers/block/replicate/etc
9 detect_duplicate_field_design_stock_properties.pl -H [dbhost] -D [dbname] -U [dbuser] -P [dbpass] -t [test]
10 =head1 COMMAND-LINE OPTIONS
12 -H host name (required) e.g. "localhost"
13 -D database name (required) e.g. "cxgn_cassava"
14 -U database username (required)
15 -P database userpass (required)
16 -t provide 1 to only show duplicates and not delete
20 perl bin/detect_duplicate_field_design_stock_properties.pl -D cass -H localhost -U postgres -P postgres
22 This script will detect and correct (if t is passed) the issue of plots having multiple plot_numbers associated.
26 Nicolas Morales (nm529@cornell.edu)
37 use Bio
::Chado
::Schema
;
38 use SGN
::Model
::Cvterm
;
40 our ($opt_H, $opt_D, $opt_U, $opt_P, $opt_t);
42 getopts
('H:D:U:P:t:');
44 if (!$opt_H || !$opt_D || !$opt_U ||!$opt_P ) {
45 die "Must provide options -H (hostname), -D (database name), -U (database user), -P (database password)\n";
48 my $schema = Bio
::Chado
::Schema
->connect(
49 "dbi:Pg:database=$opt_D;host=$opt_H", # DSN Line
54 my $plot_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plot number', 'stock_property')->cvterm_id();
55 my $block_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'block', 'stock_property')->cvterm_id();
56 my $row_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'row_number', 'stock_property')->cvterm_id();
57 my $col_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'col_number', 'stock_property')->cvterm_id();
58 my $is_a_control_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'is a control', 'stock_property')->cvterm_id();
59 my $plant_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'plant_index_number', 'stock_property')->cvterm_id();
60 my $subplot_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'subplot_index_number', 'stock_property')->cvterm_id();
61 my $tissue_sample_number_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'tissue_sample_index_number', 'stock_property')->cvterm_id();
62 my $replicate_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'replicate', 'stock_property')->cvterm_id();
63 my $range_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'range', 'stock_property')->cvterm_id();
64 my @type_ids_of_interest = ($plot_number_type_id, $block_type_id, $row_number_type_id, $col_number_type_id, $is_a_control_type_id, $plant_number_type_id, $subplot_number_type_id, $tissue_sample_number_type_id, $replicate_type_id, $range_type_id);
66 my $stockprops = $schema->resultset("Stock::Stockprop")->search(
67 {'me.type_id' => {-in => \
@type_ids_of_interest}},
69 'join' => ['type', {'stock' => {'nd_experiment_stocks'=>{'nd_experiment'=>{'nd_experiment_projects'=>'project'}}}}],
70 '+select' => ['type.name', 'stock.uniquename', 'project.name'],
71 '+as' => ['type', 'stock', 'project'],
73 order_by
=> 'me.stockprop_id'
77 while (my $r = $stockprops->next){
78 push @
{$results{$r->get_column('project')}->{$r->get_column('stock')}->{$r->get_column('type')}}, [$r->value, $r->stockprop_id];
81 my @all_but_last_values;
82 while (my ($p, $x) = each %results){
83 while (my ($k, $v) = each %$x){
84 while (my ($t, $z) = each %$v){
87 my @values = map $_->[0], @z;
88 my $values = join ',', @values;
89 print STDERR
"More than one for project $p for stock $k for type $t with $values\n";
90 my @old_values = @z[0..$#z-1];
91 my @old_Vals = map $_->[1], @old_values;
92 push @all_but_last_values, @old_Vals;
98 #print STDERR Dumper \@all_but_last_values;
100 my $stockprops = $schema->resultset("Stock::Stockprop")->search({'me.stockprop_id' => {-in => \
@all_but_last_values}});
101 while (my $r = $stockprops->next){
106 print STDERR
"Script Complete.\n";