Merge pull request #4728 from solgenomics/topic/simsearch_ui
[sgn.git] / bin / load_genotyping_plates.pl
blob0fedd8af5bc657f43f5df76ff9b9540bbfb10805
2 =head1
4 load_genotyping_plates.pl
6 =head1 SYNOPSIS
8 load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -b [breeding program name] -u [username] -l location [-t]
10 =head1 COMMAND-LINE OPTIONS
12 =over 4
14 =item -H
16 host name
18 =item -D
20 database name
22 =item -i
24 infile
26 =item -u
28 username (must be in the database)
30 =item -b
32 breeding program name (must be in the database)
34 =item -t
36 Test run . Rolling back at the end.
38 =item -l
40 location
42 =item -y
44 year
46 =back
48 =head2 DESCRIPTION
50 Load genotyping plate layouts for many plates
52 Minimal metadata requirements are
54 =over 3
56 =item
58 trial_name
60 =item
62 trial_description (can also be built from the trial name, type, year, location)
64 =item
66 trial_type (read from an input file)
68 =item
70 trial_location geo_description ( must be in the database - nd_geolocation.description - can be read from metadata file)
72 =item
74 year (can be read from the metadata file )
76 =item
78 breeding_program (provide with option -b )
80 =back
82 The infile is an Excel file (.xls format) with the following columns:
84 =over 3
86 =item
88 Item
90 =item
92 Plate ID
94 =item
96 Intertek plate/well ID
98 =item
100 accession name
102 =item
104 Breeder ID
106 =back
108 =head2 AUTHORS
110 Based on a script for loading trial data by Naama Menda <nm249@cornell.edu>, November 2016
112 Modifications for genotype plate loading, Lukas Mueller <lam87@cornell.edu>, August 2021
114 =cut
117 #!/usr/bin/perl
118 use strict;
119 use Getopt::Long;
120 use CXGN::Tools::File::Spreadsheet;
122 use Bio::Chado::Schema;
123 use CXGN::DB::InsertDBH;
124 use Carp qw /croak/ ;
125 use Try::Tiny;
126 use DateTime;
127 use Pod::Usage;
129 use CXGN::Metadata::Schema;
130 use CXGN::Phenome::Schema;
131 use CXGN::People::Person;
132 use Data::Dumper;
133 use CXGN::Phenotypes::StorePhenotypes;
135 use CXGN::Trial; # add project metadata
136 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
138 use CXGN::Trial::TrialCreate;
140 my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $breeding_program_name, $metadata_file, $location, $year, $format );
141 GetOptions(
142 'i=s' => \$infile,
143 'b=s' => \$breeding_program_name,
144 'l=s' => \$location,
145 'y=s' => \$year,
146 't' => \$test,
147 'f=s' => \$format,
148 'user|u=s' => \$username,
149 'dbname|D=s' => \$dbname,
150 'dbhost|H=s' => \$dbhost,
151 'help' => \$help,
156 pod2usage(1) if $help;
157 if (!$infile || !$breeding_program_name || !$username || !$dbname || !$dbhost ) {
158 pod2usage( { -msg => 'Error. Missing options!' , -verbose => 1, -exitval => 1 } ) ;
162 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
163 dbname=>$dbname,
164 dbargs => {AutoCommit => 1,
165 RaiseError => 1}
168 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] } );
171 my $metadata_schema = CXGN::Metadata::Schema->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do => ['SET search_path TO metadata;'] } );
173 my $phenome_schema = CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do => ['SET search_path TO phenome;'] } );
176 # Breeding program for associating the trial/s ##
179 my $breeding_program = $schema->resultset("Project::Project")->find(
181 'me.name' => $breeding_program_name,
182 'type.name' => 'breeding_program',
185 join => { projectprops => 'type' } ,
186 } ) ;
188 if (!$breeding_program) { die "Breeding program $breeding_program_name does not exist in the database. Check your input \n"; }
189 print "Found breeding program $breeding_program_name " . $breeding_program->project_id . "\n";
191 if (!$format) {
192 die "Please specify format (-f) as CIP. No other format is supported right now\n";
195 my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $username);
197 print STDERR "SP_PERSON_ID = $sp_person_id\n";
198 ##Column headers for trial design/s
199 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
201 ###################
202 #trial metadata can be loaded from a separate data sheet
203 ###################
207 ##Parse the trials + designs first, then upload the phenotypes
209 #new spreadsheet for design + phenotyping data ###
210 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($infile);
211 my @trial_rows = $spreadsheet->row_labels();
212 my @trial_columns = $spreadsheet->column_labels();
213 print "Trial design columns = " . Dumper(\@trial_columns);
215 my %multi_trial_data;
219 print "Reading phenotyping file:\n";
220 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @trial_columns ;
221 delete $phen_params{''};
223 my @traits = (keys %phen_params) ;
224 print "Found traits " . Dumper(\%phen_params) . "\n" ;
225 #foreach my $trait_string ( keys %phen_params ) {
226 # my ($trait_name, $trait_accession) = split "|", $col_header ;
227 # my ($db_name, $dbxref_accession) = split ":" , $trait_accession ;
231 my %trial_design_hash; #multi-level hash of hashes of hashrefs
232 my %phen_data_by_trial; #
234 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
237 # CIP format:
238 ## Item Plate ID Intertek plate/well ID CIP Number Breeder ID
240 my $operator;
242 foreach my $plot_name (@trial_rows) {
244 my $accession;
245 my $plot_number;
246 my $block_number;
247 my $trial_name;
248 my $is_a_control;
249 my $rep_number;
250 my $range_number;
251 my $row_number;
252 my $col_number;
254 if ($format eq 'CIP') {
255 $accession = $spreadsheet->value_at($plot_name, "CIP Number");
256 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
257 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
258 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
260 if (! $accession) {
261 print STDERR "Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
262 next;
263 } # some plates have empty wells - ignore
265 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
266 $row_number = $1;
267 $col_number = $2;
270 $is_a_control = 0;
271 if ($accession eq "") {
272 $is_a_control = 1;
275 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
277 $trial_design_hash{$trial_name}{$plot_number}->{plot_number} = $plot_number;
278 $trial_design_hash{$trial_name}{$plot_number}->{stock_name} = $accession;
279 $trial_design_hash{$trial_name}{$plot_number}->{plot_name} = $plot_name;
280 $trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
281 $trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;
283 else {
284 $accession = $spreadsheet->value_at($plot_name, "accession_name");
285 $plot_number = $spreadsheet->value_at($plot_name, "plot_number");
286 $block_number = $spreadsheet->value_at($plot_name, "block_number");
287 $trial_name = $spreadsheet->value_at($plot_name, "trial_name");
288 $is_a_control = $spreadsheet->value_at($plot_name, "is_a_control");
289 $rep_number = $spreadsheet->value_at($plot_name, "rep_number");
290 $range_number = $spreadsheet->value_at($plot_name, "range_number");
291 $row_number = $spreadsheet->value_at($plot_name, "row_number");
292 $col_number = $spreadsheet->value_at($plot_name, "col_number");
294 if (!$plot_number) {
295 $plot_number = 1;
296 use List::Util qw(max);
297 my @keys = (keys %{ $trial_design_hash{$trial_name} } );
298 my $max = max( @keys );
299 if ( $max ) {
300 $max++;
301 $plot_number = $max ;
305 $trial_design_hash{$trial_name}{$plot_number}->{plot_number} = $plot_number;
306 $trial_design_hash{$trial_name}{$plot_number}->{stock_name} = $accession;
307 $trial_design_hash{$trial_name}{$plot_number}->{plot_name} = $plot_name;
308 $trial_design_hash{$trial_name}{$plot_number}->{block_number} = $block_number;
309 $trial_design_hash{$trial_name}{$plot_number}->{rep_number} = $rep_number;
310 $trial_design_hash{$trial_name}{$plot_number}->{is_a_control} = $is_a_control;
311 $trial_design_hash{$trial_name}{$plot_number}->{range_number} = $range_number;
312 $trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
313 $trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;
316 # Add the plot name into the multi trial data hashref of hashes
318 push( @{ $multi_trial_data{$trial_name}->{plots} } , $plot_name );
321 #####create the design hash#####
322 #print Dumper(\%trial_design_hash);
323 #foreach my $trial_name (keys %trial_design_hash) {
324 # $multi_trial_data{$trial_name}->{design} = $trial_design_hash{$trial_name} ;
327 my $date = localtime();
329 ####required phenotypeprops###
330 my %phenotype_metadata ;
331 $phenotype_metadata{'archived_file'} = $infile;
332 $phenotype_metadata{'archived_file_type'} = "genotyping file";
333 $phenotype_metadata{'operator'} = $username;
334 $phenotype_metadata{'date'} = $date;
337 #######
339 my $coderef= sub {
340 foreach my $trial_name (keys %multi_trial_data ) {
342 my $trial_create = CXGN::Trial::TrialCreate->new({
343 chado_schema => $schema,
344 dbh => $dbh,
345 design_type => 'genotyping_plate',
346 design => $trial_design_hash{$trial_name},
347 program => $breeding_program->name(),
348 trial_year => $year,
349 trial_description => $trial_name,
350 trial_location => $location,
351 trial_name => $trial_name,
352 operator => $operator,
353 owner_id => $sp_person_id,
354 is_genotyping => 1,
355 genotyping_user_id => $sp_person_id,
356 genotyping_plate_format => 96,
357 genotyping_plate_sample_type => 'accession',
361 try {
362 $trial_create->save_trial();
363 } catch {
364 print STDERR "ERROR SAVING TRIAL! $_\n";
369 try {
370 $schema->txn_do($coderef);
371 if (!$test) { print "Transaction succeeded! Commiting project and its metadata \n\n"; }
372 } catch {
373 # Transaction failed
374 # foreach my $value ( sort keys %seq ) {
375 # my $maxval= $seq{$value} || 0;
376 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
377 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
379 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";