4 load_genotyping_plates.pl
8 load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -b [breeding program name] -u [username] -l location [-t]
10 =head1 COMMAND-LINE OPTIONS
28 username (must be in the database)
32 breeding program name (must be in the database)
36 Test run . Rolling back at the end.
50 Load genotyping plate layouts for many plates
52 Minimal metadata requirements are
62 trial_description (can also be built from the trial name, type, year, location)
66 trial_type (read from an input file)
70 trial_location geo_description ( must be in the database - nd_geolocation.description - can be read from metadata file)
74 year (can be read from the metadata file )
78 breeding_program (provide with option -b )
82 The infile is an Excel file (.xls format) with the following columns:
96 Intertek plate/well ID
110 Based on a script for loading trial data by Naama Menda <nm249@cornell.edu>, November 2016
112 Modifications for genotype plate loading, Lukas Mueller <lam87@cornell.edu>, August 2021
120 use CXGN
::Tools
::File
::Spreadsheet
;
122 use Bio
::Chado
::Schema
;
123 use CXGN
::DB
::InsertDBH
;
124 use Carp qw
/croak/ ;
129 use CXGN
::Metadata
::Schema
;
130 use CXGN
::Phenome
::Schema
;
131 use CXGN
::People
::Person
;
133 use CXGN
::Phenotypes
::StorePhenotypes
;
135 use CXGN
::Trial
; # add project metadata
136 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
138 use CXGN
::Trial
::TrialCreate
;
140 my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $breeding_program_name, $metadata_file, $location, $year, $format );
143 'b=s' => \
$breeding_program_name,
148 'user|u=s' => \
$username,
149 'dbname|D=s' => \
$dbname,
150 'dbhost|H=s' => \
$dbhost,
156 pod2usage
(1) if $help;
157 if (!$infile || !$breeding_program_name || !$username || !$dbname || !$dbhost ) {
158 pod2usage
( { -msg
=> 'Error. Missing options!' , -verbose
=> 1, -exitval
=> 1 } ) ;
162 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
164 dbargs
=> {AutoCommit
=> 1,
168 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] } );
171 my $metadata_schema = CXGN
::Metadata
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO metadata;'] } );
173 my $phenome_schema = CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO phenome;'] } );
176 # Breeding program for associating the trial/s ##
179 my $breeding_program = $schema->resultset("Project::Project")->find(
181 'me.name' => $breeding_program_name,
182 'type.name' => 'breeding_program',
185 join => { projectprops
=> 'type' } ,
188 if (!$breeding_program) { die "Breeding program $breeding_program_name does not exist in the database. Check your input \n"; }
189 print "Found breeding program $breeding_program_name " . $breeding_program->project_id . "\n";
192 die "Please specify format (-f) as CIP. No other format is supported right now\n";
195 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
197 print STDERR
"SP_PERSON_ID = $sp_person_id\n";
198 ##Column headers for trial design/s
199 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
202 #trial metadata can be loaded from a separate data sheet
207 ##Parse the trials + designs first, then upload the phenotypes
209 #new spreadsheet for design + phenotyping data ###
210 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($infile);
211 my @trial_rows = $spreadsheet->row_labels();
212 my @trial_columns = $spreadsheet->column_labels();
213 print "Trial design columns = " . Dumper
(\
@trial_columns);
215 my %multi_trial_data;
219 print "Reading phenotyping file:\n";
220 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @trial_columns ;
221 delete $phen_params{''};
223 my @traits = (keys %phen_params) ;
224 print "Found traits " . Dumper
(\
%phen_params) . "\n" ;
225 #foreach my $trait_string ( keys %phen_params ) {
226 # my ($trait_name, $trait_accession) = split "|", $col_header ;
227 # my ($db_name, $dbxref_accession) = split ":" , $trait_accession ;
231 my %trial_design_hash; #multi-level hash of hashes of hashrefs
232 my %phen_data_by_trial; #
234 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
238 ## Item Plate ID Intertek plate/well ID CIP Number Breeder ID
242 foreach my $plot_name (@trial_rows) {
254 if ($format eq 'CIP') {
255 $accession = $spreadsheet->value_at($plot_name, "CIP Number");
256 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
257 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
258 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
261 print STDERR
"Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
263 } # some plates have empty wells - ignore
265 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
271 if ($accession eq "") {
275 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
277 $trial_design_hash{$trial_name}{$plot_number}->{plot_number
} = $plot_number;
278 $trial_design_hash{$trial_name}{$plot_number}->{stock_name
} = $accession;
279 $trial_design_hash{$trial_name}{$plot_number}->{plot_name
} = $plot_name;
280 $trial_design_hash{$trial_name}{$plot_number}->{row_number
} = $row_number;
281 $trial_design_hash{$trial_name}{$plot_number}->{col_number
} = $col_number;
284 $accession = $spreadsheet->value_at($plot_name, "accession_name");
285 $plot_number = $spreadsheet->value_at($plot_name, "plot_number");
286 $block_number = $spreadsheet->value_at($plot_name, "block_number");
287 $trial_name = $spreadsheet->value_at($plot_name, "trial_name");
288 $is_a_control = $spreadsheet->value_at($plot_name, "is_a_control");
289 $rep_number = $spreadsheet->value_at($plot_name, "rep_number");
290 $range_number = $spreadsheet->value_at($plot_name, "range_number");
291 $row_number = $spreadsheet->value_at($plot_name, "row_number");
292 $col_number = $spreadsheet->value_at($plot_name, "col_number");
296 use List
::Util
qw(max);
297 my @keys = (keys %{ $trial_design_hash{$trial_name} } );
298 my $max = max
( @keys );
301 $plot_number = $max ;
305 $trial_design_hash{$trial_name}{$plot_number}->{plot_number
} = $plot_number;
306 $trial_design_hash{$trial_name}{$plot_number}->{stock_name
} = $accession;
307 $trial_design_hash{$trial_name}{$plot_number}->{plot_name
} = $plot_name;
308 $trial_design_hash{$trial_name}{$plot_number}->{block_number
} = $block_number;
309 $trial_design_hash{$trial_name}{$plot_number}->{rep_number
} = $rep_number;
310 $trial_design_hash{$trial_name}{$plot_number}->{is_a_control
} = $is_a_control;
311 $trial_design_hash{$trial_name}{$plot_number}->{range_number
} = $range_number;
312 $trial_design_hash{$trial_name}{$plot_number}->{row_number
} = $row_number;
313 $trial_design_hash{$trial_name}{$plot_number}->{col_number
} = $col_number;
316 # Add the plot name into the multi trial data hashref of hashes
318 push( @
{ $multi_trial_data{$trial_name}->{plots
} } , $plot_name );
321 #####create the design hash#####
322 #print Dumper(\%trial_design_hash);
323 #foreach my $trial_name (keys %trial_design_hash) {
324 # $multi_trial_data{$trial_name}->{design} = $trial_design_hash{$trial_name} ;
327 my $date = localtime();
329 ####required phenotypeprops###
330 my %phenotype_metadata ;
331 $phenotype_metadata{'archived_file'} = $infile;
332 $phenotype_metadata{'archived_file_type'} = "genotyping file";
333 $phenotype_metadata{'operator'} = $username;
334 $phenotype_metadata{'date'} = $date;
340 foreach my $trial_name (keys %multi_trial_data ) {
342 my $trial_create = CXGN
::Trial
::TrialCreate
->new({
343 chado_schema
=> $schema,
345 design_type
=> 'genotyping_plate',
346 design
=> $trial_design_hash{$trial_name},
347 program
=> $breeding_program->name(),
349 trial_description
=> $trial_name,
350 trial_location
=> $location,
351 trial_name
=> $trial_name,
352 operator
=> $operator,
353 owner_id
=> $sp_person_id,
355 genotyping_user_id
=> $sp_person_id,
356 genotyping_plate_format
=> 96,
357 genotyping_plate_sample_type
=> 'accession',
362 $trial_create->save_trial();
364 print STDERR
"ERROR SAVING TRIAL! $_\n";
370 $schema->txn_do($coderef);
371 if (!$test) { print "Transaction succeeded! Commiting project and its metadata \n\n"; }
374 # foreach my $value ( sort keys %seq ) {
375 # my $maxval= $seq{$value} || 0;
376 # if ($maxval) { $dbh->do("SELECT setval ('$value', $maxval, true)") ; }
377 # else { $dbh->do("SELECT setval ('$value', 1, false)"); }
379 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";