4 load_genotyping_plates.pl
8 NOTE: You need to create the genotyping project in the database first. With the -g option, provide the name of genotyping project the plates should be associated with. Metadata such as year, location and breeding program will be loaded from the genotyping object directly.
10 load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -u [username] -g genotyping_project [-t] -f format
12 =head1 COMMAND-LINE OPTIONS
30 username (must be in the database)
34 Test run . Rolling back at the end.
38 genotyping project name (the genotyping project to which this plate is associated)
44 Load genotyping plate layouts for many plates
46 The infile is a tab delimited file with the following columns:
60 Intertek plate/well ID
74 Based on a script for loading trial data by Naama Menda <nm249@cornell.edu>, November 2016
76 Modifications for genotype plate loading, Lukas Mueller <lam87@cornell.edu>, August 2021
84 use CXGN
::Tools
::File
::Spreadsheet
;
86 use Bio
::Chado
::Schema
;
87 use CXGN
::DB
::InsertDBH
;
93 use CXGN
::Metadata
::Schema
;
94 use CXGN
::Phenome
::Schema
;
95 use CXGN
::People
::Person
;
97 use CXGN
::Phenotypes
::StorePhenotypes
;
99 use CXGN
::Trial
; # add project metadata
100 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
102 use CXGN
::Trial
::TrialCreate
;
104 my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $genotyping_project, $format );
107 'g=s' => \
$genotyping_project,
110 'user|u=s' => \
$username,
111 'dbname|D=s' => \
$dbname,
112 'dbhost|H=s' => \
$dbhost,
118 pod2usage
(1) if $help;
119 if (!$infile || !$username || !$dbname || !$dbhost ) {
120 pod2usage
( { -msg
=> 'Error. Missing options!' , -verbose
=> 1, -exitval
=> 1 } ) ;
124 my $dbh = CXGN
::DB
::InsertDBH
->new( { dbhost
=>$dbhost,
126 dbargs
=> {AutoCommit
=> 1,
130 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do
=> ['SET search_path TO public;'] } );
133 my $metadata_schema = CXGN
::Metadata
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO metadata;'] } );
135 my $phenome_schema = CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO phenome;'] } );
138 # check if genotyping project exists
140 my $genotyping_project_row = $schema->resultset("Project::Project")->find(
142 'name' => $genotyping_project,
143 # 'type.name' => 'genotyping_project_name',
146 if (! $genotyping_project_row) { die "Please enter a valid genotyping project. You may have to create it before running this script."; }
148 my $genotyping_project_id = $genotyping_project_row->project_id();
151 $format = "standard";
154 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
156 print STDERR
"SP_PERSON_ID = $sp_person_id\n";
157 ##Column headers for trial design/s
158 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
161 #trial metadata can be loaded from a separate data sheet
166 ##Parse the trials + designs first, then upload the phenotypes
168 #new spreadsheet for design + phenotyping data ###
169 my $spreadsheet=CXGN
::Tools
::File
::Spreadsheet
->new($infile);
170 my @trial_rows = $spreadsheet->row_labels();
171 my @trial_columns = $spreadsheet->column_labels();
172 print "Trial design columns = " . Dumper
(\
@trial_columns);
174 my %multi_trial_data;
178 print "Reading phenotyping file:\n";
179 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @trial_columns ;
180 delete $phen_params{''};
182 my %trial_design_hash; #multi-level hash of hashes of hashrefs
183 my %phen_data_by_trial; #
186 # Item Plate ID Intertek plate/well ID CIP Number Breeder ID
189 # Item Plate ID Intertek plate/well ID accession name Breeder ID
193 foreach my $plot_name (@trial_rows) {
205 if ($format eq 'CIP') {
206 $accession = $spreadsheet->value_at($plot_name, "CIP Number");
207 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
208 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
209 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
212 print STDERR
"Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
214 } # some plates have empty wells - ignore
216 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
222 if ($accession eq "") {
226 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
228 $trial_design_hash{$trial_name}{$plot_number}->{plot_number
} = $plot_number;
229 $trial_design_hash{$trial_name}{$plot_number}->{stock_name
} = $accession;
230 $trial_design_hash{$trial_name}{$plot_number}->{plot_name
} = $plot_name;
231 $trial_design_hash{$trial_name}{$plot_number}->{row_number
} = $row_number;
232 $trial_design_hash{$trial_name}{$plot_number}->{col_number
} = $col_number;
236 $accession = $spreadsheet->value_at($plot_name, "accession name");
237 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
238 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
239 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
242 print STDERR
"Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
244 } # some plates have empty wells - ignore
246 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
252 if ($accession eq "") {
256 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
258 $trial_design_hash{$trial_name}{$plot_number}->{plot_number
} = $plot_number;
259 $trial_design_hash{$trial_name}{$plot_number}->{stock_name
} = $accession;
260 $trial_design_hash{$trial_name}{$plot_number}->{plot_name
} = $plot_name;
261 $trial_design_hash{$trial_name}{$plot_number}->{row_number
} = $row_number;
262 $trial_design_hash{$trial_name}{$plot_number}->{col_number
} = $col_number;
266 # Add the plot name into the multi trial data hashref of hashes
268 push( @
{ $multi_trial_data{$trial_name}->{plots
} } , $plot_name );
271 #####create the design hash#####
272 #print Dumper(\%trial_design_hash);
273 #foreach my $trial_name (keys %trial_design_hash) {
274 # $multi_trial_data{$trial_name}->{design} = $trial_design_hash{$trial_name} ;
277 my $date = localtime();
279 ####required phenotypeprops###
280 my %phenotype_metadata ;
281 $phenotype_metadata{'archived_file'} = $infile;
282 $phenotype_metadata{'archived_file_type'} = "genotyping file";
283 $phenotype_metadata{'operator'} = $username;
284 $phenotype_metadata{'date'} = $date;
291 my $trial = CXGN
::Trial
->new( { bcs_schema
=> $schema, trial_id
=> $genotyping_project_id });
292 my $location_data = $trial->get_location();
293 my $location_name = $location_data->[1];
294 my $description = $trial->get_description();
295 my $genotyping_facility = $trial->get_genotyping_facility();
296 my $plate_year = $trial->get_year();
298 my $program_object = CXGN
::BreedersToolbox
::Projects
->new( { schema
=> $schema });
299 my $breeding_program_data = $program_object->get_breeding_programs_by_trial($genotyping_project_id);
300 my $breeding_program_name = $breeding_program_data->[0]->[1];
302 print STDERR
"Working with genotyping project name $genotyping_project\n";
303 foreach my $trial_name (keys %multi_trial_data ) {
305 my $trial_create = CXGN
::Trial
::TrialCreate
->new(
307 chado_schema
=> $schema,
309 design_type
=> 'genotyping_plate',
310 design
=> $trial_design_hash{$trial_name},
311 program
=> $breeding_program_name,
312 trial_year
=> $plate_year,
313 trial_description
=> $description,
314 trial_location
=> $location_name,
315 trial_name
=> $trial_name,
316 operator
=> $operator,
317 owner_id
=> $sp_person_id,
319 genotyping_user_id
=> $sp_person_id,
320 genotyping_plate_format
=> $format,
321 genotyping_plate_sample_type
=> 'accession',
322 genotyping_project_id
=> $genotyping_project_id,
323 genotyping_facility
=> $genotyping_facility,
327 $trial_create->save_trial();
329 print STDERR
"ERROR SAVING TRIAL! $_\n";
335 $schema->txn_do($coderef);
336 if (!$test) { print "Transaction succeeded! Commiting project and its metadata \n\n"; }
338 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";