fix issues raised by Titima.
[sgn.git] / bin / load_genotyping_plates.pl
blob0b39df8ce5a1db9a265148e077b73bca70490db0
2 =head1
4 load_genotyping_plates.pl
6 =head1 SYNOPSIS
8 NOTE: You need to create the genotyping project in the database first. With the -g option, provide the name of genotyping project the plates should be associated with. Metadata such as year, location and breeding program will be loaded from the genotyping object directly.
10 load_genotyping_plates.pl -H [dbhost] -D [dbname] -i inFile -u [username] -g genotyping_project [-t] -f format
12 =head1 COMMAND-LINE OPTIONS
14 =over 4
16 =item -H
18 host name
20 =item -D
22 database name
24 =item -i
26 infile
28 =item -u
30 username (must be in the database)
32 =item -t
34 Test run . Rolling back at the end.
36 =item -g
38 genotyping project name (the genotyping project to which this plate is associated)
40 =back
42 =head2 DESCRIPTION
44 Load genotyping plate layouts for many plates
46 The infile is a tab delimited file with the following columns:
48 =over 3
50 =item
52 Item
54 =item
56 Plate ID
58 =item
60 Intertek plate/well ID
62 =item
64 accession name
66 =item
68 Breeder ID
70 =back
72 =head2 AUTHORS
74 Based on a script for loading trial data by Naama Menda <nm249@cornell.edu>, November 2016
76 Modifications for genotype plate loading, Lukas Mueller <lam87@cornell.edu>, August 2021
78 =cut
81 #!/usr/bin/perl
82 use strict;
83 use Getopt::Long;
84 use CXGN::Tools::File::Spreadsheet;
86 use Bio::Chado::Schema;
87 use CXGN::DB::InsertDBH;
88 use Carp qw /croak/ ;
89 use Try::Tiny;
90 use DateTime;
91 use Pod::Usage;
93 use CXGN::Metadata::Schema;
94 use CXGN::Phenome::Schema;
95 use CXGN::People::Person;
96 use Data::Dumper;
97 use CXGN::Phenotypes::StorePhenotypes;
99 use CXGN::Trial; # add project metadata
100 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
102 use CXGN::Trial::TrialCreate;
104 my ( $help, $dbhost, $dbname, $infile, $sites, $types, $test, $username, $genotyping_project, $format );
105 GetOptions(
106 'i=s' => \$infile,
107 'g=s' => \$genotyping_project,
108 't' => \$test,
109 'f=s' => \$format,
110 'user|u=s' => \$username,
111 'dbname|D=s' => \$dbname,
112 'dbhost|H=s' => \$dbhost,
113 'help' => \$help,
118 pod2usage(1) if $help;
119 if (!$infile || !$username || !$dbname || !$dbhost ) {
120 pod2usage( { -msg => 'Error. Missing options!' , -verbose => 1, -exitval => 1 } ) ;
124 my $dbh = CXGN::DB::InsertDBH->new( { dbhost=>$dbhost,
125 dbname=>$dbname,
126 dbargs => {AutoCommit => 1,
127 RaiseError => 1}
130 my $schema= Bio::Chado::Schema->connect( sub { $dbh->get_actual_dbh() } , { on_connect_do => ['SET search_path TO public;'] } );
133 my $metadata_schema = CXGN::Metadata::Schema->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do => ['SET search_path TO metadata;'] } );
135 my $phenome_schema = CXGN::Phenome::Schema->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do => ['SET search_path TO phenome;'] } );
138 # check if genotyping project exists
140 my $genotyping_project_row = $schema->resultset("Project::Project")->find(
142 'name' => $genotyping_project,
143 # 'type.name' => 'genotyping_project_name',
144 } );
146 if (! $genotyping_project_row) { die "Please enter a valid genotyping project. You may have to create it before running this script."; }
148 my $genotyping_project_id = $genotyping_project_row->project_id();
150 if (!$format) {
151 $format = "standard";
154 my $sp_person_id= CXGN::People::Person->get_person_by_username($dbh, $username);
156 print STDERR "SP_PERSON_ID = $sp_person_id\n";
157 ##Column headers for trial design/s
158 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number
160 ###################
161 #trial metadata can be loaded from a separate data sheet
162 ###################
166 ##Parse the trials + designs first, then upload the phenotypes
168 #new spreadsheet for design + phenotyping data ###
169 my $spreadsheet=CXGN::Tools::File::Spreadsheet->new($infile);
170 my @trial_rows = $spreadsheet->row_labels();
171 my @trial_columns = $spreadsheet->column_labels();
172 print "Trial design columns = " . Dumper(\@trial_columns);
174 my %multi_trial_data;
178 print "Reading phenotyping file:\n";
179 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @trial_columns ;
180 delete $phen_params{''};
182 my %trial_design_hash; #multi-level hash of hashes of hashrefs
183 my %phen_data_by_trial; #
185 # CIP format:
186 # Item Plate ID Intertek plate/well ID CIP Number Breeder ID
188 # standard format:
189 # Item Plate ID Intertek plate/well ID accession name Breeder ID
191 my $operator;
193 foreach my $plot_name (@trial_rows) {
195 my $accession;
196 my $plot_number;
197 my $block_number;
198 my $trial_name;
199 my $is_a_control;
200 my $rep_number;
201 my $range_number;
202 my $row_number;
203 my $col_number;
205 if ($format eq 'CIP') {
206 $accession = $spreadsheet->value_at($plot_name, "CIP Number");
207 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
208 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
209 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
211 if (! $accession) {
212 print STDERR "Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
213 next;
214 } # some plates have empty wells - ignore
216 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
217 $row_number = $1;
218 $col_number = $2;
221 $is_a_control = 0;
222 if ($accession eq "") {
223 $is_a_control = 1;
226 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
228 $trial_design_hash{$trial_name}{$plot_number}->{plot_number} = $plot_number;
229 $trial_design_hash{$trial_name}{$plot_number}->{stock_name} = $accession;
230 $trial_design_hash{$trial_name}{$plot_number}->{plot_name} = $plot_name;
231 $trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
232 $trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;
234 else {
236 $accession = $spreadsheet->value_at($plot_name, "accession name");
237 $plot_number = $spreadsheet->value_at($plot_name, "Intertek plate/well ID");
238 $trial_name = $spreadsheet->value_at($plot_name, "Plate ID");
239 $operator = $spreadsheet->value_at($plot_name, "Breeder ID");
241 if (! $accession) {
242 print STDERR "Ignoring entry for plot_number $plot_number as accession is empty - presumably a check?\n";
243 next;
244 } # some plates have empty wells - ignore
246 if ($plot_number =~ m/^([A-Ha-h])(\d+)$/) {
247 $row_number = $1;
248 $col_number = $2;
251 $is_a_control = 0;
252 if ($accession eq "") {
253 $is_a_control = 1;
256 if (! $row_number ) { die "Weird well number: $plot_number\n"; }
258 $trial_design_hash{$trial_name}{$plot_number}->{plot_number} = $plot_number;
259 $trial_design_hash{$trial_name}{$plot_number}->{stock_name} = $accession;
260 $trial_design_hash{$trial_name}{$plot_number}->{plot_name} = $plot_name;
261 $trial_design_hash{$trial_name}{$plot_number}->{row_number} = $row_number;
262 $trial_design_hash{$trial_name}{$plot_number}->{col_number} = $col_number;
266 # Add the plot name into the multi trial data hashref of hashes
268 push( @{ $multi_trial_data{$trial_name}->{plots} } , $plot_name );
271 #####create the design hash#####
272 #print Dumper(\%trial_design_hash);
273 #foreach my $trial_name (keys %trial_design_hash) {
274 # $multi_trial_data{$trial_name}->{design} = $trial_design_hash{$trial_name} ;
277 my $date = localtime();
279 ####required phenotypeprops###
280 my %phenotype_metadata ;
281 $phenotype_metadata{'archived_file'} = $infile;
282 $phenotype_metadata{'archived_file_type'} = "genotyping file";
283 $phenotype_metadata{'operator'} = $username;
284 $phenotype_metadata{'date'} = $date;
287 #######
289 my $coderef= sub {
291 my $trial = CXGN::Trial->new( { bcs_schema => $schema, trial_id => $genotyping_project_id });
292 my $location_data = $trial->get_location();
293 my $location_name = $location_data->[1];
294 my $description = $trial->get_description();
295 my $genotyping_facility = $trial->get_genotyping_facility();
296 my $plate_year = $trial->get_year();
298 my $program_object = CXGN::BreedersToolbox::Projects->new( { schema => $schema });
299 my $breeding_program_data = $program_object->get_breeding_programs_by_trial($genotyping_project_id);
300 my $breeding_program_name = $breeding_program_data->[0]->[1];
302 print STDERR "Working with genotyping project name $genotyping_project\n";
303 foreach my $trial_name (keys %multi_trial_data ) {
305 my $trial_create = CXGN::Trial::TrialCreate->new(
307 chado_schema => $schema,
308 dbh => $dbh,
309 design_type => 'genotyping_plate',
310 design => $trial_design_hash{$trial_name},
311 program => $breeding_program_name,
312 trial_year => $plate_year,
313 trial_description => $description,
314 trial_location => $location_name,
315 trial_name => $trial_name,
316 operator => $operator,
317 owner_id => $sp_person_id,
318 is_genotyping => 1,
319 genotyping_user_id => $sp_person_id,
320 genotyping_plate_format => $format,
321 genotyping_plate_sample_type => 'accession',
322 genotyping_project_id => $genotyping_project_id,
323 genotyping_facility => $genotyping_facility,
326 try {
327 $trial_create->save_trial();
328 } catch {
329 print STDERR "ERROR SAVING TRIAL! $_\n";
334 try {
335 $schema->txn_do($coderef);
336 if (!$test) { print "Transaction succeeded! Commiting project and its metadata \n\n"; }
337 } catch {
338 die "An error occured! Rolling back and reseting database sequences!" . $_ . "\n";