5 upload_multiple_trial_design.pl
9 upload_multiple_trial_design.pl -H [dbhost] -D [dbname] -P [dbpass] -w [basepath] -U [dbuser] -b [breeding program name] -i infile -un [username] -e [email address] -eo [email_option_enabled] -r [temp_file_nd_experiment_id]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) Ex: "breedbase_db"
14 -D database name (required) Ex: "breedbase"
15 -P database userpass (required) Ex: "postgres"
16 -w basepath (required) Ex: /home/production/cxgn/sgn
17 -i path to infile (required)
18 -U username (required) Ex: "postgres"
19 -b breeding program name (required) Ex: test
20 -t test run . Rolling back at the end
21 -e email address of the user
23 -r temp_file_nd_experiment_id (required) Ex: /temp/delete_nd_experiment_ids.txt
24 if loading trial data from metadata file, phenotypes + layout from infile
28 perl bin/upload_multiple_trial_design.pl -h breedbase_db -d breedbase -p postgres -w /home/cxgn/sgn/ -u postgres -i ~/Desktop/test_multi.xlsx -b test -n janedoe -e 'sk2783@cornell.edu' -l 'sri' -r /tmp/delete_nd_experiment_ids.txt
30 This script will parse and validate the input file. If there are any warnings or errors during validation it will send a error message to the provided email.
31 If there are no warnings(or errors) during validation it will then store the data.
32 The input file should be either .xlsx or .xls format.
34 CHECK cvterms for trial metadata!!
36 ################################################
37 Minimal metadata requirements are
39 trial_description (can also be built from the trial name, type, year, location)
40 trial_type (read from an input file)
41 trial_location geo_description ( must be in the database - nd_geolocation.description - can be read from metadata file)
42 year (can be read from the metadata file )
43 design (defaults to 'RCBD' )
44 breeding_program (provide with option -b )
47 Other OPTIONAL trial metadata (projectprops)
50 project fertilizer date
53 project harvested plants
57 Srikanth (sk2783@cornell.edu)
64 use CXGN
::File
::Parse
;
65 use Bio
::Chado
::Schema
;
66 use CXGN
::DB
::InsertDBH
;
71 use List
::Util
qw(max);
72 use List
::Util
qw(first);
73 use List
::Util
qw(uniq);
74 use CXGN
::Metadata
::Schema
;
75 use CXGN
::Phenome
::Schema
;
76 use CXGN
::People
::Person
;
78 use CXGN
::Phenotypes
::StorePhenotypes
;
79 use CXGN
::Trial
; # add project metadata
80 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
81 use CXGN
::Trial
::TrialCreate
;
84 use CXGN
::Trial
::ParseUpload
;
85 use CXGN
::TrialStatus
;
88 use File
::Path
qw(make_path);
90 use JSON
::MaybeXS
qw(encode_json);
92 # sub print_json_response {
93 # my ($status, $message) = @_;
96 # message => $message,
98 # print encode_json($response);
103 my ( $help, $dbhost, $dbname, $basepath, $dbuser, $dbpass, $infile, $sites, $types, $username, $breeding_program_name, $email_address, $logged_in_name, $email_option_enabled, $temp_file_nd_experiment_id);
105 'dbhost|H=s' => \
$dbhost,
106 'dbname|D=s' => \
$dbname,
107 'dbpass|P=s' => \
$dbpass,
108 'basepath|w=s' => \
$basepath,
109 'dbuser|U=s' => \
$dbuser,
111 'b=s' => \
$breeding_program_name,
112 'user|un=s' => \
$username,
114 'email|e=s' => \
$email_address,
115 # 'logged_in_user|l=s' => \$logged_in_name,
116 'temp_file|r=s' => \
$temp_file_nd_experiment_id,
117 'email_option_enabled|eo=s' => \
$email_option_enabled,
120 #Ensure the parent directory exists before creating the temporary file
121 my $parent_dir = File
::Spec
->catdir($basepath, 'static', 'documents', 'tempfiles', 'delete_nd_experiment_ids');
122 unless (-d
$parent_dir) {
123 # make_path($parent_dir) or die "Failed to create directory $parent_dir: $!";
124 make_path
($parent_dir) or push @errors, "Failed to create directory $parent_dir: $!";
127 # Create the temporary file in the parent directory
128 my $temp_file_nd_experiment_id = File
::Spec
->catfile($parent_dir, 'fileXXXX');
130 pod2usage
(1) if $help;
131 if (!$infile || !$breeding_program_name || !$username || !$dbname || !$dbhost ) {
132 pod2usage
( { -msg
=> 'Error. Missing options!' , -verbose
=> 1, -exitval
=> 1 } ) ;
138 print STDERR
"Logging in with password\n";
139 $dbh = DBI
->connect("dbi:Pg:database=$dbname;host=$dbhost",
145 $dbh = CXGN
::DB
::InsertDBH
->new( {
148 dbargs
=> {AutoCommit
=> 1,
153 print STDERR
"Database connection ok!\n";
155 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh } , { on_connect_do
=> ['SET search_path TO public, sgn, metadata, phenome;'] } );
158 # getting the last database ids for resetting at the end in case of rolling back
161 # my $last_nd_experiment_id = $schema->resultset('NaturalDiversity::NdExperiment')->get_column('nd_experiment_id')->max;
162 # my $last_cvterm_id = $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
164 # my $last_nd_experiment_project_id = $schema->resultset('NaturalDiversity::NdExperimentProject')->get_column('nd_experiment_project_id')->max;
165 # my $last_nd_experiment_stock_id = $schema->resultset('NaturalDiversity::NdExperimentStock')->get_column('nd_experiment_stock_id')->max;
166 # my $last_nd_experiment_phenotype_id = $schema->resultset('NaturalDiversity::NdExperimentPhenotype')->get_column('nd_experiment_phenotype_id')->max;
167 # my $last_phenotype_id = $schema->resultset('Phenotype::Phenotype')->get_column('phenotype_id')->max;
168 # my $last_stock_id = $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
169 # my $last_stock_relationship_id = $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
170 # my $last_project_id = $schema->resultset('Project::Project')->get_column('project_id')->max;
171 # my $last_nd_geolocation_id = $schema->resultset('NaturalDiversity::NdGeolocation')->get_column('nd_geolocation_id')->max;
172 # my $last_geoprop_id = $schema->resultset('NaturalDiversity::NdGeolocationprop')->get_column('nd_geolocationprop_id')->max;
173 # my $last_projectprop_id = $schema->resultset('Project::Projectprop')->get_column('projectprop_id')->max;
176 # 'nd_experiment_nd_experiment_id_seq' => $last_nd_experiment_id,
177 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
178 # 'nd_experiment_project_nd_experiment_project_id_seq' => $last_nd_experiment_project_id,
179 # 'nd_experiment_stock_nd_experiment_stock_id_seq' => $last_nd_experiment_stock_id,
180 # 'nd_experiment_phenotype_nd_experiment_phenotype_id_seq' => $last_nd_experiment_phenotype_id,
181 # 'phenotype_phenotype_id_seq' => $last_phenotype_id,
182 # 'stock_stock_id_seq' => $last_stock_id,
183 # 'stock_relationship_stock_relationship_id_seq' => $last_stock_relationship_id,
184 # 'project_project_id_seq' => $last_project_id,
185 # 'nd_geolocation_nd_geolocation_id_seq' => $last_nd_geolocation_id,
186 # 'nd_geolocationprop_nd_geolocationprop_id_seq' => $last_geoprop_id,
187 # 'projectprop_projectprop_id_seq' => $last_projectprop_id,
192 # Breeding program for associating the trial/s ##
195 my $breeding_program = $schema->resultset("Project::Project")->find(
196 {'me.name' => $breeding_program_name, 'type.name' => 'breeding_program'},
197 { join => { projectprops
=> 'type' }}
200 if (!$breeding_program) {
201 push @errors, "Breeding program $breeding_program_name does not exist in the database. Check your input";
204 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
205 if (!$sp_person_id) {
206 push @errors, "Need to have a user pre-loaded in the database!";
209 #Column headers for trial design/s
210 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number entry_numbers
212 #parse file using the generic file parser
213 my $parser = CXGN
::File
::Parse
->new(
215 required_columns
=> ['trial_name', 'accession_name', 'plot_number', 'block_number', 'location', 'year'],
218 # warn "Starting to parse the file...\n";
219 my $parsed = $parser->parse();
220 # warn "Parsed data = " . Dumper($parsed);
221 if (scalar(@
{$parsed->{errors
}}) > 0) {
222 push @errors, "Error parsing file: " . join(',', @
{$parsed->{errors
}}) if scalar(@
{$parsed->{errors
}}) > 0;
225 if (exists $parsed->{warnings
}) {
226 print "Warnings: " . join("\n", @
{$parsed->{warnings
}}) . "\n";
229 # my $parsed = $parser->parse();
231 # if (scalar(@{$parsed->{errors}}) > 0) {
232 # die "Error parsing file: ".join(',', @{$parsed->{errors}});
236 my %multi_trial_data;
237 my %metadata_fields = map { $_ => 1 } qw(trial_name accession_name plot_number block_number location year design_type trial_description);
239 foreach my $row (@
{$parsed->{data
}}) {
240 my $trial_name = $row->{trial_name
};
241 next unless $trial_name; # Skip rows with empty trial names
243 # Check if the location exists in the database
244 my $trial_location = $row->{location
};
245 my $location_rs = $schema->resultset("NaturalDiversity::NdGeolocation")->search({
246 description
=> { ilike
=> '%' . $trial_location . '%' },
248 if (scalar($location_rs) == 0) {
249 push @errors, "ERROR: location must be pre-loaded in the database. Location name = " . $trial_location . "\n";
251 my $location_id = $location_rs->first->nd_geolocation_id;
252 ######################################################
254 # # Store all data for the current trial
255 $multi_trial_data{$trial_name} = {
256 trial_location
=> $row->{location
},
257 trial_year
=> $row->{year
},
258 design_type
=> $row->{design_type
},
259 trial_description
=> $row->{description
},
260 program
=> $breeding_program->name,
261 plot_name
=> $row->{plot_name
},
262 accession_name
=> $row->{accession_name
},
266 foreach my $col (@
{$parsed->{columns
}}) {
267 next if exists $metadata_fields{$col};
271 print STDERR
"unique trial names:\n";
272 foreach my $name(keys %multi_trial_data) {
276 print STDERR
"Reading phenotyping file:\n";
277 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @traits;
278 delete $phen_params{''};
280 # my @traits = keys %phen_params;
281 print STDERR
"Found traits: " . Dumper
(\
%phen_params) . "\n";
284 foreach my $trial_name (keys %multi_trial_data) {
285 $multi_trial_data{$trial_name}->{design
} = $multi_trial_data{$trial_name};
289 my %trial_design_hash;
290 my %phen_data_by_trial;
292 foreach my $row (@
{$parsed->{data
}}) {
293 my $trial_name = $row->{trial_name
};
294 next unless $trial_name;
296 my $plot_number = $row->{plot_number
};
297 my $plot_name = $row->{plot_name
};
298 $trial_design_hash{$trial_name}{$plot_number} = {
299 trial_name
=> $trial_name,
300 trial_type
=> $row->{trial_type
},
301 planting_date
=> $row->{planting_date
},
302 harvest_date
=> $row->{harvest_date
},
303 entry_numbers
=> $row->{entry_numbers
},
304 is_a_control
=> $row->{is_a_control
},
305 rep_number
=> $row->{rep_number
},
306 range_number
=> $row->{range_number
},
307 row_number
=> $row->{row_number
},
308 col_number
=> $row->{col_number
},
309 seedlot_name
=> $row->{seedlot_name
},
310 num_seed_per_plot
=> $row->{num_seed_per_plot
},
311 weight_gram_seed_per_plot
=> $row->{weight_gram_seed_per_plot
},
314 foreach my $trait_string (keys %phen_params) {
315 my $phen_value = $row->{$trait_string};
316 $phen_data_by_trial{$trial_name}{$plot_name}{$trait_string} = [$phen_value, DateTime
->now->datetime];
320 print STDERR
"multi trial hash:" . Dumper
(\
%multi_trial_data);
321 print STDERR
"trial design " . Dumper
(\
%trial_design_hash);
322 print STDERR
"Processed trials: " . scalar(keys %trial_design_hash) . "\n";
323 print STDERR
"Phen data by trial: " . Dumper
(\
%phen_data_by_trial) . "\n";
325 #####create the design hash#####
326 print Dumper
(keys %trial_design_hash);
327 foreach my $trial_name (keys %trial_design_hash) {
328 $multi_trial_data{$trial_name}->{design
} = $trial_design_hash{$trial_name} ;
331 my $date = localtime();
338 my $time = DateTime
->now();
339 my $timestamp = $time->ymd()."_".$time->hms();
341 my %phenotype_metadata = {
342 'archived_file' => $infile,
343 'archived_file_type' => 'spreadsheet phenotype file',
344 'operator' => $username,
348 #parse uploaded file with appropriate plugin
349 $parser = CXGN
::Trial
::ParseUpload
->new(chado_schema
=> $schema, filename
=> $infile);
350 $parser->load_plugin('MultipleTrialDesignExcelFormat');
351 $parsed_data = $parser->parse();
354 # if (!$parsed_data) {
355 # my $return_error = '';
357 # if (!$parser->has_parse_errors() ){
358 # print STDERR "Parsing errors: " . join("\n", @{$parse_errors->{'error_messages'}}) . "\n";
359 # # push @errors, "could not get parsing errors\n";
361 # $parse_errors = $parser->get_parse_errors();
362 # print STDERR "Unknown parsing error occurred.\n";
365 # # push @errors, $return_error;
368 if ($parser->has_parse_warnings()) {
369 unless ($ignore_warnings) {
370 my $warnings = $parser->get_parse_warnings();
371 # print "Warnings: " . join("\n", @{$warnings->{'warning_messages'}}) . "\n";
376 my %all_desings = %{$parsed_data};
378 $save{'errors'} = [];
380 foreach my $trial_name (keys %multi_trial_data) {
381 my $trial_location = $multi_trial_data{$trial_name}->{trial_location
};
382 my $trial_design_info = $all_desings{$trial_name};
384 my %trial_info_hash = (
385 chado_schema
=> $schema,
387 trial_year
=> $trial_design_info->{'year'},
388 trial_description
=> $trial_design_info->{'description'},
389 trial_location
=> $trial_design_info->{'location'},
390 trial_name
=> $trial_name,
391 design_type
=> $trial_design_info->{'design_type'},
392 design
=> $trial_design_info->{'design_details'},
393 program
=> $trial_design_info->{'breeding_program'},
394 operator
=> $username,
395 owner_id
=> $sp_person_id,
398 # my $entry_numbers = $trial_design->{'entry_numbers'};
400 if ($trial_design_info->{'entry_numbers'}){
401 $trial_info_hash{trial_type
} = $trial_design_info->{'entry_numbers'};
403 if ($trial_design_info->{'trial_type'}){
404 $trial_info_hash{trial_type
} = $trial_design_info->{'trial_type'};
406 if ($trial_design_info->{'plot_width'}){
407 $trial_info_hash{plot_width
} = $trial_design_info->{'plot_width'};
409 if ($trial_design_info->{'plot_length'}){
410 $trial_info_hash{plot_length
} = $trial_design_info->{'plot_length'};
412 if ($trial_design_info->{'field_size'}){
413 $trial_info_hash{field_size
} = $trial_design_info->{'field_size'};
415 if ($trial_design_info->{'planting_date'}){
416 $trial_info_hash{planting_date
} = $trial_design_info->{'planting_date'};
418 if ($trial_design_info->{'harvest_date'}){
419 $trial_info_hash{harvest_date
} = $trial_design_info->{'harvest_date'};
421 my $trial_create = CXGN
::Trial
::TrialCreate
->new(%trial_info_hash);
422 my $current_save = $trial_create->save_trial();
424 if ($current_save->{error
}){
425 # $schema->txn_rollback();
426 push @
{$save{'errors'}}, $current_save->{'error'};
427 } elsif ($current_save->{'trial_id'}) {
428 my $trial_id = $current_save->{'trial_id'};
429 my $timestamp = $time->ymd();
430 my $calendar_funcs = CXGN
::Calendar
->new({});
431 my $formatted_date = $calendar_funcs->check_value_format($timestamp);
432 my $upload_date = $calendar_funcs->display_start_date($formatted_date);
435 $trial_activity{'Trial Uploaded'}{'user_id'} = $sp_person_id;
436 $trial_activity{'Trial Uploaded'}{'activity_date'} = $upload_date;
438 my $trial_activity_obj = CXGN
::TrialStatus
->new({ bcs_schema
=> $schema });
439 $trial_activity_obj->trial_activities(\
%trial_activity);
440 $trial_activity_obj->parent_id($trial_id);
441 my $activity_prop_id = $trial_activity_obj->store();
444 # save entry numbers, if provided
446 if ($entry_numbers = $trial_design_info->{'entry_numbers'}) {
447 if (scalar(keys %$entry_numbers) > 0 && $current_save->{'trial_id'} ) {
448 my %entry_numbers_prop;
449 my @stock_names = keys %$entry_numbers;
451 # Convert stock names from parsed trial template to stock ids for data storage
452 my $stocks = $schema->resultset('Stock::Stock')->search({ uniquename
=>{-in=>\
@stock_names} });
453 while (my $s = $stocks->next()) {
454 $entry_numbers_prop{$s->stock_id} = $entry_numbers->{$s->uniquename};
457 # Lookup synonyms of accession names
458 my $synonym_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
459 my $acc_synonym_rs = $schema->resultset("Stock::Stock")->search({
460 'me.is_obsolete' => { '!=' => 't' },
461 'stockprops.value' => { -in => \
@stock_names},
462 'stockprops.type_id' => $synonym_cvterm_id
463 },{join => 'stockprops', '+select'=>['stockprops.value'], '+as'=>['synonym']});
464 while (my $r=$acc_synonym_rs->next) {
465 if ( exists($entry_numbers->{$r->get_column('synonym')}) ) {
466 $entry_numbers_prop{$r->stock_id} = $entry_numbers->{$r->get_column('synonym')};
470 # store entry numbers
471 my $trial = CXGN
::Trial
->new({ bcs_schema
=> $schema, trial_id
=> $current_save->{'trial_id'} });
472 $trial->set_entry_numbers(\
%entry_numbers_prop);
476 print STDERR
"TrialCreate object created for trial: $trial_name\n";
478 my @plots = @
{$multi_trial_data{$trial_name}->{plots
} // []};
479 print STDERR
"Trial Name = $trial_name\n";
481 my %parsed_data = $phen_data_by_trial{$trial_name};
482 if (scalar(@traits) > 0) {
483 foreach my $pname (keys %parsed_data) {
484 print STDERR
"PLOT = $pname\n";
485 my %trait_string_hash = $parsed_data{$pname};
487 foreach my $trait_string (keys %trait_string_hash ) {
488 print STDERR
"trait = $trait_string\n";
489 print STDERR
"value = " . $trait_string_hash{$trait_string}[0] . "\n";
493 # # after storing the trial desgin store the phenotypes
494 my $metadata_schema = CXGN
::Metadata
::Schema
->connect( sub { $dbh->get_actual_dbh() }, {on_connect_do
=> ['SET search_path TO metadata;'] } );
495 my $phenome_schema = CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO phenome, sgn, public;'] } );
496 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new(
497 basepath
=> $basepath,
502 temp_file_nd_experiment_id
=> $temp_file_nd_experiment_id,
503 bcs_schema
=> $schema,
504 metadata_schema
=> $metadata_schema,
505 phenome_schema
=> $phenome_schema,
506 user_id
=> $sp_person_id,
507 stock_list
=> \
@plots,
508 trait_list
=> \
@traits,
509 values_hash
=> \
%parsed_data,
511 overwrite_values
=> 0,
512 metadata_hash
=> \
%phenotype_metadata,
515 #store the phenotypes
516 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
517 # print STDERR "Verified phenotypes. warning = $verified_warning, error = $verified_error\n";
518 my $stored_phenotype_error = $store_phenotypes->store();
519 # print STDERR "Stored phenotypes Error:" . Dumper($stored_phenotype_error). "\n";
521 print STDERR
"No traits defined for these $trial_name\n";
526 my ($email_subject, $email_body);
529 $schema->txn_do($coderef);
531 if ($email_option_enabled == 1 && $email_address) {
532 print "Transaction succeeded! Committing project and its metadata \n\n";
534 $email_subject = "Multiple Trial Designs upload status";
535 $email_body = "Dear $username,\n\nCongratulations, all the multiple trial designs have been successfully uploaded into the database\n\nThank you\nHave a nice day\n\n";
537 CXGN
::Contact
::send_email
($email_subject, $email_body, $email_address);
541 my $error_message = "An error occurred! Rolling back! $_\n";
542 push @errors, $error_message;
543 # push @{$save{'errors'}}, $error_message;
544 # print STDERR $error_message;
546 if ($email_option_enabled == 1 && $email_address) {
547 $email_subject = 'Error in Trial Upload';
548 $email_body = "Dear $username,\n\n$error_message\n\nThank You\nHave a nice day\n";
550 # print STDERR $error_message;
552 CXGN
::Contact
::send_email
($email_subject, $email_body, $email_address);
557 my $error_message = join("\n", @errors);
558 print STDERR
$error_message;
562 print STDERR
"All trials loaded successfully\n";