5 upload_multiple_trial_design.pl
9 upload_multiple_trial_design.pl -H [dbhost] -D [dbname] -P [dbpass] -w [basepath] -U [dbuser] -b [breeding program name] -i infile -un [username] -e [email address] -eo [email_option_enabled] -r [temp_file_nd_experiment_id]
11 =head1 COMMAND-LINE OPTIONS
13 -H host name (required) Ex: "breedbase_db"
14 -D database name (required) Ex: "breedbase"
15 -P database userpass (required) Ex: "postgres"
16 -w basepath (required) Ex: /home/production/cxgn/sgn
17 -i path to infile (required)
18 -U username (required) Ex: "postgres"
19 -b breeding program name (required) Ex: test
20 -t test run . Rolling back at the end
21 -e email address of the user
23 -r temp_file_nd_experiment_id (required) Ex: /temp/delete_nd_experiment_ids.txt
24 if loading trial data from metadata file, phenotypes + layout from infile
28 perl bin/upload_multiple_trial_design.pl -h breedbase_db -d breedbase -p postgres -w /home/cxgn/sgn/ -u postgres -i ~/Desktop/test_multi.xlsx -b test -n janedoe -e 'sk2783@cornell.edu' -l 'sri' -r /tmp/delete_nd_experiment_ids.txt
30 This script will parse and validate the input file. If there are any warnings or errors during validation it will send a error message to the provided email.
31 If there are no warnings(or errors) during validation it will then store the data.
32 The input file should be either .xlsx or .xls format.
34 CHECK cvterms for trial metadata!!
36 ################################################
37 Minimal metadata requirements are
39 trial_description (can also be built from the trial name, type, year, location)
40 trial_type (read from an input file)
41 trial_location geo_description ( must be in the database - nd_geolocation.description - can be read from metadata file)
42 year (can be read from the metadata file )
43 design (defaults to 'RCBD' )
44 breeding_program (provide with option -b )
47 Other OPTIONAL trial metadata (projectprops)
50 project fertilizer date
53 project harvested plants
57 Srikanth (sk2783@cornell.edu)
64 use CXGN
::File
::Parse
;
65 use Bio
::Chado
::Schema
;
66 use CXGN
::DB
::InsertDBH
;
71 use List
::Util
qw(max);
72 use List
::Util
qw(first);
73 use List
::Util
qw(uniq);
74 use CXGN
::Metadata
::Schema
;
75 use CXGN
::Phenome
::Schema
;
76 use CXGN
::People
::Person
;
78 use CXGN
::Phenotypes
::StorePhenotypes
;
79 use CXGN
::Trial
; # add project metadata
80 #use CXGN::BreedersToolbox::Projects; # associating a breeding program
81 use CXGN
::Trial
::TrialCreate
;
84 use CXGN
::Trial
::ParseUpload
;
85 use CXGN
::TrialStatus
;
88 use File
::Path
qw(make_path);
91 my ( $help, $dbhost, $dbname, $basepath, $dbuser, $dbpass, $infile, $sites, $types, $username, $breeding_program_name, $email_address, $logged_in_name, $email_option_enabled, $temp_file_nd_experiment_id);
93 'dbhost|H=s' => \
$dbhost,
94 'dbname|D=s' => \
$dbname,
95 'dbpass|P=s' => \
$dbpass,
96 'basepath|w=s' => \
$basepath,
97 'dbuser|U=s' => \
$dbuser,
99 'b=s' => \
$breeding_program_name,
100 'user|un=s' => \
$username,
102 'email|e=s' => \
$email_address,
103 # 'logged_in_user|l=s' => \$logged_in_name,
104 'temp_file|r=s' => \
$temp_file_nd_experiment_id,
105 'email_option_enabled|eo=s' => \
$email_option_enabled,
108 #Ensure the parent directory exists before creating the temporary file
109 my $parent_dir = File
::Spec
->catdir($basepath, 'static', 'documents', 'tempfiles', 'delete_nd_experiment_ids');
110 unless (-d
$parent_dir) {
111 make_path
($parent_dir) or die "Failed to create directory $parent_dir: $!";
114 # Create the temporary file in the parent directory
115 my $temp_file_nd_experiment_id = File
::Spec
->catfile($parent_dir, 'fileXXXX');
117 pod2usage
(1) if $help;
118 if (!$infile || !$breeding_program_name || !$username || !$dbname || !$dbhost ) {
119 pod2usage
( { -msg
=> 'Error. Missing options!' , -verbose
=> 1, -exitval
=> 1 } ) ;
125 print STDERR
"Logging in with password\n";
126 $dbh = DBI
->connect("dbi:Pg:database=$dbname;host=$dbhost",
132 $dbh = CXGN
::DB
::InsertDBH
->new( {
135 dbargs
=> {AutoCommit
=> 1,
140 print STDERR
"Database connection ok!\n";
142 my $schema= Bio
::Chado
::Schema
->connect( sub { $dbh } , { on_connect_do
=> ['SET search_path TO public, sgn, metadata, phenome;'] } );
145 # getting the last database ids for resetting at the end in case of rolling back
148 # my $last_nd_experiment_id = $schema->resultset('NaturalDiversity::NdExperiment')->get_column('nd_experiment_id')->max;
149 # my $last_cvterm_id = $schema->resultset('Cv::Cvterm')->get_column('cvterm_id')->max;
151 # my $last_nd_experiment_project_id = $schema->resultset('NaturalDiversity::NdExperimentProject')->get_column('nd_experiment_project_id')->max;
152 # my $last_nd_experiment_stock_id = $schema->resultset('NaturalDiversity::NdExperimentStock')->get_column('nd_experiment_stock_id')->max;
153 # my $last_nd_experiment_phenotype_id = $schema->resultset('NaturalDiversity::NdExperimentPhenotype')->get_column('nd_experiment_phenotype_id')->max;
154 # my $last_phenotype_id = $schema->resultset('Phenotype::Phenotype')->get_column('phenotype_id')->max;
155 # my $last_stock_id = $schema->resultset('Stock::Stock')->get_column('stock_id')->max;
156 # my $last_stock_relationship_id = $schema->resultset('Stock::StockRelationship')->get_column('stock_relationship_id')->max;
157 # my $last_project_id = $schema->resultset('Project::Project')->get_column('project_id')->max;
158 # my $last_nd_geolocation_id = $schema->resultset('NaturalDiversity::NdGeolocation')->get_column('nd_geolocation_id')->max;
159 # my $last_geoprop_id = $schema->resultset('NaturalDiversity::NdGeolocationprop')->get_column('nd_geolocationprop_id')->max;
160 # my $last_projectprop_id = $schema->resultset('Project::Projectprop')->get_column('projectprop_id')->max;
163 # 'nd_experiment_nd_experiment_id_seq' => $last_nd_experiment_id,
164 # 'cvterm_cvterm_id_seq' => $last_cvterm_id,
165 # 'nd_experiment_project_nd_experiment_project_id_seq' => $last_nd_experiment_project_id,
166 # 'nd_experiment_stock_nd_experiment_stock_id_seq' => $last_nd_experiment_stock_id,
167 # 'nd_experiment_phenotype_nd_experiment_phenotype_id_seq' => $last_nd_experiment_phenotype_id,
168 # 'phenotype_phenotype_id_seq' => $last_phenotype_id,
169 # 'stock_stock_id_seq' => $last_stock_id,
170 # 'stock_relationship_stock_relationship_id_seq' => $last_stock_relationship_id,
171 # 'project_project_id_seq' => $last_project_id,
172 # 'nd_geolocation_nd_geolocation_id_seq' => $last_nd_geolocation_id,
173 # 'nd_geolocationprop_nd_geolocationprop_id_seq' => $last_geoprop_id,
174 # 'projectprop_projectprop_id_seq' => $last_projectprop_id,
179 # Breeding program for associating the trial/s ##
182 my $breeding_program = $schema->resultset("Project::Project")->find(
183 {'me.name' => $breeding_program_name, 'type.name' => 'breeding_program'},
184 { join => { projectprops
=> 'type' }}
187 if (!$breeding_program) { die "Breeding program $breeding_program_name does not exist in the database. Check your input \n"; }
188 # print STDERR "Found breeding program $breeding_program_name " . $breeding_program->project_id . "\n";
190 my $sp_person_id= CXGN
::People
::Person
->get_person_by_username($dbh, $username);
191 die "Need to have a user pre-loaded in the database! " if !$sp_person_id;
193 #Column headers for trial design/s
194 #plot_name accession_name plot_number block_number trial_name trial_description trial_location year trial_type is_a_control rep_number range_number row_number col_number entry_numbers
196 #parse file using the generic file parser
197 my $parser = CXGN
::File
::Parse
->new(
199 required_columns
=> ['trial_name', 'accession_name', 'plot_number', 'block_number', 'location', 'year'],
202 # warn "Starting to parse the file...\n";
203 my $parsed = $parser->parse();
204 # warn "Parsed data = " . Dumper($parsed);
206 die "Error parsing file: " . join(',', @
{$parsed->{errors
}}) if scalar(@
{$parsed->{errors
}}) > 0;
208 if (exists $parsed->{warnings
}) {
209 print "Warnings: " . join("\n", @
{$parsed->{warnings
}}) . "\n";
212 # my $parsed = $parser->parse();
214 # if (scalar(@{$parsed->{errors}}) > 0) {
215 # die "Error parsing file: ".join(',', @{$parsed->{errors}});
219 my %multi_trial_data;
220 my %metadata_fields = map { $_ => 1 } qw(trial_name accession_name plot_number block_number location year design_type trial_description);
222 foreach my $row (@
{$parsed->{data
}}) {
223 my $trial_name = $row->{trial_name
};
224 next unless $trial_name; # Skip rows with empty trial names
226 # Check if the location exists in the database
227 my $trial_location = $row->{location
};
228 my $location_rs = $schema->resultset("NaturalDiversity::NdGeolocation")->search({
229 description
=> { ilike
=> '%' . $trial_location . '%' },
231 if (scalar($location_rs) == 0) {
232 die "ERROR: location must be pre-loaded in the database. Location name = '" . $trial_location . "'\n";
234 my $location_id = $location_rs->first->nd_geolocation_id;
235 ######################################################
237 # # Store all data for the current trial
238 $multi_trial_data{$trial_name} = {
239 trial_location
=> $row->{location
},
240 trial_year
=> $row->{year
},
241 design_type
=> $row->{design_type
},
242 trial_description
=> $row->{description
},
243 program
=> $breeding_program->name,
244 plot_name
=> $row->{plot_name
},
245 accession_name
=> $row->{accession_name
},
249 foreach my $col (@
{$parsed->{columns
}}) {
250 next if exists $metadata_fields{$col};
254 print STDERR
"unique trial names:\n";
255 foreach my $name(keys %multi_trial_data) {
259 print STDERR
"Reading phenotyping file:\n";
260 my %phen_params = map { if ($_ =~ m/^\w+\|(\w+:\d{7})$/ ) { $_ => $1 } } @traits;
261 delete $phen_params{''};
263 # my @traits = keys %phen_params;
264 print STDERR
"Found traits: " . Dumper
(\
%phen_params) . "\n";
267 foreach my $trial_name (keys %multi_trial_data) {
268 $multi_trial_data{$trial_name}->{design
} = $multi_trial_data{$trial_name};
272 my %trial_design_hash;
273 my %phen_data_by_trial;
275 foreach my $row (@
{$parsed->{data
}}) {
276 my $trial_name = $row->{trial_name
};
277 next unless $trial_name;
279 my $plot_number = $row->{plot_number
};
280 my $plot_name = $row->{plot_name
};
281 $trial_design_hash{$trial_name}{$plot_number} = {
282 trial_name
=> $trial_name,
283 trial_type
=> $row->{trial_type
},
284 planting_date
=> $row->{planting_date
},
285 harvest_date
=> $row->{harvest_date
},
286 entry_numbers
=> $row->{entry_numbers
},
287 is_a_control
=> $row->{is_a_control
},
288 rep_number
=> $row->{rep_number
},
289 range_number
=> $row->{range_number
},
290 row_number
=> $row->{row_number
},
291 col_number
=> $row->{col_number
},
292 seedlot_name
=> $row->{seedlot_name
},
293 num_seed_per_plot
=> $row->{num_seed_per_plot
},
294 weight_gram_seed_per_plot
=> $row->{weight_gram_seed_per_plot
},
297 foreach my $trait_string (keys %phen_params) {
298 my $phen_value = $row->{$trait_string};
299 $phen_data_by_trial{$trial_name}{$plot_name}{$trait_string} = [$phen_value, DateTime
->now->datetime];
303 print STDERR
"multi trial hash:" . Dumper
(\
%multi_trial_data);
304 print STDERR
"trial design " . Dumper
(\
%trial_design_hash);
305 print STDERR
"Processed trials: " . scalar(keys %trial_design_hash) . "\n";
306 print STDERR
"Phen data by trial: " . Dumper
(\
%phen_data_by_trial) . "\n";
308 #####create the design hash#####
309 print Dumper
(keys %trial_design_hash);
310 foreach my $trial_name (keys %trial_design_hash) {
311 $multi_trial_data{$trial_name}->{design
} = $trial_design_hash{$trial_name} ;
314 my $date = localtime();
321 my $time = DateTime
->now();
322 my $timestamp = $time->ymd()."_".$time->hms();
324 my %phenotype_metadata = {
325 'archived_file' => $infile,
326 'archived_file_type' => 'spreadsheet phenotype file',
327 'operator' => $username,
331 #parse uploaded file with appropriate plugin
332 $parser = CXGN
::Trial
::ParseUpload
->new(chado_schema
=> $schema, filename
=> $infile);
333 $parser->load_plugin('MultipleTrialDesignExcelFormat');
334 $parsed_data = $parser->parse();
338 my $return_error = '';
340 if (! $parser->has_parse_errors() ){
341 die "could not get parsing errors\n";
343 $parse_errors = $parser->get_parse_errors();
344 die $parse_errors->{'error_messages'};
350 if ($parser->has_parse_warnings()) {
351 unless ($ignore_warnings) {
352 my $warnings = $parser->get_parse_warnings();
353 print "Warnings: " . join("\n", @
{$warnings->{'warning_messages'}}) . "\n";
358 my %all_desings = %{$parsed_data};
360 $save{'errors'} = [];
362 foreach my $trial_name (keys %multi_trial_data) {
363 my $trial_location = $multi_trial_data{$trial_name}->{trial_location
};
364 my $trial_design_info = $all_desings{$trial_name};
366 my %trial_info_hash = (
367 chado_schema
=> $schema,
369 trial_year
=> $trial_design_info->{'year'},
370 trial_description
=> $trial_design_info->{'description'},
371 trial_location
=> $trial_design_info->{'location'},
372 trial_name
=> $trial_name,
373 design_type
=> $trial_design_info->{'design_type'},
374 design
=> $trial_design_info->{'design_details'},
375 program
=> $trial_design_info->{'breeding_program'},
376 operator
=> $username,
377 owner_id
=> $sp_person_id,
380 # my $entry_numbers = $trial_design->{'entry_numbers'};
382 if ($trial_design_info->{'entry_numbers'}){
383 $trial_info_hash{trial_type
} = $trial_design_info->{'entry_numbers'};
385 if ($trial_design_info->{'trial_type'}){
386 $trial_info_hash{trial_type
} = $trial_design_info->{'trial_type'};
388 if ($trial_design_info->{'plot_width'}){
389 $trial_info_hash{plot_width
} = $trial_design_info->{'plot_width'};
391 if ($trial_design_info->{'plot_length'}){
392 $trial_info_hash{plot_length
} = $trial_design_info->{'plot_length'};
394 if ($trial_design_info->{'field_size'}){
395 $trial_info_hash{field_size
} = $trial_design_info->{'field_size'};
397 if ($trial_design_info->{'planting_date'}){
398 $trial_info_hash{planting_date
} = $trial_design_info->{'planting_date'};
400 if ($trial_design_info->{'harvest_date'}){
401 $trial_info_hash{harvest_date
} = $trial_design_info->{'harvest_date'};
403 my $trial_create = CXGN
::Trial
::TrialCreate
->new(%trial_info_hash);
404 my $current_save = $trial_create->save_trial();
406 if ($current_save->{error
}){
407 # $schema->txn_rollback();
408 push @
{$save{'errors'}}, $current_save->{'error'};
409 } elsif ($current_save->{'trial_id'}) {
410 my $trial_id = $current_save->{'trial_id'};
411 my $timestamp = $time->ymd();
412 my $calendar_funcs = CXGN
::Calendar
->new({});
413 my $formatted_date = $calendar_funcs->check_value_format($timestamp);
414 my $upload_date = $calendar_funcs->display_start_date($formatted_date);
417 $trial_activity{'Trial Uploaded'}{'user_id'} = $sp_person_id;
418 $trial_activity{'Trial Uploaded'}{'activity_date'} = $upload_date;
420 my $trial_activity_obj = CXGN
::TrialStatus
->new({ bcs_schema
=> $schema });
421 $trial_activity_obj->trial_activities(\
%trial_activity);
422 $trial_activity_obj->parent_id($trial_id);
423 my $activity_prop_id = $trial_activity_obj->store();
426 # save entry numbers, if provided
428 if ($entry_numbers = $trial_design_info->{'entry_numbers'}) {
429 if (scalar(keys %$entry_numbers) > 0 && $current_save->{'trial_id'} ) {
430 my %entry_numbers_prop;
431 my @stock_names = keys %$entry_numbers;
433 # Convert stock names from parsed trial template to stock ids for data storage
434 my $stocks = $schema->resultset('Stock::Stock')->search({ uniquename
=>{-in=>\
@stock_names} });
435 while (my $s = $stocks->next()) {
436 $entry_numbers_prop{$s->stock_id} = $entry_numbers->{$s->uniquename};
439 # Lookup synonyms of accession names
440 my $synonym_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'stock_synonym', 'stock_property')->cvterm_id();
441 my $acc_synonym_rs = $schema->resultset("Stock::Stock")->search({
442 'me.is_obsolete' => { '!=' => 't' },
443 'stockprops.value' => { -in => \
@stock_names},
444 'stockprops.type_id' => $synonym_cvterm_id
445 },{join => 'stockprops', '+select'=>['stockprops.value'], '+as'=>['synonym']});
446 while (my $r=$acc_synonym_rs->next) {
447 if ( exists($entry_numbers->{$r->get_column('synonym')}) ) {
448 $entry_numbers_prop{$r->stock_id} = $entry_numbers->{$r->get_column('synonym')};
452 # store entry numbers
453 my $trial = CXGN
::Trial
->new({ bcs_schema
=> $schema, trial_id
=> $current_save->{'trial_id'} });
454 $trial->set_entry_numbers(\
%entry_numbers_prop);
458 print STDERR
"TrialCreate object created for trial: $trial_name\n";
460 my @plots = @
{$multi_trial_data{$trial_name}->{plots
} // []};
461 print STDERR
"Trial Name = $trial_name\n";
463 my %parsed_data = $phen_data_by_trial{$trial_name};
464 if (scalar(@traits) > 0) {
465 foreach my $pname (keys %parsed_data) {
466 print STDERR
"PLOT = $pname\n";
467 my %trait_string_hash = $parsed_data{$pname};
469 foreach my $trait_string (keys %trait_string_hash ) {
470 print STDERR
"trait = $trait_string\n";
471 print STDERR
"value = " . $trait_string_hash{$trait_string}[0] . "\n";
475 # # after storing the trial desgin store the phenotypes
476 my $metadata_schema = CXGN
::Metadata
::Schema
->connect( sub { $dbh->get_actual_dbh() }, {on_connect_do
=> ['SET search_path TO metadata;'] } );
477 my $phenome_schema = CXGN
::Phenome
::Schema
->connect( sub { $dbh->get_actual_dbh() } , {on_connect_do
=> ['SET search_path TO phenome, sgn, public;'] } );
478 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new(
479 basepath
=> $basepath,
484 temp_file_nd_experiment_id
=> $temp_file_nd_experiment_id,
485 bcs_schema
=> $schema,
486 metadata_schema
=> $metadata_schema,
487 phenome_schema
=> $phenome_schema,
488 user_id
=> $sp_person_id,
489 stock_list
=> \
@plots,
490 trait_list
=> \
@traits,
491 values_hash
=> \
%parsed_data,
493 overwrite_values
=> 0,
494 metadata_hash
=> \
%phenotype_metadata,
497 #store the phenotypes
498 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
499 # print STDERR "Verified phenotypes. warning = $verified_warning, error = $verified_error\n";
500 my $stored_phenotype_error = $store_phenotypes->store();
501 # print STDERR "Stored phenotypes Error:" . Dumper($stored_phenotype_error). "\n";
503 print STDERR
"No traits defined for these $trial_name\n";
508 my ($email_subject, $email_body);
511 $schema->txn_do($coderef);
513 if ($email_option_enabled == 1 && $email_address) {
514 print "Transaction succeeded! Committing project and its metadata \n\n";
516 $email_subject = "Multiple Trial Designs upload status";
517 $email_body = "Dear $username,\n\nCongratulations, all the multiple trial designs have been successfully uploaded into the database\n\nThank you\nHave a nice day\n\n";
519 CXGN
::Contact
::send_email
($email_subject, $email_body, $email_address);
523 my $error_message = "An error occurred! Rolling back! $_\n";
524 # push @errors, $error_message;
525 # push @{$save{'errors'}}, $error_message;
526 # print STDERR $error_message;
528 if ($email_option_enabled == 1 && $email_address) {
529 $email_subject = 'Error in Trial Upload';
530 $email_body = "Dear $username,\n\n$error_message\n\nThank You\nHave a nice day\n";
532 # print STDERR $error_message;
534 CXGN
::Contact
::send_email
($email_subject, $email_body, $email_address);