do not attempt get the person object when no one is logged in...
[sgn.git] / lib / CXGN / Phenotypes / StorePhenotypes.pm
blob85e55192acee1e61d82e96c5c52a1f375cd7d7b5
1 package CXGN::Phenotypes::StorePhenotypes;
3 =head1 NAME
5 CXGN::Phenotypes::StorePhenotypes - an object to handle storing phenotypes for SGN stocks
7 =head1 USAGE
9 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new(
10 basepath=>basepath,
11 dbhost=>dbhost,
12 dbname=>dbname,
13 dbuser=>dbuser,
14 dbpass=>dbpass,
15 temp_file_nd_experiment_id=>$temp_file_nd_experiment_id, #tempfile full name for deleting nd_experiment_ids asynchronously
16 bcs_schema=>$schema,
17 metadata_schema=>$metadata_schema,
18 phenome_schema=>$phenome_schema,
19 user_id=>$user_id,
20 stock_list=>$plots,
21 trait_list=>$traits,
22 values_hash=>$parsed_data,
23 has_timestamps=>$timestamp_included,
24 overwrite_values=>$overwrite,
25 ignore_new_values=>$ignore_new_values,
26 metadata_hash=>$phenotype_metadata,
27 image_zipfile_path=>$image_zip
29 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
30 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
32 =head1 DESCRIPTION
35 =head1 AUTHORS
37 Jeremy D. Edwards (jde22@cornell.edu)
38 Naama Menda (nm249@cornell.edu)
39 Nicolas Morales (nm529@cornell.edu)
40 Bryan Ellerbrock (bje24@cornell.edu)
42 =cut
44 use strict;
45 use warnings;
46 use Moose;
47 use Try::Tiny;
48 use File::Basename qw | basename dirname|;
49 use Digest::MD5;
50 use CXGN::List::Validate;
51 use Data::Dumper;
52 use Scalar::Util qw(looks_like_number);
53 use JSON;
54 use SGN::Image;
55 use CXGN::ZipFile;
56 use CXGN::UploadFile;
57 use CXGN::List::Transform;
58 use CXGN::Stock;
59 use CXGN::Tools::Run;
61 has 'bcs_schema' => (
62 isa => 'Bio::Chado::Schema',
63 is => 'rw',
64 required => 1,
67 has 'metadata_schema' => (
68 isa => 'CXGN::Metadata::Schema',
69 is => 'rw',
70 required => 1,
73 has 'phenome_schema' => (
74 isa => 'CXGN::Phenome::Schema',
75 is => 'rw',
76 required => 1,
79 has 'basepath' => (
80 isa => "Str",
81 is => 'rw',
82 required => 1
85 has 'dbhost' => (
86 isa => "Str",
87 is => 'rw',
88 required => 1
91 has 'dbname' => (
92 isa => "Str",
93 is => 'rw',
94 required => 1
97 has 'dbuser' => (
98 isa => "Str",
99 is => 'rw',
100 required => 1
103 has 'dbpass' => (
104 isa => "Str",
105 is => 'rw',
106 required => 1
109 has 'temp_file_nd_experiment_id' => (
110 isa => "Str",
111 is => 'rw',
112 required => 1
115 has 'user_id' => (
116 isa => "Int",
117 is => 'rw',
118 required => 1
121 has 'stock_list' => (
122 isa => "ArrayRef",
123 is => 'rw',
124 required => 1
127 has 'stock_id_list' => (
128 isa => "ArrayRef[Int]|Undef",
129 is => 'rw',
130 required => 0,
133 has 'trait_list' => (
134 isa => "ArrayRef",
135 is => 'rw',
136 required => 1
139 has 'values_hash' => (
140 isa => "HashRef",
141 is => 'rw',
142 required => 1
145 has 'has_timestamps' => (
146 isa => "Bool",
147 is => 'rw',
148 default => 0
151 has 'overwrite_values' => (
152 isa => "Bool",
153 is => 'rw',
154 default => 0
157 has 'remove_values' => (
158 isa => "Bool",
159 is => 'rw',
160 default => 0
163 has 'ignore_new_values' => (
164 isa => "Bool",
165 is => 'rw',
166 default => 0
169 has 'metadata_hash' => (
170 isa => "HashRef",
171 is => 'rw',
172 required => 1
175 has 'image_zipfile_path' => (
176 isa => "Str | Undef",
177 is => 'rw',
178 required => 0
181 has 'trait_objs' => (
182 isa => "HashRef",
183 is => 'rw',
186 has 'unique_value_trait_stock' => (
187 isa => "HashRef",
188 is => 'rw',
191 has 'unique_trait_stock' => (
192 isa => "HashRef",
193 is => 'rw',
196 has 'unique_trait_stock_timestamp' => (
197 isa => "HashRef",
198 is => 'rw',
201 has 'composable_validation_check_name' => (
202 isa => "Bool",
203 is => 'rw',
204 default => 0
207 has 'allow_repeat_measures' => (
208 isa => "Bool",
209 is => 'rw',
210 default => 0
213 #build is used for creating hash lookups in this case
214 sub create_hash_lookups {
215 my $self = shift;
216 my $schema = $self->bcs_schema;
218 #Find trait cvterm objects and put them in a hash
219 my %trait_objs;
220 my @trait_list = @{$self->trait_list};
221 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation
222 print STDERR "trait list after filtering @trait_list\n";
223 my @stock_list = @{$self->stock_list};
224 my @cvterm_ids;
226 my $t = CXGN::List::Transform->new();
227 my $stock_id_list = $t->transform($schema, 'stocks_2_stock_ids', \@stock_list);
228 $self->stock_id_list($stock_id_list->{'transform'});
230 foreach my $trait_name (@trait_list) {
231 print STDERR "trait: $trait_name\n";
232 my $trait_cvterm = SGN::Model::Cvterm->get_cvterm_row_from_trait_name($schema, $trait_name);
233 $trait_objs{$trait_name} = $trait_cvterm;
234 push @cvterm_ids, $trait_cvterm->cvterm_id();
236 $self->trait_objs(\%trait_objs);
238 #for checking if values in the file are already stored in the database or in the same file
239 my %check_unique_trait_stock;
240 my %check_unique_trait_stock_timestamp;
241 my %check_unique_value_trait_stock;
243 my $stock_ids_sql = join ("," , @{$self->stock_id_list});
244 #print STDERR "Cvterm ids are @cvterm_ids";
245 if (scalar @cvterm_ids > 0) {
246 my $cvterm_ids_sql = join ("," , @cvterm_ids);
247 my $previous_phenotype_q = "SELECT phenotype.value, phenotype.cvalue_id, phenotype.collect_date, stock.stock_id FROM phenotype LEFT JOIN nd_experiment_phenotype USING(phenotype_id) LEFT JOIN nd_experiment USING(nd_experiment_id) LEFT JOIN nd_experiment_stock USING(nd_experiment_id) LEFT JOIN stock USING(stock_id) WHERE stock.stock_id IN ($stock_ids_sql) AND phenotype.cvalue_id IN ($cvterm_ids_sql);";
248 my $h = $schema->storage->dbh()->prepare($previous_phenotype_q);
249 $h->execute();
251 #my $previous_phenotype_rs = $schema->resultset('Phenotype::Phenotype')->search({'me.cvalue_id'=>{-in=>\@cvterm_ids}, 'stock.stock_id'=>{-in=>$self->stock_id_list}}, {'join'=>{'nd_experiment_phenotypes'=>{'nd_experiment'=>{'nd_experiment_stocks'=>'stock'}}}, 'select' => ['me.value', 'me.cvalue_id', 'stock.stock_id'], 'as' => ['value', 'cvterm_id', 'stock_id']});
252 while (my ($previous_value, $cvterm_id, $collect_timestamp, $stock_id) = $h->fetchrow_array()) {
253 #while (my $previous_phenotype_cvterm = $previous_phenotype_rs->next() ) {
254 #my $cvterm_id = $previous_phenotype_cvterm->get_column('cvterm_id');
255 #my $stock_id = $previous_phenotype_cvterm->get_column('stock_id');
256 if ($stock_id){
257 #my $previous_value = $previous_phenotype_cvterm->get_column('value') || ' ';
258 $collect_timestamp = $collect_timestamp || 'NA';
259 $check_unique_trait_stock{$cvterm_id, $stock_id} = $previous_value;
260 $check_unique_trait_stock_timestamp{$cvterm_id, $stock_id, $collect_timestamp} = $previous_value;
261 $check_unique_value_trait_stock{$previous_value, $cvterm_id, $stock_id} = 1;
266 $self->unique_value_trait_stock(\%check_unique_value_trait_stock);
267 $self->unique_trait_stock(\%check_unique_trait_stock);
268 $self->unique_trait_stock_timestamp(\%check_unique_trait_stock_timestamp);
272 sub verify {
273 my $self = shift;
274 print STDERR "CXGN::Phenotypes::StorePhenotypes verify\n";
276 my @plot_list = @{$self->stock_list};
277 my @trait_list = @{$self->trait_list};
278 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation
279 print STDERR Dumper \@trait_list;
280 my %plot_trait_value = %{$self->values_hash};
281 my %phenotype_metadata = %{$self->metadata_hash};
282 my $timestamp_included = $self->has_timestamps;
283 my $archived_image_zipfile_with_path = $self->image_zipfile_path;
284 my $schema = $self->bcs_schema;
285 my $transaction_error;
286 # print STDERR Dumper \@plot_list;
287 # print STDERR Dumper \%plot_trait_value;
288 my $plot_validator = CXGN::List::Validate->new();
289 my $trait_validator = CXGN::List::Validate->new(
290 composable_validation_check_name => $self->{composable_validation_check_name}
292 my @plots_missing = @{$plot_validator->validate($schema,'plots_or_subplots_or_plants_or_tissue_samples_or_analysis_instances',\@plot_list)->{'missing'}};
293 my $traits_validation = $trait_validator->validate($schema,'traits',\@trait_list);
294 my @traits_missing = @{$traits_validation->{'missing'}};
295 my @traits_wrong_ids = @{$traits_validation->{'wrong_ids'}};
296 my $error_message = '';
297 my $warning_message = '';
299 if (scalar(@plots_missing) > 0 || scalar(@traits_missing) > 0) {
300 print STDERR "Plots or traits not valid\n";
301 print STDERR "Invalid plots: ".join(", ", map { "'$_'" } @plots_missing)."\n" if (@plots_missing);
302 print STDERR "Invalid traits: ".join(", ", map { "'$_'" } @traits_missing)."\n" if (@traits_missing);
303 $error_message = "Invalid plots: <br/>".join(", <br/>", map { "'$_'" } @plots_missing) if (@plots_missing);
304 $error_message = "Invalid traits: <br/>".join(", <br/>", map { "'$_'" } @traits_missing) if (@traits_missing);
306 # Display matches of traits with the wrong id
307 if ( scalar(@traits_wrong_ids) > 0 ) {
308 $error_message .= "<br /><br /><strong>Possible Trait Matches:</strong>";
309 foreach my $m (@traits_wrong_ids) {
310 $error_message .= "<br /><br />" . $m->{'original_term'} . "<br />should be<br />" . $m->{'matching_term'};
314 return ($warning_message, $error_message);
317 $self->create_hash_lookups();
318 my %trait_objs = %{$self->trait_objs};
319 my %check_unique_value_trait_stock = %{$self->unique_value_trait_stock};
320 my %check_unique_trait_stock = %{$self->unique_trait_stock};
321 my %check_unique_trait_stock_timestamp = %{$self->unique_trait_stock_timestamp};
323 my %check_trait_category;
324 my $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_categories';";
325 my $sth = $schema->storage->dbh->prepare($sql);
326 $sth->execute();
327 while (my ($category_value, $cvterm_id) = $sth->fetchrow_array) {
328 $check_trait_category{$cvterm_id} = $category_value;
331 my %check_trait_format;
332 $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_format';";
333 $sth = $schema->storage->dbh->prepare($sql);
334 $sth->execute();
335 while (my ($format_value, $cvterm_id) = $sth->fetchrow_array) {
336 $check_trait_format{$cvterm_id} = $format_value;
339 my %image_plot_full_names;
340 #This is for saving Fieldbook images, which are only associated to a stock. To save images that are associated to a stock and a trait and a value, use the ExcelAssociatedImages parser
341 if ($archived_image_zipfile_with_path) {
343 my $archived_zip = CXGN::ZipFile->new(archived_zipfile_path=>$archived_image_zipfile_with_path);
344 my @archived_zipfile_return = $archived_zip->file_names();
345 if (!@archived_zipfile_return){
346 $error_message = $error_message."<small>Image zipfile could not be read. Is it .zip format?</small><hr>";
347 } else {
348 my $file_names_stripped = $archived_zipfile_return[0];
349 my $file_names_full = $archived_zipfile_return[1];
350 foreach (@$file_names_full) {
351 $image_plot_full_names{$_} = 1;
353 my %plot_name_check;
354 foreach (@plot_list) {
355 $plot_name_check{$_} = 1;
357 foreach my $img_name (@$file_names_stripped) {
358 $img_name = substr($img_name, 0, -20);
359 if ($img_name && !exists($plot_name_check{$img_name})) {
360 $warning_message = $error_message."<small>Image ".$img_name." in images zip file does not reference a plot or plant_name (e.g. the image filename does not have a plot or plant name in it)!</small><hr>";
366 my %check_file_stock_trait_duplicates;
368 my $same_value_count = 0;
369 foreach my $plot_name (@plot_list) {
370 foreach my $trait_name (@trait_list) {
371 my $value_array = $plot_trait_value{$plot_name}->{$trait_name};
372 #print STDERR Dumper $value_array;
373 my $trait_value = $value_array->[0];
374 my $timestamp = $value_array->[1];
375 #print STDERR "$plot_name, $trait_name, $trait_value\n";
376 if ( defined($trait_value) ) {
377 my $trait_cvterm = $trait_objs{$trait_name};
378 my $trait_cvterm_id = $trait_cvterm->cvterm_id();
379 my $stock_id = $schema->resultset('Stock::Stock')->find({'uniquename' => $plot_name})->stock_id();
381 #Trait values can be non alphanumeric
382 #if ($trait_value eq '.' || ($trait_value =~ m/[^a-zA-Z0-9,.\-\/\_]/ && $trait_value ne '.')){
383 # $error_message = $error_message."<small>Trait values must be alphanumeric with no spaces: <br/>Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Value: ".$trait_value."</small><hr>";
386 #check that trait value is valid for trait name
387 if (exists($check_trait_format{$trait_cvterm_id})) {
388 if ($check_trait_format{$trait_cvterm_id} eq 'numeric') {
389 my $trait_format_checked = looks_like_number($trait_value);
390 if (!$trait_format_checked && $trait_value ne '') {
391 $error_message = $error_message."<small>This trait value should be numeric: <br/>Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Value: ".$trait_value."</small><hr>";
394 if ($check_trait_format{$trait_cvterm_id} eq 'image') {
395 $trait_value =~ s/^.*photos\///;
396 if (!exists($image_plot_full_names{$trait_value})) {
397 $error_message = $error_message."<small>For Plot Name: $plot_name there should be a corresponding image named in the zipfile called $trait_value. </small><hr>";
402 if (exists($check_trait_category{$trait_cvterm_id})) {
403 my @check_values;
405 my @trait_categories = split /\//, $check_trait_category{$trait_cvterm_id};
406 my %trait_categories_hash;
408 if ($check_trait_format{$trait_cvterm_id} eq "Multicat") {
409 @check_values = split /\:/, $trait_value;
411 else {
412 @check_values = ( $trait_value );
415 if ($check_trait_format{$trait_cvterm_id} eq 'Ordinal' || $check_trait_format{$trait_cvterm_id} eq 'Nominal' || $check_trait_format{$trait_cvterm_id} eq 'Multicat') {
416 # Ordinal looks like <value>=<category>
418 foreach my $ordinal_category (@trait_categories) {
419 my @split_value = split('=', $ordinal_category);
420 if (scalar(@split_value) >= 1) {
421 $trait_categories_hash{$split_value[0]} = 1;
424 } else {
425 # Catch everything else
426 %trait_categories_hash = map { $_ => 1 } @trait_categories;
429 foreach my $tw (@check_values) {
430 if ($tw ne '' && !exists($trait_categories_hash{$tw})) {
431 my $valid_values = join("/", sort keys %trait_categories_hash); # Sort values for consistent order
432 $error_message = "<small>This trait value should be one of $valid_values: <br/>Plot Name: $plot_name<br/>Trait Name: $trait_name<br/>Value: $trait_value</small><hr>";
433 print STDERR $error_message;
434 } else {
435 print STDERR "Trait value is valid $tw.\n";
440 #print STDERR "$trait_value, $trait_cvterm_id, $stock_id\n";
441 #check if the plot_name, trait_name combination already exists in database.
442 if (exists($check_unique_value_trait_stock{$trait_value, $trait_cvterm_id, $stock_id})) {
443 my $prev = $check_unique_value_trait_stock{$trait_value, $trait_cvterm_id, $stock_id};
444 if ( defined($prev) && length($prev) && defined($trait_value) && length($trait_value) ) {
445 $same_value_count++;
447 } elsif (exists($check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp})) {
448 my $prev = $check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp};
449 if ( defined($prev) ) {
450 $warning_message = $warning_message."<small>$plot_name already has a <strong>different value</strong> ($prev) than in your file (" . ($trait_value ? $trait_value : "<em>blank</em>") . ") stored in the database for the trait $trait_name for the timestamp $timestamp.</small><hr>";
452 } elsif (exists($check_unique_trait_stock{$trait_cvterm_id, $stock_id})) {
453 my $prev = $check_unique_trait_stock{$trait_cvterm_id, $stock_id};
454 if ( defined($prev) ) {
455 $warning_message = $warning_message."<small>$plot_name already has a <strong>different value</strong> ($prev) than in your file (" . ($trait_value ? $trait_value : "<em>blank</em>") . ") stored in the database for the trait $trait_name.</small><hr>";
459 #check if the plot_name, trait_name combination already exists in same file.
460 if (exists($check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id})) {
461 $warning_message = $warning_message."<small>$plot_name already has a value for the trait $trait_name in your file. Possible duplicate in your file?</small><hr>";
463 $check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id} = 1;
466 if ($timestamp_included) {
467 if ( (!$timestamp && !$trait_value) || ($timestamp && !$trait_value) || ($timestamp && $trait_value) ) {
468 if ($timestamp) {
469 if( !$timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) {
470 $error_message = $error_message."<small>Bad timestamp for value for Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Should be YYYY-MM-DD HH:MM:SS-0000 or YYYY-MM-DD HH:MM:SS+0000</small><hr>";
479 # combine all warnings about the same values into a summary count
480 if ( $same_value_count > 0 ) {
481 $warning_message = $warning_message."<small>There are $same_value_count values in your file that are the same as values already stored in the database.</small>";
484 ## Verify metadata
485 if ($phenotype_metadata{'archived_file'} && (!$phenotype_metadata{'archived_file_type'} || $phenotype_metadata{'archived_file_type'} eq "")) {
486 $error_message = "No file type provided for archived file.";
487 return ($warning_message, $error_message);
489 if (!$phenotype_metadata{'operator'} || $phenotype_metadata{'operator'} eq "") {
490 $error_message = "No operaror provided in file upload metadata.";
491 return ($warning_message, $error_message);
493 if (!$phenotype_metadata{'date'} || $phenotype_metadata{'date'} eq "") {
494 $error_message = "No date provided in file upload metadata.";
495 return ($warning_message, $error_message);
498 return ($warning_message, $error_message);
501 sub store {
502 my $self = shift;
503 print STDERR "CXGN::Phenotypes::StorePhenotypes store\n";
505 $self->create_hash_lookups();
506 my %linked_data = %{$self->get_linked_data()};
507 my @plot_list = @{$self->stock_list};
508 my @trait_list = @{$self->trait_list};
509 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes so they can be handled separately
510 my %trait_objs = %{$self->trait_objs};
511 my %plot_trait_value = %{$self->values_hash};
512 my %phenotype_metadata = %{$self->metadata_hash};
513 my $timestamp_included = $self->has_timestamps;
514 my $archived_image_zipfile_with_path = $self->image_zipfile_path;
515 my $phenotype_metadata = $self->metadata_hash;
516 my $schema = $self->bcs_schema;
517 my $metadata_schema = $self->metadata_schema;
518 my $phenome_schema = $self->phenome_schema;
519 my $overwrite_values = $self->overwrite_values;
520 my $remove_values = $self->remove_values;
521 my $ignore_new_values = $self->ignore_new_values;
522 my $allow_repeat_measures = $self->allow_repeat_measures;
523 my $error_message;
524 my $transaction_error;
525 my $user_id = $self->user_id;
526 my $archived_file = $phenotype_metadata->{'archived_file'};
527 my $archived_file_type = $phenotype_metadata->{'archived_file_type'};
528 my $operator = $phenotype_metadata->{'operator'};
529 my $upload_date = $phenotype_metadata->{'date'};
530 my $success_message;
532 my $phenotyping_experiment_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id();
533 my $local_date_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'date', 'local')->cvterm_id();
534 my $local_operator_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'operator', 'local')->cvterm_id();
535 my $plot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
536 my $plant_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant', 'stock_type')->cvterm_id();
537 my $subplot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'subplot', 'stock_type')->cvterm_id();
538 my $tissue_sample_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'tissue_sample', 'stock_type')->cvterm_id();
539 my $analysis_instance_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_instance', 'stock_type')->cvterm_id();
540 my $phenotype_addtional_info_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotype_additional_info', 'phenotype_property')->cvterm_id();
541 my $external_references_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotype_external_references', 'phenotype_property')->cvterm_id();
542 my %experiment_ids;
543 my @stored_details;
544 my %nd_experiment_md_images;
546 my %check_unique_trait_stock = %{$self->unique_trait_stock};
548 my $rs;
549 my %data;
550 $rs = $schema->resultset('Stock::Stock')->search(
551 {'type.name' => ['field_layout', 'analysis_experiment', 'sampling_layout'], 'me.type_id' => [$plot_cvterm_id, $plant_cvterm_id, $subplot_cvterm_id, $tissue_sample_cvterm_id, $analysis_instance_cvterm_id], 'me.stock_id' => {-in=>$self->stock_id_list } },
552 {join=> {'nd_experiment_stocks' => {'nd_experiment' => ['type', 'nd_experiment_projects' ] } } ,
553 '+select'=> ['me.stock_id', 'me.uniquename', 'nd_experiment.nd_geolocation_id', 'nd_experiment_projects.project_id'],
554 '+as'=> ['stock_id', 'uniquename', 'nd_geolocation_id', 'project_id']
557 while (my $s = $rs->next()) {
558 $data{$s->get_column('uniquename')} = [$s->get_column('stock_id'), $s->get_column('nd_geolocation_id'), $s->get_column('project_id') ];
561 # print STDERR "DATA: ".Dumper(\%data);
562 ## Use txn_do with the following coderef so that if any part fails, the entire transaction fails.
563 my $coderef = sub {
564 my %trait_and_stock_to_overwrite;
565 my @overwritten_values;
566 my $new_count = 0;
567 my $skip_count = 0;
568 my $overwrite_count = 0;
569 my $remove_count = 0;
570 foreach my $plot_name (@plot_list) {
572 my $stock_id = $data{$plot_name}[0];
573 my $location_id = $data{$plot_name}[1];
574 my $project_id = $data{$plot_name}[2];
576 # create plot-wide nd_experiment entry
578 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create({
579 nd_geolocation_id => $location_id,
580 type_id => $phenotyping_experiment_cvterm_id,
581 nd_experimentprops => [{type_id => $local_date_cvterm_id, value => $upload_date}, {type_id => $local_operator_cvterm_id, value => $operator}],
582 nd_experiment_projects => [{project_id => $project_id}],
583 nd_experiment_stocks => [{stock_id => $stock_id, type_id => $phenotyping_experiment_cvterm_id}]
586 $experiment_ids{$experiment->nd_experiment_id()}=1;
588 # Check if there is a note for this plot, If so add it using dedicated function
589 my $note_array = $plot_trait_value{$plot_name}->{'notes'};
590 if (defined $note_array) {
591 $self->store_stock_note($stock_id, $note_array, $operator);
594 # Check if there is nirs data for this plot
595 my $nirs_hashref = $plot_trait_value{$plot_name}->{'nirs'};
596 if (defined $nirs_hashref) {
597 $self->store_high_dimensional_data($nirs_hashref, $experiment->nd_experiment_id(), 'nirs_spectra');
598 $new_count++;
601 # Check if there is transcriptomics data for this plot
602 my $transcriptomics_hashref = $plot_trait_value{$plot_name}->{'transcriptomics'};
603 if (defined $transcriptomics_hashref) {
604 $self->store_high_dimensional_data($transcriptomics_hashref, $experiment->nd_experiment_id(), 'transcriptomics');
605 $new_count++;
608 # Check if there is metabolomics data for this plot
609 my $metabolomics_hashref = $plot_trait_value{$plot_name}->{'metabolomics'};
610 if (defined $metabolomics_hashref) {
611 $self->store_high_dimensional_data($metabolomics_hashref, $experiment->nd_experiment_id(), 'metabolomics');
612 $new_count++;
615 foreach my $trait_name (@trait_list) {
617 #print STDERR "trait: $trait_name\n";
618 my $trait_cvterm = $trait_objs{$trait_name};
620 my $value_array = $plot_trait_value{$plot_name}->{$trait_name};
622 my @values;
624 # convert to array or array format for single array values to accept old format inputs without refactoring
625 if (ref($value_array->[0]) ne 'ARRAY') {
626 push @values, $value_array;
627 } else {
628 @values = @{$value_array};
631 foreach my $value (@values) {
633 # perl doesn't have a problem attempting to access possibly non existing indices
634 my $trait_value = $value->[0];
635 my $timestamp = $value->[1];
636 $operator = $value->[2] ? $value->[2] : $operator;
637 my $observation = $value->[3];
638 my $image_id = $value->[4];
639 my $additional_info = $value->[5] || undef;
640 my $external_references = $value->[6] || undef;
641 my $unique_time = $timestamp && defined($timestamp) ? $timestamp : 'NA' . $upload_date;
642 my $existing_trait_value = $check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id};
644 if (defined($trait_value) && (length($trait_value) || $remove_values)) {
646 if ($ignore_new_values) {
647 if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) {
648 $skip_count++;
649 next;
653 my $plot_trait_uniquename = "stock: " .
654 $stock_id . ", trait: " .
655 $trait_cvterm->name .
656 ", date: $unique_time" .
657 ", operator: $operator";
659 # Remove previous phenotype values for a given stock and trait if $overwrite values is checked, otherwise skip to next
660 if ($overwrite_values) {
661 if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) {
663 #skip when observation is provided since overwriting doesn't create records it updates observations.
664 if (!$observation) {
665 push @{$trait_and_stock_to_overwrite{traits}}, $trait_cvterm->cvterm_id();
666 push @{$trait_and_stock_to_overwrite{stocks}}, $stock_id;
668 $plot_trait_uniquename .= ", overwritten: $upload_date";
669 if ( defined($trait_value) && length($trait_value) ) {
670 $overwrite_count++;
672 elsif ( $existing_trait_value ne "" ) {
673 $remove_count++;
675 } elsif ( length($trait_value) ) {
676 $new_count++;
678 $check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id} = 1;
679 } else {
680 if (!$allow_repeat_measures && exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) {
681 $skip_count++;
682 next;
683 } else {
684 $new_count++;
688 my $phenotype;
689 if ($observation) {
690 $phenotype = $trait_cvterm->find_related("phenotype_cvalues", {
691 observable_id => $trait_cvterm->cvterm_id,
692 phenotype_id => $observation,
695 ## should check that unit and variable (also checked here) are conserved in parse step, if not reject before store
696 ## should also update operator in nd_experimentprops
698 $phenotype->update({
699 value => $trait_value,
700 uniquename => $plot_trait_uniquename,
703 $self->handle_timestamp($timestamp, $observation);
704 $self->handle_operator($operator, $observation);
706 my $q = "SELECT phenotype_id, nd_experiment_id, file_id
707 FROM phenotype
708 JOIN nd_experiment_phenotype using(phenotype_id)
709 JOIN nd_experiment_stock using(nd_experiment_id)
710 LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id)
711 JOIN stock using(stock_id)
712 WHERE stock.stock_id=?
713 AND phenotype.cvalue_id=?";
715 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
716 $h->execute($stock_id, $trait_cvterm->cvterm_id);
717 while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) {
718 push @overwritten_values, [ $file_id, $phenotype_id, $nd_experiment_id ];
719 $experiment_ids{$nd_experiment_id} = 1;
720 if ($image_id) {
721 $nd_experiment_md_images{$nd_experiment_id} = $image_id;
726 else {
728 $phenotype = $trait_cvterm->create_related("phenotype_cvalues", {
729 observable_id => $trait_cvterm->cvterm_id,
730 value => $trait_value,
731 uniquename => $plot_trait_uniquename,
734 $self->handle_timestamp($timestamp, $phenotype->phenotype_id);
735 $self->handle_operator($operator, $phenotype->phenotype_id);
737 $experiment->create_related('nd_experiment_phenotypes', {
738 phenotype_id => $phenotype->phenotype_id
741 # $experiment->find_or_create_related({
742 # nd_experiment_phenotypes => [{phenotype_id => $phenotype->phenotype_id}]
743 # });
745 $experiment_ids{$experiment->nd_experiment_id()} = 1;
746 if ($image_id) {
747 $nd_experiment_md_images{$experiment->nd_experiment_id()} = $image_id;
750 my $additional_info_stored;
751 if($additional_info){
752 my $pheno_additional_info = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({
753 phenotype_id => $phenotype->phenotype_id,
754 type_id => $phenotype_addtional_info_type_id,
756 $pheno_additional_info = $pheno_additional_info->update({
757 value => encode_json $additional_info,
759 $additional_info_stored = $pheno_additional_info->value ? decode_json $pheno_additional_info->value : undef;
761 my $external_references_stored;
762 if($external_references){
763 my $phenotype_external_references = $schema->resultset("Phenotype::Phenotypeprop")->find_or_create({
764 phenotype_id => $phenotype->phenotype_id,
765 type_id => $external_references_type_id,
767 $phenotype_external_references = $phenotype_external_references->update({
768 value => encode_json $external_references,
770 $external_references_stored = $phenotype_external_references->value ? decode_json $phenotype_external_references->value : undef;
773 my $observationVariableDbId = $trait_cvterm->cvterm_id;
774 my $observation_id = $phenotype->phenotype_id;
775 my %details = (
776 "germplasmDbId"=> qq|$linked_data{$plot_name}->{germplasmDbId}|,
777 "germplasmName"=> $linked_data{$plot_name}->{germplasmName},
778 "observationDbId"=> qq|$observation_id|,
779 "observationLevel"=> $linked_data{$plot_name}->{observationLevel},
780 "observationUnitDbId"=> qq|$linked_data{$plot_name}->{observationUnitDbId}|,
781 "observationUnitName"=> $linked_data{$plot_name}->{observationUnitName},
782 "observationVariableDbId"=> qq|$observationVariableDbId|,
783 "observationVariableName"=> $trait_cvterm->name,
784 "studyDbId"=> qq|$project_id|,
785 "uploadedBy"=> $operator ? $operator : "",
786 "additionalInfo" => $additional_info_stored,
787 "externalReferences" => $external_references_stored,
788 "value" => $trait_value
791 if ($timestamp) { $details{'observationTimeStamp'} = $timestamp};
792 if ($operator) { $details{'collector'} = $operator};
794 push @stored_details, \%details;
796 elsif ( !length($trait_value) && !$remove_values && $existing_trait_value ne "" ) {
797 $skip_count++;
803 if (scalar(keys %trait_and_stock_to_overwrite) > 0) {
804 my @saved_nd_experiment_ids = keys %experiment_ids;
805 push @overwritten_values, $self->delete_previous_phenotypes(\%trait_and_stock_to_overwrite, \@saved_nd_experiment_ids);
808 $success_message = 'All values in your file have been successfully processed!<br><br>';
809 $success_message .= "$new_count new values stored<br>";
810 $success_message .= "$skip_count previously stored values skipped<br>";
811 $success_message .= "$overwrite_count previously stored values overwritten<br>";
812 $success_message .= "$remove_count previously stored values removed<br><br>";
813 my %files_with_overwritten_values = map {$_->[0] => 1} @overwritten_values;
814 my $obsoleted_files = $self->check_overwritten_files_status(keys %files_with_overwritten_values);
815 if (scalar (@$obsoleted_files) > 0){
816 $success_message .= ' The following previously uploaded files are now obsolete because all values from them were overwritten by your upload: ';
817 foreach (@$obsoleted_files){
818 $success_message .= " ".$_->[1];
823 try {
824 $schema->txn_do($coderef);
825 } catch {
826 $transaction_error = $_;
829 if ($transaction_error) {
830 $error_message = $transaction_error;
831 print STDERR "Transaction error storing phenotypes: $transaction_error\n";
832 return ($error_message, $success_message);
835 if ($archived_file) {
836 $self->save_archived_file_metadata($archived_file, $archived_file_type, \%experiment_ids);
838 if (scalar(keys %nd_experiment_md_images) > 0) {
839 $self->save_archived_images_metadata(\%nd_experiment_md_images);
842 return ($error_message, $success_message, \@stored_details);
845 sub store_stock_note {
846 my $self = shift;
847 my $stock_id = shift;
848 my $note_array = shift;
849 my $operator = shift;
850 my $note = $note_array->[0];
851 my $timestamp = $note_array->[1];
852 $operator = $note_array->[2] ? $note_array->[2] : $operator;
854 print STDERR "Stock_id is $stock_id and note in sub is $note, timestamp is $timestamp, operator is $operator\n";
856 $note = $note ." (Operator: $operator, Time: $timestamp)";
857 my $stock = $self->bcs_schema()->resultset("Stock::Stock")->find( { stock_id => $stock_id } );
858 $stock->create_stockprops( { 'notes' => $note } );
863 sub store_high_dimensional_data {
864 my $self = shift;
865 my $nirs_hashref = shift;
866 my $nd_experiment_id = shift;
867 my $md_json_type = shift;
868 my %nirs_hash = %{$nirs_hashref};
870 my $protocol_id = $nirs_hash{protocol_id};
871 delete $nirs_hash{protocol_id};
873 my $nirs_json = encode_json \%nirs_hash;
875 my $insert_query = "INSERT INTO metadata.md_json (json_type, json) VALUES (?,?) RETURNING json_id;";
876 my $dbh = $self->bcs_schema->storage->dbh()->prepare($insert_query);
877 $dbh->execute($md_json_type, $nirs_json);
878 my ($json_id) = $dbh->fetchrow_array();
880 my $linking_query = "INSERT INTO phenome.nd_experiment_md_json ( nd_experiment_id, json_id) VALUES (?,?);";
881 $dbh = $self->bcs_schema->storage->dbh()->prepare($linking_query);
882 $dbh->execute($nd_experiment_id,$json_id);
884 my $protocol_query = "INSERT INTO nd_experiment_protocol ( nd_experiment_id, nd_protocol_id) VALUES (?,?);";
885 $dbh = $self->bcs_schema->storage->dbh()->prepare($protocol_query);
886 $dbh->execute($nd_experiment_id,$protocol_id);
888 print STDERR "[StorePhenotypes] Linked $md_json_type json with id $json_id to nd_experiment $nd_experiment_id to protocol $protocol_id\n";
891 sub delete_previous_phenotypes {
892 my $self = shift;
893 my $trait_and_stock_to_overwrite = shift;
894 my $saved_nd_experiment_ids = shift;
895 my $stocks_sql = join ("," , @{$trait_and_stock_to_overwrite->{stocks}});
896 my $traits_sql = join ("," , @{$trait_and_stock_to_overwrite->{traits}});
897 my $saved_nd_experiment_ids_sql = join (",", @$saved_nd_experiment_ids);
898 my $nd_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($self->bcs_schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id();
900 my $q_search = "
901 SELECT phenotype_id, nd_experiment_id, file_id
902 FROM phenotype
903 JOIN nd_experiment_phenotype using(phenotype_id)
904 JOIN nd_experiment_stock using(nd_experiment_id)
905 JOIN nd_experiment using(nd_experiment_id)
906 LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id)
907 JOIN stock using(stock_id)
908 WHERE stock.stock_id IN ($stocks_sql)
909 AND phenotype.cvalue_id IN ($traits_sql)
910 AND nd_experiment_id NOT IN ($saved_nd_experiment_ids_sql)
911 AND nd_experiment.type_id = $nd_experiment_type_id;
914 my $h = $self->bcs_schema->storage->dbh()->prepare($q_search);
915 $h->execute();
917 my %phenotype_ids_and_nd_experiment_ids_to_delete;
918 my @deleted_phenotypes;
919 while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) {
920 push @{$phenotype_ids_and_nd_experiment_ids_to_delete{phenotype_ids}}, $phenotype_id;
921 push @{$phenotype_ids_and_nd_experiment_ids_to_delete{nd_experiment_ids}}, $nd_experiment_id;
922 push @deleted_phenotypes, [$file_id, $phenotype_id, $nd_experiment_id];
925 if (scalar(@deleted_phenotypes) > 0) {
926 my $delete_phenotype_values_error = CXGN::Project::delete_phenotype_values_and_nd_experiment_md_values($self->dbhost, $self->dbname, $self->dbuser, $self->dbpass, $self->temp_file_nd_experiment_id, $self->basepath, $self->bcs_schema, \%phenotype_ids_and_nd_experiment_ids_to_delete);
927 if ($delete_phenotype_values_error) {
928 die "Error deleting phenotype values ".$delete_phenotype_values_error."\n";
931 return @deleted_phenotypes;
934 sub check_overwritten_files_status {
935 my $self = shift;
936 my @file_ids = shift;
937 #print STDERR Dumper \@file_ids;
939 my $q = "SELECT count(nd_experiment_md_files_id) FROM metadata.md_files JOIN phenome.nd_experiment_md_files using(file_id) WHERE file_id=?;";
940 my $q2 = "UPDATE metadata.md_metadata SET obsolete=1 where metadata_id IN (SELECT metadata_id FROM metadata.md_files where file_id=?);";
941 my $q3 = "SELECT basename FROM metadata.md_files where file_id=?;";
942 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
943 my $h2 = $self->bcs_schema->storage->dbh()->prepare($q2);
944 my $h3 = $self->bcs_schema->storage->dbh()->prepare($q3);
945 my @obsoleted_files;
946 foreach (@file_ids){
947 if ($_){
948 $h->execute($_);
949 my $count = $h->fetchrow;
950 print STDERR "COUNT $count \n";
951 if ($count == 0){
952 $h2->execute($_);
953 $h3->execute($_);
954 my $basename = $h3->fetchrow;
955 push @obsoleted_files, [$_, $basename];
956 print STDERR "MADE file_id $_ OBSOLETE\n";
960 #print STDERR Dumper \@obsoleted_files;
961 return \@obsoleted_files;
964 sub save_archived_file_metadata {
965 my $self = shift;
966 my $archived_file = shift;
967 my $archived_file_type = shift;
968 my $experiment_ids = shift;
969 my $md5checksum;
971 if ($archived_file ne 'none'){
972 my $upload_file = CXGN::UploadFile->new();
973 my $md5 = $upload_file->get_md5($archived_file);
974 $md5checksum = $md5->hexdigest();
977 my $md_row = $self->metadata_schema->resultset("MdMetadata")->create({create_person_id => $self->user_id,});
978 $md_row->insert();
979 my $file_row = $self->metadata_schema->resultset("MdFiles")
980 ->create({
981 basename => basename($archived_file),
982 dirname => dirname($archived_file),
983 filetype => $archived_file_type,
984 md5checksum => $md5checksum,
985 metadata_id => $md_row->metadata_id(),
987 $file_row->insert();
989 foreach my $nd_experiment_id (keys %$experiment_ids) {
990 ## Link the file to the experiment
991 my $experiment_files = $self->phenome_schema->resultset("NdExperimentMdFiles")
992 ->create({
993 nd_experiment_id => $nd_experiment_id,
994 file_id => $file_row->file_id(),
996 $experiment_files->insert();
997 #print STDERR "[StorePhenotypes] Linking file: $archived_file \n\t to experiment id " . $nd_experiment_id . "\n";
1001 sub save_archived_images_metadata {
1002 my $self = shift;
1003 my $nd_experiment_md_images = shift;
1005 my $q = "INSERT into phenome.nd_experiment_md_images (nd_experiment_id, image_id) VALUES (?, ?);";
1006 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
1007 # Check for single image id vs array, then handle accordingly
1008 while (my ($nd_experiment_id, $image_id) = each %$nd_experiment_md_images) {
1009 $h->execute($nd_experiment_id, $image_id);
1013 sub get_linked_data {
1014 my $self = shift;
1015 my %data;
1016 my $unit_list = $self->stock_list;
1017 my $schema = $self->bcs_schema;
1019 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id;
1021 my $subquery = "
1022 SELECT cvterm_id
1023 FROM cvterm
1024 JOIN cv USING (cv_id)
1025 WHERE cvterm.name IN ('plot_of', 'plant_of', 'subplot_of') AND cv.name = 'stock_relationship'
1028 my $query = "
1029 SELECT unit.stock_id, unit.uniquename, level.name, accession.stock_id, accession.uniquename, nd_experiment.nd_geolocation_id, nd_experiment_project.project_id
1030 FROM stock AS unit
1031 JOIN cvterm AS level ON (unit.type_id = level.cvterm_id)
1032 JOIN stock_relationship AS rel ON (unit.stock_id = rel.subject_id AND rel.type_id IN ($subquery))
1033 JOIN stock AS accession ON (rel.object_id = accession.stock_id AND accession.type_id = $accession_cvterm_id)
1034 JOIN nd_experiment_stock ON (unit.stock_id = nd_experiment_stock.stock_id)
1035 JOIN nd_experiment ON (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id)
1036 JOIN nd_experiment_project ON (nd_experiment.nd_experiment_id = nd_experiment_project.nd_experiment_id)
1037 WHERE unit.uniquename = ANY (?)
1040 my $h = $schema->storage->dbh()->prepare($query);
1041 $h->execute($unit_list);
1042 while (my ($unit_id, $unit_name, $level, $accession_id, $accession_name, $location_id, $project_id) = $h->fetchrow_array()) {
1043 $data{$unit_name}{observationUnitName} = $unit_name;
1044 $data{$unit_name}{observationUnitDbId} = $unit_id;
1045 $data{$unit_name}{observationLevel} = $level;
1046 $data{$unit_name}{germplasmDbId} = $accession_id;
1047 $data{$unit_name}{germplasmName} = $accession_name;
1048 $data{$unit_name}{locationDbId} = $location_id;
1049 $data{$unit_name}{studyDbId} = $project_id;
1052 return \%data;
1055 sub handle_timestamp {
1056 my $self = shift;
1057 my $timestamp = shift || undef;
1058 my $phenotype_id = shift;
1060 my $q = "
1061 UPDATE phenotype
1062 SET collect_date = ?,
1063 create_date = DEFAULT
1064 WHERE phenotype_id = ?
1067 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
1068 $h->execute($timestamp, $phenotype_id);
1071 sub handle_operator {
1072 my $self = shift;
1073 my $operator = shift || undef;
1074 my $phenotype_id = shift;
1076 my $q = "
1077 UPDATE phenotype
1078 SET operator = ?
1079 WHERE phenotype_id = ?
1082 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
1083 $h->execute($operator, $phenotype_id);