Merge pull request #3196 from solgenomics/topic/nirs_upload_to_generic_highdim_pheno_...
[sgn.git] / lib / CXGN / Phenotypes / StorePhenotypes.pm
blob76abca2d3092105692e21c4558f5d74e4a2736ad
1 package CXGN::Phenotypes::StorePhenotypes;
3 =head1 NAME
5 CXGN::Phenotypes::StorePhenotypes - an object to handle storing phenotypes for SGN stocks
7 =head1 USAGE
9 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new(
10 basepath=>basepath,
11 dbhost=>dbhost,
12 dbname=>dbname,
13 dbuser=>dbuser,
14 dbpass=>dbpass,
15 temp_file_nd_experiment_id=>$temp_file_nd_experiment_id, #tempfile full name for deleting nd_experiment_ids asynchronously
16 bcs_schema=>$schema,
17 metadata_schema=>$metadata_schema,
18 phenome_schema=>$phenome_schema,
19 user_id=>$user_id,
20 stock_list=>$plots,
21 trait_list=>$traits,
22 values_hash=>$parsed_data,
23 has_timestamps=>$timestamp_included,
24 overwrite_values=>$overwrite,
25 ignore_new_values=>$ignore_new_values,
26 metadata_hash=>$phenotype_metadata,
27 image_zipfile_path=>$image_zip
29 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
30 my ($stored_phenotype_error, $stored_Phenotype_success) = $store_phenotypes->store();
32 =head1 DESCRIPTION
35 =head1 AUTHORS
37 Jeremy D. Edwards (jde22@cornell.edu)
38 Naama Menda (nm249@cornell.edu)
39 Nicolas Morales (nm529@cornell.edu)
40 Bryan Ellerbrock (bje24@cornell.edu)
42 =cut
44 use strict;
45 use warnings;
46 use Moose;
47 use Try::Tiny;
48 use File::Basename qw | basename dirname|;
49 use Digest::MD5;
50 use CXGN::List::Validate;
51 use Data::Dumper;
52 use Scalar::Util qw(looks_like_number);
53 use JSON;
54 use SGN::Image;
55 use CXGN::ZipFile;
56 use CXGN::UploadFile;
57 use CXGN::List::Transform;
58 use CXGN::Stock;
59 use CXGN::Tools::Run;
61 has 'bcs_schema' => (
62 isa => 'Bio::Chado::Schema',
63 is => 'rw',
64 required => 1,
67 has 'metadata_schema' => (
68 isa => 'CXGN::Metadata::Schema',
69 is => 'rw',
70 required => 1,
73 has 'phenome_schema' => (
74 isa => 'CXGN::Phenome::Schema',
75 is => 'rw',
76 required => 1,
79 has 'basepath' => (
80 isa => "Str",
81 is => 'rw',
82 required => 1
85 has 'dbhost' => (
86 isa => "Str",
87 is => 'rw',
88 required => 1
91 has 'dbname' => (
92 isa => "Str",
93 is => 'rw',
94 required => 1
97 has 'dbuser' => (
98 isa => "Str",
99 is => 'rw',
100 required => 1
103 has 'dbpass' => (
104 isa => "Str",
105 is => 'rw',
106 required => 1
109 has 'temp_file_nd_experiment_id' => (
110 isa => "Str",
111 is => 'rw',
112 required => 1
115 has 'user_id' => (
116 isa => "Int",
117 is => 'rw',
118 required => 1
121 has 'stock_list' => (
122 isa => "ArrayRef",
123 is => 'rw',
124 required => 1
127 has 'stock_id_list' => (
128 isa => "ArrayRef[Int]|Undef",
129 is => 'rw',
130 required => 0,
133 has 'trait_list' => (
134 isa => "ArrayRef",
135 is => 'rw',
136 required => 1
139 has 'values_hash' => (
140 isa => "HashRef",
141 is => 'rw',
142 required => 1
145 has 'has_timestamps' => (
146 isa => "Bool",
147 is => 'rw',
148 default => 0
151 has 'overwrite_values' => (
152 isa => "Bool",
153 is => 'rw',
154 default => 0
157 has 'ignore_new_values' => (
158 isa => "Bool",
159 is => 'rw',
160 default => 0
163 has 'metadata_hash' => (
164 isa => "HashRef",
165 is => 'rw',
166 required => 1
169 has 'image_zipfile_path' => (
170 isa => "Str | Undef",
171 is => 'rw',
172 required => 0
175 has 'trait_objs' => (
176 isa => "HashRef",
177 is => 'rw',
180 has 'unique_value_trait_stock' => (
181 isa => "HashRef",
182 is => 'rw',
185 has 'unique_trait_stock' => (
186 isa => "HashRef",
187 is => 'rw',
190 has 'unique_trait_stock_timestamp' => (
191 isa => "HashRef",
192 is => 'rw',
195 #build is used for creating hash lookups in this case
196 sub create_hash_lookups {
197 my $self = shift;
198 my $schema = $self->bcs_schema;
200 #Find trait cvterm objects and put them in a hash
201 my %trait_objs;
202 my @trait_list = @{$self->trait_list};
203 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation
204 print STDERR "trait list after filtering @trait_list\n";
205 my @stock_list = @{$self->stock_list};
206 my @cvterm_ids;
208 my $t = CXGN::List::Transform->new();
209 my $stock_id_list = $t->transform($schema, 'stocks_2_stock_ids', \@stock_list);
210 $self->stock_id_list($stock_id_list->{'transform'});
212 foreach my $trait_name (@trait_list) {
213 print STDERR "trait: $trait_name\n";
214 my $trait_cvterm = SGN::Model::Cvterm->get_cvterm_row_from_trait_name($schema, $trait_name);
215 $trait_objs{$trait_name} = $trait_cvterm;
216 push @cvterm_ids, $trait_cvterm->cvterm_id();
218 $self->trait_objs(\%trait_objs);
220 #for checking if values in the file are already stored in the database or in the same file
221 my %check_unique_trait_stock;
222 my %check_unique_trait_stock_timestamp;
223 my %check_unique_value_trait_stock;
225 my $stock_ids_sql = join ("," , @{$self->stock_id_list});
226 #print STDERR "Cvterm ids are @cvterm_ids";
227 if (scalar @cvterm_ids > 0) {
228 my $cvterm_ids_sql = join ("," , @cvterm_ids);
229 my $previous_phenotype_q = "SELECT phenotype.value, phenotype.cvalue_id, phenotype.collect_date, stock.stock_id FROM phenotype LEFT JOIN nd_experiment_phenotype USING(phenotype_id) LEFT JOIN nd_experiment USING(nd_experiment_id) LEFT JOIN nd_experiment_stock USING(nd_experiment_id) LEFT JOIN stock USING(stock_id) WHERE stock.stock_id IN ($stock_ids_sql) AND phenotype.cvalue_id IN ($cvterm_ids_sql);";
230 my $h = $schema->storage->dbh()->prepare($previous_phenotype_q);
231 $h->execute();
233 #my $previous_phenotype_rs = $schema->resultset('Phenotype::Phenotype')->search({'me.cvalue_id'=>{-in=>\@cvterm_ids}, 'stock.stock_id'=>{-in=>$self->stock_id_list}}, {'join'=>{'nd_experiment_phenotypes'=>{'nd_experiment'=>{'nd_experiment_stocks'=>'stock'}}}, 'select' => ['me.value', 'me.cvalue_id', 'stock.stock_id'], 'as' => ['value', 'cvterm_id', 'stock_id']});
234 while (my ($previous_value, $cvterm_id, $collect_timestamp, $stock_id) = $h->fetchrow_array()) {
235 #while (my $previous_phenotype_cvterm = $previous_phenotype_rs->next() ) {
236 #my $cvterm_id = $previous_phenotype_cvterm->get_column('cvterm_id');
237 #my $stock_id = $previous_phenotype_cvterm->get_column('stock_id');
238 if ($stock_id){
239 #my $previous_value = $previous_phenotype_cvterm->get_column('value') || ' ';
240 $collect_timestamp = $collect_timestamp || 'NA';
241 $check_unique_trait_stock{$cvterm_id, $stock_id} = $previous_value;
242 $check_unique_trait_stock_timestamp{$cvterm_id, $stock_id, $collect_timestamp} = $previous_value;
243 $check_unique_value_trait_stock{$previous_value, $cvterm_id, $stock_id} = 1;
248 $self->unique_value_trait_stock(\%check_unique_value_trait_stock);
249 $self->unique_trait_stock(\%check_unique_trait_stock);
250 $self->unique_trait_stock_timestamp(\%check_unique_trait_stock_timestamp);
254 sub verify {
255 my $self = shift;
256 print STDERR "CXGN::Phenotypes::StorePhenotypes verify\n";
258 my @plot_list = @{$self->stock_list};
259 my @trait_list = @{$self->trait_list};
260 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes from trait validation
261 print STDERR Dumper \@trait_list;
262 my %plot_trait_value = %{$self->values_hash};
263 my %phenotype_metadata = %{$self->metadata_hash};
264 my $timestamp_included = $self->has_timestamps;
265 my $archived_image_zipfile_with_path = $self->image_zipfile_path;
266 my $schema = $self->bcs_schema;
267 my $transaction_error;
268 # print STDERR Dumper \@plot_list;
269 # print STDERR Dumper \%plot_trait_value;
270 my $plot_validator = CXGN::List::Validate->new();
271 my $trait_validator = CXGN::List::Validate->new();
272 my @plots_missing = @{$plot_validator->validate($schema,'plots_or_subplots_or_plants_or_tissue_samples_or_analysis_instances',\@plot_list)->{'missing'}};
273 my @traits_missing = @{$trait_validator->validate($schema,'traits',\@trait_list)->{'missing'}};
274 my $error_message = '';
275 my $warning_message = '';
277 if (scalar(@plots_missing) > 0 || scalar(@traits_missing) > 0) {
278 print STDERR "Plots or traits not valid\n";
279 print STDERR "Invalid plots: ".join(", ", map { "'$_'" } @plots_missing)."\n" if (@plots_missing);
280 print STDERR "Invalid traits: ".join(", ", map { "'$_'" } @traits_missing)."\n" if (@traits_missing);
281 $error_message = "Invalid plots: <br/>".join(", <br/>", map { "'$_'" } @plots_missing) if (@plots_missing);
282 $error_message = "Invalid traits: <br/>".join(", <br/>", map { "'$_'" } @traits_missing) if (@traits_missing);
283 return ($warning_message, $error_message);
286 $self->create_hash_lookups();
287 my %trait_objs = %{$self->trait_objs};
288 my %check_unique_value_trait_stock = %{$self->unique_value_trait_stock};
289 my %check_unique_trait_stock = %{$self->unique_trait_stock};
290 my %check_unique_trait_stock_timestamp = %{$self->unique_trait_stock_timestamp};
292 my %check_trait_category;
293 my $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_categories';";
294 my $sth = $schema->storage->dbh->prepare($sql);
295 $sth->execute();
296 while (my ($category_value, $cvterm_id) = $sth->fetchrow_array) {
297 $check_trait_category{$cvterm_id} = $category_value;
300 my %check_trait_format;
301 $sql = "SELECT b.value, c.cvterm_id from cvtermprop as b join cvterm as a on (b.type_id = a.cvterm_id) join cvterm as c on (b.cvterm_id=c.cvterm_id) where a.name = 'trait_format';";
302 $sth = $schema->storage->dbh->prepare($sql);
303 $sth->execute();
304 while (my ($format_value, $cvterm_id) = $sth->fetchrow_array) {
305 $check_trait_format{$cvterm_id} = $format_value;
308 my %image_plot_full_names;
309 #This is for saving Fieldbook images, which are only associated to a stock. To save images that are associated to a stock and a trait and a value, use the ExcelAssociatedImages parser
310 if ($archived_image_zipfile_with_path) {
312 my $archived_zip = CXGN::ZipFile->new(archived_zipfile_path=>$archived_image_zipfile_with_path);
313 my @archived_zipfile_return = $archived_zip->file_names();
314 if (!@archived_zipfile_return){
315 $error_message = $error_message."<small>Image zipfile could not be read. Is it .zip format?</small><hr>";
316 } else {
317 my $file_names_stripped = $archived_zipfile_return[0];
318 my $file_names_full = $archived_zipfile_return[1];
319 foreach (@$file_names_full) {
320 $image_plot_full_names{$_} = 1;
322 my %plot_name_check;
323 foreach (@plot_list) {
324 $plot_name_check{$_} = 1;
326 foreach my $img_name (@$file_names_stripped) {
327 $img_name = substr($img_name, 0, -20);
328 if ($img_name && !exists($plot_name_check{$img_name})) {
329 $warning_message = $error_message."<small>Image ".$img_name." in images zip file does not reference a plot or plant_name (e.g. the image filename does not have a plot or plant name in it)!</small><hr>";
335 my %check_file_stock_trait_duplicates;
337 foreach my $plot_name (@plot_list) {
338 foreach my $trait_name (@trait_list) {
339 my $value_array = $plot_trait_value{$plot_name}->{$trait_name};
340 #print STDERR Dumper $value_array;
341 my $trait_value = $value_array->[0];
342 my $timestamp = $value_array->[1];
343 #print STDERR "$plot_name, $trait_name, $trait_value\n";
344 if ($trait_value || (defined($trait_value) && $trait_value eq '0')) {
345 my $trait_cvterm = $trait_objs{$trait_name};
346 my $trait_cvterm_id = $trait_cvterm->cvterm_id();
347 my $stock_id = $schema->resultset('Stock::Stock')->find({'uniquename' => $plot_name})->stock_id();
349 if ($trait_value eq '.' || ($trait_value =~ m/[^a-zA-Z0-9,.\-\/\_]/ && $trait_value ne '.')){
350 $error_message = $error_message."<small>Trait values must be alphanumeric with no spaces: <br/>Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Value: ".$trait_value."</small><hr>";
353 #check that trait value is valid for trait name
354 if (exists($check_trait_format{$trait_cvterm_id})) {
355 if ($check_trait_format{$trait_cvterm_id} eq 'numeric') {
356 my $trait_format_checked = looks_like_number($trait_value);
357 if (!$trait_format_checked) {
358 $error_message = $error_message."<small>This trait value should be numeric: <br/>Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Value: ".$trait_value."</small><hr>";
361 if ($check_trait_format{$trait_cvterm_id} eq 'image') {
362 $trait_value =~ s/^.*photos\///;
363 if (!exists($image_plot_full_names{$trait_value})) {
364 $error_message = $error_message."<small>For Plot Name: $plot_name there should be a corresponding image named in the zipfile called $trait_value. </small><hr>";
368 if (exists($check_trait_category{$trait_cvterm_id})) {
369 my @trait_categories = split /\//, $check_trait_category{$trait_cvterm_id};
370 my %trait_categories_hash = map { $_ => 1 } @trait_categories;
371 if (!exists($trait_categories_hash{$trait_value})) {
372 $error_message = $error_message."<small>This trait value should be one of ".$check_trait_category{$trait_cvterm_id}.": <br/>Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Value: ".$trait_value."</small><hr>";
376 #print STDERR "$trait_value, $trait_cvterm_id, $stock_id\n";
377 #check if the plot_name, trait_name combination already exists in database.
378 if (exists($check_unique_value_trait_stock{$trait_value, $trait_cvterm_id, $stock_id})) {
379 $warning_message = $warning_message."<small>$plot_name already has the same value as in your file ($trait_value) stored for the trait $trait_name.</small><hr>";
380 } elsif (exists($check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp})) {
381 $warning_message = $warning_message."<small>$plot_name already has a different value ($check_unique_trait_stock_timestamp{$trait_cvterm_id, $stock_id, $timestamp}) than in your file ($trait_value) stored in the database for the trait $trait_name for the timestamp $timestamp.</small><hr>";
382 } elsif (exists($check_unique_trait_stock{$trait_cvterm_id, $stock_id})) {
383 $warning_message = $warning_message."<small>$plot_name already has a different value ($check_unique_trait_stock{$trait_cvterm_id, $stock_id}) than in your file ($trait_value) stored in the database for the trait $trait_name.</small><hr>";
386 #check if the plot_name, trait_name combination already exists in same file.
387 if (exists($check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id})) {
388 $warning_message = $warning_message."<small>$plot_name already has a value for the trait $trait_name in your file. Possible duplicate in your file?</small><hr>";
390 $check_file_stock_trait_duplicates{$trait_cvterm_id, $stock_id} = 1;
393 if ($timestamp_included) {
394 if ( (!$timestamp && !$trait_value) || ($timestamp && !$trait_value) || ($timestamp && $trait_value) ) {
395 if ($timestamp) {
396 if( !$timestamp =~ m/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})(\S)(\d{4})/) {
397 $error_message = $error_message."<small>Bad timestamp for value for Plot Name: ".$plot_name."<br/>Trait Name: ".$trait_name."<br/>Should be YYYY-MM-DD HH:MM:SS-0000 or YYYY-MM-DD HH:MM:SS+0000</small><hr>";
406 ## Verify metadata
407 if ($phenotype_metadata{'archived_file'} && (!$phenotype_metadata{'archived_file_type'} || $phenotype_metadata{'archived_file_type'} eq "")) {
408 $error_message = "No file type provided for archived file.";
409 return ($warning_message, $error_message);
411 if (!$phenotype_metadata{'operator'} || $phenotype_metadata{'operator'} eq "") {
412 $error_message = "No operaror provided in file upload metadata.";
413 return ($warning_message, $error_message);
415 if (!$phenotype_metadata{'date'} || $phenotype_metadata{'date'} eq "") {
416 $error_message = "No date provided in file upload metadata.";
417 return ($warning_message, $error_message);
420 return ($warning_message, $error_message);
423 sub store {
424 my $self = shift;
425 print STDERR "CXGN::Phenotypes::StorePhenotypes store\n";
427 $self->create_hash_lookups();
428 my %linked_data = %{$self->get_linked_data()};
429 my @plot_list = @{$self->stock_list};
430 print STDERR Dumper \@plot_list;
431 my @trait_list = @{$self->trait_list};
432 print STDERR Dumper \@trait_list;
433 @trait_list = map { $_ eq 'notes' ? () : ($_) } @trait_list; # omit notes so they can be handled separately
434 my %trait_objs = %{$self->trait_objs};
435 my %plot_trait_value = %{$self->values_hash};
436 print STDERR Dumper %plot_trait_value;
437 my %phenotype_metadata = %{$self->metadata_hash};
438 my $timestamp_included = $self->has_timestamps;
439 my $archived_image_zipfile_with_path = $self->image_zipfile_path;
440 my $phenotype_metadata = $self->metadata_hash;
441 my $schema = $self->bcs_schema;
442 my $metadata_schema = $self->metadata_schema;
443 my $phenome_schema = $self->phenome_schema;
444 my $overwrite_values = $self->overwrite_values;
445 my $ignore_new_values = $self->ignore_new_values;
446 my $error_message;
447 my $transaction_error;
448 my $user_id = $self->user_id;
449 my $archived_file = $phenotype_metadata->{'archived_file'};
450 my $archived_file_type = $phenotype_metadata->{'archived_file_type'};
451 my $operator = $phenotype_metadata->{'operator'};
452 my $upload_date = $phenotype_metadata->{'date'};
453 my $success_message;
455 my $phenotyping_experiment_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id();
456 my $local_date_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'date', 'local')->cvterm_id();
457 my $local_operator_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'operator', 'local')->cvterm_id();
458 my $plot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plot', 'stock_type')->cvterm_id();
459 my $plant_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'plant', 'stock_type')->cvterm_id();
460 my $subplot_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'subplot', 'stock_type')->cvterm_id();
461 my $tissue_sample_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'tissue_sample', 'stock_type')->cvterm_id();
462 my $analysis_instance_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_instance', 'stock_type')->cvterm_id();
463 my %experiment_ids;
464 my @stored_details;
465 my %nd_experiment_md_images;
467 my %check_unique_trait_stock = %{$self->unique_trait_stock};
469 my $rs;
470 my %data;
471 $rs = $schema->resultset('Stock::Stock')->search(
472 {'type.name' => ['field_layout', 'analysis_experiment', 'sampling_layout'], 'me.type_id' => [$plot_cvterm_id, $plant_cvterm_id, $subplot_cvterm_id, $tissue_sample_cvterm_id, $analysis_instance_cvterm_id], 'me.stock_id' => {-in=>$self->stock_id_list } },
473 {join=> {'nd_experiment_stocks' => {'nd_experiment' => ['type', 'nd_experiment_projects' ] } } ,
474 '+select'=> ['me.stock_id', 'me.uniquename', 'nd_experiment.nd_geolocation_id', 'nd_experiment_projects.project_id'],
475 '+as'=> ['stock_id', 'uniquename', 'nd_geolocation_id', 'project_id']
478 while (my $s = $rs->next()) {
479 $data{$s->get_column('uniquename')} = [$s->get_column('stock_id'), $s->get_column('nd_geolocation_id'), $s->get_column('project_id') ];
482 # print STDERR "DATA: ".Dumper(\%data);
483 ## Use txn_do with the following coderef so that if any part fails, the entire transaction fails.
484 my $coderef = sub {
485 my %trait_and_stock_to_overwrite;
486 my @overwritten_values;
488 foreach my $plot_name (@plot_list) {
490 my $stock_id = $data{$plot_name}[0];
491 my $location_id = $data{$plot_name}[1];
492 my $project_id = $data{$plot_name}[2];
494 # create plot-wide nd_experiment entry
496 my $experiment = $schema->resultset('NaturalDiversity::NdExperiment')->create({
497 nd_geolocation_id => $location_id,
498 type_id => $phenotyping_experiment_cvterm_id,
499 nd_experimentprops => [{type_id => $local_date_cvterm_id, value => $upload_date}, {type_id => $local_operator_cvterm_id, value => $operator}],
500 nd_experiment_projects => [{project_id => $project_id}],
501 nd_experiment_stocks => [{stock_id => $stock_id, type_id => $phenotyping_experiment_cvterm_id}]
504 $experiment_ids{$experiment->nd_experiment_id()}=1;
506 # Check if there is a note for this plot, If so add it using dedicated function
507 my $note_array = $plot_trait_value{$plot_name}->{'notes'};
508 if (defined $note_array) {
509 $self->store_stock_note($stock_id, $note_array, $operator);
512 # Check if there is nirs data for this plot, If so add it using dedicated function
513 my $nirs_hashref = $plot_trait_value{$plot_name}->{'nirs'};
514 if (defined $nirs_hashref) {
515 $self->store_nirs_data($nirs_hashref, $experiment->nd_experiment_id());
518 foreach my $trait_name (@trait_list) {
520 #print STDERR "trait: $trait_name\n";
521 my $trait_cvterm = $trait_objs{$trait_name};
523 my $value_array = $plot_trait_value{$plot_name}->{$trait_name};
524 #print STDERR Dumper $value_array;
525 my $trait_value = $value_array->[0];
526 my $timestamp = $value_array->[1];
527 $operator = $value_array->[2] ? $value_array->[2] : $operator;
528 my $observation = $value_array->[3];
529 my $image_id = $value_array->[4];
530 my $unique_time = $timestamp && defined($timestamp) ? $timestamp : 'NA'.$upload_date;
532 if (defined($trait_value) && length($trait_value)) {
534 #Remove previous phenotype values for a given stock and trait, if $overwrite values is checked
535 if ($overwrite_values) {
536 if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) {
537 push @{$trait_and_stock_to_overwrite{traits}}, $trait_cvterm->cvterm_id();
538 push @{$trait_and_stock_to_overwrite{stocks}}, $stock_id;
540 $check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id} = 1;
542 if ($ignore_new_values) {
543 if (exists($check_unique_trait_stock{$trait_cvterm->cvterm_id(), $stock_id})) {
544 next;
548 my $plot_trait_uniquename = "Stock: " .
549 $stock_id . ", trait: " .
550 $trait_cvterm->name .
551 " date: $unique_time" .
552 " operator = $operator" ;
554 my $phenotype;
555 if ($observation) {
556 $phenotype = $trait_cvterm->find_related("phenotype_cvalues", {
557 observable_id => $trait_cvterm->cvterm_id,
558 phenotype_id => $observation,
561 ## should check that unit and variable (also checked here) are conserved in parse step, if not reject before store
562 ## should also update operator in nd_experimentprops
564 $phenotype->update({
565 value => $trait_value,
566 uniquename => $plot_trait_uniquename,
569 $self->handle_timestamp($timestamp, $observation);
570 $self->handle_operator($operator, $observation);
572 my $q = "SELECT phenotype_id, nd_experiment_id, file_id
573 FROM phenotype
574 JOIN nd_experiment_phenotype using(phenotype_id)
575 JOIN nd_experiment_stock using(nd_experiment_id)
576 LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id)
577 JOIN stock using(stock_id)
578 WHERE stock.stock_id=?
579 AND phenotype.cvalue_id=?";
581 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
582 $h->execute($stock_id, $trait_cvterm->cvterm_id);
583 while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) {
584 push @overwritten_values, [$file_id, $phenotype_id, $nd_experiment_id];
585 $experiment_ids{$nd_experiment_id}=1;
586 if ($image_id) {
587 $nd_experiment_md_images{$nd_experiment_id} = $image_id;
591 } else {
593 $phenotype = $trait_cvterm->create_related("phenotype_cvalues", {
594 observable_id => $trait_cvterm->cvterm_id,
595 value => $trait_value ,
596 uniquename => $plot_trait_uniquename,
599 $self->handle_timestamp($timestamp, $phenotype->phenotype_id);
600 $self->handle_operator($operator, $phenotype->phenotype_id);
602 $experiment->create_related('nd_experiment_phenotypes', {
603 phenotype_id => $phenotype->phenotype_id
606 # $experiment->find_or_create_related({
607 # nd_experiment_phenotypes => [{phenotype_id => $phenotype->phenotype_id}]
608 # });
610 $experiment_ids{$experiment->nd_experiment_id()}=1;
611 if ($image_id) {
612 $nd_experiment_md_images{$experiment->nd_experiment_id()} = $image_id;
615 my $observationVariableDbId = $trait_cvterm->cvterm_id;
616 my %details = (
617 "germplasmDbId"=> $linked_data{$plot_name}->{germplasmDbId},
618 "germplasmName"=> $linked_data{$plot_name}->{germplasmName},
619 "observationDbId"=> $phenotype->phenotype_id,
620 "observationLevel"=> $linked_data{$plot_name}->{observationLevel},
621 "observationUnitDbId"=> $linked_data{$plot_name}->{observationUnitDbId},
622 "observationUnitName"=> $linked_data{$plot_name}->{observationUnitName},
623 "observationVariableDbId"=> qq|$observationVariableDbId|,
624 "observationVariableName"=> $trait_cvterm->name,
625 "studyDbId"=> $project_id,
626 "uploadedBy"=> $user_id,
627 "value" => $trait_value
630 if ($timestamp) { $details{'observationTimeStamp'} = $timestamp};
631 if ($operator) { $details{'collector'} = $operator};
633 push @stored_details, \%details;
638 if (scalar(keys %trait_and_stock_to_overwrite) > 0) {
639 my @saved_nd_experiment_ids = keys %experiment_ids;
640 push @overwritten_values, $self->delete_previous_phenotypes(\%trait_and_stock_to_overwrite, \@saved_nd_experiment_ids);
643 $success_message = 'All values in your file are now saved in the database!';
644 #print STDERR Dumper \@overwritten_values;
645 my %files_with_overwritten_values = map {$_->[0] => 1} @overwritten_values;
646 my $obsoleted_files = $self->check_overwritten_files_status(keys %files_with_overwritten_values);
647 if (scalar (@$obsoleted_files) > 0){
648 $success_message .= ' The following previously uploaded files are now obsolete because all values from them were overwritten by your upload: ';
649 foreach (@$obsoleted_files){
650 $success_message .= " ".$_->[1];
655 try {
656 $schema->txn_do($coderef);
657 } catch {
658 $transaction_error = $_;
661 if ($transaction_error) {
662 $error_message = $transaction_error;
663 print STDERR "Transaction error storing phenotypes: $transaction_error\n";
664 return ($error_message, $success_message);
667 if ($archived_file) {
668 $self->save_archived_file_metadata($archived_file, $archived_file_type, \%experiment_ids);
670 if (scalar(keys %nd_experiment_md_images) > 0) {
671 $self->save_archived_images_metadata(\%nd_experiment_md_images);
674 return ($error_message, $success_message, \@stored_details);
677 sub store_stock_note {
678 my $self = shift;
679 my $stock_id = shift;
680 my $note_array = shift;
681 my $operator = shift;
682 my $note = $note_array->[0];
683 my $timestamp = $note_array->[1];
684 $operator = $note_array->[2] ? $note_array->[2] : $operator;
686 print STDERR "Stock_id is $stock_id and note in sub is $note, timestamp is $timestamp, operator is $operator\n";
688 $note = $note ." (Operator: $operator, Time: $timestamp)";
689 my $stock = $self->bcs_schema()->resultset("Stock::Stock")->find( { stock_id => $stock_id } );
690 $stock->create_stockprops( { 'notes' => $note } );
693 sub store_nirs_data {
694 my $self = shift;
695 my $nirs_hashref = shift;
696 my $nd_experiment_id = shift;
697 my %nirs_hash = %{$nirs_hashref};
699 my $protocol_id = $nirs_hash{protocol_id};
700 delete $nirs_hash{protocol_id};
702 my $nirs_json = encode_json \%nirs_hash;
704 my $insert_query = "INSERT INTO metadata.md_json (json_type, json) VALUES ('nirs_spectra',?) RETURNING json_id;";
705 my $dbh = $self->bcs_schema->storage->dbh()->prepare($insert_query);
706 $dbh->execute($nirs_json);
707 my ($json_id) = $dbh->fetchrow_array();
709 my $linking_query = "INSERT INTO phenome.nd_experiment_md_json ( nd_experiment_id, json_id) VALUES (?,?);";
710 $dbh = $self->bcs_schema->storage->dbh()->prepare($linking_query);
711 $dbh->execute($nd_experiment_id,$json_id);
713 my $protocol_query = "INSERT INTO nd_experiment_protocol ( nd_experiment_id, nd_protocol_id) VALUES (?,?);";
714 $dbh = $self->bcs_schema->storage->dbh()->prepare($protocol_query);
715 $dbh->execute($nd_experiment_id,$protocol_id);
717 print STDERR "[StorePhenotypes] Linked json with id $json_id to nd_experiment $nd_experiment_id to protocol $protocol_id\n";
720 sub delete_previous_phenotypes {
721 my $self = shift;
722 my $trait_and_stock_to_overwrite = shift;
723 my $saved_nd_experiment_ids = shift;
724 my $stocks_sql = join ("," , @{$trait_and_stock_to_overwrite->{stocks}});
725 my $traits_sql = join ("," , @{$trait_and_stock_to_overwrite->{traits}});
726 my $saved_nd_experiment_ids_sql = join (",", @$saved_nd_experiment_ids);
727 my $nd_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($self->bcs_schema, 'phenotyping_experiment', 'experiment_type')->cvterm_id();
729 my $q_search = "
730 SELECT phenotype_id, nd_experiment_id, file_id
731 FROM phenotype
732 JOIN nd_experiment_phenotype using(phenotype_id)
733 JOIN nd_experiment_stock using(nd_experiment_id)
734 JOIN nd_experiment using(nd_experiment_id)
735 LEFT JOIN phenome.nd_experiment_md_files using(nd_experiment_id)
736 JOIN stock using(stock_id)
737 WHERE stock.stock_id IN ($stocks_sql)
738 AND phenotype.cvalue_id IN ($traits_sql)
739 AND nd_experiment_id NOT IN ($saved_nd_experiment_ids_sql)
740 AND nd_experiment.type_id = $nd_experiment_type_id;
743 my $h = $self->bcs_schema->storage->dbh()->prepare($q_search);
744 $h->execute();
746 my %phenotype_ids_and_nd_experiment_ids_to_delete;
747 my @deleted_phenotypes;
748 while (my ($phenotype_id, $nd_experiment_id, $file_id) = $h->fetchrow_array()) {
749 push @{$phenotype_ids_and_nd_experiment_ids_to_delete{phenotype_ids}}, $phenotype_id;
750 push @{$phenotype_ids_and_nd_experiment_ids_to_delete{nd_experiment_ids}}, $nd_experiment_id;
751 push @deleted_phenotypes, [$file_id, $phenotype_id, $nd_experiment_id];
753 my $delete_phenotype_values_error = CXGN::Project::delete_phenotype_values_and_nd_experiment_md_values($self->dbhost, $self->dbname, $self->dbuser, $self->dbpass, $self->temp_file_nd_experiment_id, $self->basepath, $self->bcs_schema, \%phenotype_ids_and_nd_experiment_ids_to_delete);
754 if ($delete_phenotype_values_error) {
755 die "Error deleting phenotype values ".$delete_phenotype_values_error."\n";
758 return @deleted_phenotypes;
761 sub check_overwritten_files_status {
762 my $self = shift;
763 my @file_ids = shift;
764 #print STDERR Dumper \@file_ids;
766 my $q = "SELECT count(nd_experiment_md_files_id) FROM metadata.md_files JOIN phenome.nd_experiment_md_files using(file_id) WHERE file_id=?;";
767 my $q2 = "UPDATE metadata.md_metadata SET obsolete=1 where metadata_id IN (SELECT metadata_id FROM metadata.md_files where file_id=?);";
768 my $q3 = "SELECT basename FROM metadata.md_files where file_id=?;";
769 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
770 my $h2 = $self->bcs_schema->storage->dbh()->prepare($q2);
771 my $h3 = $self->bcs_schema->storage->dbh()->prepare($q3);
772 my @obsoleted_files;
773 foreach (@file_ids){
774 if ($_){
775 $h->execute($_);
776 my $count = $h->fetchrow;
777 print STDERR "COUNT $count \n";
778 if ($count == 0){
779 $h2->execute($_);
780 $h3->execute($_);
781 my $basename = $h3->fetchrow;
782 push @obsoleted_files, [$_, $basename];
783 print STDERR "MADE file_id $_ OBSOLETE\n";
787 #print STDERR Dumper \@obsoleted_files;
788 return \@obsoleted_files;
791 sub save_archived_file_metadata {
792 my $self = shift;
793 my $archived_file = shift;
794 my $archived_file_type = shift;
795 my $experiment_ids = shift;
796 my $md5checksum;
798 if ($archived_file ne 'none'){
799 my $upload_file = CXGN::UploadFile->new();
800 my $md5 = $upload_file->get_md5($archived_file);
801 $md5checksum = $md5->hexdigest();
804 my $md_row = $self->metadata_schema->resultset("MdMetadata")->create({create_person_id => $self->user_id,});
805 $md_row->insert();
806 my $file_row = $self->metadata_schema->resultset("MdFiles")
807 ->create({
808 basename => basename($archived_file),
809 dirname => dirname($archived_file),
810 filetype => $archived_file_type,
811 md5checksum => $md5checksum,
812 metadata_id => $md_row->metadata_id(),
814 $file_row->insert();
816 foreach my $nd_experiment_id (keys %$experiment_ids) {
817 ## Link the file to the experiment
818 my $experiment_files = $self->phenome_schema->resultset("NdExperimentMdFiles")
819 ->create({
820 nd_experiment_id => $nd_experiment_id,
821 file_id => $file_row->file_id(),
823 $experiment_files->insert();
824 #print STDERR "[StorePhenotypes] Linking file: $archived_file \n\t to experiment id " . $nd_experiment_id . "\n";
828 sub save_archived_images_metadata {
829 my $self = shift;
830 my $nd_experiment_md_images = shift;
832 my $q = "INSERT into phenome.nd_experiment_md_images (nd_experiment_id, image_id) VALUES (?, ?);";
833 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
834 # Check for single image id vs array, then handle accordingly
835 while (my ($nd_experiment_id, $image_id) = each %$nd_experiment_md_images) {
836 $h->execute($nd_experiment_id, $image_id);
840 sub get_linked_data {
841 my $self = shift;
842 my %data;
843 my $unit_list = $self->stock_list;
844 my $schema = $self->bcs_schema;
846 my $accession_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'accession', 'stock_type')->cvterm_id;
848 my $subquery = "
849 SELECT cvterm_id
850 FROM cvterm
851 JOIN cv USING (cv_id)
852 WHERE cvterm.name IN ('plot_of', 'plant_of', 'subplot_of') AND cv.name = 'stock_relationship'
855 my $query = "
856 SELECT unit.stock_id, unit.uniquename, level.name, accession.stock_id, accession.uniquename, nd_experiment.nd_geolocation_id, nd_experiment_project.project_id
857 FROM stock AS unit
858 JOIN cvterm AS level ON (unit.type_id = level.cvterm_id)
859 JOIN stock_relationship AS rel ON (unit.stock_id = rel.subject_id AND rel.type_id IN ($subquery))
860 JOIN stock AS accession ON (rel.object_id = accession.stock_id AND accession.type_id = $accession_cvterm_id)
861 JOIN nd_experiment_stock ON (unit.stock_id = nd_experiment_stock.stock_id)
862 JOIN nd_experiment ON (nd_experiment_stock.nd_experiment_id = nd_experiment.nd_experiment_id)
863 JOIN nd_experiment_project ON (nd_experiment.nd_experiment_id = nd_experiment_project.nd_experiment_id)
864 WHERE unit.uniquename = ANY (?)
867 my $h = $schema->storage->dbh()->prepare($query);
868 $h->execute($unit_list);
869 while (my ($unit_id, $unit_name, $level, $accession_id, $accession_name, $location_id, $project_id) = $h->fetchrow_array()) {
870 $data{$unit_name}{observationUnitName} = $unit_name;
871 $data{$unit_name}{observationUnitDbId} = $unit_id;
872 $data{$unit_name}{observationLevel} = $level;
873 $data{$unit_name}{germplasmDbId} = $accession_id;
874 $data{$unit_name}{germplasmName} = $accession_name;
875 $data{$unit_name}{locationDbId} = $location_id;
876 $data{$unit_name}{studyDbId} = $project_id;
879 return \%data;
882 sub handle_timestamp {
883 my $self = shift;
884 my $timestamp = shift || undef;
885 my $phenotype_id = shift;
887 my $q = "
888 UPDATE phenotype
889 SET collect_date = ?,
890 create_date = DEFAULT
891 WHERE phenotype_id = ?
894 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
895 $h->execute($timestamp, $phenotype_id);
898 sub handle_operator {
899 my $self = shift;
900 my $operator = shift || undef;
901 my $phenotype_id = shift;
903 my $q = "
904 UPDATE phenotype
905 SET operator = ?
906 WHERE phenotype_id = ?
909 my $h = $self->bcs_schema->storage->dbh()->prepare($q);
910 $h->execute($operator, $phenotype_id);