fixed get stocks functions
[sgn.git] / lib / CXGN / Analysis.pm
blobc5076e1508f8a1268f56a9c9c29f31c47a9376ed
2 =encoding utf-8
4 =head1 NAME
6 CXGN::Analysis - manage analyses on Breedbase
8 =head1 DESCRIPTION
10 Analyses are stored much like trials, starting out in the project table, and linking through to nd_experiment and stock through linking tables, as well phentoype to store the analysis results. Additional metadata is stored in in a projectprop with the type_id 'analysis_metadata_json'. The type of the project is 'analysis_project' (stored in a projectprop as well). Each analysis is assigned to a user, using and sp_person_id assigned in a projectprop.
12 =head2 TYPES
14 The data structure is built using type ids that are different from a regular field trial.
16 =over 4
18 =item nd_experiment.type_id
20 The nd_experiment.type_id links to 'analysis_experiment' (nd_experiment_property) (equivalent to 'field_experiment' in a trial),
22 =item stock.type_id
24 The stock.type_id links to 'analysis_instance' (stock_property) (equivalent to 'plot' in a trial)
26 =item stock_relationship.type_id
28 The stock_relationship.type_id links to 'analysis_of' (equivalent to 'plot_of' in field trials)
30 This is summarized in the following table:
32 ┌──────────────────┬───────────────────────┬───────────────────┬────────────────┐
33 │ project type │ nd_experiment.type_id │ stock.type_id │ stock_relation │
34 │ │ │ │ ship.type_id │
35 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
36 │ trial │ field_experiment │ plot │ plot_of │
37 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
38 │ genotyping_plate │ genotyping_experiment │ tissue_sample │ sample_of │
39 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
40 │ analysis │ analysis_experiment │ analysis_instance │ analysis_of │
41 └──────────────────┴───────────────────────┴───────────────────┴────────────────┘
44 =back
46 The data in analysis_metdata_json is managed by the CXGN::Analysis::AnalysisMetadata class and contains the dataset_id used to generate the analysis, the actual analysis protocol that was run, and the traits relevant to the analysis.
48 =head1 AUTHOR
50 Lukas Mueller <lam87@cornell.edu>
52 =head1 METHODS
54 =cut
56 package CXGN::Analysis;
58 use Moose;
60 extends 'CXGN::Project';
62 use Try::Tiny;
63 use DateTime;
64 use Data::Dumper;
65 use CXGN::Trial::TrialCreate;
66 use CXGN::Trial::TrialDesign;
67 use CXGN::Trial::TrialDesignStore;
68 use CXGN::Trial::TrialLayout;
69 use CXGN::Phenotypes::StorePhenotypes;
70 use CXGN::Analysis::AnalysisMetadata;
71 use CXGN::List::Transform;
72 use CXGN::Dataset;
73 use CXGN::AnalysisModel::SaveModel;
74 use CXGN::People::Person;
75 use CXGN::AnalysisModel::GetModel;
77 =head2 bcs_schema()
79 =cut
81 has 'bcs_schema' => (is => 'rw', isa => 'Bio::Chado::Schema', required => 1 );
83 =head2 people_schema()
85 =cut
87 has 'people_schema' => (is => 'rw', isa => 'CXGN::People::Schema', required=>1);
89 =head2 metadata_schema()
91 =cut
93 has 'metadata_schema' => (is => 'rw', isa => 'CXGN::Metadata::Schema', required=>1);
95 =head2 phenome_schema()
97 =cut
99 has 'phenome_schema' => (is => 'rw', isa => 'CXGN::Phenome::Schema', required=>1);
101 =head2 project_id()
103 =cut
105 #has 'project_id' => (is => 'rw', isa => 'Int');
107 =head2 name()
109 =cut
111 #has 'name' => (is => 'rw', isa => 'Str');
113 =head2 description()
115 =cut
117 ##has 'description' => (is => 'rw', isa => 'Str', default => "No description");
119 =head2 breeding_program_id()
121 =cut
123 has 'breeding_program_id' => (is => 'rw', isa => 'Int');
125 =head2 accession_names()
127 =cut
129 has 'accession_names' => (is => 'rw', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_load_accession_names');
131 =head2 design()
133 =cut
135 has 'design' => (is => 'rw', isa => 'Ref', lazy => 1, builder => '_get_layout');
137 =head2 traits()
139 =cut
141 has 'traits' => (is => 'rw', isa => 'ArrayRef', builder => '_load_traits', lazy => 1);
143 =head2 nd_geolocation_id()
145 =cut
147 has 'nd_geolocation_id' => (is => 'rw', isa=> 'Maybe[Int]');
149 =head2 user_id()
151 =cut
153 has 'user_id' => (is => 'rw', isa => 'Int');
155 =head2 user_role()
157 =cut
159 has 'user_role' => (is => 'rw', isa => 'Str');
161 =head2 analysis_model_protocol_id()
163 nd_protocol_id of save model information
165 =cut
167 has 'analysis_model_protocol_id' => (isa => 'Int|Undef', is => 'rw');
169 =head2 metadata()
171 CXGN::Analysis::AnalysisMetadata object.
173 =cut
175 has 'metadata' => (isa => 'Maybe[CXGN::Analysis::AnalysisMetadata]', is => 'rw');
177 #sub BUILDARGS {
178 # my $self = shift;
179 # my $args = shift;
180 # $args->{trial_id} = $args->{project_id};
183 #has 'project' => (isa => 'CXGN::Project', is => 'rw');
185 =head2 year()
187 year the analysis was done.
189 =cut
191 #has 'year' => (isa => 'Str', is => 'rw');
193 =head2 saved_model()
195 information about the saved model.
197 =cut
199 has 'saved_model' => (isa => 'HashRef', is => 'rw');
202 sub BUILD {
203 my $self = shift;
204 my $args = shift;
206 print STDERR "BUILD CXGN::Analysis...\n";
207 my $metadata;
209 if ($self->get_trial_id()) {
210 my $schema = $args->{bcs_schema};
211 print STDERR "Location id retrieved : = ".$self->get_location()->[0]."\n";
212 $self->nd_geolocation_id($self->get_location()->[0]);
214 my $metadata_json_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
215 my $rs = $self->bcs_schema()->resultset("Project::Projectprop")->search( { project_id => $self->get_trial_id(), type_id => $metadata_json_id });
217 my $stockprop_id;
218 if ($rs->count() > 0) {
219 $stockprop_id = $rs->first()->projectprop_id();
222 print STDERR "Create AnalysisMetadata object...\n";
223 $metadata = CXGN::Analysis::AnalysisMetadata->new( { bcs_schema => $schema, prop_id => $stockprop_id });
224 $self->metadata($metadata);
226 $stockprop_id = $metadata->prop_id();
228 my $time = DateTime->now();
229 print STDERR "prop_id is $stockprop_id...\n";
230 if (! defined($stockprop_id)) {
231 print STDERR "project_id = ".$self->get_trial_id()." with stockprop_id = undefined...storing metadata...\n";
232 $metadata->parent_id($self->get_trial_id());
233 $metadata->create_timestamp($time->ymd()." ".$time->hms());
234 $metadata->store();
237 my $analysis_nd_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
238 my $nd_protocol_q = "SELECT nd_protocol_id FROM nd_experiment_protocol JOIN nd_experiment ON (nd_experiment_protocol.nd_experiment_id = nd_experiment.nd_experiment_id) JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id = nd_experiment.nd_experiment_id) WHERE nd_experiment.type_id=$analysis_nd_experiment_type_id AND project_id=?;";
239 my $nd_protocol_h = $schema->storage->dbh()->prepare($nd_protocol_q);
240 $nd_protocol_h->execute($self->get_trial_id());
241 my ($nd_protocol_id) = $nd_protocol_h->fetchrow_array();
242 if ($nd_protocol_id) {
243 my $m = CXGN::AnalysisModel::GetModel->new({
244 bcs_schema=>$schema,
245 metadata_schema=>$self->metadata_schema(),
246 phenome_schema=>$self->phenome_schema(),
247 nd_protocol_id=>$nd_protocol_id
249 my $saved_model_object = $m->get_model();
250 $self->saved_model($saved_model_object);
253 else {
254 # otherwise create an empty project object with an empty metadata object...
256 die "need a project id...";
258 $self->metadata($metadata);
261 =head2 retrieve_analyses_by_user
263 Usage: my @analyses = CXGN::Analysis->retrieve_analyses_by_user($schema, $user_id);
264 Desc: Class function to retrieve all analyses by user_id
265 Ret: a list of listrefs with analysis data
266 Args: $schema - a BCS schema object, $user_id - the numeric id of a user
267 Side Effects:
268 Example:
270 =cut
272 sub retrieve_analyses_by_user {
273 my $class = shift;
274 my $bcs_schema = shift;
275 my $people_schema = shift;
276 my $metadata_schema = shift;
277 my $phenome_schema = shift;
278 my $user_id = shift;
280 my $user_info_type_id = SGN::Model::Cvterm->get_cvterm_row($bcs_schema, 'project_sp_person_id', 'project_property')->cvterm_id();
281 my $analysis_info_type_id = SGN::Model::Cvterm->get_cvterm_row($bcs_schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
283 my $q = "SELECT userinfo.project_id FROM projectprop AS userinfo JOIN projectprop AS analysisinfo on (userinfo.project_id=analysisinfo.project_id) WHERE userinfo.type_id=? AND analysisinfo.type_id=? AND userinfo.value=?";
285 my $h = $bcs_schema->storage()->dbh()->prepare($q);
286 $h->execute($user_info_type_id, $analysis_info_type_id, $user_id);
288 my @analyses = ();
289 while (my ($project_id) = $h->fetchrow_array()) {
290 print STDERR "Instantiating analysis project for project ID $project_id...\n";
291 push @analyses, CXGN::Analysis->new( { bcs_schema => $bcs_schema, people_schema => $people_schema, metadata_schema => $metadata_schema, phenome_schema => $phenome_schema, trial_id=> $project_id });
294 return @analyses;
297 sub create_and_store_analysis_design {
298 my $self = shift;
299 my $precomputed_design_to_save = shift; #DESIGN HASHREF
301 my $schema = $self->bcs_schema();
302 my $dbh = $schema->storage->dbh();
304 print STDERR "CREATE AND STORE ANALYSIS DESIGN...\n";
306 if (!$self->user_id()) {
307 die "Need an sp_person_id to store an analysis.";
309 if (!$self->get_description()) {
310 die "Need a description to store an analysis.";
312 if (!$self->get_name()) {
313 die "Need a name to store an analysis.";
315 if (!$self->breeding_program_id()) {
316 die "Need a breeding program to store an analysis.";
319 my $p = CXGN::People::Person->new($dbh, $self->user_id);
320 my $user_name = $p->get_username;
322 if (!$self->year()) {
323 my $dt = DateTime->now();
324 my $year = $dt->year();
325 print STDERR "Year: $year\n";
326 print STDERR "No year provided. Using current year ($year).\n";
327 $self->year($year);
330 my $computation_location_name = "[Computation]";
331 my $calculation_location_id = $schema->resultset("NaturalDiversity::NdGeolocation")->search({ description => $computation_location_name })->first->nd_geolocation_id();
332 $self->nd_geolocation_id($calculation_location_id);
333 $self->set_location($calculation_location_id);
335 my $breeding_program_name = $schema->resultset("Project::Project")->find({project_id=>$self->breeding_program_id()})->name();
336 $self->set_breeding_program($self->breeding_program_id());
338 # store user info
340 print STDERR "Storing user info...\n";
341 my $project_sp_person_term_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'project_sp_person_id', 'project_property')->cvterm_id();
342 my $row = $schema->resultset("Project::Projectprop")->create({
343 project_id => $self->get_trial_id(),
344 type_id=>$project_sp_person_term_cvterm_id,
345 value=>$self->user_id(),
348 # Store metadata
350 my $time = DateTime->now();
351 if (!$self->metadata()) {
352 print STDERR "Storing metadata...\n";
353 my $metadata = CXGN::Analysis::AnalysisMetadata->new({ bcs_schema => $schema });
354 print STDERR "Analysis ID = ".$self->get_trial_id()."\n";
355 $metadata->parent_id($self->get_trial_id());
356 $self->metadata( $metadata );
357 $self->metadata()->create_timestamp($time->ymd()." ".$time->hms());
360 # store dataset info, if available. Copy the actual dataset json,
361 # so that dataset info is frozen and does not reflect future
362 # changes.
364 if ($self->metadata()->dataset_id()) {
365 print STDERR "Retrieving data for dataset_id ".$self->metadata->dataset_id()."\n";
366 my $ds = CXGN::Dataset->new( { schema => $schema, people_schema => $self->people_schema(), sp_dataset_id => $self->metadata()->dataset_id() });
367 my $data = $ds->to_hashref();
368 #print STDERR "DATA: $data\n";
369 $self->metadata()->dataset_data(JSON::Any->encode($data));
371 else {
372 print STDERR "No dataset_id provided...\n";
375 $self->metadata()->parent_id($self->get_trial_id());
376 $self->metadata()->modified_timestamp($time->ymd()." ".$time->hms());
377 $self->metadata()->store();
379 my $design;
380 if (!$precomputed_design_to_save) {
381 print STDERR "Create a new analysis design...\n";
382 my $td = CXGN::Trial::TrialDesign->new();
384 $td->set_trial_name($self->name());
385 $td->set_stock_list($self->accession_names());
386 $td->set_design_type("Analysis");
388 if ($td->calculate_design()) {
389 print STDERR "Design calculated :-) ...\n";
390 $design = $td->get_design();
391 $self->design($design);
393 else {
394 die "An error occurred creating the analysis design.";
396 } else {
397 $design = $precomputed_design_to_save;
399 # print STDERR Dumper $design;
401 print STDERR "Store design...\n";
403 my $saved_model_protocol_id;
404 if ($self->analysis_model_protocol_id) {
405 $saved_model_protocol_id = $self->analysis_model_protocol_id();
408 my $analysis_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
409 my $trial_create = CXGN::Trial::TrialCreate->new({
410 trial_id => $self->get_trial_id(),
411 chado_schema => $schema,
412 dbh => $dbh,
413 operator => $user_name,
414 design => $design,
415 design_type => $analysis_experiment_type_id,
416 program => $breeding_program_name,
417 trial_year => $self->year(),
418 trial_description => $self->description(),
419 trial_location => $computation_location_name,
420 trial_name => $self->name(),
421 trial_type => $analysis_experiment_type_id,
422 is_analysis => 1,
423 analysis_model_protocol_id => $saved_model_protocol_id,
426 # my $validate_error = $trial_create->validate_design();
427 # my $store_error;
428 # if ($validate_error) {
429 # print STDERR "VALIDATE ERROR! "; #.Dumper($validate_error)."\n";
430 # }
431 # else {
432 ## print STDERR "Valiation successful. Storing...\n";
433 # try { $store_error = $design_store->store() }
434 # catch { $store_error = $_ };
435 # }
436 # if ($store_error) {
437 # die "ERROR SAVING TRIAL!: $store_error\n";
440 try {
441 $trial_create->save_trial();
443 catch {
444 die "Error saving trial: $_";
447 print STDERR "Done with design create & store.\n";
448 return $self->get_trial_id();
452 # store analysis values is a separate call and has to be called after
453 # storing the design
455 sub store_analysis_values {
456 my $self = shift;
457 my $metadata_schema = shift;
458 my $phenome_schema = shift;
459 my $values = shift;
460 my $plots = shift;
461 my $traits = shift;
462 my $operator = shift;
463 my $basepath = shift;
464 my $dbhost = shift;
465 my $dbname = shift;
466 my $dbuser = shift;
467 my $dbpass = shift;
468 my $tempfile_path = shift;
470 print STDERR "Storing analysis values...\n";
472 my $time = DateTime->now();
473 my $timestamp = $time->ymd()."_".$time->hms();
474 my %phenotype_metadata;
475 $phenotype_metadata{'archived_file'} = 'none';
476 $phenotype_metadata{'archived_file_type'} = 'analysis_values';
477 $phenotype_metadata{'operator'} = $operator;
478 $phenotype_metadata{'date'} = $timestamp;
480 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new({
481 bcs_schema => $self->bcs_schema(),
482 basepath => $basepath,
483 dbhost => $dbhost,
484 dbname => $dbname,
485 dbuser => $dbuser,
486 dbpass => $dbpass,
487 temp_file_nd_experiment_id => $tempfile_path,
488 metadata_schema => $metadata_schema,
489 phenome_schema => $phenome_schema,
490 user_id => $self->user_id(),
491 stock_list => $plots,
492 trait_list => $traits,
493 values_hash => $values,
494 has_timestamps => 0,
495 overwrite_values => 0,
496 metadata_hash => \%phenotype_metadata,
499 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
501 if ($verified_warning) {
502 warn $verified_warning;
504 if ($verified_error) {
505 die $verified_error;
508 my ($stored_phenotype_error, $stored_phenotype_success) = $store_phenotypes->store();
510 if ($stored_phenotype_error) {
511 die "An error occurred storing the phenotypes: $stored_phenotype_error\n";
516 sub _get_layout {
517 my $self = shift;
519 # Load the design
521 my $design = CXGN::Trial::TrialLayout->new({ schema => $self->bcs_schema(), trial_id => $self->get_trial_id(), experiment_type=> 'analysis_experiment'});
523 # print STDERR "_get_layout: design = ".Dumper($design->get_design);
525 #print STDERR "ERROR IN LAYOUT: ".Dumper($error)."\n";
526 #print STDERR "READ DESIGN: ".Dumper($design->get_design());
527 return $design;
530 sub get_phenotype_matrix {
531 my $self = shift;
532 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
533 bcs_schema=>$self->bcs_schema(),
534 search_type => "MaterializedViewTable",
535 data_level => "analysis_instance",
536 experiment_type => "analysis_experiment",
537 trial_list=> [ $self->get_trial_id() ],
539 my @data = $phenotypes_search->get_phenotype_matrix();
540 return \@data;
543 sub _load_accession_names {
544 my $self = shift;
546 my $design = $self->design();
547 #print STDERR "Design = ".Dumper($design);
549 my @accessions = $design->get_accession_names();
550 print STDERR "ACCESSIONS: ". Dumper(\@accessions);
551 # get the accessions from the design (not the dataset!)
553 return $self->design()->get_accession_names();
556 sub _load_traits {
557 my $self = shift;
559 my $phenotypes = $self->get_phenotype_matrix();
561 my $header = $phenotypes->[0];
563 my $traits = [ @$header[39..scalar(@$header)-1] ];
565 print STDERR "_load_traits: TRAITS: ".Dumper($traits);
566 #$self->traits($traits);
567 return $traits;
572 #__PACKAGE__->meta->make_immutable;