add is_variable accessor.
[sgn.git] / lib / CXGN / Analysis.pm
blobf6d844a5aa839f59f9f55046042540d0044be4a9
2 =encoding utf-8
4 =head1 NAME
6 CXGN::Analysis - manage analyses on Breedbase
8 =head1 DESCRIPTION
10 Analyses are stored much like trials, starting out in the project table, and linking through to nd_experiment and stock through linking tables, as well phentoype to store the analysis results. Additional metadata is stored in in a projectprop with the type_id 'analysis_metadata_json'. The type of the project is 'analysis_project' (stored in a projectprop as well). Each analysis is assigned to a user, using and sp_person_id assigned in a projectprop.
12 =head2 TYPES
14 The data structure is built using type ids that are different from a regular field trial.
16 =over 4
18 =item nd_experiment.type_id
20 The nd_experiment.type_id links to 'analysis_experiment' (nd_experiment_property) (equivalent to 'field_experiment' in a trial),
22 =item stock.type_id
24 The stock.type_id links to 'analysis_instance' (stock_property) (equivalent to 'plot' in a trial)
26 =item stock_relationship.type_id
28 The stock_relationship.type_id links to 'analysis_of' (equivalent to 'plot_of' in field trials)
30 This is summarized in the following table:
32 ┌──────────────────┬───────────────────────┬───────────────────┬────────────────┐
33 │ project type │ nd_experiment.type_id │ stock.type_id │ stock_relation │
34 │ │ │ │ ship.type_id │
35 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
36 │ trial │ field_experiment │ plot │ plot_of │
37 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
38 │ genotyping_plate │ genotyping_experiment │ tissue_sample │ sample_of │
39 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
40 │ analysis │ analysis_experiment │ analysis_instance │ analysis_of │
41 └──────────────────┴───────────────────────┴───────────────────┴────────────────┘
44 =back
46 The data in analysis_metdata_json is managed by the CXGN::Analysis::AnalysisMetadata class and contains the dataset_id used to generate the analysis, the actual analysis protocol that was run, and the traits relevant to the analysis.
48 =head1 AUTHOR
50 Lukas Mueller <lam87@cornell.edu>
52 =head1 METHODS
54 =cut
56 package CXGN::Analysis;
58 use Moose;
60 extends 'CXGN::Project';
62 use Try::Tiny;
63 use DateTime;
64 use Data::Dumper;
65 use CXGN::Trial::TrialCreate;
66 use CXGN::Trial::TrialDesign;
67 use CXGN::Trial::TrialDesignStore;
68 use CXGN::Trial::TrialLayout;
69 use CXGN::Phenotypes::StorePhenotypes;
70 use CXGN::Analysis::AnalysisMetadata;
71 use CXGN::List::Transform;
72 use CXGN::Dataset;
73 use CXGN::AnalysisModel::SaveModel;
74 use CXGN::People::Person;
75 use CXGN::AnalysisModel::GetModel;
76 use JSON::XS;
78 =head2 bcs_schema()
80 =cut
82 has 'bcs_schema' => (is => 'rw', isa => 'Bio::Chado::Schema', required => 1 );
84 =head2 people_schema()
86 =cut
88 has 'people_schema' => (is => 'rw', isa => 'CXGN::People::Schema', required=>1);
90 =head2 metadata_schema()
92 =cut
94 has 'metadata_schema' => (is => 'rw', isa => 'CXGN::Metadata::Schema', required=>1);
96 =head2 phenome_schema()
98 =cut
100 has 'phenome_schema' => (is => 'rw', isa => 'CXGN::Phenome::Schema', required=>1);
102 =head2 project_id()
104 =cut
106 #has 'project_id' => (is => 'rw', isa => 'Int');
108 =head2 name()
110 =cut
112 #has 'name' => (is => 'rw', isa => 'Str');
114 =head2 description()
116 =cut
118 ##has 'description' => (is => 'rw', isa => 'Str', default => "No description");
120 =head2 breeding_program_id()
122 =cut
124 has 'breeding_program_id' => (is => 'rw', isa => 'Int');
126 =head2 accession_names()
128 =cut
130 has 'accession_names' => (is => 'rw', isa => 'Maybe[ArrayRef]', lazy => 1, builder => '_load_accession_names');
132 =head2 design()
134 =cut
136 has 'design' => (is => 'rw', isa => 'Ref', lazy => 1, builder => '_get_layout');
138 =head2 traits()
140 =cut
142 has 'traits' => (is => 'rw', isa => 'ArrayRef', builder => '_load_traits', lazy => 1);
144 =head2 nd_geolocation_id()
146 =cut
148 has 'nd_geolocation_id' => (is => 'rw', isa=> 'Maybe[Int]');
150 =head2 user_id()
152 =cut
154 has 'user_id' => (is => 'rw', isa => 'Int');
156 =head2 user_role()
158 =cut
160 has 'user_role' => (is => 'rw', isa => 'Str');
162 =head2 analysis_model_protocol_id()
164 nd_protocol_id of save model information
166 =cut
168 has 'analysis_model_protocol_id' => (isa => 'Int|Undef', is => 'rw');
170 =head2 metadata()
172 CXGN::Analysis::AnalysisMetadata object.
174 =cut
176 has 'metadata' => (isa => 'Maybe[CXGN::Analysis::AnalysisMetadata]', is => 'rw');
178 #sub BUILDARGS {
179 # my $self = shift;
180 # my $args = shift;
181 # $args->{trial_id} = $args->{project_id};
184 #has 'project' => (isa => 'CXGN::Project', is => 'rw');
186 =head2 year()
188 year the analysis was done.
190 =cut
192 #has 'year' => (isa => 'Str', is => 'rw');
194 =head2 saved_model()
196 information about the saved model.
198 =cut
200 has 'saved_model' => (isa => 'HashRef', is => 'rw');
203 sub BUILD {
204 my $self = shift;
205 my $args = shift;
207 print STDERR "BUILD CXGN::Analysis...\n";
208 my $metadata;
210 if ($self->get_trial_id()) {
211 my $schema = $args->{bcs_schema};
212 print STDERR "Location id retrieved : = ".$self->get_location()->[0]."\n";
213 $self->nd_geolocation_id($self->get_location()->[0]);
215 my $metadata_json_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
216 my $rs = $self->bcs_schema()->resultset("Project::Projectprop")->search( { project_id => $self->get_trial_id(), type_id => $metadata_json_id });
218 my $stockprop_id;
219 if ($rs->count() > 0) {
220 $stockprop_id = $rs->first()->projectprop_id();
223 print STDERR "Create AnalysisMetadata object...\n";
224 $metadata = CXGN::Analysis::AnalysisMetadata->new( { bcs_schema => $schema, prop_id => $stockprop_id });
225 $self->metadata($metadata);
227 $stockprop_id = $metadata->prop_id();
229 my $time = DateTime->now();
230 print STDERR "prop_id is $stockprop_id...\n";
231 if (! defined($stockprop_id)) {
232 print STDERR "project_id = ".$self->get_trial_id()." with stockprop_id = undefined...storing metadata...\n";
233 $metadata->parent_id($self->get_trial_id());
234 $metadata->create_timestamp($time->ymd()." ".$time->hms());
235 $metadata->store();
238 my $analysis_nd_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
239 my $nd_protocol_q = "SELECT nd_protocol_id
240 FROM nd_experiment_protocol
241 JOIN nd_experiment ON (nd_experiment_protocol.nd_experiment_id = nd_experiment.nd_experiment_id)
242 JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id = nd_experiment.nd_experiment_id)
243 WHERE nd_experiment.type_id=$analysis_nd_experiment_type_id AND project_id=?;";
244 my $nd_protocol_h = $schema->storage->dbh()->prepare($nd_protocol_q);
245 $nd_protocol_h->execute($self->get_trial_id());
246 my ($nd_protocol_id) = $nd_protocol_h->fetchrow_array();
247 if ($nd_protocol_id) {
248 my $m = CXGN::AnalysisModel::GetModel->new({
249 bcs_schema=>$schema,
250 metadata_schema=>$self->metadata_schema(),
251 phenome_schema=>$self->phenome_schema(),
252 nd_protocol_id=>$nd_protocol_id
254 my $saved_model_object = $m->get_model();
255 $self->saved_model($saved_model_object);
258 else {
259 # otherwise create an empty project object with an empty metadata object...
261 die "need a project id...";
263 $self->metadata($metadata);
266 =head2 retrieve_analyses_by_user
268 Usage: my @analyses = CXGN::Analysis->retrieve_analyses_by_user($schema, $user_id);
269 Desc: Class function to retrieve all analyses by user_id
270 Ret: a list of listrefs with analysis data
271 Args: $schema - a BCS schema object, $user_id - the numeric id of a user
272 Side Effects:
273 Example:
275 =cut
277 sub retrieve_analyses_by_user {
278 my $class = shift;
279 my $bcs_schema = shift;
280 my $people_schema = shift;
281 my $metadata_schema = shift;
282 my $phenome_schema = shift;
283 my $user_id = shift;
284 my $analyses_type = shift;
286 my $user_info_type_id = SGN::Model::Cvterm->get_cvterm_row($bcs_schema, 'project_sp_person_id', 'project_property')->cvterm_id();
287 my $analysis_info_type_id = SGN::Model::Cvterm->get_cvterm_row($bcs_schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
289 my $q = "SELECT userinfo.project_id, analysisinfo.value FROM projectprop AS userinfo
290 JOIN projectprop AS analysisinfo on (userinfo.project_id=analysisinfo.project_id)
291 WHERE userinfo.type_id=? AND analysisinfo.type_id=? AND userinfo.value=?";
293 my $h = $bcs_schema->storage()->dbh()->prepare($q);
294 $h->execute($user_info_type_id, $analysis_info_type_id, $user_id);
296 my @analyses = ();
297 while (my ($project_id, $analysis_info) = $h->fetchrow_array()) {
298 print STDERR "Instantiating analysis project for project ID $project_id...\n";
299 my $info = decode_json $analysis_info;
300 if ($analyses_type) {
301 if ($info->{analysis_model_type} eq $analyses_type) {
302 push @analyses, CXGN::Analysis->new( { bcs_schema => $bcs_schema, people_schema => $people_schema, metadata_schema => $metadata_schema, phenome_schema => $phenome_schema, trial_id=> $project_id });
305 else {
306 push @analyses, CXGN::Analysis->new( { bcs_schema => $bcs_schema, people_schema => $people_schema, metadata_schema => $metadata_schema, phenome_schema => $phenome_schema, trial_id=> $project_id });
310 return @analyses;
313 sub create_and_store_analysis_design {
314 my $self = shift;
315 my $precomputed_design_to_save = shift; #DESIGN HASHREF
317 my $schema = $self->bcs_schema();
318 my $dbh = $schema->storage->dbh();
320 print STDERR "CREATE AND STORE ANALYSIS DESIGN...\n";
322 if (!$self->user_id()) {
323 die "Need an sp_person_id to store an analysis.";
325 if (!$self->get_description()) {
326 die "Need a description to store an analysis.";
328 if (!$self->get_name()) {
329 die "Need a name to store an analysis.";
331 if (!$self->breeding_program_id()) {
332 die "Need a breeding program to store an analysis.";
335 my $p = CXGN::People::Person->new($dbh, $self->user_id);
336 my $user_name = $p->get_username;
338 if (!$self->year()) {
339 my $dt = DateTime->now();
340 my $year = $dt->year();
341 print STDERR "Year: $year\n";
342 print STDERR "No year provided. Using current year ($year).\n";
343 $self->year($year);
346 my $computation_location_name = "[Computation]";
347 my $calculation_location_id = $schema->resultset("NaturalDiversity::NdGeolocation")->search({ description => $computation_location_name })->first->nd_geolocation_id();
348 $self->nd_geolocation_id($calculation_location_id);
349 $self->set_location($calculation_location_id);
351 my $breeding_program_name = $schema->resultset("Project::Project")->find({project_id=>$self->breeding_program_id()})->name();
352 $self->set_breeding_program($self->breeding_program_id());
354 # store user info
356 print STDERR "Storing user info...\n";
357 my $project_sp_person_term_cvterm_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'project_sp_person_id', 'project_property')->cvterm_id();
358 my $row = $schema->resultset("Project::Projectprop")->create({
359 project_id => $self->get_trial_id(),
360 type_id=>$project_sp_person_term_cvterm_id,
361 value=>$self->user_id(),
364 # Store metadata
366 my $time = DateTime->now();
367 if (!$self->metadata()) {
368 print STDERR "Storing metadata...\n";
369 my $metadata = CXGN::Analysis::AnalysisMetadata->new({ bcs_schema => $schema });
370 print STDERR "Analysis ID = ".$self->get_trial_id()."\n";
371 $metadata->parent_id($self->get_trial_id());
372 $self->metadata( $metadata );
373 $self->metadata()->create_timestamp($time->ymd()." ".$time->hms());
376 # store dataset info, if available. Copy the actual dataset json,
377 # so that dataset info is frozen and does not reflect future
378 # changes.
380 if ($self->metadata()->dataset_id()) {
381 print STDERR "Retrieving data for dataset_id ".$self->metadata->dataset_id()."\n";
382 my $ds = CXGN::Dataset->new( { schema => $schema, people_schema => $self->people_schema(), sp_dataset_id => $self->metadata()->dataset_id() });
383 my $data = $ds->to_hashref();
384 #print STDERR "DATA: $data\n";
385 $self->metadata()->dataset_data(JSON::Any->encode($data));
387 else {
388 print STDERR "No dataset_id provided...\n";
391 $self->metadata()->parent_id($self->get_trial_id());
392 $self->metadata()->modified_timestamp($time->ymd()." ".$time->hms());
393 $self->metadata()->store();
395 my $design;
396 if (!$precomputed_design_to_save) {
397 print STDERR "Create a new analysis design...\n";
398 my $td = CXGN::Trial::TrialDesign->new();
400 $td->set_trial_name($self->name());
401 $td->set_stock_list($self->accession_names());
402 $td->set_design_type("Analysis");
404 if ($td->calculate_design()) {
405 print STDERR "Design calculated :-) ...\n";
406 $design = $td->get_design();
407 $self->design($design);
409 else {
410 die "An error occurred creating the analysis design.";
412 } else {
413 $design = $precomputed_design_to_save;
415 # print STDERR Dumper $design;
417 print STDERR "Store design...\n";
419 my $saved_model_protocol_id;
420 if ($self->analysis_model_protocol_id) {
421 $saved_model_protocol_id = $self->analysis_model_protocol_id();
424 my $analysis_experiment_type_id = SGN::Model::Cvterm->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
426 my $trial_create = CXGN::Trial::TrialCreate->new({
427 trial_id => $self->get_trial_id(),
428 owner_id => $self->user_id(),
429 chado_schema => $schema,
430 dbh => $dbh,
431 owner_id => $self->user_id,
432 operator => $user_name,
433 design => $design,
434 design_type => $analysis_experiment_type_id,
435 program => $breeding_program_name,
436 trial_year => $self->year(),
437 trial_description => $self->description(),
438 trial_location => $computation_location_name,
439 trial_name => $self->name(),
440 trial_type => $analysis_experiment_type_id,
441 is_analysis => 1,
442 analysis_model_protocol_id => $saved_model_protocol_id,
445 # my $validate_error = $trial_create->validate_design();
446 # my $store_error;
447 # if ($validate_error) {
448 # print STDERR "VALIDATE ERROR! "; #.Dumper($validate_error)."\n";
450 # else {
451 ## print STDERR "Valiation successful. Storing...\n";
452 # try { $store_error = $design_store->store() }
453 # catch { $store_error = $_ };
455 # if ($store_error) {
456 # die "ERROR SAVING TRIAL!: $store_error\n";
459 try {
460 $trial_create->save_trial();
462 catch {
463 die "Error saving trial: $_";
466 $self->_get_layout()->get_design();
468 print STDERR "Done with design create & store.\n";
469 return $self->get_trial_id();
473 # store analysis values is a separate call and has to be called after
474 # storing the design
476 sub store_analysis_values {
477 my $self = shift;
478 my $metadata_schema = shift;
479 my $phenome_schema = shift;
480 my $values = shift;
481 my $plots = shift;
482 my $traits = shift;
483 my $operator = shift;
484 my $basepath = shift;
485 my $dbhost = shift;
486 my $dbname = shift;
487 my $dbuser = shift;
488 my $dbpass = shift;
489 my $tempfile_path = shift;
491 print STDERR "Storing analysis values...\n";
493 my $time = DateTime->now();
494 my $timestamp = $time->ymd()."_".$time->hms();
495 my %phenotype_metadata;
496 $phenotype_metadata{'archived_file'} = 'none';
497 $phenotype_metadata{'archived_file_type'} = 'analysis_values';
498 $phenotype_metadata{'operator'} = $operator;
499 $phenotype_metadata{'date'} = $timestamp;
501 my $store_phenotypes = CXGN::Phenotypes::StorePhenotypes->new({
502 bcs_schema => $self->bcs_schema(),
503 basepath => $basepath,
504 dbhost => $dbhost,
505 dbname => $dbname,
506 dbuser => $dbuser,
507 dbpass => $dbpass,
508 temp_file_nd_experiment_id => $tempfile_path,
509 metadata_schema => $metadata_schema,
510 phenome_schema => $phenome_schema,
511 user_id => $self->user_id(),
512 stock_list => $plots,
513 trait_list => $traits,
514 values_hash => $values,
515 has_timestamps => 0,
516 overwrite_values => 1,
517 metadata_hash => \%phenotype_metadata,
520 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
522 if ($verified_warning) {
523 warn $verified_warning;
525 if ($verified_error) {
526 die $verified_error;
529 my ($stored_phenotype_error, $stored_phenotype_success) = $store_phenotypes->store();
531 if ($stored_phenotype_error) {
532 die "An error occurred storing the phenotypes: $stored_phenotype_error\n";
537 sub _get_layout {
538 my $self = shift;
540 my $design = CXGN::Trial::TrialLayout->new({ schema => $self->bcs_schema(), trial_id => $self->get_trial_id(), experiment_type=> 'analysis_experiment'});
542 # print STDERR "_get_layout: design = ".Dumper($design->get_design);
543 return $design;
546 sub get_phenotype_matrix {
547 my $self = shift;
548 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
549 bcs_schema=>$self->bcs_schema(),
550 search_type => "MaterializedViewTable",
551 data_level => "analysis_instance",
552 experiment_type => "analysis_experiment",
553 trial_list=> [ $self->get_trial_id() ],
555 my @data = $phenotypes_search->get_phenotype_matrix();
556 return \@data;
559 sub _load_accession_names {
560 my $self = shift;
562 my $design = $self->design();
563 #print STDERR "Design = ".Dumper($design);
565 my @accessions = $design->get_accession_names();
566 print STDERR "ACCESSIONS: ". Dumper(\@accessions);
567 # get the accessions from the design (not the dataset!)
569 return $self->design()->get_accession_names();
572 sub _load_traits {
573 my $self = shift;
575 my $phenotypes = $self->get_phenotype_matrix();
577 my $header = $phenotypes->[0];
579 my $traits = [ @$header[39..scalar(@$header)-1] ];
581 print STDERR "_load_traits: TRAITS: ".Dumper($traits);
582 #$self->traits($traits);
583 return $traits;
588 #__PACKAGE__->meta->make_immutable;