6 CXGN::Analysis - manage analyses on Breedbase
10 Analyses are stored much like trials, starting out in the project table, and linking through to nd_experiment and stock through linking tables, as well phentoype to store the analysis results. Additional metadata is stored in in a projectprop with the type_id 'analysis_metadata_json'. The type of the project is 'analysis_project' (stored in a projectprop as well). Each analysis is assigned to a user, using and sp_person_id assigned in a projectprop.
14 The data structure is built using type ids that are different from a regular field trial.
18 =item nd_experiment.type_id
20 The nd_experiment.type_id links to 'analysis_experiment' (nd_experiment_property) (equivalent to 'field_experiment' in a trial),
24 The stock.type_id links to 'analysis_instance' (stock_property) (equivalent to 'plot' in a trial)
26 =item stock_relationship.type_id
28 The stock_relationship.type_id links to 'analysis_of' (equivalent to 'plot_of' in field trials)
30 This is summarized in the following table:
32 ┌──────────────────┬───────────────────────┬───────────────────┬────────────────┐
33 │ project type │ nd_experiment.type_id │ stock.type_id │ stock_relation │
34 │ │ │ │ ship.type_id │
35 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
36 │ trial │ field_experiment │ plot │ plot_of │
37 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
38 │ genotyping_plate │ genotyping_experiment │ tissue_sample │ sample_of │
39 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
40 │ analysis │ analysis_experiment │ analysis_instance │ analysis_of │
41 └──────────────────┴───────────────────────┴───────────────────┴────────────────┘
46 The data in analysis_metdata_json is managed by the CXGN::Analysis::AnalysisMetadata class and contains the dataset_id used to generate the analysis, the actual analysis protocol that was run, and the traits relevant to the analysis.
50 Lukas Mueller <lam87@cornell.edu>
56 package CXGN
::Analysis
;
60 extends
'CXGN::Project';
65 use CXGN
::Trial
::TrialCreate
;
66 use CXGN
::Trial
::TrialDesign
;
67 use CXGN
::Trial
::TrialDesignStore
;
68 use CXGN
::Trial
::TrialLayout
;
69 use CXGN
::Phenotypes
::StorePhenotypes
;
70 use CXGN
::Analysis
::AnalysisMetadata
;
71 use CXGN
::List
::Transform
;
73 use CXGN
::AnalysisModel
::SaveModel
;
74 use CXGN
::People
::Person
;
75 use CXGN
::AnalysisModel
::GetModel
;
81 has
'bcs_schema' => (is
=> 'rw', isa
=> 'Bio::Chado::Schema', required
=> 1 );
83 =head2 people_schema()
87 has
'people_schema' => (is
=> 'rw', isa
=> 'CXGN::People::Schema', required
=>1);
89 =head2 metadata_schema()
93 has
'metadata_schema' => (is
=> 'rw', isa
=> 'CXGN::Metadata::Schema', required
=>1);
95 =head2 phenome_schema()
99 has
'phenome_schema' => (is
=> 'rw', isa
=> 'CXGN::Phenome::Schema', required
=>1);
105 #has 'project_id' => (is => 'rw', isa => 'Int');
111 #has 'name' => (is => 'rw', isa => 'Str');
117 ##has 'description' => (is => 'rw', isa => 'Str', default => "No description");
119 =head2 breeding_program_id()
123 has
'breeding_program_id' => (is
=> 'rw', isa
=> 'Int');
125 =head2 accession_names()
129 has
'accession_names' => (is
=> 'rw', isa
=> 'Maybe[ArrayRef]', lazy
=> 1, builder
=> '_load_accession_names');
135 has
'design' => (is
=> 'rw', isa
=> 'Ref', lazy
=> 1, builder
=> '_get_layout');
141 has
'traits' => (is
=> 'rw', isa
=> 'ArrayRef', builder
=> '_load_traits', lazy
=> 1);
143 =head2 nd_geolocation_id()
147 has
'nd_geolocation_id' => (is
=> 'rw', isa
=> 'Maybe[Int]');
153 has
'user_id' => (is
=> 'rw', isa
=> 'Int');
159 has
'user_role' => (is
=> 'rw', isa
=> 'Str');
161 =head2 analysis_model_protocol_id()
163 nd_protocol_id of save model information
167 has
'analysis_model_protocol_id' => (isa
=> 'Int|Undef', is
=> 'rw');
171 CXGN::Analysis::AnalysisMetadata object.
175 has
'metadata' => (isa
=> 'Maybe[CXGN::Analysis::AnalysisMetadata]', is
=> 'rw');
180 # $args->{trial_id} = $args->{project_id};
183 #has 'project' => (isa => 'CXGN::Project', is => 'rw');
187 year the analysis was done.
191 #has 'year' => (isa => 'Str', is => 'rw');
195 information about the saved model.
199 has
'saved_model' => (isa
=> 'HashRef', is
=> 'rw');
206 print STDERR
"BUILD CXGN::Analysis...\n";
209 if ($self->get_trial_id()) {
210 my $schema = $args->{bcs_schema
};
211 print STDERR
"Location id retrieved : = ".$self->get_location()->[0]."\n";
212 $self->nd_geolocation_id($self->get_location()->[0]);
214 my $metadata_json_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
215 my $rs = $self->bcs_schema()->resultset("Project::Projectprop")->search( { project_id
=> $self->get_trial_id(), type_id
=> $metadata_json_id });
218 if ($rs->count() > 0) {
219 $stockprop_id = $rs->first()->projectprop_id();
222 print STDERR
"Create AnalysisMetadata object...\n";
223 $metadata = CXGN
::Analysis
::AnalysisMetadata
->new( { bcs_schema
=> $schema, prop_id
=> $stockprop_id });
224 $self->metadata($metadata);
226 $stockprop_id = $metadata->prop_id();
228 my $time = DateTime
->now();
229 print STDERR
"prop_id is $stockprop_id...\n";
230 if (! defined($stockprop_id)) {
231 print STDERR
"project_id = ".$self->get_trial_id()." with stockprop_id = undefined...storing metadata...\n";
232 $metadata->parent_id($self->get_trial_id());
233 $metadata->create_timestamp($time->ymd()." ".$time->hms());
237 my $analysis_nd_experiment_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
238 my $nd_protocol_q = "SELECT nd_protocol_id FROM nd_experiment_protocol JOIN nd_experiment ON (nd_experiment_protocol.nd_experiment_id = nd_experiment.nd_experiment_id) JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id = nd_experiment.nd_experiment_id) WHERE nd_experiment.type_id=$analysis_nd_experiment_type_id AND project_id=?;";
239 my $nd_protocol_h = $schema->storage->dbh()->prepare($nd_protocol_q);
240 $nd_protocol_h->execute($self->get_trial_id());
241 my ($nd_protocol_id) = $nd_protocol_h->fetchrow_array();
242 if ($nd_protocol_id) {
243 my $m = CXGN
::AnalysisModel
::GetModel
->new({
245 metadata_schema
=>$self->metadata_schema(),
246 phenome_schema
=>$self->phenome_schema(),
247 nd_protocol_id
=>$nd_protocol_id
249 my $saved_model_object = $m->get_model();
250 $self->saved_model($saved_model_object);
254 # otherwise create an empty project object with an empty metadata object...
256 die "need a project id...";
258 $self->metadata($metadata);
261 =head2 retrieve_analyses_by_user
263 Usage: my @analyses = CXGN::Analysis->retrieve_analyses_by_user($schema, $user_id);
264 Desc: Class function to retrieve all analyses by user_id
265 Ret: a list of listrefs with analysis data
266 Args: $schema - a BCS schema object, $user_id - the numeric id of a user
272 sub retrieve_analyses_by_user
{
274 my $bcs_schema = shift;
275 my $people_schema = shift;
276 my $metadata_schema = shift;
277 my $phenome_schema = shift;
280 my $user_info_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($bcs_schema, 'project_sp_person_id', 'project_property')->cvterm_id();
281 my $analysis_info_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($bcs_schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
283 my $q = "SELECT userinfo.project_id FROM projectprop AS userinfo JOIN projectprop AS analysisinfo on (userinfo.project_id=analysisinfo.project_id) WHERE userinfo.type_id=? AND analysisinfo.type_id=? AND userinfo.value=?";
285 my $h = $bcs_schema->storage()->dbh()->prepare($q);
286 $h->execute($user_info_type_id, $analysis_info_type_id, $user_id);
289 while (my ($project_id) = $h->fetchrow_array()) {
290 print STDERR
"Instantiating analysis project for project ID $project_id...\n";
291 push @analyses, CXGN
::Analysis
->new( { bcs_schema
=> $bcs_schema, people_schema
=> $people_schema, metadata_schema
=> $metadata_schema, phenome_schema
=> $phenome_schema, trial_id
=> $project_id });
297 sub create_and_store_analysis_design
{
299 my $precomputed_design_to_save = shift; #DESIGN HASHREF
301 my $schema = $self->bcs_schema();
302 my $dbh = $schema->storage->dbh();
304 print STDERR
"CREATE AND STORE ANALYSIS DESIGN...\n";
306 if (!$self->user_id()) {
307 die "Need an sp_person_id to store an analysis.";
309 if (!$self->get_description()) {
310 die "Need a description to store an analysis.";
312 if (!$self->get_name()) {
313 die "Need a name to store an analysis.";
315 if (!$self->breeding_program_id()) {
316 die "Need a breeding program to store an analysis.";
319 my $p = CXGN
::People
::Person
->new($dbh, $self->user_id);
320 my $user_name = $p->get_username;
322 if (!$self->year()) {
323 my $dt = DateTime
->now();
324 my $year = $dt->year();
325 print STDERR
"Year: $year\n";
326 print STDERR
"No year provided. Using current year ($year).\n";
330 my $computation_location_name = "[Computation]";
331 my $calculation_location_id = $schema->resultset("NaturalDiversity::NdGeolocation")->search({ description
=> $computation_location_name })->first->nd_geolocation_id();
332 $self->nd_geolocation_id($calculation_location_id);
333 $self->set_location($calculation_location_id);
335 my $breeding_program_name = $schema->resultset("Project::Project")->find({project_id
=>$self->breeding_program_id()})->name();
336 $self->set_breeding_program($self->breeding_program_id());
340 print STDERR
"Storing user info...\n";
341 my $project_sp_person_term_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'project_sp_person_id', 'project_property')->cvterm_id();
342 my $row = $schema->resultset("Project::Projectprop")->create({
343 project_id
=> $self->get_trial_id(),
344 type_id
=>$project_sp_person_term_cvterm_id,
345 value
=>$self->user_id(),
350 my $time = DateTime
->now();
351 if (!$self->metadata()) {
352 print STDERR
"Storing metadata...\n";
353 my $metadata = CXGN
::Analysis
::AnalysisMetadata
->new({ bcs_schema
=> $schema });
354 print STDERR
"Analysis ID = ".$self->get_trial_id()."\n";
355 $metadata->parent_id($self->get_trial_id());
356 $self->metadata( $metadata );
357 $self->metadata()->create_timestamp($time->ymd()." ".$time->hms());
360 # store dataset info, if available. Copy the actual dataset json,
361 # so that dataset info is frozen and does not reflect future
364 if ($self->metadata()->dataset_id()) {
365 print STDERR
"Retrieving data for dataset_id ".$self->metadata->dataset_id()."\n";
366 my $ds = CXGN
::Dataset
->new( { schema
=> $schema, people_schema
=> $self->people_schema(), sp_dataset_id
=> $self->metadata()->dataset_id() });
367 my $data = $ds->to_hashref();
368 #print STDERR "DATA: $data\n";
369 $self->metadata()->dataset_data(JSON
::Any
->encode($data));
372 print STDERR
"No dataset_id provided...\n";
375 $self->metadata()->parent_id($self->get_trial_id());
376 $self->metadata()->modified_timestamp($time->ymd()." ".$time->hms());
377 $self->metadata()->store();
380 if (!$precomputed_design_to_save) {
381 print STDERR
"Create a new analysis design...\n";
382 my $td = CXGN
::Trial
::TrialDesign
->new();
384 $td->set_trial_name($self->name());
385 $td->set_stock_list($self->accession_names());
386 $td->set_design_type("Analysis");
388 if ($td->calculate_design()) {
389 print STDERR
"Design calculated :-) ...\n";
390 $design = $td->get_design();
391 $self->design($design);
394 die "An error occurred creating the analysis design.";
397 $design = $precomputed_design_to_save;
399 # print STDERR Dumper $design;
401 print STDERR
"Store design...\n";
403 my $saved_model_protocol_id;
404 if ($self->analysis_model_protocol_id) {
405 $saved_model_protocol_id = $self->analysis_model_protocol_id();
408 my $analysis_experiment_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
409 my $trial_create = CXGN
::Trial
::TrialCreate
->new({
410 trial_id
=> $self->get_trial_id(),
411 chado_schema
=> $schema,
413 operator
=> $user_name,
415 design_type
=> $analysis_experiment_type_id,
416 program
=> $breeding_program_name,
417 trial_year
=> $self->year(),
418 trial_description
=> $self->description(),
419 trial_location
=> $computation_location_name,
420 trial_name
=> $self->name(),
421 trial_type
=> $analysis_experiment_type_id,
423 analysis_model_protocol_id
=> $saved_model_protocol_id,
426 # my $validate_error = $trial_create->validate_design();
428 # if ($validate_error) {
429 # print STDERR "VALIDATE ERROR! "; #.Dumper($validate_error)."\n";
432 ## print STDERR "Valiation successful. Storing...\n";
433 # try { $store_error = $design_store->store() }
434 # catch { $store_error = $_ };
436 # if ($store_error) {
437 # die "ERROR SAVING TRIAL!: $store_error\n";
441 $trial_create->save_trial();
444 die "Error saving trial: $_";
447 print STDERR
"Done with design create & store.\n";
448 return $self->get_trial_id();
452 # store analysis values is a separate call and has to be called after
455 sub store_analysis_values
{
457 my $metadata_schema = shift;
458 my $phenome_schema = shift;
462 my $operator = shift;
463 my $basepath = shift;
468 my $tempfile_path = shift;
470 print STDERR
"Storing analysis values...\n";
472 my $time = DateTime
->now();
473 my $timestamp = $time->ymd()."_".$time->hms();
474 my %phenotype_metadata;
475 $phenotype_metadata{'archived_file'} = 'none';
476 $phenotype_metadata{'archived_file_type'} = 'analysis_values';
477 $phenotype_metadata{'operator'} = $operator;
478 $phenotype_metadata{'date'} = $timestamp;
480 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new({
481 bcs_schema
=> $self->bcs_schema(),
482 basepath
=> $basepath,
487 temp_file_nd_experiment_id
=> $tempfile_path,
488 metadata_schema
=> $metadata_schema,
489 phenome_schema
=> $phenome_schema,
490 user_id
=> $self->user_id(),
491 stock_list
=> $plots,
492 trait_list
=> $traits,
493 values_hash
=> $values,
495 overwrite_values
=> 0,
496 metadata_hash
=> \
%phenotype_metadata,
499 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
501 if ($verified_warning) {
502 warn $verified_warning;
504 if ($verified_error) {
508 my ($stored_phenotype_error, $stored_phenotype_success) = $store_phenotypes->store();
510 if ($stored_phenotype_error) {
511 die "An error occurred storing the phenotypes: $stored_phenotype_error\n";
521 my $design = CXGN
::Trial
::TrialLayout
->new({ schema
=> $self->bcs_schema(), trial_id
=> $self->get_trial_id(), experiment_type
=> 'analysis_experiment'});
523 # print STDERR "_get_layout: design = ".Dumper($design->get_design);
525 #print STDERR "ERROR IN LAYOUT: ".Dumper($error)."\n";
526 #print STDERR "READ DESIGN: ".Dumper($design->get_design());
530 sub get_phenotype_matrix
{
532 my $phenotypes_search = CXGN
::Phenotypes
::PhenotypeMatrix
->new(
533 bcs_schema
=>$self->bcs_schema(),
534 search_type
=> "MaterializedViewTable",
535 data_level
=> "analysis_instance",
536 experiment_type
=> "analysis_experiment",
537 trial_list
=> [ $self->get_trial_id() ],
539 my @data = $phenotypes_search->get_phenotype_matrix();
543 sub _load_accession_names
{
546 my $design = $self->design();
547 #print STDERR "Design = ".Dumper($design);
549 my @accessions = $design->get_accession_names();
550 print STDERR
"ACCESSIONS: ". Dumper
(\
@accessions);
551 # get the accessions from the design (not the dataset!)
553 return $self->design()->get_accession_names();
559 my $phenotypes = $self->get_phenotype_matrix();
561 my $header = $phenotypes->[0];
563 my $traits = [ @
$header[39..scalar(@
$header)-1] ];
565 print STDERR
"_load_traits: TRAITS: ".Dumper
($traits);
566 #$self->traits($traits);
572 #__PACKAGE__->meta->make_immutable;