6 CXGN::Analysis - manage analyses on Breedbase
10 Analyses are stored much like trials, starting out in the project table, and linking through to nd_experiment and stock through linking tables, as well phentoype to store the analysis results. Additional metadata is stored in in a projectprop with the type_id 'analysis_metadata_json'. The type of the project is 'analysis_project' (stored in a projectprop as well). Each analysis is assigned to a user, using and sp_person_id assigned in a projectprop.
14 The data structure is built using type ids that are different from a regular field trial.
18 =item nd_experiment.type_id
20 The nd_experiment.type_id links to 'analysis_experiment' (nd_experiment_property) (equivalent to 'field_experiment' in a trial),
24 The stock.type_id links to 'analysis_instance' (stock_property) (equivalent to 'plot' in a trial)
26 =item stock_relationship.type_id
28 The stock_relationship.type_id links to 'analysis_of' (equivalent to 'plot_of' in field trials)
30 This is summarized in the following table:
32 ┌──────────────────┬───────────────────────┬───────────────────┬────────────────┐
33 │ project type │ nd_experiment.type_id │ stock.type_id │ stock_relation │
34 │ │ │ │ ship.type_id │
35 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
36 │ trial │ field_experiment │ plot │ plot_of │
37 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
38 │ genotyping_plate │ genotyping_experiment │ tissue_sample │ sample_of │
39 ├──────────────────┼───────────────────────┼───────────────────┼────────────────┤
40 │ analysis │ analysis_experiment │ analysis_instance │ analysis_of │
41 └──────────────────┴───────────────────────┴───────────────────┴────────────────┘
46 The data in analysis_metdata_json is managed by the CXGN::Analysis::AnalysisMetadata class and contains the dataset_id used to generate the analysis, the actual analysis protocol that was run, and the traits relevant to the analysis.
50 Lukas Mueller <lam87@cornell.edu>
56 package CXGN
::Analysis
;
60 extends
'CXGN::Project';
65 use CXGN
::Trial
::TrialCreate
;
66 use CXGN
::Trial
::TrialDesign
;
67 use CXGN
::Trial
::TrialDesignStore
;
68 use CXGN
::Trial
::TrialLayout
;
69 use CXGN
::Phenotypes
::StorePhenotypes
;
70 use CXGN
::Analysis
::AnalysisMetadata
;
71 use CXGN
::List
::Transform
;
73 use CXGN
::AnalysisModel
::SaveModel
;
74 use CXGN
::People
::Person
;
75 use CXGN
::AnalysisModel
::GetModel
;
82 has
'bcs_schema' => (is
=> 'rw', isa
=> 'Bio::Chado::Schema', required
=> 1 );
84 =head2 people_schema()
88 has
'people_schema' => (is
=> 'rw', isa
=> 'CXGN::People::Schema', required
=>1);
90 =head2 metadata_schema()
94 has
'metadata_schema' => (is
=> 'rw', isa
=> 'CXGN::Metadata::Schema', required
=>1);
96 =head2 phenome_schema()
100 has
'phenome_schema' => (is
=> 'rw', isa
=> 'CXGN::Phenome::Schema', required
=>1);
106 #has 'project_id' => (is => 'rw', isa => 'Int');
112 #has 'name' => (is => 'rw', isa => 'Str');
118 ##has 'description' => (is => 'rw', isa => 'Str', default => "No description");
120 =head2 breeding_program_id()
124 has
'breeding_program_id' => (is
=> 'rw', isa
=> 'Int');
126 =head2 accession_names()
130 has
'accession_names' => (is
=> 'rw', isa
=> 'Maybe[ArrayRef]', lazy
=> 1, builder
=> '_load_accession_names');
136 has
'design' => (is
=> 'rw', isa
=> 'Ref', lazy
=> 1, builder
=> '_get_layout');
142 has
'traits' => (is
=> 'rw', isa
=> 'ArrayRef', builder
=> '_load_traits', lazy
=> 1);
144 =head2 nd_geolocation_id()
148 has
'nd_geolocation_id' => (is
=> 'rw', isa
=> 'Maybe[Int]');
154 has
'user_id' => (is
=> 'rw', isa
=> 'Int');
160 has
'user_role' => (is
=> 'rw', isa
=> 'Str');
162 =head2 analysis_model_protocol_id()
164 nd_protocol_id of save model information
168 has
'analysis_model_protocol_id' => (isa
=> 'Int|Undef', is
=> 'rw');
172 CXGN::Analysis::AnalysisMetadata object.
176 has
'metadata' => (isa
=> 'Maybe[CXGN::Analysis::AnalysisMetadata]', is
=> 'rw');
181 # $args->{trial_id} = $args->{project_id};
184 #has 'project' => (isa => 'CXGN::Project', is => 'rw');
188 year the analysis was done.
192 #has 'year' => (isa => 'Str', is => 'rw');
196 information about the saved model.
200 has
'saved_model' => (isa
=> 'HashRef', is
=> 'rw');
207 print STDERR
"BUILD CXGN::Analysis...\n";
210 if ($self->get_trial_id()) {
211 my $schema = $args->{bcs_schema
};
212 print STDERR
"Location id retrieved : = ".$self->get_location()->[0]."\n";
213 $self->nd_geolocation_id($self->get_location()->[0]);
215 my $metadata_json_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
216 my $rs = $self->bcs_schema()->resultset("Project::Projectprop")->search( { project_id
=> $self->get_trial_id(), type_id
=> $metadata_json_id });
219 if ($rs->count() > 0) {
220 $stockprop_id = $rs->first()->projectprop_id();
223 print STDERR
"Create AnalysisMetadata object...\n";
224 $metadata = CXGN
::Analysis
::AnalysisMetadata
->new( { bcs_schema
=> $schema, prop_id
=> $stockprop_id });
225 $self->metadata($metadata);
227 $stockprop_id = $metadata->prop_id();
229 my $time = DateTime
->now();
230 print STDERR
"prop_id is $stockprop_id...\n";
231 if (! defined($stockprop_id)) {
232 print STDERR
"project_id = ".$self->get_trial_id()." with stockprop_id = undefined...storing metadata...\n";
233 $metadata->parent_id($self->get_trial_id());
234 $metadata->create_timestamp($time->ymd()." ".$time->hms());
238 my $analysis_nd_experiment_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
239 my $nd_protocol_q = "SELECT nd_protocol_id
240 FROM nd_experiment_protocol
241 JOIN nd_experiment ON (nd_experiment_protocol.nd_experiment_id = nd_experiment.nd_experiment_id)
242 JOIN nd_experiment_project ON (nd_experiment_project.nd_experiment_id = nd_experiment.nd_experiment_id)
243 WHERE nd_experiment.type_id=$analysis_nd_experiment_type_id AND project_id=?;";
244 my $nd_protocol_h = $schema->storage->dbh()->prepare($nd_protocol_q);
245 $nd_protocol_h->execute($self->get_trial_id());
246 my ($nd_protocol_id) = $nd_protocol_h->fetchrow_array();
247 if ($nd_protocol_id) {
248 my $m = CXGN
::AnalysisModel
::GetModel
->new({
250 metadata_schema
=>$self->metadata_schema(),
251 phenome_schema
=>$self->phenome_schema(),
252 nd_protocol_id
=>$nd_protocol_id
254 my $saved_model_object = $m->get_model();
255 $self->saved_model($saved_model_object);
259 # otherwise create an empty project object with an empty metadata object...
261 die "need a project id...";
263 $self->metadata($metadata);
266 =head2 retrieve_analyses_by_user
268 Usage: my @analyses = CXGN::Analysis->retrieve_analyses_by_user($schema, $user_id);
269 Desc: Class function to retrieve all analyses by user_id
270 Ret: a list of listrefs with analysis data
271 Args: $schema - a BCS schema object, $user_id - the numeric id of a user
277 sub retrieve_analyses_by_user
{
279 my $bcs_schema = shift;
280 my $people_schema = shift;
281 my $metadata_schema = shift;
282 my $phenome_schema = shift;
284 my $analyses_type = shift;
286 my $user_info_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($bcs_schema, 'project_sp_person_id', 'project_property')->cvterm_id();
287 my $analysis_info_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($bcs_schema, 'analysis_metadata_json', 'project_property')->cvterm_id();
289 my $q = "SELECT userinfo.project_id, analysisinfo.value FROM projectprop AS userinfo
290 JOIN projectprop AS analysisinfo on (userinfo.project_id=analysisinfo.project_id)
291 WHERE userinfo.type_id=? AND analysisinfo.type_id=? AND userinfo.value=?";
293 my $h = $bcs_schema->storage()->dbh()->prepare($q);
294 $h->execute($user_info_type_id, $analysis_info_type_id, $user_id);
297 while (my ($project_id, $analysis_info) = $h->fetchrow_array()) {
298 print STDERR
"Instantiating analysis project for project ID $project_id...\n";
299 my $info = decode_json
$analysis_info;
300 if ($analyses_type) {
301 if ($info->{analysis_model_type
} eq $analyses_type) {
302 push @analyses, CXGN
::Analysis
->new( { bcs_schema
=> $bcs_schema, people_schema
=> $people_schema, metadata_schema
=> $metadata_schema, phenome_schema
=> $phenome_schema, trial_id
=> $project_id });
306 push @analyses, CXGN
::Analysis
->new( { bcs_schema
=> $bcs_schema, people_schema
=> $people_schema, metadata_schema
=> $metadata_schema, phenome_schema
=> $phenome_schema, trial_id
=> $project_id });
313 sub create_and_store_analysis_design
{
315 my $precomputed_design_to_save = shift; #DESIGN HASHREF
317 my $schema = $self->bcs_schema();
318 my $dbh = $schema->storage->dbh();
320 print STDERR
"CREATE AND STORE ANALYSIS DESIGN...\n";
322 if (!$self->user_id()) {
323 die "Need an sp_person_id to store an analysis.";
325 if (!$self->get_description()) {
326 die "Need a description to store an analysis.";
328 if (!$self->get_name()) {
329 die "Need a name to store an analysis.";
331 if (!$self->breeding_program_id()) {
332 die "Need a breeding program to store an analysis.";
335 my $p = CXGN
::People
::Person
->new($dbh, $self->user_id);
336 my $user_name = $p->get_username;
338 if (!$self->year()) {
339 my $dt = DateTime
->now();
340 my $year = $dt->year();
341 print STDERR
"Year: $year\n";
342 print STDERR
"No year provided. Using current year ($year).\n";
346 my $computation_location_name = "[Computation]";
347 my $calculation_location_id = $schema->resultset("NaturalDiversity::NdGeolocation")->search({ description
=> $computation_location_name })->first->nd_geolocation_id();
348 $self->nd_geolocation_id($calculation_location_id);
349 $self->set_location($calculation_location_id);
351 my $breeding_program_name = $schema->resultset("Project::Project")->find({project_id
=>$self->breeding_program_id()})->name();
352 $self->set_breeding_program($self->breeding_program_id());
356 print STDERR
"Storing user info...\n";
357 my $project_sp_person_term_cvterm_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'project_sp_person_id', 'project_property')->cvterm_id();
358 my $row = $schema->resultset("Project::Projectprop")->create({
359 project_id
=> $self->get_trial_id(),
360 type_id
=>$project_sp_person_term_cvterm_id,
361 value
=>$self->user_id(),
366 my $time = DateTime
->now();
367 if (!$self->metadata()) {
368 print STDERR
"Storing metadata...\n";
369 my $metadata = CXGN
::Analysis
::AnalysisMetadata
->new({ bcs_schema
=> $schema });
370 print STDERR
"Analysis ID = ".$self->get_trial_id()."\n";
371 $metadata->parent_id($self->get_trial_id());
372 $self->metadata( $metadata );
373 $self->metadata()->create_timestamp($time->ymd()." ".$time->hms());
376 # store dataset info, if available. Copy the actual dataset json,
377 # so that dataset info is frozen and does not reflect future
380 if ($self->metadata()->dataset_id()) {
381 print STDERR
"Retrieving data for dataset_id ".$self->metadata->dataset_id()."\n";
382 my $ds = CXGN
::Dataset
->new( { schema
=> $schema, people_schema
=> $self->people_schema(), sp_dataset_id
=> $self->metadata()->dataset_id() });
383 my $data = $ds->to_hashref();
384 #print STDERR "DATA: $data\n";
385 $self->metadata()->dataset_data(JSON
::Any
->encode($data));
388 print STDERR
"No dataset_id provided...\n";
391 $self->metadata()->parent_id($self->get_trial_id());
392 $self->metadata()->modified_timestamp($time->ymd()." ".$time->hms());
393 $self->metadata()->store();
396 if (!$precomputed_design_to_save) {
397 print STDERR
"Create a new analysis design...\n";
398 my $td = CXGN
::Trial
::TrialDesign
->new();
400 $td->set_trial_name($self->name());
401 $td->set_stock_list($self->accession_names());
402 $td->set_design_type("Analysis");
404 if ($td->calculate_design()) {
405 print STDERR
"Design calculated :-) ...\n";
406 $design = $td->get_design();
407 $self->design($design);
410 die "An error occurred creating the analysis design.";
413 $design = $precomputed_design_to_save;
415 # print STDERR Dumper $design;
417 print STDERR
"Store design...\n";
419 my $saved_model_protocol_id;
420 if ($self->analysis_model_protocol_id) {
421 $saved_model_protocol_id = $self->analysis_model_protocol_id();
424 my $analysis_experiment_type_id = SGN
::Model
::Cvterm
->get_cvterm_row($schema, 'analysis_experiment', 'experiment_type')->cvterm_id();
426 my $trial_create = CXGN
::Trial
::TrialCreate
->new({
427 trial_id
=> $self->get_trial_id(),
428 owner_id
=> $self->user_id(),
429 chado_schema
=> $schema,
431 owner_id
=> $self->user_id,
432 operator
=> $user_name,
434 design_type
=> $analysis_experiment_type_id,
435 program
=> $breeding_program_name,
436 trial_year
=> $self->year(),
437 trial_description
=> $self->description(),
438 trial_location
=> $computation_location_name,
439 trial_name
=> $self->name(),
440 trial_type
=> $analysis_experiment_type_id,
442 analysis_model_protocol_id
=> $saved_model_protocol_id,
445 # my $validate_error = $trial_create->validate_design();
447 # if ($validate_error) {
448 # print STDERR "VALIDATE ERROR! "; #.Dumper($validate_error)."\n";
451 ## print STDERR "Valiation successful. Storing...\n";
452 # try { $store_error = $design_store->store() }
453 # catch { $store_error = $_ };
455 # if ($store_error) {
456 # die "ERROR SAVING TRIAL!: $store_error\n";
460 $trial_create->save_trial();
463 die "Error saving trial: $_";
466 $self->_get_layout()->get_design();
468 print STDERR
"Done with design create & store.\n";
469 return $self->get_trial_id();
473 # store analysis values is a separate call and has to be called after
476 sub store_analysis_values
{
478 my $metadata_schema = shift;
479 my $phenome_schema = shift;
483 my $operator = shift;
484 my $basepath = shift;
489 my $tempfile_path = shift;
491 print STDERR
"Storing analysis values...\n";
493 my $time = DateTime
->now();
494 my $timestamp = $time->ymd()."_".$time->hms();
495 my %phenotype_metadata;
496 $phenotype_metadata{'archived_file'} = 'none';
497 $phenotype_metadata{'archived_file_type'} = 'analysis_values';
498 $phenotype_metadata{'operator'} = $operator;
499 $phenotype_metadata{'date'} = $timestamp;
501 my $store_phenotypes = CXGN
::Phenotypes
::StorePhenotypes
->new({
502 bcs_schema
=> $self->bcs_schema(),
503 basepath
=> $basepath,
508 temp_file_nd_experiment_id
=> $tempfile_path,
509 metadata_schema
=> $metadata_schema,
510 phenome_schema
=> $phenome_schema,
511 user_id
=> $self->user_id(),
512 stock_list
=> $plots,
513 trait_list
=> $traits,
514 values_hash
=> $values,
516 overwrite_values
=> 1,
517 metadata_hash
=> \
%phenotype_metadata,
520 my ($verified_warning, $verified_error) = $store_phenotypes->verify();
522 if ($verified_warning) {
523 warn $verified_warning;
525 if ($verified_error) {
529 my ($stored_phenotype_error, $stored_phenotype_success) = $store_phenotypes->store();
531 if ($stored_phenotype_error) {
532 die "An error occurred storing the phenotypes: $stored_phenotype_error\n";
540 my $design = CXGN
::Trial
::TrialLayout
->new({ schema
=> $self->bcs_schema(), trial_id
=> $self->get_trial_id(), experiment_type
=> 'analysis_experiment'});
542 # print STDERR "_get_layout: design = ".Dumper($design->get_design);
546 sub get_phenotype_matrix
{
548 my $phenotypes_search = CXGN
::Phenotypes
::PhenotypeMatrix
->new(
549 bcs_schema
=>$self->bcs_schema(),
550 search_type
=> "MaterializedViewTable",
551 data_level
=> "analysis_instance",
552 experiment_type
=> "analysis_experiment",
553 trial_list
=> [ $self->get_trial_id() ],
555 my @data = $phenotypes_search->get_phenotype_matrix();
559 sub _load_accession_names
{
562 my $design = $self->design();
563 #print STDERR "Design = ".Dumper($design);
565 my @accessions = $design->get_accession_names();
566 print STDERR
"ACCESSIONS: ". Dumper
(\
@accessions);
567 # get the accessions from the design (not the dataset!)
569 return $self->design()->get_accession_names();
575 my $phenotypes = $self->get_phenotype_matrix();
577 my $header = $phenotypes->[0];
579 my $traits = [ @
$header[39..scalar(@
$header)-1] ];
581 print STDERR
"_load_traits: TRAITS: ".Dumper
($traits);
582 #$self->traits($traits);
588 #__PACKAGE__->meta->make_immutable;