add is_variable accessor.
[sgn.git] / lib / CXGN / Dataset.pm
blobd48f6182673fd332c0853a9c4e6b77181202972c
2 =head1 NAME
4 CXGN::Dataset - a class to easily query the database for breeding data
6 =head1 DESCRIPTION
8 CXGN::Dataset can be used to flexibly define datasets for breeding applications. For example, a dataset can be defined using a list of germplasm, a list of trials, a list of years, etc, or a combination of the above. Once defined, it allows to easily obtain related phenotypes and genotypes and other data.
10 Datasets can be stored in the database and retrieved for later use.
12 Currently, there are three incarnations of CXGN::Dataset:
14 =over 5
16 =item CXGN::Dataset
18 Unbuffered output of the queries
20 =item CXGN::Dataset::File
22 Writes results to files
24 =item CXGN::Dataset::Cache
26 Returns output like CXGN::Dataset, but uses a disk-cache for the response data
28 =back
30 =head1 SYNOPSYS
32 my $ds = CXGN::Dataset->new( { people_schema => $p, schema => $s } );
33 $ds->accessions([ 'a', 'b', 'c' ]);
34 my $trials = $ds->retrieve_trials();
35 my $sp_dataset_id = $ds->store();
36 #...
37 my $restored_ds = CXGN::Dataset( { people_schema => $p, schema => $s, sp_dataset_id => $sp_dataset_id } );
38 my $years = $restored_ds->retrieve_years();
39 #...
41 =head1 AUTHOR
43 Lukas Mueller <lam87@cornell.edu>
46 =head1 ACCESSORS
48 =cut
51 package CXGN::Dataset;
53 use Moose;
54 use Moose::Util::TypeConstraints;
55 use Data::Dumper;
56 use JSON::Any;
57 use CXGN::BreederSearch;
58 use CXGN::People::Schema;
59 use CXGN::Phenotypes::PhenotypeMatrix;
60 use CXGN::Genotype::Search;
61 use CXGN::Phenotypes::HighDimensionalPhenotypesSearch;
63 =head2 people_schema()
65 accessor for CXGN::People::Schema database object
67 =cut
69 has 'people_schema' => (isa => 'CXGN::People::Schema', is => 'rw', required => 1 );
71 =head2 schema()
73 accessor for Bio::Chado::Schema database object
75 =cut
77 has 'schema' => ( isa => "Bio::Chado::Schema", is => 'rw', required => 1 );
79 =head2 sp_dataset_id()
81 accessor for sp_dataset primary key
83 =cut
86 has 'sp_dataset_id' => ( isa => 'Int',
87 is => 'rw',
88 predicate => 'has_sp_dataset_id',
91 =head2 data()
93 accessor for the json-formatted data structure (as used for the backend storage)
95 =cut
97 has 'data' => ( isa => 'HashRef',
98 is => 'rw'
101 =head2 name()
103 accessor for the name of this dataset
105 =cut
107 has 'name' => ( isa => 'Maybe[Str]',
108 is => 'rw',
111 =head2 description()
113 accessor for the descrition of this dataset
115 =cut
117 has 'description' => ( isa => 'Maybe[Str]',
118 is => 'rw'
121 =head2 sp_person_id()
123 accessor for sp_person_id (owner of the dataset)
125 =cut
127 has 'sp_person_id' => ( isa => 'Maybe[Int]',
128 is => 'rw',
132 =head2 accessions()
134 accessor for defining the accessions that are part of this dataset (ArrayRef).
136 =cut
138 has 'accessions' => ( isa => 'Maybe[ArrayRef]',
139 is => 'rw',
140 predicate => 'has_accessions',
143 =head2 plots()
145 accessor for defining the plots that are part of this dataset (ArrayRef).
147 =cut
149 has 'plots' => ( isa => 'Maybe[ArrayRef]',
150 is => 'rw',
151 predicate => 'has_plots',
154 =head2 plants()
156 accessor for defining the plants that are part of this dataset (ArrayRef).
158 =cut
160 has 'plants' => ( isa => 'Maybe[ArrayRef]',
161 is => 'rw',
162 predicate => 'has_plants',
167 =head2 trials()
169 accessor for defining the trials that are part of this dataset (ArrayRef).
171 =cut
174 has 'trials' => ( isa => 'Maybe[ArrayRef]',
175 is => 'rw',
176 predicate => 'has_trials',
180 =head2 traits()
182 =cut
184 has 'traits' => ( isa => 'Maybe[ArrayRef]',
185 is => 'rw',
186 predicate => 'has_traits',
189 =head2 years()
191 =cut
194 has 'years' => ( isa => 'Maybe[ArrayRef[Str]]',
195 is => 'rw',
196 predicate => 'has_years',
199 =head2 breeding_programs()
201 =cut
203 has 'breeding_programs' => ( isa => 'Maybe[ArrayRef]',
204 is => 'rw',
205 predicate => 'has_breeding_programs',
206 default => sub { [] },
209 =head2 genotyping_protocols()
211 =cut
213 has 'genotyping_protocols' => ( isa => 'Maybe[ArrayRef]',
214 is => 'rw',
215 predicate => 'has_genotyping_protocols',
218 =head2 genotyping_projects()
220 =cut
222 has 'genotyping_projects' => ( isa => 'Maybe[ArrayRef]',
223 is => 'rw',
224 predicate => 'has_genotyping_projects',
227 =head2 trial_types()
229 =cut
231 has 'trial_types' => ( isa => 'Maybe[ArrayRef]',
232 is => 'rw',
233 predicate => 'has_trial_types',
236 =head2 trial_designs()
238 =cut
240 has 'trial_designs' => ( isa => 'Maybe[ArrayRef]',
241 is => 'rw',
242 predicate => 'has_trial_designs',
245 =head2 locations()
247 =cut
249 has 'locations' => ( isa => 'Maybe[ArrayRef]',
250 is => 'rw',
251 predicate => 'has_locations',
255 has 'category_order' => ( isa => 'Maybe[ArrayRef]',
256 is => 'rw',
257 predicate => 'has_category_order',
260 has 'is_live' => ( isa => 'Bool',
261 is => 'rw',
262 default => 0,
265 has 'is_public' => ( isa => 'Bool',
266 is => 'rw',
267 default => 0,
271 =head2 data_level()
273 =cut
275 has 'data_level' => ( isa => 'String',
276 is => 'rw',
277 isa => enum([qw[ plot plant subplot ]]),
278 default => 'plot',
281 =head2 exclude_phenotype_outlier()
283 =cut
285 has 'exclude_phenotype_outlier' => (
286 isa => 'Bool',
287 is => 'ro',
288 default => 0
291 =head2 outliers()
293 =cut
295 has 'outliers' => (
296 isa => 'Maybe[ArrayRef]',
297 is => 'rw',
298 predicate => 'has_outliers',
299 default => sub { [] },
302 =head2 outlier_cutoff()
304 =cut
306 has 'outlier_cutoffs' => (
307 isa => 'Maybe[ArrayRef]',
308 is => 'rw',
309 predicate => 'has_outlier_cutoffs',
310 default => sub { [] },
313 =head2 exclude_dataset_outliers()
315 =cut
317 has 'exclude_dataset_outliers' => (
318 isa => 'Bool',
319 is => 'ro',
320 default => 0
323 =head2 include_phenotype_primary_key()
325 =cut
327 has 'include_phenotype_primary_key' => (
328 isa => 'Bool|Undef',
329 is => 'ro',
330 default => 0
333 has 'breeder_search' => (isa => 'CXGN::BreederSearch', is => 'rw');
335 sub BUILD {
336 my $self = shift;
337 my $args = shift;
340 my $bs = CXGN::BreederSearch->new(dbh => $self->schema->storage->dbh());
341 $self->breeder_search($bs);
343 if ($self->has_sp_dataset_id()) {
344 #print STDERR "Processing dataset_id ".$self->sp_dataset_id()."\n";
345 my $row = $self->people_schema()->resultset("SpDataset")->find({ sp_dataset_id => $self->sp_dataset_id() });
346 if (!$row) { die "The dataset with id ".$self->sp_dataset_id()." does not exist"; }
347 my $dataset = JSON::Any->decode($row->dataset());
348 $self->data($dataset);
349 $self->name($row->name());
350 $self->description($row->description());
351 $self->sp_person_id($row->sp_person_id());
352 $self->accessions($dataset->{categories}->{accessions});
353 $self->plots($dataset->{categories}->{plots});
354 $self->plants($dataset->{categories}->{plants});
355 $self->trials($dataset->{categories}->{trials});
356 $self->traits($dataset->{categories}->{traits});
357 $self->years($dataset->{categories}->{years});
358 $self->breeding_programs($dataset->{categories}->{breeding_programs});
359 $self->genotyping_protocols($dataset->{categories}->{genotyping_protocols});
360 $self->genotyping_projects($dataset->{categories}->{genotyping_projects});
361 $self->locations($dataset->{categories}->{locations});
362 $self->trial_designs($dataset->{categories}->{trial_designs});
363 $self->trial_types($dataset->{categories}->{trial_types});
364 $self->category_order($dataset->{category_order});
365 $self->is_live($dataset->{is_live});
366 $self->is_public($dataset->{is_public});
367 if ($args->{outliers}) { $self->outliers($args->{outliers})} else { $self->outliers($dataset->{outliers}); }
368 if ($args->{outlier_cutoffs}) { $self->outlier_cutoffs } else {($dataset->{outlier_cutoffs}); };
370 else { print STDERR "Creating empty dataset object\n"; }
374 =head1 CLASS METHODS
376 =head2 datasets_by_user()
379 =cut
381 sub get_datasets_by_user {
382 my $class = shift;
383 my $people_schema = shift;
384 my $sp_person_id = shift;
385 my $found;
387 my $rs = $people_schema->resultset("SpDataset")->search( { sp_person_id => $sp_person_id });
389 my @datasets;
390 my @datasets_id;
391 while (my $row = $rs->next()) {
392 push @datasets, [ $row->sp_dataset_id(), $row->name(), $row->description() ];
393 push @datasets_id, $row->sp_dataset_id();
396 $rs = $people_schema->resultset("SpDataset")->search( { is_public => 1 });
398 while (my $row = $rs->next()) {
399 $found = 0;
400 for (@datasets_id) {
401 if ( $_ == $row->sp_dataset_id() ) {
402 $found = 1;
405 if (!$found) {
406 push @datasets, [ $row->sp_dataset_id(), 'public - ' . $row->name(), $row->description() ];
409 return \@datasets;
412 =head2 datasets_public()
414 =cut
416 sub get_datasets_public {
417 my $class = shift;
418 my $people_schema = shift;
420 my $rs = $people_schema->resultset("SpDataset")->search( { is_public => 1 });
422 my @datasets;
423 while (my $row = $rs->next()) {
424 push @datasets, [ $row->sp_dataset_id(), $row->name(), $row->description() ];
427 return \@datasets;
430 =head2 datasets_public()
432 =cut
434 sub set_dataset_public {
435 my $self = shift;
437 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
439 if (! $row) {
440 return "The specified dataset does not exist";
441 } else {
442 eval {
443 $row->is_public(1);
444 $row->sp_person_id($self->sp_person_id());
445 $row->sp_dataset_id($self->sp_dataset_id());
446 $row->update();
448 if ($@) {
449 return "An error occurred, $@";
450 } else {
451 return undef;
456 =head2 datasets_public()
458 =cut
460 sub set_dataset_private {
461 my $self = shift;
463 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
465 if (! $row) {
466 return "The specified dataset does not exist";
467 } else {
468 eval {
469 $row->is_public(0);
470 $row->sp_person_id($self->sp_person_id());
471 $row->sp_dataset_id($self->sp_dataset_id());
472 $row->update();
474 if ($@) {
475 return "An error occurred, $@";
476 } else {
477 return undef;
482 =head2 exists_dataset_name
484 Usage:
485 Desc:
486 Ret:
487 Args:
488 Side Effects:
489 Example:
491 =cut
493 sub exists_dataset_name {
494 my $class = shift;
495 my $people_schema = shift;
496 my $name = shift;
498 my $rs = $people_schema->resultset("SpDataset")->search( { name => { -ilike => $name}});
500 if ($rs->count() > 0) {
501 return 1;
503 else {
504 return 0;
509 =head1 METHODS
512 =head2 to_hashref()
515 =cut
517 sub to_hashref {
518 my $self = shift;
520 my $dataset = $self->get_dataset_data();
522 my $data = {
523 name => $self->name(),
524 description => $self->description(),
525 sp_person_id => $self->sp_person_id(),
526 dataset => $dataset,
529 return $data;
532 =head2 store()
534 =cut
536 sub store {
537 my $self = shift;
539 #print STDERR "dataset_id = ".$self->sp_dataset_id()."\n";
540 if (!$self->has_sp_dataset_id()) {
541 #print STDERR "Creating new dataset row... ".$self->sp_dataset_id()."\n";
542 my $row = $self->people_schema()->resultset("SpDataset")->create({
543 name => $self->name(),
544 description => $self->description(),
545 sp_person_id => $self->sp_person_id(),
546 dataset => JSON::Any->encode($self->get_dataset_data()),
548 $self->sp_dataset_id($row->sp_dataset_id());
549 return $row->sp_dataset_id();
551 else {
552 #print STDERR "Updating dataset row ".$self->sp_dataset_id()."\n";
553 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
554 if ($row) {
555 $row->name($self->name());
556 $row->description($self->description());
557 $row->dataset(JSON::Any->encode($self->to_hashref()->{dataset}));
558 $row->sp_person_id($self->sp_person_id());
559 $row->update();
560 return $row->sp_dataset_id();
562 else {
563 print STDERR "Weird... has ".$self->sp_dataset_id()." but no data in db\n";
568 sub get_dataset_data {
569 my $self = shift;
570 my $dataref;
571 $dataref->{categories}->{accessions} = $self->accessions() if $self->accessions && scalar(@{$self->accessions})>0;
572 $dataref->{categories}->{plots} = $self->plots() if $self->plots && scalar(@{$self->plots})>0;
573 $dataref->{categories}->{plants} = $self->plants() if $self->plants && scalar(@{$self->plants})>0;
574 $dataref->{categories}->{trials} = $self->trials() if $self->trials && scalar(@{$self->trials})>0;
575 $dataref->{categories}->{traits} = $self->traits() if $self->traits && scalar(@{$self->traits})>0;
576 @{$dataref->{categories}->{years}} = @{$self->years()} if $self->years && scalar(@{$self->years})>0;
577 $dataref->{categories}->{breeding_programs} = $self->breeding_programs() if $self->breeding_programs && scalar(@{$self->breeding_programs})>0;
578 $dataref->{categories}->{genotyping_protocols} = $self->genotyping_protocols() if $self->genotyping_protocols && scalar(@{$self->genotyping_protocols})>0;
579 $dataref->{categories}->{genotyping_projects} = $self->genotyping_projects() if $self->genotyping_projects && scalar(@{$self->genotyping_projects})>0;
580 $dataref->{categories}->{trial_designs} = $self->trial_designs() if $self->trial_designs && scalar(@{$self->trial_designs})>0;
581 $dataref->{categories}->{trial_types} = $self->trial_types() if $self->trial_types && scalar(@{$self->trial_types})>0;
582 $dataref->{categories}->{locations} = $self->locations() if $self->locations && scalar(@{$self->locations})>0;
583 $dataref->{category_order} = $self->category_order();
584 $dataref->{outliers} = $self->outliers() if $self->outliers;
585 $dataref->{outlier_cutoffs} = $self->outlier_cutoffs() if $self->outliers;
586 return $dataref;
589 sub _get_dataref {
590 my $self = shift;
591 my $dataref;
593 $dataref->{accessions} = join(",", @{$self->accessions()}) if $self->accessions && scalar(@{$self->accessions})>0;
594 $dataref->{plots} = join(",", @{$self->plots()}) if $self->plots && scalar(@{$self->plots})>0;
595 $dataref->{plants} = join(",", @{$self->plants()}) if $self->plants && scalar(@{$self->plants})>0;
596 $dataref->{trials} = join(",", @{$self->trials()}) if $self->trials && scalar(@{$self->trials})>0;
597 $dataref->{traits} = join(",", @{$self->traits()}) if $self->traits && scalar(@{$self->traits})>0;
598 $dataref->{years} = join(",", map { "'".$_."'" } @{$self->years()}) if $self->years && scalar(@{$self->years})>0;
599 $dataref->{breeding_programs} = join(",", @{$self->breeding_programs()}) if $self->breeding_programs && scalar(@{$self->breeding_programs})>0;
600 $dataref->{genotyping_protocols} = join(",", @{$self->genotyping_protocols()}) if $self->genotyping_protocols && scalar(@{$self->genotyping_protocols})>0;
601 $dataref->{genotyping_projects} = join(",", @{$self->genotyping_projects()}) if $self->genotyping_projects && scalar(@{$self->genotyping_projects})>0;
602 $dataref->{trial_designs} = join(",", @{$self->trial_designs()}) if $self->trial_designs && scalar(@{$self->trial_designs})>0;
603 $dataref->{trial_types} = join(",", @{$self->trial_types()}) if $self->trial_types && scalar(@{$self->trial_types})>0;
604 $dataref->{locations} = join(",", @{$self->locations()}) if $self->locations && scalar(@{$self->locations})>0;
605 return $dataref;
608 sub _get_source_dataref {
609 my $self = shift;
610 my $source_type = shift;
612 my $dataref;
614 $dataref->{$source_type} = $self->_get_dataref();
616 return $dataref;
619 =head2 retrieve_genotypes()
621 Retrieves genotypes as a listref of hashrefs.
623 =cut
625 sub retrieve_genotypes {
626 my $self = shift;
627 my $protocol_id = shift;
628 my $genotypeprop_hash_select = shift || ['DS'];
629 my $protocolprop_top_key_select = shift || [];
630 my $protocolprop_marker_hash_select = shift || [];
631 my $return_only_first_genotypeprop_for_stock = shift || 1;
632 my $chromosome_list = shift || [];
633 my $start_position = shift;
634 my $end_position = shift;
635 my $marker_name_list = shift || [];
636 # print STDERR "CXGN::Dataset retrieve_genotypes\n";
638 my $accessions = $self->retrieve_accessions();
640 #print STDERR "ACCESSIONS: ".Dumper($accessions);
642 my @accession_ids;
643 foreach (@$accessions) {
644 push @accession_ids, $_->[0];
647 #print STDERR "ACCESSION IDS: ".Dumper(\@accession_ids);
649 my $trials = $self->retrieve_trials();
650 my @trial_ids;
651 foreach (@$trials) {
652 push @trial_ids, $_->[0];
655 my @protocols;
656 if (!$protocol_id) {
657 my $genotyping_protocol_ref = $self->retrieve_genotyping_protocols();
658 foreach my $p (@$genotyping_protocol_ref) {
659 push @protocols, $p->[0];
661 } else {
662 @protocols = ($protocol_id);
665 my $genotypes_search = CXGN::Genotype::Search->new({
666 bcs_schema => $self->schema(),
667 people_schema=>$self->people_schema,
668 accession_list => \@accession_ids,
669 trial_list => \@trial_ids,
670 protocol_id_list => \@protocols,
671 chromosome_list => $chromosome_list,
672 start_position => $start_position,
673 end_position => $end_position,
674 marker_name_list => $marker_name_list,
675 genotypeprop_hash_select=>$genotypeprop_hash_select, #THESE ARE THE KEYS IN THE GENOTYPEPROP OBJECT
676 protocolprop_top_key_select=>$protocolprop_top_key_select, #THESE ARE THE KEYS AT THE TOP LEVEL OF THE PROTOCOLPROP OBJECT
677 protocolprop_marker_hash_select=>$protocolprop_marker_hash_select, #THESE ARE THE KEYS IN THE MARKERS OBJECT IN THE PROTOCOLPROP OBJECT
678 return_only_first_genotypeprop_for_stock=>$return_only_first_genotypeprop_for_stock #FOR MEMORY REASONS TO LIMIT DATA
680 my ($total_count, $dataref) = $genotypes_search->get_genotype_info();
681 return $dataref;
684 =head2 retrieve_phenotypes()
686 retrieves phenotypes as a listref of listrefs
688 =cut
690 sub retrieve_phenotypes {
691 my $self = shift;
693 my $accessions = $self->retrieve_accessions();
694 my @accession_ids;
695 foreach (@$accessions) {
696 push @accession_ids, $_->[0];
699 my $trials = $self->retrieve_trials();
700 my @trial_ids;
701 foreach (@$trials) {
702 push @trial_ids, $_->[0];
705 my $traits = $self->retrieve_traits();
706 my @trait_ids;
707 foreach (@$traits) {
708 push @trait_ids, $_->[0];
711 my $dataset_exluded_outliers = $self->exclude_dataset_outliers() ? $self->outliers() : undef;
713 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
714 search_type=>'MaterializedViewTable',
715 bcs_schema=>$self->schema(),
716 data_level=>$self->data_level(),
717 trait_list=>\@trait_ids,
718 trial_list=>\@trial_ids,
719 accession_list=>\@accession_ids,
720 exclude_phenotype_outlier=>$self->exclude_phenotype_outlier,
721 include_phenotype_primary_key=>$self->include_phenotype_primary_key,
722 dataset_exluded_outliers=>$dataset_exluded_outliers
724 my @data = $phenotypes_search->get_phenotype_matrix();
725 return \@data;
728 =head2 retrieve_phenotypes_ref()
730 retrieves phenotypes as a hashref representation
732 =cut
734 sub retrieve_phenotypes_ref {
735 my $self = shift;
737 my $accessions = $self->retrieve_accessions();
738 my @accession_ids;
739 foreach (@$accessions) {
740 push @accession_ids, $_->[0];
743 my $trials = $self->retrieve_trials();
744 my @trial_ids;
745 foreach (@$trials) {
746 push @trial_ids, $_->[0];
749 my $traits = $self->retrieve_traits();
750 my @trait_ids;
751 foreach (@$traits) {
752 push @trait_ids, $_->[0];
755 my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate(
756 'MaterializedViewTable',
758 bcs_schema=>$self->schema(),
759 data_level=>$self->data_level(),
760 trait_list=>\@trait_ids,
761 trial_list=>\@trial_ids,
762 accession_list=>\@accession_ids,
763 exclude_phenotype_outlier=>$self->exclude_phenotype_outlier
766 my ($data, $unique_traits) = $phenotypes_search->search();
768 return ($data, $unique_traits);
771 =head2 retrieve_high_dimensional_phenotypes()
773 retrieves high-dimensional phenotypes (NIRS, transcriptomics, and metabolomics) as a hashref representation. Will return both the data-matrix and the identifier metadata (transcripts and metabolites)
775 =cut
777 sub retrieve_high_dimensional_phenotypes {
778 my $self = shift;
779 my $nd_protocol_id = shift;
780 my $high_dimensional_phenotype_type = shift; #NIRS, Transcriptomics, or Metabolomics
781 my $query_associated_stocks = shift || 1;
782 my $high_dimensional_phenotype_identifier_list = shift || [];
784 if (!$nd_protocol_id) {
785 die "Must provide the protocol id!\n";
788 if (!$high_dimensional_phenotype_type) {
789 die "Must provide the high dimensional phenotype type!\n";
792 my $accessions = $self->retrieve_accessions();
793 my @accession_ids;
794 foreach (@$accessions) {
795 push @accession_ids, $_->[0];
798 my $plots = $self->retrieve_plots();
799 my @plot_ids;
800 foreach (@$plots) {
801 push @plot_ids, $_->[0];
804 my $plants = $self->retrieve_plants();
805 my @plant_ids;
806 foreach (@$plants) {
807 push @plant_ids, $_->[0];
810 my $phenotypes_search = CXGN::Phenotypes::HighDimensionalPhenotypesSearch->new({
811 bcs_schema=>$self->schema(),
812 nd_protocol_id=>$nd_protocol_id,
813 high_dimensional_phenotype_type=>$high_dimensional_phenotype_type,
814 query_associated_stocks=>$query_associated_stocks,
815 high_dimensional_phenotype_identifier_list=>$high_dimensional_phenotype_identifier_list,
816 accession_list=>\@accession_ids,
817 plot_list=>\@plot_ids,
818 plant_list=>\@plant_ids,
821 my ($data_matrix, $identifier_metadata, $identifier_names) = $phenotypes_search->search();
823 return ($data_matrix, $identifier_metadata, $identifier_names);
826 =head2 retrieve_high_dimensional_phenotypes_relationship_matrix()
828 retrieves high-dimensional phenotypes relationship matrix (NIRS, transcriptomics, and metabolomics) as a hashref representation. Will return both the data-matrix and the identifier metadata (transcripts and metabolites)
830 =cut
832 sub retrieve_high_dimensional_phenotypes_relationship_matrix {
833 my $self = shift;
834 my $nd_protocol_id = shift;
835 my $high_dimensional_phenotype_type = shift; #NIRS, Transcriptomics, or Metabolomics
836 my $query_associated_stocks = shift || 1;
837 my $temp_data_file = shift;
838 my $download_file_tempfile = shift;
840 if (!$nd_protocol_id) {
841 die "Must provide the protocol id!\n";
843 if (!$high_dimensional_phenotype_type) {
844 die "Must provide the high dimensional phenotype type!\n";
847 my $accessions = $self->retrieve_accessions();
848 my @accession_ids;
849 foreach (@$accessions) {
850 push @accession_ids, $_->[0];
853 my $plots = $self->retrieve_plots();
854 my @plot_ids;
855 foreach (@$plots) {
856 push @plot_ids, $_->[0];
859 my $plants = $self->retrieve_plants();
860 my @plant_ids;
861 foreach (@$plants) {
862 push @plant_ids, $_->[0];
865 my $phenotypes_search = CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix->new({
866 bcs_schema=>$self->schema,
867 nd_protocol_id=>$nd_protocol_id,
868 temporary_data_file=>$temp_data_file,
869 relationship_matrix_file=>$download_file_tempfile,
870 high_dimensional_phenotype_type=>$high_dimensional_phenotype_type,
871 query_associated_stocks=>$query_associated_stocks,
872 accession_list=>\@accession_ids,
873 plot_list=>\@plot_ids,
874 plant_list=>\@plant_ids
876 my ($relationship_matrix_data, $data_matrix, $identifier_metadata, $identifier_names) = $phenotypes_search->search();
877 # print STDERR Dumper $relationship_matrix_data;
878 # print STDERR Dumper $data_matrix;
879 # print STDERR Dumper $identifier_metadata;
880 # print STDERR Dumper $identifier_names;
882 return ($relationship_matrix_data, $data_matrix, $identifier_metadata, $identifier_names);
885 =head2 retrieve_accessions()
887 retrieves accessions as a listref of listref [stock_id, uniquename]
889 =cut
891 sub retrieve_accessions {
892 my $self = shift;
893 my $accessions;
894 if ($self->accessions() && scalar(@{$self->accessions()})>0) {
895 my @stocks;
896 my $stock_rs = $self->schema->resultset("Stock::Stock")->search({'stock_id' => { -in => $self->accessions }});
897 while (my $a = $stock_rs->next()) {
898 push @stocks, [$a->stock_id, $a->uniquename];
900 return \@stocks;
902 else {
903 my $criteria = $self->get_dataset_definition();
904 push @$criteria, "accessions";
906 $accessions = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("accessions"));
908 return $accessions->{results};
911 =head2 retrieve_plots()
913 Retrieves plots as a listref of listrefs.
915 =cut
917 sub retrieve_plots {
918 my $self = shift;
919 my $plots;
920 if ($self->plots && scalar(@{$self->plots})>0) {
921 my @stocks;
922 my $stock_rs = $self->schema->resultset("Stock::Stock")->search({'stock_id' => {-in => $self->plots}});
923 while (my $a = $stock_rs->next()) {
924 push @stocks, [$a->stock_id, $a->uniquename];
926 return \@stocks;
928 else {
929 my $criteria = $self->get_dataset_definition();
930 push @$criteria, "plots";
931 $plots = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("plots"));
933 return $plots->{results};
936 =head2 retrieve_plants()
938 Retrieves plants as a listref of listrefs.
940 =cut
942 sub retrieve_plants {
943 my $self = shift;
944 my $plants;
945 if ($self->plants && scalar(@{$self->plants})>0) {
946 my @stocks;
947 my $stock_rs = $self->schema->resultset("Stock::Stock")->search({'stock_id' => {-in => $self->plants}});
948 while (my $a = $stock_rs->next()) {
949 push @stocks, [$a->stock_id, $a->uniquename];
951 return \@stocks;
953 else {
954 my $criteria = $self->get_dataset_definition();
955 push @$criteria, "plants";
956 $plants = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("plants"));
958 return $plants->{results};
961 =head2 retrieve_trials()
963 retrieves trials as a listref of listrefs.
965 =cut
967 sub retrieve_trials {
968 my $self = shift;
969 my $trials;
970 if ($self->trials && scalar(@{$self->trials})>0) {
971 my @projects;
972 my $rs = $self->schema->resultset("Project::Project")->search({'project_id' => {-in => $self->trials}});
973 while (my $a = $rs->next()) {
974 push @projects, [$a->project_id, $a->name];
976 return \@projects;
978 else {
979 my $criteria = $self->get_dataset_definition();
980 push @$criteria, "trials";
981 $trials = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("trials"));
983 #print STDERR "TRIALS: ".Dumper($trials);
984 return $trials->{results};
987 =head2 retrieve_traits()
989 retrieves traits as a listref of listrefs.
991 =cut
993 sub retrieve_traits {
994 my $self = shift;
995 my $traits;
996 if ($self->traits && scalar(@{$self->traits})>0) {
997 my @cvterms;
998 my $rs = $self->schema->resultset("Cv::Cvterm")->search({'cvterm_id' => {-in => $self->traits}});
999 while (my $a = $rs->next()) {
1000 push @cvterms, [$a->cvterm_id, $a->name];
1002 return \@cvterms;
1004 else {
1005 my $criteria = $self->get_dataset_definition();
1006 push @$criteria, "traits";
1007 $traits = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("traits"));
1009 return $traits->{results};
1013 =head2 retrieve_years()
1015 retrieves years as a listref of listrefs
1017 =cut
1019 sub retrieve_years {
1020 my $self = shift;
1021 my @years;
1022 if ($self->years() && scalar(@{$self->years()})>0) {
1023 foreach my $a (@{$self->years()}) {
1024 push @years, [$a, $a];
1027 else {
1028 my $criteria = $self->get_dataset_definition();
1029 push @$criteria, "years";
1030 my $year_data = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("years"));
1031 my $year_list = $year_data->{results};
1033 foreach my $y (@$year_list) {
1034 push @years, [$y->[0], $y->[0]];
1037 return \@years;
1040 =head2 retrieve_years()
1042 retrieves years as a listref of listrefs
1044 =cut
1046 sub retrieve_locations {
1047 my $self = shift;
1048 my $locations;
1049 if ($self->locations && scalar(@{$self->locations})>0) {
1050 my @locs;
1051 my $rs = $self->schema->resultset("NaturalDiversity::NdGeolocation")->search({'nd_geolocation_id' => {-in => $self->locations}});
1052 while (my $a = $rs->next()) {
1053 push @locs, [$a->nd_geolocation_id, $a->description];
1055 return \@locs;
1057 else {
1058 my $criteria = $self->get_dataset_definition();
1059 push @$criteria, "locations";
1060 $locations = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("locations"));
1062 return $locations->{results};
1065 =head2 retrieve_breeding_programs
1067 Usage:
1068 Desc:
1069 Ret:
1070 Args:
1071 Side Effects:
1072 Example:
1074 =cut
1076 sub retrieve_breeding_programs {
1077 my $self = shift;
1078 my $breeding_programs;
1079 if ($self->breeding_programs && scalar(@{$self->breeding_programs})>0) {
1080 my @projects;
1081 my $rs = $self->schema->resultset("Project::Project")->search({'project_id' => {-in => $self->breeding_programs}});
1082 while (my $a = $rs->next()) {
1083 push @projects, [$a->project_id, $a->name];
1085 return \@projects;
1087 else {
1088 my $criteria = $self->get_dataset_definition();
1089 push @$criteria, "breeding_programs";
1090 $breeding_programs = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("breeding_programs"));
1092 return $breeding_programs->{results};
1095 =head2 retrieve_genotyping_protocols
1097 Usage:
1098 Desc:
1099 Ret:
1100 Args:
1101 Side Effects:
1102 Example:
1104 =cut
1106 sub retrieve_genotyping_protocols {
1107 my $self = shift;
1108 my $genotyping_protocols;
1109 if ($self->genotyping_protocols && scalar(@{$self->genotyping_protocols})>0) {
1110 my @protocols;
1111 my $rs = $self->schema->resultset("NaturalDiversity::NdProtocol")->search({'nd_protocol_id' => {-in => $self->genotyping_protocols}});
1112 while (my $a = $rs->next()) {
1113 push @protocols, [$a->nd_protocol_id, $a->name];
1115 return \@protocols;
1117 else {
1118 my $criteria = $self->get_dataset_definition();
1119 push @$criteria, "genotyping_protocols";
1120 $genotyping_protocols = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("genotyping_protocols"));
1122 return $genotyping_protocols->{results};
1125 =head2 retrieve_trial_designs
1127 Usage:
1128 Desc:
1129 Ret:
1130 Args:
1131 Side Effects:
1132 Example:
1134 =cut
1136 sub retrieve_trial_designs {
1137 my $self = shift;
1138 my @trial_designs;
1139 if ($self->trial_designs && scalar(@{$self->trial_designs})>0) {
1140 foreach my $a (@{$self->trial_designs()}) {
1141 push @trial_designs, [$a, $a];
1144 else {
1145 my $criteria = $self->get_dataset_definition();
1146 push @$criteria, "trial_designs";
1147 my $breeding_program_data = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("trial_designs"));
1148 my $breeding_program_list = $breeding_program_data->{results};
1150 foreach my $y (@$breeding_program_list) {
1151 push @trial_designs, [$y->[0], $y->[0]];
1154 return \@trial_designs;
1158 =head2 retrieve_trial_types
1160 Usage:
1161 Desc:
1162 Ret:
1163 Args:
1164 Side Effects:
1165 Example:
1167 =cut
1169 sub retrieve_trial_types {
1170 my $self = shift;
1171 my @trial_types;
1172 if ($self->trial_types && scalar(@{$self->trial_types})>0) {
1173 foreach my $a (@{$self->trial_types()}) {
1174 push @trial_types, [$a, $a];
1177 else {
1178 my $criteria = $self->get_dataset_definition();
1179 push @$criteria, "trial_types";
1180 my $breeding_program_data = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("trial_types"));
1181 my $breeding_program_list = $breeding_program_data->{results};
1183 foreach my $y (@$breeding_program_list) {
1184 push @trial_types, [$y->[0], $y->[0]];
1187 return \@trial_types;
1190 sub get_dataset_definition {
1191 my $self = shift;
1192 my @criteria;
1194 if ($self->accessions && scalar(@{$self->accessions})>0) {
1195 push @criteria, "accessions";
1197 if ($self->plots && scalar(@{$self->plots})>0) {
1198 push @criteria, "plots";
1200 if ($self->plants && scalar(@{$self->plants})>0) {
1201 push @criteria, "plants";
1203 if ($self->trials && scalar(@{$self->trials})>0) {
1204 push @criteria, "trials";
1206 if ($self->traits && scalar(@{$self->traits})>0) {
1207 push @criteria, "traits";
1209 if ($self->years && scalar(@{$self->years})>0) {
1210 push @criteria, "years";
1212 if ($self->locations && scalar(@{$self->locations})>0) {
1213 push @criteria, "locations";
1215 if ($self->breeding_programs && scalar(@{$self->breeding_programs})>0) {
1216 push @criteria, "breeding_programs";
1218 if ($self->genotyping_protocols && scalar(@{$self->genotyping_protocols})>0) {
1219 push @criteria, "genotyping_protocols";
1221 if ($self->genotyping_projects && scalar(@{$self->genotyping_projects})>0) {
1222 push @criteria, "genotyping_projects";
1224 if ($self->trial_types && scalar(@{$self->trial_types})>0) {
1225 push @criteria, "trial_types";
1227 if ($self->trial_designs && scalar(@{$self->trial_designs})>0) {
1228 push @criteria, "trial_designs";
1231 return \@criteria;
1234 =head2 delete()
1236 Usage: $dataset->delete();
1237 Desc: Deletes the specified dataset. Returns a string with an
1238 error message is unsuccessful.
1239 Ret: string if failure, undef if success
1240 Args:
1241 Side Effects: The function does not check for ownership of the dataset,
1242 this has to be implemented in the calling function.
1243 Example:
1245 =cut
1247 sub delete {
1248 my $self = shift;
1250 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
1252 if (! $row) {
1253 return "The specified dataset does not exist";
1254 } else {
1255 eval {
1256 $row->delete();
1258 if ($@) {
1259 return "An error occurred, $@";
1260 } else {
1261 return undef;
1267 sub update_description {
1268 my $self = shift;
1269 my $description = shift;
1270 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
1271 if (! $row) {
1272 return "The specified dataset does not exist";
1273 } else {
1274 eval {
1275 $row->sp_person_id($self->sp_person_id());
1276 $row->sp_dataset_id($self->sp_dataset_id());
1277 $row->description($description);
1278 $row->update();
1280 if ($@) {
1281 return "An error occurred, $@";
1282 } else {
1283 return undef;