create and upload fieldbook phenotypes with treatments
[sgn.git] / lib / CXGN / Dataset.pm
blob6a67b3d92a93cc75d3026f93d034369a859cd8b1
2 =head1 NAME
4 CXGN::Dataset - a class to easily query the database for breeding data
6 =head1 DESCRIPTION
8 CXGN::Dataset can be used to flexibly define datasets for breeding applications. For example, a dataset can be defined using a list of germplasm, a list of trials, a list of years, etc, or a combination of the above. Once defined, it allows to easily obtain related phenotypes and genotypes and other data.
10 Datasets can be stored in the database and retrieved for later use.
12 Currently, there are three incarnations of CXGN::Dataset:
14 =over 5
16 =item CXGN::Dataset
18 Unbuffered output of the queries
20 =item CXGN::Dataset::File
22 Writes results to files
24 =item CXGN::Dataset::Cache
26 Returns output like CXGN::Dataset, but uses a disk-cache for the response data
28 =back
30 =head1 SYNOPSYS
32 my $ds = CXGN::Dataset->new( people_schema => $p, schema => $s);
33 $ds->accessions([ 'a', 'b', 'c' ]);
34 my $trials = $ds->retrieve_trials();
35 my $sp_dataset_id = $ds->store();
36 #...
37 my $restored_ds = CXGN::Dataset( people_schema => $p, schema => $s, sp_dataset_id => $sp_dataset_id );
38 my $years = $restored_ds->retrieve_years();
39 #...
41 =head1 AUTHOR
43 Lukas Mueller <lam87@cornell.edu>
46 =head1 ACCESSORS
48 =cut
51 package CXGN::Dataset;
53 use Moose;
54 use Moose::Util::TypeConstraints;
55 use Data::Dumper;
56 use JSON::Any;
57 use CXGN::BreederSearch;
58 use CXGN::People::Schema;
59 use CXGN::Phenotypes::PhenotypeMatrix;
60 use CXGN::Genotype::Search;
62 =head2 people_schema()
64 accessor for CXGN::People::Schema database object
66 =cut
68 has 'people_schema' => (isa => 'CXGN::People::Schema', is => 'rw', required => 1 );
70 =head2 schema()
72 accessor for Bio::Chado::Schema database object
74 =cut
76 has 'schema' => ( isa => "Bio::Chado::Schema", is => 'rw', required => 1 );
78 =head2 sp_dataset_id()
80 accessor for sp_dataset primary key
82 =cut
85 has 'sp_dataset_id' => ( isa => 'Int',
86 is => 'rw',
87 predicate => 'has_sp_dataset_id',
90 =head2 data()
92 accessor for the json-formatted data structure (as used for the backend storage)
94 =cut
96 has 'data' => ( isa => 'HashRef',
97 is => 'rw'
100 =head2 name()
102 accessor for the name of this dataset
104 =cut
106 has 'name' => ( isa => 'Maybe[Str]',
107 is => 'rw',
110 =head2 description()
112 accessor for the descrition of this dataset
114 =cut
116 has 'description' => ( isa => 'Maybe[Str]',
117 is => 'rw'
120 =head2 accessions()
122 accessor for defining the accessions that are part of this dataset (ArrayRef).
124 =cut
126 has 'accessions' => ( isa => 'Maybe[ArrayRef]',
127 is => 'rw',
128 predicate => 'has_accessions',
131 =head2 plots()
133 accessor for defining the plots that are part of this dataset (ArrayRef).
135 =cut
137 has 'plots' => ( isa => 'Maybe[ArrayRef]',
138 is => 'rw',
139 predicate => 'has_plots',
143 =head2 trials()
145 accessor for defining the trials that are part of this dataset (ArrayRef).
147 =cut
150 has 'trials' => ( isa => 'Maybe[ArrayRef]',
151 is => 'rw',
152 predicate => 'has_trials',
156 =head2 traits()
158 =cut
160 has 'traits' => ( isa => 'Maybe[ArrayRef]',
161 is => 'rw',
162 predicate => 'has_traits',
165 =head2 years()
167 =cut
170 has 'years' => ( isa => 'Maybe[ArrayRef]',
171 is => 'rw',
172 predicate => 'has_years',
175 =head2 breeding_programs()
177 =cut
179 has 'breeding_programs' => ( isa => 'Maybe[ArrayRef]',
180 is => 'rw',
181 predicate => 'has_breeding_programs',
184 has 'is_live' => ( isa => 'Bool',
185 is => 'rw',
186 default => 0,
190 =head2 data_level()
192 =cut
194 has 'data_level' => ( isa => 'String',
195 is => 'rw',
196 isa => enum([qw[ plot plant ]]),
197 default => 'plot',
200 has 'breeder_search' => (isa => 'CXGN::BreederSearch', is => 'rw');
203 sub BUILD {
204 my $self = shift;
206 print STDERR "Processing dataset_id ".$self->sp_dataset_id()."\n";
207 if ($self->has_sp_dataset_id()) {
208 my $row = $self->people_schema()->resultset("SpDataset")->find({ sp_dataset_id => $self->sp_dataset_id() });
210 my $dataset = JSON::Any->decode($row->dataset());
211 $self->data($dataset);
212 $self->name($row->name());
213 $self->description($row->description());
214 $self->accessions($dataset->{accessions});
215 $self->plots($dataset->{plots});
216 $self->trials($dataset->{trials});
217 $self->traits($dataset->{traits});
218 $self->years($dataset->{years});
219 $self->breeding_programs($dataset->{breeding_programs});
220 $self->is_live($dataset->{is_live});
224 else { print STDERR "Creating empty dataset object\n"; }
226 my $bs = CXGN::BreederSearch->new(dbh => $self->schema->storage->dbh());
227 $self->breeder_search($bs);
232 =head1 CLASS METHODS
234 =head2 datasets_by_person()
237 =cut
239 sub datasets_by_person {
240 my $class = shift;
241 my $people_schema = shift;
242 my $sp_person_id = shift;
244 my $rs = $people_schema->resultset("SpDataset")->search( { sp_person_id => $sp_person_id });
246 my @datasets;
247 while (my $row = $rs->next()) {
248 push @datasets, $row->sp_dataset_id(), $row->name();
251 return \@datasets;
255 =head1 METHODS
257 =head2 store()
259 =cut
261 sub store {
262 my $self = shift;
264 my $dataref;
265 $dataref->{accessions} = $self->accessions() if $self->has_accessions();
266 $dataref->{plots} = $self->plots() if $self->has_plots();
267 $dataref->{trials} = $self->trials() if $self->has_trials();
268 $dataref->{traits} = $self->traits() if $self->has_traits();
269 $dataref->{years} = $self->years() if $self->has_years();
270 $dataref->{breeding_programs} = $self->breeding_programs() if $self->has_breeding_programs();
272 my $json = JSON::Any->encode($dataref);
274 my $data = { name => $self->name(),
275 description => $self->description(),
276 dataset => $json,
281 print STDERR "dataset_id = ".$self->sp_dataset_id()."\n";
282 if (!$self->has_sp_dataset_id()) {
283 print STDERR "Creating new dataset row... ".$self->sp_dataset_id()."\n";
284 my $row = $self->people_schema()->resultset("SpDataset")->create($data);
285 $self->sp_dataset_id($row->sp_dataset_id());
286 return $row->sp_dataset_id();
288 else {
289 print STDERR "Updating dataset row ".$self->sp_dataset_id()."\n";
290 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id => $self->sp_dataset_id() });
291 if ($row) {
292 $row->name($self->name());
293 $row->description($self->description());
294 $row->dataset($json);
295 $row->update();
296 return $row->sp_dataset_id();
298 else {
299 print STDERR "Weird... has ".$self->sp_dataset_id()." but no data in db\n";
304 sub _get_dataref {
305 my $self = shift;
306 my $dataref;
308 $dataref->{accessions} = join(",", @{$self->accessions()}) if $self->has_accessions();
309 $dataref->{plots} = join(",", @{$self->plots()}) if $self->has_plots();
310 $dataref->{trials} = join(",", @{$self->trials()}) if $self->has_trials();
311 $dataref->{traits} = join(",", @{$self->traits()}) if $self->has_traits();
312 $dataref->{years} = join(",", @{$self->years()}) if $self->has_years();
313 $dataref->{breeding_programs} = join(",", @{$self->breeding_programs()}) if $self->has_breeding_programs();
314 return $dataref;
317 sub _get_source_dataref {
318 my $self = shift;
319 my $source_type = shift;
321 my $dataref;
323 $dataref->{$source_type} = $self->_get_dataref();
325 return $dataref;
328 =head2 retrieve_genotypes()
330 Retrieves genotypes as a listref of hashrefs.
332 =cut
334 sub retrieve_genotypes {
335 my $self = shift;
336 my $protocol_id = shift;
338 my $genotypes_search = CXGN::Genotype::Search->new(
339 bcs_schema => $self->schema(),
340 accession_list => $self->accessions(),
341 trial_list => $self->trials(),
342 protocol_id => $protocol_id
344 my ($total_count, $dataref) = $genotypes_search->get_genotype_info();
345 return $dataref;
348 =head2 retrieve_phenotypes()
350 retrieves phenotypes as a listref of listrefs
352 =cut
354 sub retrieve_phenotypes {
355 my $self = shift;
356 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
357 search_type=>'MaterializedView',
358 bcs_schema=>$self->schema(),
359 data_level=>$self->data_level(),
360 trait_list=>$self->traits(),
361 trial_list=>$self->trials(),
362 accession_list=>$self->accessions(),
364 my @data = $phenotypes_search->get_phenotype_matrix();
365 return \@data;
368 =head2 retrieve_accessions()
370 retrieves accessions as a listref of listref [stock_id, uniquname]
372 =cut
374 sub retrieve_accessions {
375 my $self = shift;
376 my $accessions;
377 if ($self->has_accessions()) {
378 return $self->accessions();
380 else {
381 my $criteria = $self->_get_criteria();
382 push @$criteria, "accessions";
384 $accessions = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("accessions"));
386 return $accessions->{results};
389 =head2 retrieve_plots()
391 Retrieves plots as a listref of listrefs.
393 =cut
395 sub retrieve_plots {
396 my $self = shift;
397 my $plots;
398 if ($self->has_plots()) {
399 return $self->plots();
401 else {
402 my $criteria = $self->_get_criteria();
403 push @$criteria, "plots";
404 $plots = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("plots"));
406 return $plots->{results};
409 =head2 retrieve_trials()
411 retrieves trials as a listref of listrefs.
413 =cut
415 sub retrieve_trials {
416 my $self = shift;
417 my $trials;
418 if ($self->has_trials()) {
419 return $self->trials();
421 else {
422 my $criteria = $self->_get_criteria();
423 push @$criteria, "trials";
424 $trials = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("trials"));
426 print STDERR "TRIALS: ".Dumper($trials);
427 return $trials->{results};
430 =head2 retrieve_traits()
432 retrieves traits as a listref of listrefs.
434 =cut
436 sub retrieve_traits {
437 my $self = shift;
438 my $traits;
439 if ($self->has_traits()) {
440 return $self->traits();
442 else {
443 my $criteria = $self->_get_criteria();
444 push @$criteria, "traits";
445 $traits = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("traits"));
447 return $traits->{results};
451 =head2 retrieve_years()
453 retrieves years as a listref of listrefs
455 =cut
457 sub retrieve_years {
458 my $self = shift;
459 my @years;
460 if ($self->has_years()) {
461 return $self->years();
463 else {
464 my $criteria = $self->_get_criteria();
465 push @$criteria, "years";
466 my $year_data = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("years"));
467 my $year_list = $year_data->{result};
469 foreach my $y (@$year_list) {
470 push @years, $y->[0];
473 return \@years;
476 sub _get_criteria {
477 my $self = shift;
478 my @criteria;
480 if ($self->has_accessions()) {
481 push @criteria, "accessions";
483 if ($self->has_plots()) {
484 push @criteria, "plots";
486 if ($self->has_trials()) {
487 push @criteria, "trials";
489 if ($self->has_traits()) {
490 push @criteria, "traits";
492 if ($self->has_years()) {
493 push @criteria, "years";
496 return \@criteria;