4 CXGN::Dataset - a class to easily query the database for breeding data
8 CXGN::Dataset can be used to flexibly define datasets for breeding applications. For example, a dataset can be defined using a list of germplasm, a list of trials, a list of years, etc, or a combination of the above. Once defined, it allows to easily obtain related phenotypes and genotypes and other data.
10 Datasets can be stored in the database and retrieved for later use.
12 Currently, there are three incarnations of CXGN::Dataset:
18 Unbuffered output of the queries
20 =item CXGN::Dataset::File
22 Writes results to files
24 =item CXGN::Dataset::Cache
26 Returns output like CXGN::Dataset, but uses a disk-cache for the response data
32 my $ds = CXGN::Dataset->new( people_schema => $p, schema => $s);
33 $ds->accessions([ 'a', 'b', 'c' ]);
34 my $trials = $ds->retrieve_trials();
35 my $sp_dataset_id = $ds->store();
37 my $restored_ds = CXGN::Dataset( people_schema => $p, schema => $s, sp_dataset_id => $sp_dataset_id );
38 my $years = $restored_ds->retrieve_years();
43 Lukas Mueller <lam87@cornell.edu>
51 package CXGN
::Dataset
;
54 use Moose
::Util
::TypeConstraints
;
57 use CXGN
::BreederSearch
;
58 use CXGN
::People
::Schema
;
59 use CXGN
::Phenotypes
::PhenotypeMatrix
;
60 use CXGN
::Genotype
::Search
;
62 =head2 people_schema()
64 accessor for CXGN::People::Schema database object
68 has
'people_schema' => (isa
=> 'CXGN::People::Schema', is
=> 'rw', required
=> 1 );
72 accessor for Bio::Chado::Schema database object
76 has
'schema' => ( isa
=> "Bio::Chado::Schema", is
=> 'rw', required
=> 1 );
78 =head2 sp_dataset_id()
80 accessor for sp_dataset primary key
85 has
'sp_dataset_id' => ( isa
=> 'Int',
87 predicate
=> 'has_sp_dataset_id',
92 accessor for the json-formatted data structure (as used for the backend storage)
96 has
'data' => ( isa
=> 'HashRef',
102 accessor for the name of this dataset
106 has
'name' => ( isa
=> 'Maybe[Str]',
112 accessor for the descrition of this dataset
116 has
'description' => ( isa
=> 'Maybe[Str]',
122 accessor for defining the accessions that are part of this dataset (ArrayRef).
126 has
'accessions' => ( isa
=> 'Maybe[ArrayRef]',
128 predicate
=> 'has_accessions',
133 accessor for defining the plots that are part of this dataset (ArrayRef).
137 has
'plots' => ( isa
=> 'Maybe[ArrayRef]',
139 predicate
=> 'has_plots',
145 accessor for defining the trials that are part of this dataset (ArrayRef).
150 has
'trials' => ( isa
=> 'Maybe[ArrayRef]',
152 predicate
=> 'has_trials',
160 has
'traits' => ( isa
=> 'Maybe[ArrayRef]',
162 predicate
=> 'has_traits',
170 has
'years' => ( isa
=> 'Maybe[ArrayRef]',
172 predicate
=> 'has_years',
175 =head2 breeding_programs()
179 has
'breeding_programs' => ( isa
=> 'Maybe[ArrayRef]',
181 predicate
=> 'has_breeding_programs',
184 has
'is_live' => ( isa
=> 'Bool',
194 has
'data_level' => ( isa
=> 'String',
196 isa
=> enum
([qw
[ plot plant
]]),
200 has
'breeder_search' => (isa
=> 'CXGN::BreederSearch', is
=> 'rw');
206 print STDERR
"Processing dataset_id ".$self->sp_dataset_id()."\n";
207 if ($self->has_sp_dataset_id()) {
208 my $row = $self->people_schema()->resultset("SpDataset")->find({ sp_dataset_id
=> $self->sp_dataset_id() });
210 my $dataset = JSON
::Any
->decode($row->dataset());
211 $self->data($dataset);
212 $self->name($row->name());
213 $self->description($row->description());
214 $self->accessions($dataset->{accessions
});
215 $self->plots($dataset->{plots
});
216 $self->trials($dataset->{trials
});
217 $self->traits($dataset->{traits
});
218 $self->years($dataset->{years
});
219 $self->breeding_programs($dataset->{breeding_programs
});
220 $self->is_live($dataset->{is_live
});
224 else { print STDERR
"Creating empty dataset object\n"; }
226 my $bs = CXGN
::BreederSearch
->new(dbh
=> $self->schema->storage->dbh());
227 $self->breeder_search($bs);
234 =head2 datasets_by_person()
239 sub datasets_by_person
{
241 my $people_schema = shift;
242 my $sp_person_id = shift;
244 my $rs = $people_schema->resultset("SpDataset")->search( { sp_person_id
=> $sp_person_id });
247 while (my $row = $rs->next()) {
248 push @datasets, $row->sp_dataset_id(), $row->name();
265 $dataref->{accessions
} = $self->accessions() if $self->has_accessions();
266 $dataref->{plots
} = $self->plots() if $self->has_plots();
267 $dataref->{trials
} = $self->trials() if $self->has_trials();
268 $dataref->{traits
} = $self->traits() if $self->has_traits();
269 $dataref->{years
} = $self->years() if $self->has_years();
270 $dataref->{breeding_programs
} = $self->breeding_programs() if $self->has_breeding_programs();
272 my $json = JSON
::Any
->encode($dataref);
274 my $data = { name
=> $self->name(),
275 description
=> $self->description(),
281 print STDERR
"dataset_id = ".$self->sp_dataset_id()."\n";
282 if (!$self->has_sp_dataset_id()) {
283 print STDERR
"Creating new dataset row... ".$self->sp_dataset_id()."\n";
284 my $row = $self->people_schema()->resultset("SpDataset")->create($data);
285 $self->sp_dataset_id($row->sp_dataset_id());
286 return $row->sp_dataset_id();
289 print STDERR
"Updating dataset row ".$self->sp_dataset_id()."\n";
290 my $row = $self->people_schema()->resultset("SpDataset")->find( { sp_dataset_id
=> $self->sp_dataset_id() });
292 $row->name($self->name());
293 $row->description($self->description());
294 $row->dataset($json);
296 return $row->sp_dataset_id();
299 print STDERR
"Weird... has ".$self->sp_dataset_id()." but no data in db\n";
308 $dataref->{accessions
} = join(",", @
{$self->accessions()}) if $self->has_accessions();
309 $dataref->{plots
} = join(",", @
{$self->plots()}) if $self->has_plots();
310 $dataref->{trials
} = join(",", @
{$self->trials()}) if $self->has_trials();
311 $dataref->{traits
} = join(",", @
{$self->traits()}) if $self->has_traits();
312 $dataref->{years
} = join(",", @
{$self->years()}) if $self->has_years();
313 $dataref->{breeding_programs
} = join(",", @
{$self->breeding_programs()}) if $self->has_breeding_programs();
317 sub _get_source_dataref
{
319 my $source_type = shift;
323 $dataref->{$source_type} = $self->_get_dataref();
328 =head2 retrieve_genotypes()
330 Retrieves genotypes as a listref of hashrefs.
334 sub retrieve_genotypes
{
336 my $protocol_id = shift;
338 my $genotypes_search = CXGN
::Genotype
::Search
->new(
339 bcs_schema
=> $self->schema(),
340 accession_list
=> $self->accessions(),
341 trial_list
=> $self->trials(),
342 protocol_id
=> $protocol_id
344 my ($total_count, $dataref) = $genotypes_search->get_genotype_info();
348 =head2 retrieve_phenotypes()
350 retrieves phenotypes as a listref of listrefs
354 sub retrieve_phenotypes
{
356 my $phenotypes_search = CXGN
::Phenotypes
::PhenotypeMatrix
->new(
357 search_type
=>'MaterializedView',
358 bcs_schema
=>$self->schema(),
359 data_level
=>$self->data_level(),
360 trait_list
=>$self->traits(),
361 trial_list
=>$self->trials(),
362 accession_list
=>$self->accessions(),
364 my @data = $phenotypes_search->get_phenotype_matrix();
368 =head2 retrieve_accessions()
370 retrieves accessions as a listref of listref [stock_id, uniquname]
374 sub retrieve_accessions
{
377 if ($self->has_accessions()) {
378 return $self->accessions();
381 my $criteria = $self->_get_criteria();
382 push @
$criteria, "accessions";
384 $accessions = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("accessions"));
386 return $accessions->{results
};
389 =head2 retrieve_plots()
391 Retrieves plots as a listref of listrefs.
398 if ($self->has_plots()) {
399 return $self->plots();
402 my $criteria = $self->_get_criteria();
403 push @
$criteria, "plots";
404 $plots = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("plots"));
406 return $plots->{results
};
409 =head2 retrieve_trials()
411 retrieves trials as a listref of listrefs.
415 sub retrieve_trials
{
418 if ($self->has_trials()) {
419 return $self->trials();
422 my $criteria = $self->_get_criteria();
423 push @
$criteria, "trials";
424 $trials = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("trials"));
426 print STDERR
"TRIALS: ".Dumper
($trials);
427 return $trials->{results
};
430 =head2 retrieve_traits()
432 retrieves traits as a listref of listrefs.
436 sub retrieve_traits
{
439 if ($self->has_traits()) {
440 return $self->traits();
443 my $criteria = $self->_get_criteria();
444 push @
$criteria, "traits";
445 $traits = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("traits"));
447 return $traits->{results
};
451 =head2 retrieve_years()
453 retrieves years as a listref of listrefs
460 if ($self->has_years()) {
461 return $self->years();
464 my $criteria = $self->_get_criteria();
465 push @
$criteria, "years";
466 my $year_data = $self->breeder_search()->metadata_query($criteria, $self->_get_source_dataref("years"));
467 my $year_list = $year_data->{result
};
469 foreach my $y (@
$year_list) {
470 push @years, $y->[0];
480 if ($self->has_accessions()) {
481 push @criteria, "accessions";
483 if ($self->has_plots()) {
484 push @criteria, "plots";
486 if ($self->has_trials()) {
487 push @criteria, "trials";
489 if ($self->has_traits()) {
490 push @criteria, "traits";
492 if ($self->has_years()) {
493 push @criteria, "years";