include treatments in phenotype matrix construction
[sgn.git] / lib / CXGN / Phenotypes / PhenotypeMatrix.pm
blobdfdf574f05a6dbccfa5007bbcc6c485ab20d61b8
1 package CXGN::Phenotypes::PhenotypeMatrix;
3 =head1 NAME
5 CXGN::Phenotypes::PhenotypeMatrix - an object to handle creating the phenotype matrix. Uses SearchFactory to handle searching native database or materialized views.
7 =head1 USAGE
9 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
10 bcs_schema=>$schema,
11 search_type=>$search_type,
12 data_level=>$data_level,
13 trait_list=>$trait_list,
14 trial_list=>$trial_list,
15 program_list=>$self->program_list,
16 folder_list=>$self->folder_list,
17 year_list=>$year_list,
18 location_list=>$location_list,
19 accession_list=>$accession_list,
20 plot_list=>$plot_list,
21 plant_list=>$plant_list,
22 include_timestamp=>$include_timestamp,
23 include_pedigree_parents=>$include_pedigree_parents,
24 exclude_phenotype_outlier=>0,
25 trait_contains=>$trait_contains,
26 phenotype_min_value=>$phenotype_min_value,
27 phenotype_max_value=>$phenotype_max_value,
28 limit=>$limit,
29 offset=>$offset
31 my @data = $phenotypes_search->get_phenotype_matrix();
33 =head1 DESCRIPTION
36 =head1 AUTHORS
39 =cut
41 use strict;
42 use warnings;
43 use Moose;
44 use Data::Dumper;
45 use SGN::Model::Cvterm;
46 use CXGN::Stock::StockLookup;
47 use CXGN::Phenotypes::SearchFactory;
48 use CXGN::BreedersToolbox::Projects;
50 has 'bcs_schema' => (
51 isa => 'Bio::Chado::Schema',
52 is => 'rw',
53 required => 1,
56 #PREFERRED MaterializedViewTable (MaterializedViewTable or Native)
57 has 'search_type' => (
58 isa => 'Str',
59 is => 'rw',
60 required => 1,
63 #(plot, plant, or all)
64 has 'data_level' => (
65 isa => 'Str|Undef',
66 is => 'ro',
69 has 'trial_list' => (
70 isa => 'ArrayRef[Int]|Undef',
71 is => 'rw',
74 has 'program_list' => (
75 isa => 'ArrayRef[Int]|Undef',
76 is => 'rw',
79 has 'folder_list' => (
80 isa => 'ArrayRef[Int]|Undef',
81 is => 'rw',
84 has 'trait_list' => (
85 isa => 'ArrayRef[Int]|Undef',
86 is => 'rw',
89 has 'accession_list' => (
90 isa => 'ArrayRef[Int]|Undef',
91 is => 'rw',
94 has 'plot_list' => (
95 isa => 'ArrayRef[Int]|Undef',
96 is => 'rw',
99 has 'plant_list' => (
100 isa => 'ArrayRef[Int]|Undef',
101 is => 'rw',
104 has 'subplot_list' => (
105 isa => 'ArrayRef[Int]|Undef',
106 is => 'rw',
109 has 'location_list' => (
110 isa => 'ArrayRef[Int]|Undef',
111 is => 'rw',
114 has 'year_list' => (
115 isa => 'ArrayRef[Int]|Undef',
116 is => 'rw',
119 has 'include_pedigree_parents' => (
120 isa => 'Bool|Undef',
121 is => 'ro',
122 default => 0
125 has 'include_timestamp' => (
126 isa => 'Bool|Undef',
127 is => 'ro',
128 default => 0
131 has 'exclude_phenotype_outlier' => (
132 isa => 'Bool',
133 is => 'ro',
134 default => 0
137 has 'trait_contains' => (
138 isa => 'ArrayRef[Str]|Undef',
139 is => 'rw'
142 has 'phenotype_min_value' => (
143 isa => 'Str|Undef',
144 is => 'rw'
147 has 'phenotype_max_value' => (
148 isa => 'Str|Undef',
149 is => 'rw'
152 has 'limit' => (
153 isa => 'Int|Undef',
154 is => 'rw'
157 has 'offset' => (
158 isa => 'Int|Undef',
159 is => 'rw'
162 sub get_phenotype_matrix {
163 my $self = shift;
164 my $include_pedigree_parents = $self->include_pedigree_parents();
165 my $include_timestamp = $self->include_timestamp;
167 print STDERR "GET PHENOMATRIX ".$self->search_type."\n";
169 my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate(
170 $self->search_type,
172 bcs_schema=>$self->bcs_schema,
173 data_level=>$self->data_level,
174 trait_list=>$self->trait_list,
175 trial_list=>$self->trial_list,
176 program_list=>$self->program_list,
177 folder_list=>$self->folder_list,
178 year_list=>$self->year_list,
179 location_list=>$self->location_list,
180 accession_list=>$self->accession_list,
181 plot_list=>$self->plot_list,
182 plant_list=>$self->plant_list,
183 subplot_list=>$self->subplot_list,
184 include_timestamp=>$include_timestamp,
185 exclude_phenotype_outlier=>$self->exclude_phenotype_outlier,
186 trait_contains=>$self->trait_contains,
187 phenotype_min_value=>$self->phenotype_min_value,
188 phenotype_max_value=>$self->phenotype_max_value,
189 limit=>$self->limit,
190 offset=>$self->offset
194 my ($data, $unique_traits);
195 my @info;
196 my @metadata_headers = ( 'studyYear', 'programDbId', 'programName', 'programDescription', 'studyDbId', 'studyName', 'studyDescription', 'studyDesign', 'plotWidth', 'plotLength', 'fieldSize', 'fieldTrialIsPlannedToBeGenotyped', 'fieldTrialIsPlannedToCross', 'plantingDate', 'harvestDate', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber', 'entryType', 'plantNumber');
198 if ($self->search_type eq 'MaterializedViewTable'){
199 ($data, $unique_traits) = $phenotypes_search->search();
201 print STDERR "No of lines retrieved: ".scalar(@$data)."\n";
202 print STDERR "Construct Pheno Matrix Start:".localtime."\n";
204 my @line = @metadata_headers;
205 push @line, ('plantedSeedlotStockDbId', 'plantedSeedlotStockUniquename', 'plantedSeedlotCurrentCount', 'plantedSeedlotCurrentWeightGram', 'plantedSeedlotBoxName', 'plantedSeedlotTransactionCount', 'plantedSeedlotTransactionWeight', 'plantedSeedlotTransactionDescription', 'availableGermplasmSeedlotUniquenames');
207 if ($include_pedigree_parents){
208 push @line, ('germplasmPedigreeFemaleParentName', 'germplasmPedigreeFemaleParentDbId', 'germplasmPedigreeMaleParentName', 'germplasmPedigreeMaleParentDbId');
211 my @sorted_traits = sort keys(%$unique_traits);
212 foreach my $trait (@sorted_traits) {
213 push @line, $trait;
215 push @line, 'notes';
216 push @info, \@line;
218 my @observationunit_ids = map { $_->{observationunit_stock_id} } @$data;
220 # retrieve treatments and add to header
221 my $project_object = CXGN::BreedersToolbox::Projects->new( { schema => $self->bcs_schema });
222 my ($treatment_info, $unique_treatments) = $project_object->get_treatments_by_observationunit_ids(\@observationunit_ids);
223 my @sorted_treatments = sort keys(%$unique_treatments);
224 foreach my $treatment (@sorted_treatments) {
225 push @line, $treatment;
228 push @info, \@line;
230 foreach my $obs_unit (@$data){
231 my $entry_type = $obs_unit->{obsunit_is_a_control} ? 'check' : 'test';
232 my $synonyms = $obs_unit->{germplasm_synonyms};
233 my $synonym_string = $synonyms ? join ("," , @$synonyms) : '';
234 my $available_germplasm_seedlots = $obs_unit->{available_germplasm_seedlots};
235 my %available_germplasm_seedlots_uniquenames;
236 foreach (@$available_germplasm_seedlots){
237 $available_germplasm_seedlots_uniquenames{$_->{stock_uniquename}}++;
239 my $available_germplasm_seedlots_uniquenames = join ' AND ', (keys %available_germplasm_seedlots_uniquenames);
241 my $trial_name = $obs_unit->{trial_name};
242 my $trial_desc = $obs_unit->{trial_description};
244 $trial_name =~ s/\s+$//g;
245 $trial_desc =~ s/\s+$//g;
247 my @line = ($obs_unit->{year}, $obs_unit->{breeding_program_id}, $obs_unit->{breeding_program_name}, $obs_unit->{breeding_program_description}, $obs_unit->{trial_id}, $trial_name, $trial_desc, $obs_unit->{design}, $obs_unit->{plot_width}, $obs_unit->{plot_length}, $obs_unit->{field_size}, $obs_unit->{field_trial_is_planned_to_be_genotyped}, $obs_unit->{field_trial_is_planned_to_cross}, $obs_unit->{planting_date}, $obs_unit->{harvest_date}, $obs_unit->{trial_location_id}, $obs_unit->{trial_location_name}, $obs_unit->{germplasm_stock_id}, $obs_unit->{germplasm_uniquename}, $synonym_string, $obs_unit->{observationunit_type_name}, $obs_unit->{observationunit_stock_id}, $obs_unit->{observationunit_uniquename}, $obs_unit->{obsunit_rep}, $obs_unit->{obsunit_block}, $obs_unit->{obsunit_plot_number}, $obs_unit->{obsunit_row_number}, $obs_unit->{obsunit_col_number}, $entry_type, $obs_unit->{obsunit_plant_number}, $obs_unit->{seedlot_stock_id}, $obs_unit->{seedlot_uniquename}, $obs_unit->{seedlot_current_count}, $obs_unit->{seedlot_current_weight_gram}, $obs_unit->{seedlot_box_name}, $obs_unit->{seedlot_transaction_amount}, $obs_unit->{seedlot_transaction_weight_gram}, $obs_unit->{seedlot_transaction_description}, $available_germplasm_seedlots_uniquenames);
249 if ($include_pedigree_parents) {
250 my $germplasm = CXGN::Stock->new({schema => $self->bcs_schema, stock_id=>$obs_unit->{germplasm_stock_id}});
251 my $parents = $germplasm->get_parents();
252 push @line, ($parents->{'mother'}, $parents->{'mother_id'}, $parents->{'father'}, $parents->{'father_id'});
255 my $observations = $obs_unit->{observations};
256 # print STDERR "OBSERVATIONS =".Dumper($observations)."\n";
257 my $include_timestamp = $self->include_timestamp;
258 my %trait_observations;
259 foreach (@$observations){
260 my $collect_date = $_->{collect_date};
261 my $timestamp = $_->{timestamp};
262 if ($include_timestamp && $timestamp) {
263 $trait_observations{$_->{trait_name}} = "$_->{value},$timestamp";
265 elsif ($include_timestamp && $collect_date) {
266 $trait_observations{$_->{trait_name}} = "$_->{value},$collect_date";
268 else {
269 $trait_observations{$_->{trait_name}} = $_->{value};
272 foreach my $trait (@sorted_traits) {
273 push @line, $trait_observations{$trait};
275 push @line, $obs_unit->{notes};
278 # add treatment info
279 my %unit_treatments = %{$treatment_info->{$obs_unit->{observationunit_stock_id}}};
280 foreach my $treatment (@sorted_treatments) {
281 push @line, $unit_treatments{$treatment};
284 push @info, \@line;
286 } else {
287 $data = $phenotypes_search->search();
288 #print STDERR "DOWNLOAD DATA =".Dumper($data)."\n";
290 my %obsunit_data;
291 my %traits;
293 print STDERR "No of lines retrieved: ".scalar(@$data)."\n";
294 print STDERR "Construct Pheno Matrix Start:".localtime."\n";
295 my @unique_obsunit_list = ();
296 my %seen_obsunits;
298 foreach my $d (@$data) {
299 my $cvterm = $d->{trait_name};
300 if ($cvterm){
301 my $obsunit_id = $d->{obsunit_stock_id};
302 if (!exists($seen_obsunits{$obsunit_id})) {
303 push @unique_obsunit_list, $obsunit_id;
304 $seen_obsunits{$obsunit_id} = 1;
307 my $timestamp_value = $d->{timestamp};
308 my $value = $d->{phenotype_value};
309 #my $cvterm = $trait."|".$cvterm_accession;
310 if ($include_timestamp && $timestamp_value) {
311 $obsunit_data{$obsunit_id}->{$cvterm} = "$value,$timestamp_value";
312 } else {
313 $obsunit_data{$obsunit_id}->{$cvterm} = $value;
315 $obsunit_data{$obsunit_id}->{'notes'} = $d->{notes};
317 my $synonyms = $d->{synonyms};
318 my $synonym_string = $synonyms ? join ("," , @$synonyms) : '';
319 my $entry_type = $d->{is_a_control} ? 'check' : 'test';
321 my $trial_name = $d->{trial_name};
322 my $trial_desc = $d->{trial_description};
324 $trial_name =~ s/\s+$//g;
325 $trial_desc =~ s/\s+$//g;
327 $obsunit_data{$obsunit_id}->{metadata} = [
328 $d->{year},
329 $d->{breeding_program_id},
330 $d->{breeding_program_name},
331 $d->{breeding_program_description},
332 $d->{trial_id},
333 $trial_name,
334 $trial_desc,
335 $d->{design},
336 $d->{plot_width},
337 $d->{plot_length},
338 $d->{field_size},
339 $d->{field_trial_is_planned_to_be_genotyped},
340 $d->{field_trial_is_planned_to_cross},
341 $d->{planting_date},
342 $d->{harvest_date},
343 $d->{location_id},
344 $d->{location_name},
345 $d->{accession_stock_id},
346 $d->{accession_uniquename},
347 $synonym_string,
348 $d->{obsunit_type_name},
349 $d->{obsunit_stock_id},
350 $d->{obsunit_uniquename},
351 $d->{rep},
352 $d->{block},
353 $d->{plot_number},
354 $d->{row_number},
355 $d->{col_number},
356 $entry_type,
357 $d->{plant_number}
359 $traits{$cvterm}++;
362 #print STDERR Dumper \%plot_data;
363 #print STDERR Dumper \%traits;
365 # retrieve treatments
366 my $project_object = CXGN::BreedersToolbox::Projects->new( { schema => $self->bcs_schema });
367 my ($treatment_info, $unique_treatments) = $project_object->get_treatments_by_observationunit_ids(\@unique_obsunit_list);
368 my @sorted_treatments = sort keys(%$unique_treatments);
370 my @line = @metadata_headers;
372 my @sorted_traits = sort keys(%traits);
373 foreach my $trait (@sorted_traits) {
374 push @line, $trait;
376 push @line, 'notes';
377 foreach my $treatment (@sorted_treatments) {
378 push @line, $treatment;
380 push @info, \@line;
382 foreach my $p (@unique_obsunit_list) {
383 my @line = @{$obsunit_data{$p}->{metadata}};
385 foreach my $trait (@sorted_traits) {
386 push @line, $obsunit_data{$p}->{$trait};
388 push @line, $obsunit_data{$p}->{'notes'};
389 # add treatment info
390 my %unit_treatments = %{$treatment_info->{$p}};
391 foreach my $treatment (@sorted_treatments) {
392 push @line, $unit_treatments{$treatment};
394 push @info, \@line;
398 #print STDERR Dumper \@info;
399 print STDERR "Construct Pheno Matrix End:".localtime."\n";
400 return @info;