1 package CXGN
::Phenotypes
::PhenotypeMatrix
;
5 CXGN::Phenotypes::PhenotypeMatrix - an object to handle creating the phenotype matrix. Uses SearchFactory to handle searching native database or materialized views.
9 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
11 search_type=>$search_type,
12 data_level=>$data_level,
13 trait_list=>$trait_list,
14 trial_list=>$trial_list,
15 program_list=>$self->program_list,
16 folder_list=>$self->folder_list,
17 year_list=>$year_list,
18 location_list=>$location_list,
19 accession_list=>$accession_list,
20 plot_list=>$plot_list,
21 plant_list=>$plant_list,
22 include_timestamp=>$include_timestamp,
23 include_pedigree_parents=>$include_pedigree_parents,
24 exclude_phenotype_outlier=>0,
25 dataset_exluded_outliers=>$dataset_exluded_outliers,
26 trait_contains=>$trait_contains,
27 phenotype_min_value=>$phenotype_min_value,
28 phenotype_max_value=>$phenotype_max_value,
29 start_date => $start_date,
30 end_date => $end_date,
31 include_dateless_items => $include_dateless_items,
35 my @data = $phenotypes_search->get_phenotype_matrix();
49 use SGN
::Model
::Cvterm
;
50 use CXGN
::Stock
::StockLookup
;
51 use CXGN
::Phenotypes
::SearchFactory
;
52 use CXGN
::BreedersToolbox
::Projects
;
55 isa
=> 'Bio::Chado::Schema',
60 #PREFERRED MaterializedViewTable (MaterializedViewTable or Native)
61 has
'search_type' => (
67 #(plot, plant, or all)
74 isa
=> 'ArrayRef[Int]|Undef',
78 has
'program_list' => (
79 isa
=> 'ArrayRef[Int]|Undef',
83 has
'folder_list' => (
84 isa
=> 'ArrayRef[Int]|Undef',
89 isa
=> 'ArrayRef[Int]|Undef',
93 has
'accession_list' => (
94 isa
=> 'ArrayRef[Int]|Undef',
99 isa
=> 'ArrayRef[Int]|Undef',
103 has
'plant_list' => (
104 isa
=> 'ArrayRef[Int]|Undef',
108 has
'subplot_list' => (
109 isa
=> 'ArrayRef[Int]|Undef',
113 has
'location_list' => (
114 isa
=> 'ArrayRef[Int]|Undef',
119 isa
=> 'ArrayRef[Int]|Undef',
123 has
'include_pedigree_parents' => (
129 has
'include_timestamp' => (
135 has
'include_phenotype_primary_key' => (
141 has
'exclude_phenotype_outlier' => (
147 has
'dataset_exluded_outliers' => (
148 isa
=> 'ArrayRef[Int]|Undef',
152 has
'trait_contains' => (
153 isa
=> 'ArrayRef[Str]|Undef',
157 has
'phenotype_min_value' => (
162 has
'phenotype_max_value' => (
167 has
'start_date' => (
170 default => sub { return "1900-01-01"; },
176 default => sub { return "2100-12-31"; },
179 has
'include_dateless_items' => (
182 default => sub { return 1; },
195 sub get_phenotype_matrix
{
197 my $include_pedigree_parents = $self->include_pedigree_parents();
198 my $include_timestamp = $self->include_timestamp;
199 my $include_phenotype_primary_key = $self->include_phenotype_primary_key;
201 print STDERR
"GET PHENOMATRIX ".$self->search_type."\n";
203 my $phenotypes_search = CXGN
::Phenotypes
::SearchFactory
->instantiate(
206 bcs_schema
=>$self->bcs_schema,
207 data_level
=>$self->data_level,
208 trait_list
=>$self->trait_list,
209 trial_list
=>$self->trial_list,
210 program_list
=>$self->program_list,
211 folder_list
=>$self->folder_list,
212 year_list
=>$self->year_list,
213 location_list
=>$self->location_list,
214 accession_list
=>$self->accession_list,
215 plot_list
=>$self->plot_list,
216 plant_list
=>$self->plant_list,
217 subplot_list
=>$self->subplot_list,
218 include_timestamp
=>$include_timestamp,
219 exclude_phenotype_outlier
=>$self->exclude_phenotype_outlier,
220 dataset_exluded_outliers
=>$self->dataset_exluded_outliers,
221 trait_contains
=>$self->trait_contains,
222 phenotype_min_value
=>$self->phenotype_min_value,
223 phenotype_max_value
=>$self->phenotype_max_value,
224 start_date
=> $self->start_date(),
225 end_date
=> $self->end_date(),
226 include_dateless_items
=> $self->include_dateless_items(),
228 offset
=>$self->offset
232 my ($data, $unique_traits);
234 my @metadata_headers = ( 'studyYear', 'programDbId', 'programName', 'programDescription', 'studyDbId', 'studyName', 'studyDescription', 'studyDesign', 'plotWidth', 'plotLength', 'fieldSize', 'fieldTrialIsPlannedToBeGenotyped', 'fieldTrialIsPlannedToCross', 'plantingDate', 'harvestDate', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber', 'entryType', 'plantNumber');
236 if ($self->search_type eq 'MaterializedViewTable'){
237 ($data, $unique_traits) = $phenotypes_search->search();
238 print STDERR
"No of lines retrieved: ".scalar(@
$data)."\n";
239 print STDERR
"Construct Pheno Matrix Start:".localtime."\n";
241 my @line = @metadata_headers;
242 push @line, ('plantedSeedlotStockDbId', 'plantedSeedlotStockUniquename', 'plantedSeedlotCurrentCount', 'plantedSeedlotCurrentWeightGram', 'plantedSeedlotBoxName', 'plantedSeedlotTransactionCount', 'plantedSeedlotTransactionWeight', 'plantedSeedlotTransactionDescription', 'availableGermplasmSeedlotUniquenames');
244 if ($include_pedigree_parents){
245 push @line, ('germplasmPedigreeFemaleParentName', 'germplasmPedigreeFemaleParentDbId', 'germplasmPedigreeMaleParentName', 'germplasmPedigreeMaleParentDbId');
248 my @sorted_traits = sort keys(%$unique_traits);
249 foreach my $trait (@sorted_traits) {
251 if ($include_phenotype_primary_key) {
252 push @line, $trait.'_phenotype_id';
257 # retrieve treatments and add treatment names to header
258 my %seen_obsunits = map { $_->{observationunit_stock_id
} => 1 } @
$data;
259 my $project_object = CXGN
::BreedersToolbox
::Projects
->new( { schema
=> $self->bcs_schema });
260 my $treatment_info = {};
261 if ($self->trial_list) {
262 $treatment_info = $project_object->get_related_treatments($self->trial_list, \
%seen_obsunits);
264 my $treatment_names = $treatment_info->{treatment_names
};
265 my $treatment_details = $treatment_info->{treatment_details
};
267 foreach my $name (@
$treatment_names) {
273 foreach my $obs_unit (@
$data){
274 my $entry_type = $obs_unit->{obsunit_is_a_control
} ?
'check' : 'test';
275 my $synonyms = $obs_unit->{germplasm_synonyms
};
276 my $synonym_string = $synonyms ?
join ("," , @
$synonyms) : '';
277 my $available_germplasm_seedlots = $obs_unit->{available_germplasm_seedlots
};
278 my %available_germplasm_seedlots_uniquenames;
279 foreach (@
$available_germplasm_seedlots){
280 $available_germplasm_seedlots_uniquenames{$_->{stock_uniquename
}}++;
282 my $available_germplasm_seedlots_uniquenames = join ' AND ', (keys %available_germplasm_seedlots_uniquenames);
284 my $trial_name = $obs_unit->{trial_name
};
285 my $trial_desc = $obs_unit->{trial_description
};
287 $trial_name =~ s/\s+$//g;
288 $trial_desc =~ s/\s+$//g;
290 my @line = ($obs_unit->{year
}, $obs_unit->{breeding_program_id
}, $obs_unit->{breeding_program_name
}, $obs_unit->{breeding_program_description
}, $obs_unit->{trial_id
}, $trial_name, $trial_desc, $obs_unit->{design
}, $obs_unit->{plot_width
}, $obs_unit->{plot_length
}, $obs_unit->{field_size
}, $obs_unit->{field_trial_is_planned_to_be_genotyped
}, $obs_unit->{field_trial_is_planned_to_cross
}, $obs_unit->{planting_date
}, $obs_unit->{harvest_date
}, $obs_unit->{trial_location_id
}, $obs_unit->{trial_location_name
}, $obs_unit->{germplasm_stock_id
}, $obs_unit->{germplasm_uniquename
}, $synonym_string, $obs_unit->{observationunit_type_name
}, $obs_unit->{observationunit_stock_id
}, $obs_unit->{observationunit_uniquename
}, $obs_unit->{obsunit_rep
}, $obs_unit->{obsunit_block
}, $obs_unit->{obsunit_plot_number
}, $obs_unit->{obsunit_row_number
}, $obs_unit->{obsunit_col_number
}, $entry_type, $obs_unit->{obsunit_plant_number
}, $obs_unit->{seedlot_stock_id
}, $obs_unit->{seedlot_uniquename
}, $obs_unit->{seedlot_current_count
}, $obs_unit->{seedlot_current_weight_gram
}, $obs_unit->{seedlot_box_name
}, $obs_unit->{seedlot_transaction_amount
}, $obs_unit->{seedlot_transaction_weight_gram
}, $obs_unit->{seedlot_transaction_description
}, $available_germplasm_seedlots_uniquenames);
292 if ($include_pedigree_parents) {
293 my $germplasm = CXGN
::Stock
->new({schema
=> $self->bcs_schema, stock_id
=>$obs_unit->{germplasm_stock_id
}});
294 my $parents = $germplasm->get_parents();
295 push @line, ($parents->{'mother'}, $parents->{'mother_id'}, $parents->{'father'}, $parents->{'father_id'});
298 my $observations = $obs_unit->{observations
};
299 # print STDERR "OBSERVATIONS =".Dumper($observations)."\n";
300 my $include_timestamp = $self->include_timestamp;
301 my %trait_observations;
303 my $dataset_exluded_outliers_ref = $self->dataset_exluded_outliers;
304 foreach my $observation (@
$observations){
305 my $collect_date = $observation->{collect_date
};
306 my $timestamp = $observation->{timestamp
};
308 if ($include_timestamp && $timestamp) {
309 $trait_observations{$observation->{trait_name
}} = "$observation->{value},$timestamp";
311 elsif ($include_timestamp && $collect_date) {
312 $trait_observations{$observation->{trait_name
}} = "$observation->{value},$collect_date";
315 $trait_observations{$observation->{trait_name
}} = $observation->{value
};
318 # dataset outliers will be empty fields if are in @$dataset_exluded_outliers_ref list of pheno_id outliers
319 if(grep {$_ == $observation->{'phenotype_id'}} @
$dataset_exluded_outliers_ref) {
320 $trait_observations{$observation->{trait_name
}} = ''; # empty field for outlier NA
324 if ($include_phenotype_primary_key) {
325 foreach my $observation (@
$observations) {
326 $phenotype_ids{$observation->{trait_name
}} = $observation->{phenotype_id
};
329 foreach my $trait (@sorted_traits) {
330 push @line, $trait_observations{$trait};
331 if ($include_phenotype_primary_key) {
332 push @line, $phenotype_ids{$trait};
335 push @line, $obs_unit->{notes
};
337 # add treatment values to each obsunit line
339 if ($treatment_details->{$obs_unit->{observationunit_stock_id
}}) {
340 %unit_treatments = %{$treatment_details->{$obs_unit->{observationunit_stock_id
}}};
342 foreach my $name (@
$treatment_names) {
343 push @line, $unit_treatments{$name};
349 $data = $phenotypes_search->search();
350 #print STDERR "DOWNLOAD DATA =".Dumper($data)."\n";
355 print STDERR
"No of lines retrieved: ".scalar(@
$data)."\n";
356 print STDERR
"Construct Pheno Matrix Start:".localtime."\n";
357 my @unique_obsunit_list = ();
360 foreach my $d (@
$data) {
361 my $cvterm = $d->{trait_name
};
363 my $obsunit_id = $d->{obsunit_stock_id
};
364 if (!exists($seen_obsunits{$obsunit_id})) {
365 push @unique_obsunit_list, $obsunit_id;
366 $seen_obsunits{$obsunit_id} = 1;
369 my $timestamp_value = $d->{timestamp
};
370 my $value = $d->{phenotype_value
};
371 #my $cvterm = $trait."|".$cvterm_accession;
372 if ($include_timestamp && $timestamp_value) {
373 $obsunit_data{$obsunit_id}->{$cvterm} = "$value,$timestamp_value";
375 $obsunit_data{$obsunit_id}->{$cvterm} = $value;
377 $obsunit_data{$obsunit_id}->{'notes'} = $d->{notes
};
379 my $synonyms = $d->{synonyms
};
380 my $synonym_string = $synonyms ?
join ("," , @
$synonyms) : '';
381 my $entry_type = $d->{is_a_control
} ?
'check' : 'test';
383 my $trial_name = $d->{trial_name
};
384 my $trial_desc = $d->{trial_description
};
386 $trial_name =~ s/\s+$//g;
387 $trial_desc =~ s/\s+$//g;
389 $obsunit_data{$obsunit_id}->{metadata
} = [
391 $d->{breeding_program_id
},
392 $d->{breeding_program_name
},
393 $d->{breeding_program_description
},
401 $d->{field_trial_is_planned_to_be_genotyped
},
402 $d->{field_trial_is_planned_to_cross
},
407 $d->{accession_stock_id
},
408 $d->{accession_uniquename
},
410 $d->{obsunit_type_name
},
411 $d->{obsunit_stock_id
},
412 $d->{obsunit_uniquename
},
424 #print STDERR Dumper \%plot_data;
425 #print STDERR Dumper \%traits;
427 # retrieve treatments
428 my $project_object = CXGN
::BreedersToolbox
::Projects
->new( { schema
=> $self->bcs_schema });
429 my $treatment_info = {};
430 if ($self->trial_list) {
431 $treatment_info = $project_object->get_related_treatments($self->trial_list, \
%seen_obsunits);
433 my $treatment_names = $treatment_info->{treatment_names
};
434 my $treatment_details = $treatment_info->{treatment_details
};
436 my @line = @metadata_headers;
438 my @sorted_traits = sort keys(%traits);
439 foreach my $trait (@sorted_traits) {
444 # add treatment names to header
445 foreach my $name (@
$treatment_names) {
451 foreach my $p (@unique_obsunit_list) {
452 my @line = @
{$obsunit_data{$p}->{metadata
}};
454 foreach my $trait (@sorted_traits) {
455 push @line, $obsunit_data{$p}->{$trait};
457 push @line, $obsunit_data{$p}->{'notes'};
459 # add treatment values to each obsunit line
461 if ($treatment_details->{$p}) {
462 %unit_treatments = %{$treatment_details->{$p}};
464 foreach my $name (@
$treatment_names) {
465 push @line, $unit_treatments{$name};
471 #print STDERR Dumper \@info;
472 print STDERR
"Construct Pheno Matrix End:".localtime."\n";