1 package CXGN
::Phenotypes
::PhenotypeMatrix
;
5 CXGN::Phenotypes::PhenotypeMatrix - an object to handle creating the phenotype matrix. Uses SearchFactory to handle searching native database or materialized views.
9 my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new(
11 search_type=>$search_type,
12 data_level=>$data_level,
13 trait_list=>$trait_list,
14 trial_list=>$trial_list,
15 program_list=>$self->program_list,
16 folder_list=>$self->folder_list,
17 year_list=>$year_list,
18 location_list=>$location_list,
19 accession_list=>$accession_list,
20 plot_list=>$plot_list,
21 plant_list=>$plant_list,
22 include_timestamp=>$include_timestamp,
23 include_pedigree_parents=>$include_pedigree_parents,
24 exclude_phenotype_outlier=>0,
25 trait_contains=>$trait_contains,
26 phenotype_min_value=>$phenotype_min_value,
27 phenotype_max_value=>$phenotype_max_value,
31 my @data = $phenotypes_search->get_phenotype_matrix();
45 use SGN
::Model
::Cvterm
;
46 use CXGN
::Stock
::StockLookup
;
47 use CXGN
::Phenotypes
::SearchFactory
;
48 use CXGN
::BreedersToolbox
::Projects
;
51 isa
=> 'Bio::Chado::Schema',
56 #PREFERRED MaterializedViewTable (MaterializedViewTable or Native)
57 has
'search_type' => (
63 #(plot, plant, or all)
70 isa
=> 'ArrayRef[Int]|Undef',
74 has
'program_list' => (
75 isa
=> 'ArrayRef[Int]|Undef',
79 has
'folder_list' => (
80 isa
=> 'ArrayRef[Int]|Undef',
85 isa
=> 'ArrayRef[Int]|Undef',
89 has
'accession_list' => (
90 isa
=> 'ArrayRef[Int]|Undef',
95 isa
=> 'ArrayRef[Int]|Undef',
100 isa
=> 'ArrayRef[Int]|Undef',
104 has
'subplot_list' => (
105 isa
=> 'ArrayRef[Int]|Undef',
109 has
'location_list' => (
110 isa
=> 'ArrayRef[Int]|Undef',
115 isa
=> 'ArrayRef[Int]|Undef',
119 has
'include_pedigree_parents' => (
125 has
'include_timestamp' => (
131 has
'exclude_phenotype_outlier' => (
137 has
'trait_contains' => (
138 isa
=> 'ArrayRef[Str]|Undef',
142 has
'phenotype_min_value' => (
147 has
'phenotype_max_value' => (
162 sub get_phenotype_matrix
{
164 my $include_pedigree_parents = $self->include_pedigree_parents();
165 my $include_timestamp = $self->include_timestamp;
167 print STDERR
"GET PHENOMATRIX ".$self->search_type."\n";
169 my $phenotypes_search = CXGN
::Phenotypes
::SearchFactory
->instantiate(
172 bcs_schema
=>$self->bcs_schema,
173 data_level
=>$self->data_level,
174 trait_list
=>$self->trait_list,
175 trial_list
=>$self->trial_list,
176 program_list
=>$self->program_list,
177 folder_list
=>$self->folder_list,
178 year_list
=>$self->year_list,
179 location_list
=>$self->location_list,
180 accession_list
=>$self->accession_list,
181 plot_list
=>$self->plot_list,
182 plant_list
=>$self->plant_list,
183 subplot_list
=>$self->subplot_list,
184 include_timestamp
=>$include_timestamp,
185 exclude_phenotype_outlier
=>$self->exclude_phenotype_outlier,
186 trait_contains
=>$self->trait_contains,
187 phenotype_min_value
=>$self->phenotype_min_value,
188 phenotype_max_value
=>$self->phenotype_max_value,
190 offset
=>$self->offset
194 my ($data, $unique_traits);
196 my @metadata_headers = ( 'studyYear', 'programDbId', 'programName', 'programDescription', 'studyDbId', 'studyName', 'studyDescription', 'studyDesign', 'plotWidth', 'plotLength', 'fieldSize', 'fieldTrialIsPlannedToBeGenotyped', 'fieldTrialIsPlannedToCross', 'plantingDate', 'harvestDate', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber', 'entryType', 'plantNumber');
198 if ($self->search_type eq 'MaterializedViewTable'){
199 ($data, $unique_traits) = $phenotypes_search->search();
201 print STDERR
"No of lines retrieved: ".scalar(@
$data)."\n";
202 print STDERR
"Construct Pheno Matrix Start:".localtime."\n";
204 my @line = @metadata_headers;
205 push @line, ('plantedSeedlotStockDbId', 'plantedSeedlotStockUniquename', 'plantedSeedlotCurrentCount', 'plantedSeedlotCurrentWeightGram', 'plantedSeedlotBoxName', 'plantedSeedlotTransactionCount', 'plantedSeedlotTransactionWeight', 'plantedSeedlotTransactionDescription', 'availableGermplasmSeedlotUniquenames');
207 if ($include_pedigree_parents){
208 push @line, ('germplasmPedigreeFemaleParentName', 'germplasmPedigreeFemaleParentDbId', 'germplasmPedigreeMaleParentName', 'germplasmPedigreeMaleParentDbId');
211 my @sorted_traits = sort keys(%$unique_traits);
212 foreach my $trait (@sorted_traits) {
218 my @observationunit_ids = map { $_->{observationunit_stock_id
} } @
$data;
220 # retrieve treatments and add to header
221 my $project_object = CXGN
::BreedersToolbox
::Projects
->new( { schema
=> $self->bcs_schema });
222 my ($treatment_info, $unique_treatments) = $project_object->get_treatments_by_observationunit_ids(\
@observationunit_ids);
223 my @sorted_treatments = sort keys(%$unique_treatments);
224 foreach my $treatment (@sorted_treatments) {
225 push @line, $treatment;
230 foreach my $obs_unit (@
$data){
231 my $entry_type = $obs_unit->{obsunit_is_a_control
} ?
'check' : 'test';
232 my $synonyms = $obs_unit->{germplasm_synonyms
};
233 my $synonym_string = $synonyms ?
join ("," , @
$synonyms) : '';
234 my $available_germplasm_seedlots = $obs_unit->{available_germplasm_seedlots
};
235 my %available_germplasm_seedlots_uniquenames;
236 foreach (@
$available_germplasm_seedlots){
237 $available_germplasm_seedlots_uniquenames{$_->{stock_uniquename
}}++;
239 my $available_germplasm_seedlots_uniquenames = join ' AND ', (keys %available_germplasm_seedlots_uniquenames);
241 my $trial_name = $obs_unit->{trial_name
};
242 my $trial_desc = $obs_unit->{trial_description
};
244 $trial_name =~ s/\s+$//g;
245 $trial_desc =~ s/\s+$//g;
247 my @line = ($obs_unit->{year
}, $obs_unit->{breeding_program_id
}, $obs_unit->{breeding_program_name
}, $obs_unit->{breeding_program_description
}, $obs_unit->{trial_id
}, $trial_name, $trial_desc, $obs_unit->{design
}, $obs_unit->{plot_width
}, $obs_unit->{plot_length
}, $obs_unit->{field_size
}, $obs_unit->{field_trial_is_planned_to_be_genotyped
}, $obs_unit->{field_trial_is_planned_to_cross
}, $obs_unit->{planting_date
}, $obs_unit->{harvest_date
}, $obs_unit->{trial_location_id
}, $obs_unit->{trial_location_name
}, $obs_unit->{germplasm_stock_id
}, $obs_unit->{germplasm_uniquename
}, $synonym_string, $obs_unit->{observationunit_type_name
}, $obs_unit->{observationunit_stock_id
}, $obs_unit->{observationunit_uniquename
}, $obs_unit->{obsunit_rep
}, $obs_unit->{obsunit_block
}, $obs_unit->{obsunit_plot_number
}, $obs_unit->{obsunit_row_number
}, $obs_unit->{obsunit_col_number
}, $entry_type, $obs_unit->{obsunit_plant_number
}, $obs_unit->{seedlot_stock_id
}, $obs_unit->{seedlot_uniquename
}, $obs_unit->{seedlot_current_count
}, $obs_unit->{seedlot_current_weight_gram
}, $obs_unit->{seedlot_box_name
}, $obs_unit->{seedlot_transaction_amount
}, $obs_unit->{seedlot_transaction_weight_gram
}, $obs_unit->{seedlot_transaction_description
}, $available_germplasm_seedlots_uniquenames);
249 if ($include_pedigree_parents) {
250 my $germplasm = CXGN
::Stock
->new({schema
=> $self->bcs_schema, stock_id
=>$obs_unit->{germplasm_stock_id
}});
251 my $parents = $germplasm->get_parents();
252 push @line, ($parents->{'mother'}, $parents->{'mother_id'}, $parents->{'father'}, $parents->{'father_id'});
255 my $observations = $obs_unit->{observations
};
256 # print STDERR "OBSERVATIONS =".Dumper($observations)."\n";
257 my $include_timestamp = $self->include_timestamp;
258 my %trait_observations;
259 foreach (@
$observations){
260 my $collect_date = $_->{collect_date
};
261 my $timestamp = $_->{timestamp
};
262 if ($include_timestamp && $timestamp) {
263 $trait_observations{$_->{trait_name
}} = "$_->{value},$timestamp";
265 elsif ($include_timestamp && $collect_date) {
266 $trait_observations{$_->{trait_name
}} = "$_->{value},$collect_date";
269 $trait_observations{$_->{trait_name
}} = $_->{value
};
272 foreach my $trait (@sorted_traits) {
273 push @line, $trait_observations{$trait};
275 push @line, $obs_unit->{notes
};
279 my %unit_treatments = %{$treatment_info->{$obs_unit->{observationunit_stock_id
}}};
280 foreach my $treatment (@sorted_treatments) {
281 push @line, $unit_treatments{$treatment};
287 $data = $phenotypes_search->search();
288 #print STDERR "DOWNLOAD DATA =".Dumper($data)."\n";
293 print STDERR
"No of lines retrieved: ".scalar(@
$data)."\n";
294 print STDERR
"Construct Pheno Matrix Start:".localtime."\n";
295 my @unique_obsunit_list = ();
298 foreach my $d (@
$data) {
299 my $cvterm = $d->{trait_name
};
301 my $obsunit_id = $d->{obsunit_stock_id
};
302 if (!exists($seen_obsunits{$obsunit_id})) {
303 push @unique_obsunit_list, $obsunit_id;
304 $seen_obsunits{$obsunit_id} = 1;
307 my $timestamp_value = $d->{timestamp
};
308 my $value = $d->{phenotype_value
};
309 #my $cvterm = $trait."|".$cvterm_accession;
310 if ($include_timestamp && $timestamp_value) {
311 $obsunit_data{$obsunit_id}->{$cvterm} = "$value,$timestamp_value";
313 $obsunit_data{$obsunit_id}->{$cvterm} = $value;
315 $obsunit_data{$obsunit_id}->{'notes'} = $d->{notes
};
317 my $synonyms = $d->{synonyms
};
318 my $synonym_string = $synonyms ?
join ("," , @
$synonyms) : '';
319 my $entry_type = $d->{is_a_control
} ?
'check' : 'test';
321 my $trial_name = $d->{trial_name
};
322 my $trial_desc = $d->{trial_description
};
324 $trial_name =~ s/\s+$//g;
325 $trial_desc =~ s/\s+$//g;
327 $obsunit_data{$obsunit_id}->{metadata
} = [
329 $d->{breeding_program_id
},
330 $d->{breeding_program_name
},
331 $d->{breeding_program_description
},
339 $d->{field_trial_is_planned_to_be_genotyped
},
340 $d->{field_trial_is_planned_to_cross
},
345 $d->{accession_stock_id
},
346 $d->{accession_uniquename
},
348 $d->{obsunit_type_name
},
349 $d->{obsunit_stock_id
},
350 $d->{obsunit_uniquename
},
362 #print STDERR Dumper \%plot_data;
363 #print STDERR Dumper \%traits;
365 # retrieve treatments
366 my $project_object = CXGN
::BreedersToolbox
::Projects
->new( { schema
=> $self->bcs_schema });
367 my ($treatment_info, $unique_treatments) = $project_object->get_treatments_by_observationunit_ids(\
@unique_obsunit_list);
368 my @sorted_treatments = sort keys(%$unique_treatments);
370 my @line = @metadata_headers;
372 my @sorted_traits = sort keys(%traits);
373 foreach my $trait (@sorted_traits) {
377 foreach my $treatment (@sorted_treatments) {
378 push @line, $treatment;
382 foreach my $p (@unique_obsunit_list) {
383 my @line = @
{$obsunit_data{$p}->{metadata
}};
385 foreach my $trait (@sorted_traits) {
386 push @line, $obsunit_data{$p}->{$trait};
388 push @line, $obsunit_data{$p}->{'notes'};
390 my %unit_treatments = %{$treatment_info->{$p}};
391 foreach my $treatment (@sorted_treatments) {
392 push @line, $unit_treatments{$treatment};
398 #print STDERR Dumper \@info;
399 print STDERR
"Construct Pheno Matrix End:".localtime."\n";