Merge pull request #5163 from solgenomics/audit-error-checking
[sgn.git] / lib / CXGN / Trial / TrialLayoutDownload.pm
blob7d7107e0ae1cfb05210e411c41ee62f0892ec7ff
1 package CXGN::Trial::TrialLayoutDownload;
3 =head1 NAME
5 CXGN::Trial::TrialLayoutDownload
7 =head1 SYNOPSIS
9 Module to format layout info for trial based on which columns user wants to see. Selected columns can be:
10 - for plot level layout: 'plot_name','plot_id','accession_name','accession_id','plot_number','block_number','is_a_control','rep_number','range_number','row_number','col_number','seedlot_name','seed_transaction_operator','num_seed_per_plot','pedigree','location_name','trial_name','year','synonyms','tier','plot_geo_json'
11 - for plant level layout: 'plant_name','plant_id','subplot_name','subplot_id','plot_name','plot_id','accession_name','accession_id','plot_number','block_number','is_a_control','range_number','rep_number','row_number','col_number','seedlot_name','seed_transaction_operator','num_seed_per_plot','subplot_number','plant_number','pedigree','location_name','trial_name','year','synonyms','tier','plot_geo_json'
12 - for subplot level layout:
13 'subplot_name','subplot_id','plot_name','plot_id','accession_name','accession_id','plot_number','block_number','is_a_control','rep_number','range_number','row_number','col_number','seedlot_name','seed_transaction_operator','num_seed_per_plot','subplot_number','pedigree','location_name','trial_name','year','synonyms','tier','plot_geo_json'
14 - for tissue sample field trial level layout:
15 'tissue_sample_name','tissue_sample_id','plant_name','plant_id','subplot_name','subplot_id','plot_name','plot_id','accession_name','accession_id','plot_number','block_number','is_a_control','range_number','rep_number','row_number','col_number','seedlot_name','seed_transaction_operator','num_seed_per_plot','subplot_number','plant_number','tissue_sample_number','pedigree','location_name','trial_name','year','synonyms','tier','plot_geo_json'
17 This module can also optionally include treatments into the output.
18 This module can also optionally include accession trait performace summaries into the output.
20 This module is used from CXGN::Trial::Download::Plugin::TrialLayoutExcel, CXGN::Trial::Download::Plugin::TrialLayoutCSV, CXGN::Fieldbook::DownloadFile, CXGN::Trial->get_plots, CXGN::Trial->get_plants, CXGN::Trial->get_subplots, CXGN::Trial->get_tissue_samples
22 my $trial_layout_download = CXGN::Trial::TrialLayoutDownload->new({
23 schema => $schema,
24 trial_id => $trial_id,
25 data_level => 'plots',
26 treatment_project_ids => [1,2],
27 selected_columns => {"plot_name"=>1,"plot_number"=>1,"block_number"=>1},
28 selected_trait_ids => [1,2,3],
29 });
30 my $output = $trial_layout_download->get_layout_output();
32 Output is an ArrayRef or ArrayRefs where the first entry is the Header and subsequent entries are the layout entries.
35 If you don't need treatments or phenotype summaries included you can ignore those keys like:
37 my $trial_layout_download = CXGN::Trial::TrialLayoutDownload->new({
38 schema => $schema,
39 trial_id => $trial_id,
40 data_level => 'plots',
41 selected_columns => {"plot_name"=>1,"plot_number"=>1,"block_number"=>1},
42 });
43 my $output = $trial_layout_download->get_layout_output();
45 Data Level can be plots, plants, subplots (for splitplot design), field_trial_tissue_samples (for seeing tissue_samples linked to plants in a field trial), plate (for seeing genotyping_layout plate tissue_samples)
47 =head1 AUTHORS
50 =cut
53 use Moose;
54 use Moose::Util::TypeConstraints;
55 use Try::Tiny;
56 use Data::Dumper;
57 use CXGN::Trial;
58 use CXGN::Trial::TrialLayout;
59 use SGN::Model::Cvterm;
60 use CXGN::Stock;
61 use CXGN::Stock::Accession;
62 use JSON;
63 use CXGN::List::Transform;
64 use CXGN::Phenotypes::Summary;
65 use CXGN::Phenotypes::Exact;
66 use CXGN::Trial::TrialLayoutDownload::PlotLayout;
67 use CXGN::Trial::TrialLayoutDownload::PlantLayout;
68 use CXGN::Trial::TrialLayoutDownload::SubplotLayout;
69 use CXGN::Trial::TrialLayoutDownload::TissueSampleLayout;
70 use CXGN::Trial::TrialLayoutDownload::GenotypingPlateLayout;
71 use CXGN::Trial::TrialLayoutDownload::SamplingTrialLayout;
73 has 'schema' => (
74 is => 'rw',
75 isa => 'DBIx::Class::Schema',
76 required => 1,
79 has 'trial_id' => (
80 isa => "Int",
81 is => 'ro',
82 required => 1,
85 has 'data_level' => (
86 is => 'ro',
87 isa => 'Str',
88 default => 'plots',
91 has 'treatment_project_ids' => (
92 isa => 'ArrayRef[Int]|Undef',
93 is => 'rw'
96 has 'selected_columns' => (
97 is => 'ro',
98 isa => 'HashRef',
99 default => sub { {"plot_name"=>1, "plot_number"=>1} }
102 has 'include_measured'=> (
103 is => 'rw',
104 isa => 'Str',
105 default => 'false',
108 has 'all_stats'=> (
109 is => 'rw',
110 isa => 'Str',
111 default => 'true',
114 has 'use_synonyms'=> (
115 is => 'rw',
116 isa => 'Str',
117 default => 'true',
120 has 'selected_trait_ids'=> (
121 is => 'ro',
122 isa => 'ArrayRef[Int]|Undef',
125 has 'trial_stock_type'=> (
126 is => 'rw',
127 isa => 'Str',
128 required => 0,
129 default => 'accession',
133 #The attributes below are populated when get_layout_output is run, so should not be instantiatied
134 #----------------------
136 #This is a hashref of the cached trial_layout_json that comes from CXGN::Trial::TrialLayout
137 has 'design' => (
138 isa => 'HashRef',
139 is => 'rw',
142 subtype 'Trial',
143 as 'Ref',
144 where { $_ =~ /CXGN::Trial/ || $_ =~ /CXGN::PhenotypingTrial/ || $_ =~ /CXGN::GenotypingTrial/ || $_ =~ /CXGN::Folder/ || $_ =~ /CXGN::CrossingTrial/ || $_ =~ /CXGN::ManagementFactor/ || $_ =~ /CXGN::SamplingTrial/},
145 message { "The string, $_, was not a valid trial object type"};
149 has 'trial' => (
150 isa => 'Trial',
151 is => 'rw',
154 #This treatment_info_hash contains all the info needed to make and fill the columns for the various treatments (management factors). All of these lists are in the same order.
155 #A key called treatment_trial_list that is a arrayref of the CXGN::Trial entries that represent the treatments (management factors) in this trial
156 #A key called treatment_trial_names_list that is an arrayref of just the treatment (management factor) names
157 #A key called treatment_units_hash_list that is a arrayref of hashrefs where the hashrefs indicate the stocks that the treatment was applied to.
158 has 'treatment_info_hash' => (
159 isa => 'HashRef',
160 is => 'rw',
163 has 'trait_header'=> (
164 is => 'rw',
165 isa => 'ArrayRef[Str]|Undef',
168 has 'exact_performance_hash' => (
169 isa => 'HashRef',
170 is => 'rw',
173 has 'overall_performance_hash' => (
174 isa => 'HashRef',
175 is => 'rw',
178 sub get_layout_output {
179 my $self = shift;
180 my $trial_id = $self->trial_id();
181 my $schema = $self->schema();
182 my $data_level = $self->data_level();
183 my $include_measured = $self->include_measured();
184 my $all_stats = $self->all_stats();
185 my $use_synonyms = $self->use_synonyms();
186 my %selected_cols = %{$self->selected_columns};
187 my $treatments = $self->treatment_project_ids();
188 my @selected_traits = $self->selected_trait_ids() ? @{$self->selected_trait_ids} : ();
189 my %errors;
190 my @error_messages;
191 my $trial_stock_type = $self->trial_stock_type();
192 print STDERR "TrialLayoutDownload for Trial id: ($trial_id) ".localtime()."\n";
194 my $trial_layout;
195 try {
196 my %param = ( schema => $schema, trial_id => $trial_id );
197 if ($data_level eq 'plate'){
198 $param{experiment_type} = 'genotyping_layout';
199 } else {
200 $param{experiment_type} = 'field_layout';
202 $trial_layout = CXGN::Trial::TrialLayout->new(\%param);
204 if (!$trial_layout) {
205 #print STDERR "Trial does not have valid field design.\n";
206 push @error_messages, "Trial does not have valid field design.";
207 $errors{'error_messages'} = \@error_messages;
208 return \%errors;
210 print STDERR "TrialLayoutDownload retrieving deisgn ".localtime."\n";
211 my $design = $trial_layout->get_design();
212 if (!$design){
213 push @error_messages, "Trial does not have valid field design. Please contact us.";
214 $errors{'error_messages'} = \@error_messages;
215 return \%errors;
218 if ($data_level eq 'plot_fieldMap' ) {
219 my %hash;
220 my @rows;
221 my @cols;
222 foreach my $key (keys %$design) {
223 my $design_info = $design->{$key};
224 my $row_num = $design_info->{row_number};
225 my $col_num = $design_info->{col_number};
226 my $accession = $design_info->{accession_name};
227 $hash{$row_num}->{$col_num} = $accession;
228 push @rows, $row_num;
229 push @cols, $col_num;
231 print STDERR "TrialLayoutDownload End for Trial id: ($trial_id) ".localtime()."\n";
232 return {output => \%hash, rows => \@rows, cols => \@cols};
235 print STDERR "TrialLayoutDownload running stock type checks ".localtime."\n";
237 my $selected_trial = CXGN::Trial->new({bcs_schema => $schema, trial_id => $trial_id});
238 my $has_plants = $selected_trial->has_plant_entries();
239 my $has_subplots = $selected_trial->has_subplot_entries();
240 my $has_tissue_samples = $selected_trial->has_tissue_sample_entries();
242 print STDERR "TrialLayoutDownload retrieving accessions ".localtime."\n";
244 my $accessions = $selected_trial->get_accessions();
245 my @accession_ids;
246 foreach (@$accessions){
247 push @accession_ids, $_->{stock_id};
249 print STDERR "TrialLayoutDownload retrieving trait performance if requested ".localtime."\n";
250 my $summary_values = [];
251 if (scalar(@selected_traits)>0){
252 my $summary = CXGN::Phenotypes::Summary->new({
253 bcs_schema=>$schema,
254 trait_list=>\@selected_traits,
255 accession_list=>\@accession_ids
257 $summary_values = $summary->search();
259 my %overall_performance_hash;
260 foreach (@$summary_values){
261 $overall_performance_hash{$_->[0]}->{$_->[8]} = $_;
264 my @treatment_trials;
265 my @treatment_names;
266 my @treatment_units_array;
267 if ($treatments){
268 foreach (@$treatments){
269 my $treatment_trial = CXGN::Trial->new({bcs_schema => $schema, trial_id => $_});
270 my $treatment_name = $treatment_trial->get_name();
271 push @treatment_trials, $treatment_trial;
272 push @treatment_names, $treatment_name;
275 my $exact_performance_hash;
276 if ($data_level eq 'plots') {
277 if ($include_measured eq 'true') {
278 print STDERR "Getting exact trait values\n";
279 my $exact = CXGN::Phenotypes::Exact->new({
280 bcs_schema=>$schema,
281 trial_id=>$trial_id,
282 data_level=>'plot'
284 $exact_performance_hash = $exact->search();
285 #print STDERR "Exact Performance hash is ".Dumper($exact_performance_hash)."\n";
287 foreach (@treatment_trials){
288 my $treatment_units = $_ ? $_->get_observation_units_direct('plot', ['treatment_experiment']) : [];
289 push @treatment_units_array, $treatment_units;
291 } elsif ($data_level eq 'plants') {
292 if (!$has_plants){
293 push @error_messages, "Trial does not have plants, so you should not try to download a plant level layout.";
294 $errors{'error_messages'} = \@error_messages;
295 return \%errors;
297 if ($include_measured eq 'true') {
298 my $exact = CXGN::Phenotypes::Exact->new({
299 bcs_schema=>$schema,
300 trial_id=>$trial_id,
301 data_level=>'plant'
303 $exact_performance_hash = $exact->search();
305 foreach (@treatment_trials){
306 my $treatment_units = $_ ? $_->get_observation_units_direct('plant', ['treatment_experiment']) : [];
307 push @treatment_units_array, $treatment_units;
309 } elsif ($data_level eq 'subplots') {
310 if (!$has_subplots){
311 push @error_messages, "Trial does not have subplots, so you should not try to download a subplot level layout.";
312 $errors{'error_messages'} = \@error_messages;
313 return \%errors;
315 if ($include_measured eq 'true') {
316 my $exact = CXGN::Phenotypes::Exact->new({
317 bcs_schema=>$schema,
318 trial_id=>$trial_id,
319 data_level=>'subplot'
321 $exact_performance_hash = $exact->search();
323 foreach (@treatment_trials){
324 my $treatment_units = $_ ? $_->get_observation_units_direct('subplot', ['treatment_experiment']) : [];
325 push @treatment_units_array, $treatment_units;
327 } elsif ($data_level eq 'field_trial_tissue_samples') {
328 if (!$has_tissue_samples){
329 push @error_messages, "Trial does not have tissue samples, so you should not try to download a tissue sample level layout.";
330 $errors{'error_messages'} = \@error_messages;
331 return \%errors;
333 if ($include_measured eq 'true') {
334 my $exact = CXGN::Phenotypes::Exact->new({
335 bcs_schema=>$schema,
336 trial_id=>$trial_id,
337 data_level=>'tissue_sample'
339 $exact_performance_hash = $exact->search();
341 foreach (@treatment_trials){
342 my $treatment_units = $_ ? $_->get_observation_units_direct('tissue_sample', ['treatment_experiment']) : [];
343 push @treatment_units_array, $treatment_units;
345 } elsif ($data_level eq 'plate') {
346 #to make the download in the header for genotyping trials more easily understood, the terms change here
347 if (exists($selected_cols{'plot_name'})){
348 $selected_cols{'tissue_sample_name'} = 1;
349 delete $selected_cols{'plot_name'};
351 if (exists($selected_cols{'plot_number'})){
352 $selected_cols{'well_A01'} = 1;
353 delete $selected_cols{'plot_number'};
355 $selected_cols{'exported_tissue_sample_name'} = 1;
358 print STDERR "Treatment stock hashes\n";
359 my @treatment_stock_hashes;
360 foreach my $u (@treatment_units_array){
361 my %treatment_stock_hash;
362 foreach (@$u){
363 $treatment_stock_hash{$_->[1]}++;
365 push @treatment_stock_hashes, \%treatment_stock_hash;
368 my %treatment_info_hash = (
369 treatment_trial_list => \@treatment_trials,
370 treatment_trial_names_list => \@treatment_names,
371 treatment_units_hash_list => \@treatment_stock_hashes
374 #combine sorted exact and overall trait names and if requested convert to synonyms
375 my @exact_trait_names = sort keys %$exact_performance_hash;
376 my @overall_trait_names = sort keys %overall_performance_hash;
377 my @traits = (@exact_trait_names, @overall_trait_names);
379 if ($use_synonyms eq 'true') {
380 print STDERR "Getting synonyms\n";
381 my $t = CXGN::List::Transform->new();
382 my $trait_id_list = $t->transform($schema, 'traits_2_trait_ids', \@traits);
383 my @trait_ids = @{$trait_id_list->{'transform'}};
384 my $synonym_list = $t->transform($schema, 'trait_ids_2_synonyms', $trait_id_list->{'transform'});
385 my @missing = @{$synonym_list->{'missing'}};
387 if (scalar @missing) {
388 #print STDERR "Traits @missing don't have synonyms. Sticking with full trait names instead\n";
390 #push @error_messages, "Traits @missing don't have synonyms. Please turn off synonym option before proceeding\n";
391 #$errors{'error_messages'} = \@error_messages;
392 #return \%errors;
393 } else {
394 @traits = @{$synonym_list->{'transform'}};
399 my $layout_build = {
400 schema => $schema,
401 trial_id => $trial_id,
402 data_level => $data_level,
403 selected_columns => \%selected_cols,
404 treatment_project_ids => $treatments,
405 design => $design,
406 trial => $selected_trial,
407 treatment_info_hash => \%treatment_info_hash,
408 trait_header => \@traits,
409 exact_performance_hash => $exact_performance_hash,
410 overall_performance_hash => \%overall_performance_hash,
411 all_stats => $all_stats,
412 trial_stock_type => $trial_stock_type
414 my $layout_output;
416 print STDERR "TrialLayoutDownload getting output object".localtime."\n";
418 if ($data_level eq 'plots' ) {
419 $layout_output = CXGN::Trial::TrialLayoutDownload::PlotLayout->new($layout_build);
421 if ($data_level eq 'plants' ) {
422 $layout_output = CXGN::Trial::TrialLayoutDownload::PlantLayout->new($layout_build);
424 if ($data_level eq 'subplots' ) {
425 $layout_output = CXGN::Trial::TrialLayoutDownload::SubplotLayout->new($layout_build);
427 if ($data_level eq 'field_trial_tissue_samples' ) {
428 $layout_output = CXGN::Trial::TrialLayoutDownload::TissueSampleLayout->new($layout_build);
430 if ($data_level eq 'plate' ) {
431 $layout_output = CXGN::Trial::TrialLayoutDownload::GenotypingPlateLayout->new($layout_build);
433 if ($data_level eq 'samplingtrial' ) {
434 $layout_output = CXGN::Trial::TrialLayoutDownload::SamplingTrialLayout->new($layout_build);
437 print STDERR "TrialLayoutDownload retrieving output ".localtime."\n";
439 my $output = $layout_output->retrieve();
441 print STDERR "TrialLayoutDownload End for Trial id: ($trial_id) ".localtime()."\n";
442 return {output => $output};
445 sub _add_treatment_to_line {
446 my $self = shift;
447 my $treatment_stock_hashes = shift;
448 my $line = shift;
449 my $design_unit_name = shift;
450 foreach (@$treatment_stock_hashes){
451 if(exists($_->{$design_unit_name})){
452 push @$line, 1;
453 } else {
454 push @$line, '';
457 return $line;
460 sub _add_overall_performance_to_line {
461 my $self = shift;
462 my $overall_trait_names = shift;
463 my $line = shift;
464 my $overall_performance_hash = shift;
465 my $design_info = shift;
466 my $all_stats = shift;
467 foreach my $t (@$overall_trait_names){
468 my $perf = $overall_performance_hash->{$t}->{$design_info->{"accession_id"}};
469 if($perf && $all_stats eq 'true'){
470 push @$line, "Avg: ".$perf->[3]." Min: ".$perf->[5]." Max: ".$perf->[4]." Count: ".$perf->[2]." StdDev: ".$perf->[6];
471 } elsif($perf) {
472 push @$line, "Avg: ".$perf->[3]." Count: ".$perf->[2];
473 } else {
474 push @$line, '';
477 return $line;
480 sub _get_all_pedigrees {
481 my $self = shift;
482 my $design = shift;
483 my $schema = $self->schema();
484 my %design = %{$design};
486 print STDERR "TrialLayoutDownload running get_all_pedigrees ".localtime()."\n";
488 # collect all unique accession ids for pedigree retrieval
489 my %accession_id_hash;
490 foreach my $key (keys %design) {
491 $accession_id_hash{$design{$key}{'accession_id'}} = $design{$key}{'accession_name'};
493 my @accession_ids = keys %accession_id_hash;
495 my %pedigree_strings;
496 if (scalar(@accession_ids)>0) {
497 # retrieve pedigree info using batch download (fastest method), then extract pedigree strings from download rows.
498 my $stock = CXGN::Stock->new ( schema => $schema);
499 my $pedigree_rows = $stock->get_pedigree_rows(\@accession_ids, 'parents_only');
500 foreach my $row (@$pedigree_rows) {
501 my ($progeny, $female_parent, $male_parent, $cross_type) = split "\t", $row;
502 my $string = join ('/', $female_parent ? $female_parent : 'NA', $male_parent ? $male_parent : 'NA');
503 $pedigree_strings{$progeny} = $string;
507 print STDERR "TrialLayoutDownload get_all_pedigrees finished at ".localtime()."\n";
509 return \%pedigree_strings;
512 sub _add_exact_performance_to_line {
513 my $self = shift;
514 my $exact_trait_names = shift;
515 my $line = shift;
516 my $exact_performance_hash = shift;
517 my $observationunit_name = shift;
519 foreach my $trait (@$exact_trait_names){
520 my $value = $exact_performance_hash->{$trait}->{$observationunit_name };
521 if($value) {
522 push @$line, $value
523 } else {
524 push @$line, '';
527 return $line;