From eac268bba5190fbf6124cf5a59b72cf9eecee841 Mon Sep 17 00:00:00 2001 From: nickmorales Date: Sat, 18 Nov 2017 15:35:25 +0000 Subject: [PATCH] make phenotype download backward compatible. if you want row and column, specify that you do --- lib/CXGN/Phenotypes/PhenotypeMatrix.pm | 456 +++++++++++---------- lib/CXGN/Phenotypes/Search/MaterializedView.pm | 2 +- lib/CXGN/Trial/Download.pm | 2 + .../Trial/Download/Plugin/TrialPhenotypeCSV.pm | 1 + .../Trial/Download/Plugin/TrialPhenotypeExcel.pm | 2 + lib/SGN/Controller/BreedersToolbox/Download.pm | 3 + 6 files changed, 244 insertions(+), 222 deletions(-) rewrite lib/CXGN/Phenotypes/PhenotypeMatrix.pm (78%) diff --git a/lib/CXGN/Phenotypes/PhenotypeMatrix.pm b/lib/CXGN/Phenotypes/PhenotypeMatrix.pm dissimilarity index 78% index 8cf950f4a..8c50d0b92 100644 --- a/lib/CXGN/Phenotypes/PhenotypeMatrix.pm +++ b/lib/CXGN/Phenotypes/PhenotypeMatrix.pm @@ -1,221 +1,235 @@ -package CXGN::Phenotypes::PhenotypeMatrix; - -=head1 NAME - -CXGN::Phenotypes::PhenotypeMatrix - an object to handle creating the phenotype matrix. Uses SearchFactory to handle searching native database or materialized views. - -=head1 USAGE - -my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new( - bcs_schema=>$schema, - search_type=>$search_type, - data_level=>$data_level, - trait_list=>$trait_list, - trial_list=>$trial_list, - year_list=>$year_list, - location_list=>$location_list, - accession_list=>$accession_list, - plot_list=>$plot_list, - plant_list=>$plant_list, - include_timestamp=>$include_timestamp, - trait_contains=>$trait_contains, - phenotype_min_value=>$phenotype_min_value, - phenotype_max_value=>$phenotype_max_value, - limit=>$limit, - offset=>$offset -); -my @data = $phenotypes_search->get_phenotype_matrix(); - -=head1 DESCRIPTION - - -=head1 AUTHORS - - -=cut - -use strict; -use warnings; -use Moose; -use Data::Dumper; -use SGN::Model::Cvterm; -use CXGN::Stock::StockLookup; -use CXGN::Phenotypes::SearchFactory; - -has 'bcs_schema' => ( - isa => 'Bio::Chado::Schema', - is => 'rw', - required => 1, -); - -#(Native or MaterializedView) -has 'search_type' => ( - isa => 'Str', - is => 'rw', - required => 1, -); - -#(plot, plant, or all) -has 'data_level' => ( - isa => 'Str|Undef', - is => 'ro', -); - -has 'trial_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'trait_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'accession_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'plot_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'plant_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'subplot_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'location_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'year_list' => ( - isa => 'ArrayRef[Int]|Undef', - is => 'rw', -); - -has 'include_timestamp' => ( - isa => 'Bool|Undef', - is => 'ro', - default => 0 -); - -has 'trait_contains' => ( - isa => 'ArrayRef[Str]|Undef', - is => 'rw' -); - -has 'phenotype_min_value' => ( - isa => 'Str|Undef', - is => 'rw' -); - -has 'phenotype_max_value' => ( - isa => 'Str|Undef', - is => 'rw' -); - -has 'limit' => ( - isa => 'Int|Undef', - is => 'rw' -); - -has 'offset' => ( - isa => 'Int|Undef', - is => 'rw' -); - -sub get_phenotype_matrix { - my $self = shift; - - my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate( - $self->search_type, - { - bcs_schema=>$self->bcs_schema, - data_level=>$self->data_level, - trait_list=>$self->trait_list, - trial_list=>$self->trial_list, - year_list=>$self->year_list, - location_list=>$self->location_list, - accession_list=>$self->accession_list, - plot_list=>$self->plot_list, - plant_list=>$self->plant_list, - subplot_list=>$self->subplot_list, - include_timestamp=>$self->include_timestamp, - trait_contains=>$self->trait_contains, - phenotype_min_value=>$self->phenotype_min_value, - phenotype_max_value=>$self->phenotype_max_value, - limit=>$self->limit, - offset=>$self->offset - } - ); - - my $data = $phenotypes_search->search(); - #print STDERR Dumper $data; - my %plot_data; - my %traits; - my $include_timestamp = $self->include_timestamp; - - print STDERR "No of lines retrieved: ".scalar(@$data)."\n"; - print STDERR "Construct Pheno Matrix Start:".localtime."\n"; - my @unique_plot_list = (); - my %seen_plots; - foreach my $d (@$data) { - - my ($year, $project_name, $stock_name, $location, $cvterm, $value, $plot_name, $rep, $block_number, $plot_number, $row_number, $col_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id, $timestamp_value, $synonyms, $design, $stock_type_name, $phenotype_id) = @$d; - - if ($cvterm){ - if (!exists($seen_plots{$plot_id})) { - push @unique_plot_list, $plot_id; - $seen_plots{$plot_id} = 1; - } - - #my $cvterm = $trait."|".$cvterm_accession; - if ($include_timestamp && $timestamp_value) { - $plot_data{$plot_id}->{$cvterm} = "$value,$timestamp_value"; - } else { - $plot_data{$plot_id}->{$cvterm} = $value; - } - my $synonym_string = $synonyms ? join ("," , @$synonyms) : ''; - $plot_data{$plot_id}->{metadata} = [$year,$project_id,$project_name,$design,$location_id,$location,$stock_id,$stock_name,$synonym_string,$stock_type_name,$plot_id,$plot_name,$rep,$block_number,$plot_number,$row_number,$col_number]; - $traits{$cvterm}++; - } - } - #print STDERR Dumper \%plot_data; - #print STDERR Dumper \%traits; - - my @info = (); - my @line = ( 'studyYear', 'studyDbId', 'studyName', 'studyDesign', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber' ); - - # generate header line - # - my @sorted_traits = sort keys(%traits); - foreach my $trait (@sorted_traits) { - push @line, $trait; - } - push @info, \@line; - - #print STDERR Dumper \@unique_plot_list; - - foreach my $p (@unique_plot_list) { - my @line = @{$plot_data{$p}->{metadata}}; - - foreach my $trait (@sorted_traits) { - push @line, $plot_data{$p}->{$trait}; - } - push @info, \@line; - } - - #print STDERR Dumper \@info; - print STDERR "Construct Pheno Matrix End:".localtime."\n"; - return @info; -} - -1; +package CXGN::Phenotypes::PhenotypeMatrix; + +=head1 NAME + +CXGN::Phenotypes::PhenotypeMatrix - an object to handle creating the phenotype matrix. Uses SearchFactory to handle searching native database or materialized views. + +=head1 USAGE + +my $phenotypes_search = CXGN::Phenotypes::PhenotypeMatrix->new( + bcs_schema=>$schema, + search_type=>$search_type, + data_level=>$data_level, + trait_list=>$trait_list, + trial_list=>$trial_list, + year_list=>$year_list, + location_list=>$location_list, + accession_list=>$accession_list, + plot_list=>$plot_list, + plant_list=>$plant_list, + include_timestamp=>$include_timestamp, + trait_contains=>$trait_contains, + phenotype_min_value=>$phenotype_min_value, + phenotype_max_value=>$phenotype_max_value, + limit=>$limit, + offset=>$offset +); +my @data = $phenotypes_search->get_phenotype_matrix(); + +=head1 DESCRIPTION + + +=head1 AUTHORS + + +=cut + +use strict; +use warnings; +use Moose; +use Data::Dumper; +use SGN::Model::Cvterm; +use CXGN::Stock::StockLookup; +use CXGN::Phenotypes::SearchFactory; + +has 'bcs_schema' => ( + isa => 'Bio::Chado::Schema', + is => 'rw', + required => 1, +); + +#(Native or MaterializedView) +has 'search_type' => ( + isa => 'Str', + is => 'rw', + required => 1, +); + +#(plot, plant, or all) +has 'data_level' => ( + isa => 'Str|Undef', + is => 'ro', +); + +has 'trial_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'trait_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'accession_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'plot_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'plant_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'subplot_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'location_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'year_list' => ( + isa => 'ArrayRef[Int]|Undef', + is => 'rw', +); + +has 'include_timestamp' => ( + isa => 'Bool|Undef', + is => 'ro', + default => 0 +); + +has 'include_row_and_column_numbers' => ( + isa => 'Bool|Undef', + is => 'ro', + default => 0 +); + +has 'trait_contains' => ( + isa => 'ArrayRef[Str]|Undef', + is => 'rw' +); + +has 'phenotype_min_value' => ( + isa => 'Str|Undef', + is => 'rw' +); + +has 'phenotype_max_value' => ( + isa => 'Str|Undef', + is => 'rw' +); + +has 'limit' => ( + isa => 'Int|Undef', + is => 'rw' +); + +has 'offset' => ( + isa => 'Int|Undef', + is => 'rw' +); + +sub get_phenotype_matrix { + my $self = shift; + + my $phenotypes_search = CXGN::Phenotypes::SearchFactory->instantiate( + $self->search_type, + { + bcs_schema=>$self->bcs_schema, + data_level=>$self->data_level, + trait_list=>$self->trait_list, + trial_list=>$self->trial_list, + year_list=>$self->year_list, + location_list=>$self->location_list, + accession_list=>$self->accession_list, + plot_list=>$self->plot_list, + plant_list=>$self->plant_list, + subplot_list=>$self->subplot_list, + include_timestamp=>$self->include_timestamp, + trait_contains=>$self->trait_contains, + phenotype_min_value=>$self->phenotype_min_value, + phenotype_max_value=>$self->phenotype_max_value, + limit=>$self->limit, + offset=>$self->offset + } + ); + + my $data = $phenotypes_search->search(); + #print STDERR Dumper $data; + my %plot_data; + my %traits; + my $include_timestamp = $self->include_timestamp; + + print STDERR "No of lines retrieved: ".scalar(@$data)."\n"; + print STDERR "Construct Pheno Matrix Start:".localtime."\n"; + my @unique_plot_list = (); + my %seen_plots; + foreach my $d (@$data) { + my ($year, $project_name, $stock_name, $location, $cvterm, $value, $plot_name, $rep, $block_number, $plot_number, $row_number, $col_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id, $timestamp_value, $synonyms, $design, $stock_type_name, $phenotype_id) = @$d; + + if ($cvterm){ + if (!exists($seen_plots{$plot_id})) { + push @unique_plot_list, $plot_id; + $seen_plots{$plot_id} = 1; + } + + #my $cvterm = $trait."|".$cvterm_accession; + if ($include_timestamp && $timestamp_value) { + $plot_data{$plot_id}->{$cvterm} = "$value,$timestamp_value"; + } else { + $plot_data{$plot_id}->{$cvterm} = $value; + } + my $synonym_string = $synonyms ? join ("," , @$synonyms) : ''; + if ($self->include_row_and_column_numbers){ + $plot_data{$plot_id}->{metadata} = [$year,$project_id,$project_name,$design,$location_id,$location,$stock_id,$stock_name,$synonym_string,$stock_type_name,$plot_id,$plot_name,$rep,$block_number,$plot_number,$row_number,$col_number]; + } else { + $plot_data{$plot_id}->{metadata} = [$year,$project_id,$project_name,$design,$location_id,$location,$stock_id,$stock_name,$synonym_string,$stock_type_name,$plot_id,$plot_name,$rep,$block_number,$plot_number]; + } + $traits{$cvterm}++; + } + } + #print STDERR Dumper \%plot_data; + #print STDERR Dumper \%traits; + + my @info = (); + my @line; + if ($self->include_row_and_column_numbers){ + @line = ( 'studyYear', 'studyDbId', 'studyName', 'studyDesign', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber', 'rowNumber', 'colNumber' ); + } else { + @line = ( 'studyYear', 'studyDbId', 'studyName', 'studyDesign', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmName', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'replicate', 'blockNumber', 'plotNumber' ); + } + + # generate header line + # + my @sorted_traits = sort keys(%traits); + foreach my $trait (@sorted_traits) { + push @line, $trait; + } + push @info, \@line; + + #print STDERR Dumper \@unique_plot_list; + + foreach my $p (@unique_plot_list) { + my @line = @{$plot_data{$p}->{metadata}}; + + foreach my $trait (@sorted_traits) { + push @line, $plot_data{$p}->{$trait}; + } + push @info, \@line; + } + + #print STDERR Dumper \@info; + print STDERR "Construct Pheno Matrix End:".localtime."\n"; + return @info; +} + +1; diff --git a/lib/CXGN/Phenotypes/Search/MaterializedView.pm b/lib/CXGN/Phenotypes/Search/MaterializedView.pm index 7322cdc26..c5597305c 100644 --- a/lib/CXGN/Phenotypes/Search/MaterializedView.pm +++ b/lib/CXGN/Phenotypes/Search/MaterializedView.pm @@ -260,7 +260,7 @@ sub search { } } my $synonyms = $synonym_hash_lookup{$stock_name}; - push @$result, [ $year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $rep, $block_number, $plot_number, $trait_id, $project_id, $location_id, $stock_id, $plot_id, $timestamp_value, $synonyms, $design, $stock_type_name, $phenotype_id, $full_count ]; + push @$result, [ $year, $project_name, $stock_name, $location, $trait, $value, $plot_name, $rep, $block_number, $plot_number, '', '', $trait_id, $project_id, $location_id, $stock_id, $plot_id, $timestamp_value, $synonyms, $design, $stock_type_name, $phenotype_id, $full_count ]; } print STDERR "Search End:".localtime."\n"; diff --git a/lib/CXGN/Trial/Download.pm b/lib/CXGN/Trial/Download.pm index 0038847e5..0369265bb 100644 --- a/lib/CXGN/Trial/Download.pm +++ b/lib/CXGN/Trial/Download.pm @@ -63,6 +63,7 @@ my $download = CXGN::Trial::Download->new({ format => $plugin, data_level => $data_level, include_timestamp => $timestamp_option, + include_row_and_column_numbers => $include_row_and_column_numbers, trait_contains => \@trait_contains_list, phenotype_min_value => $phenotype_min_value, phenotype_max_value => $phenotype_max_value, @@ -212,6 +213,7 @@ has 'plant_list' => (isa => 'ArrayRef[Int]|Undef', is => 'rw' ); has 'location_list' => (isa => 'ArrayRef[Int]|Undef', is => 'rw' ); has 'year_list' => (isa => 'ArrayRef[Int]|Undef', is => 'rw' ); has 'include_timestamp' => (isa => 'Bool', is => 'ro', default => 0); +has 'include_row_and_column_numbers' => (isa => 'Bool', is => 'ro', default => 0); has 'has_header' => (isa => 'Bool', is => 'ro', default => 1); has 'trait_contains' => (isa => 'ArrayRef[Str]|Undef', is => 'rw'); has 'phenotype_min_value' => (isa => 'Str', is => 'rw'); diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm index bcd29d5bf..69bd2659c 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeCSV.pm @@ -113,6 +113,7 @@ sub download { plot_list=>$plot_list, plant_list=>$plant_list, include_timestamp=>$include_timestamp, + include_row_and_column_numbers=>$self->include_row_and_column_numbers, trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, diff --git a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm index 3f672733d..664b93efe 100644 --- a/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm +++ b/lib/CXGN/Trial/Download/Plugin/TrialPhenotypeExcel.pm @@ -40,6 +40,7 @@ my $download = CXGN::Trial::Download->new({ format => $plugin, data_level => $data_level, include_timestamp => $timestamp_option, + include_row_and_column_numbers => $include_row_and_column_numbers, trait_contains => \@trait_contains_list, phenotype_min_value => $phenotype_min_value, phenotype_max_value => $phenotype_max_value, @@ -112,6 +113,7 @@ sub download { plot_list=>$plot_list, plant_list=>$plant_list, include_timestamp=>$include_timestamp, + include_row_and_column_numbers=>$self->include_row_and_column_numbers, trait_contains=>$trait_contains, phenotype_min_value=>$phenotype_min_value, phenotype_max_value=>$phenotype_max_value, diff --git a/lib/SGN/Controller/BreedersToolbox/Download.pm b/lib/SGN/Controller/BreedersToolbox/Download.pm index ce99d29be..76a798128 100644 --- a/lib/SGN/Controller/BreedersToolbox/Download.pm +++ b/lib/SGN/Controller/BreedersToolbox/Download.pm @@ -211,6 +211,7 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg my $format = $c->req->param("format") && $c->req->param("format") ne 'null' ? $c->req->param("format") : "xls"; my $data_level = $c->req->param("dataLevel") && $c->req->param("dataLevel") ne 'null' ? $c->req->param("dataLevel") : "plot"; my $timestamp_option = $c->req->param("timestamp") && $c->req->param("timestamp") ne 'null' ? $c->req->param("timestamp") : 0; + my $include_row_and_column_numbers = $c->req->param("include_row_and_column_numbers") && $c->req->param("include_row_and_column_numbers") ne 'null' ? $c->req->param("include_row_and_column_numbers") : 0; my $trait_list = $c->req->param("trait_list"); my $trait_component_list = $c->req->param("trait_component_list"); my $year_list = $c->req->param("year_list"); @@ -354,6 +355,7 @@ sub download_phenotypes_action : Path('/breeders/trials/phenotype/download') Arg format => $plugin, data_level => $data_level, include_timestamp => $timestamp_option, + include_row_and_column_numbers => $include_row_and_column_numbers, trait_contains => \@trait_contains_list, phenotype_min_value => $phenotype_min_value, phenotype_max_value => $phenotype_max_value, @@ -495,6 +497,7 @@ sub download_action : Path('/breeders/download_action') Args(0) { trial_list=>$trial_id_data->{transform}, accession_list=>$accession_id_data->{transform}, include_timestamp=>$timestamp_included, + include_row_and_column_numbers=>1, data_level=>$datalevel, ); my @data = $phenotypes_search->get_phenotype_matrix(); -- 2.11.4.GIT