do not attempt get the person object when no one is logged in...
[sgn.git] / lib / CXGN / Phenotypes / HighDimensionalPhenotypesRelationshipMatrix.pm
blob699fef1e9f6da492e22e08cc8351871625f0bdee
1 package CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix;
3 =head1 NAME
5 CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix - an object to handle creating a relationship matrix from high dimensional phenotypes (NIRS, Metabolomics, Transcriptomics).
7 =head1 USAGE
9 my $phenotypes_search = CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix->new({
10 bcs_schema=>$schema,
11 nd_protocol_id=>$nd_protocol_id,
12 temporary_data_file=>$temp_data_file, #A temporary file path where to store the data used to create the relationship matrix
13 relationship_matrix_file=>$relationship_matrix_file, #A file path where the relationship matrix will be written
14 high_dimensional_phenotype_type=>$high_dimensional_phenotype_type, #NIRS, Transcriptomics, or Metabolomics
15 high_dimensional_phenotype_identifier_list=>\@high_dimensional_phenotype_identifier_list,
16 query_associated_stocks=>$query_associated_stocks, #Query associated plots, plants, tissue samples, etc for accessions that are given
17 accession_list=>$accession_ids,
18 plot_list=>$plot_ids,
19 plant_list=>$plant_ids,
20 });
21 my (\%relationship_matrix_data, \@data, \%identifier_metadata, \@identifier_names) = $phenotypes_search->search();
23 =head1 DESCRIPTION
26 =head1 AUTHORS
29 =cut
31 use strict;
32 use warnings;
33 use Moose;
34 use Try::Tiny;
35 use Data::Dumper;
36 use SGN::Model::Cvterm;
37 use CXGN::Stock::StockLookup;
38 use CXGN::Trial::TrialLayout;
39 use CXGN::Calendar;
40 use JSON;
41 use CXGN::Phenotypes::HighDimensionalPhenotypeProtocol;
42 use CXGN::Phenotypes::HighDimensionalPhenotypesSearch;
43 use Text::CSV;
45 has 'bcs_schema' => (
46 isa => 'Bio::Chado::Schema',
47 is => 'rw',
48 required => 1
51 has 'high_dimensional_phenotype_type' => (
52 isa => 'Str',
53 is => 'rw',
54 required => 1
57 has 'nd_protocol_id' => (
58 isa => 'Int',
59 is => 'rw',
60 required => 1
63 has 'temporary_data_file' => (
64 isa => 'Str',
65 is => 'rw',
66 required => 1
69 has 'relationship_matrix_file' => (
70 isa => 'Str',
71 is => 'rw',
72 required => 1
75 has 'query_associated_stocks' => (
76 isa => 'Bool|Undef',
77 is => 'ro',
78 default => 1
81 has 'high_dimensional_phenotype_identifier_list' => (
82 isa => 'ArrayRef[Str]|Undef',
83 is => 'rw',
86 has 'accession_list' => (
87 isa => 'ArrayRef[Int]|Undef',
88 is => 'rw',
91 has 'plot_list' => (
92 isa => 'ArrayRef[Int]|Undef',
93 is => 'rw',
96 has 'plant_list' => (
97 isa => 'ArrayRef[Int]|Undef',
98 is => 'rw',
101 sub search {
102 my $self = shift;
103 my $schema = $self->bcs_schema();
104 my $nd_protocol_id = $self->nd_protocol_id();
105 my $high_dimensional_phenotype_type = $self->high_dimensional_phenotype_type();
106 my $high_dimensional_phenotype_identifier_list = $self->high_dimensional_phenotype_identifier_list();
107 my $accession_ids = $self->accession_list();
108 my $plot_ids = $self->plot_list();
109 my $plant_ids = $self->plant_list();
110 my $query_associated_stocks = $self->query_associated_stocks();
111 my $temporary_data_file = $self->temporary_data_file();
112 my $relationship_matrix_file = $self->relationship_matrix_file();
113 my $dbh = $schema->storage->dbh();
115 if (!$accession_ids && !$plot_ids && !$plant_ids) {
116 return { error => "No accessions or plots or plants in your selected dataset!" };
119 my $data_term;
120 if ($high_dimensional_phenotype_type eq 'NIRS') {
121 $data_term = 'spectra';
123 elsif ($high_dimensional_phenotype_type eq 'Transcriptomics') {
124 $data_term = 'transcriptomics';
126 elsif ($high_dimensional_phenotype_type eq 'Metabolomics') {
127 $data_term = 'metabolomics';
129 else {
130 return { error => "$high_dimensional_phenotype_type is not supported!" };
133 my $phenotypes_search = CXGN::Phenotypes::HighDimensionalPhenotypesSearch->new({
134 bcs_schema=>$schema,
135 nd_protocol_id=>$nd_protocol_id,
136 high_dimensional_phenotype_type=>$high_dimensional_phenotype_type, #NIRS, Transcriptomics, or Metabolomics
137 high_dimensional_phenotype_identifier_list=>$high_dimensional_phenotype_identifier_list,
138 query_associated_stocks=>$query_associated_stocks, #Query associated plots, plants, tissue samples, etc for accessions that are given
139 accession_list=>$accession_ids,
140 plot_list=>$plot_ids,
141 plant_list=>$plant_ids,
143 my ($data_hash, $identifier_metadata_hash, $identifier_names_array) = $phenotypes_search->search();
145 my %high_dimensional_phenotype_identifier_list_hash = map {$_ => 1} @$high_dimensional_phenotype_identifier_list;
147 my @data_matrix;
148 while (my ($stock_id, $s) = each %$data_hash) {
149 my @row = ($stock_id);
150 while (my ($ident, $value) = each %{$s->{$data_term}}) {
151 if ($high_dimensional_phenotype_identifier_list && scalar(@$high_dimensional_phenotype_identifier_list) > 0) {
152 if (exists($high_dimensional_phenotype_identifier_list_hash{$ident})) {
153 push @row, $value;
156 else {
157 push @row, $value;
160 push @data_matrix, \@row;
163 open(my $F, ">", $temporary_data_file) || die "Can't open file ".$temporary_data_file;
164 foreach (@data_matrix) {
165 my $line = join "\t", @$_;
166 print $F $line."\n";
168 close($F);
170 my $cmd = 'R -e "library(data.table);
171 mat <- fread(\''.$temporary_data_file.'\', header=FALSE, sep=\'\t\');
172 data_mat <- data.matrix(mat[,-1]);
173 rel_mat <- data_mat %*% t(data_mat);
174 rel_mat <- rel_mat / ncol(data_mat);
175 rownames(rel_mat) <- mat\$V1;
176 colnames(rel_mat) <- mat\$V1;
177 write.table(rel_mat, file=\''.$relationship_matrix_file.'\', row.names=TRUE, col.names=NA, sep=\'\t\');
179 my $cmd_status = system($cmd);
181 my %rel_matrix_data;
182 my $csv = Text::CSV->new({ sep_char => "\t" });
183 open(my $rel_res, '<', $relationship_matrix_file) or die "Could not open file '$relationship_matrix_file' $!";
184 print STDERR "Opened $relationship_matrix_file\n";
185 my $header_row = <$rel_res>;
186 my @header;
187 if ($csv->parse($header_row)) {
188 @header = $csv->fields();
191 while (my $row = <$rel_res>) {
192 my @columns;
193 if ($csv->parse($row)) {
194 @columns = $csv->fields();
196 my $stock_id1 = $columns[0];
197 my $counter = 1;
198 foreach my $stock_id2 (@header) {
199 my $val = $columns[$counter];
200 $rel_matrix_data{$stock_id1}->{$stock_id2} = $val;
201 $counter++;
204 close($rel_res);
206 # print STDERR Dumper \%rel_matrix_data;
207 # print STDERR Dumper \@data_matrix;
208 # print STDERR Dumper $identifier_metadata_hash;
209 # print STDERR Dumper $identifier_names_array;
210 return (\%rel_matrix_data, \@data_matrix, $identifier_metadata_hash, $identifier_names_array);