1 package CXGN
::Phenotypes
::HighDimensionalPhenotypesRelationshipMatrix
;
5 CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix - an object to handle creating a relationship matrix from high dimensional phenotypes (NIRS, Metabolomics, Transcriptomics).
9 my $phenotypes_search = CXGN::Phenotypes::HighDimensionalPhenotypesRelationshipMatrix->new({
11 nd_protocol_id=>$nd_protocol_id,
12 temporary_data_file=>$temp_data_file, #A temporary file path where to store the data used to create the relationship matrix
13 relationship_matrix_file=>$relationship_matrix_file, #A file path where the relationship matrix will be written
14 high_dimensional_phenotype_type=>$high_dimensional_phenotype_type, #NIRS, Transcriptomics, or Metabolomics
15 high_dimensional_phenotype_identifier_list=>\@high_dimensional_phenotype_identifier_list,
16 query_associated_stocks=>$query_associated_stocks, #Query associated plots, plants, tissue samples, etc for accessions that are given
17 accession_list=>$accession_ids,
19 plant_list=>$plant_ids,
21 my (\%relationship_matrix_data, \@data, \%identifier_metadata, \@identifier_names) = $phenotypes_search->search();
36 use SGN
::Model
::Cvterm
;
37 use CXGN
::Stock
::StockLookup
;
38 use CXGN
::Trial
::TrialLayout
;
41 use CXGN
::Phenotypes
::HighDimensionalPhenotypeProtocol
;
42 use CXGN
::Phenotypes
::HighDimensionalPhenotypesSearch
;
46 isa
=> 'Bio::Chado::Schema',
51 has
'high_dimensional_phenotype_type' => (
57 has
'nd_protocol_id' => (
63 has
'temporary_data_file' => (
69 has
'relationship_matrix_file' => (
75 has
'query_associated_stocks' => (
81 has
'high_dimensional_phenotype_identifier_list' => (
82 isa
=> 'ArrayRef[Str]|Undef',
86 has
'accession_list' => (
87 isa
=> 'ArrayRef[Int]|Undef',
92 isa
=> 'ArrayRef[Int]|Undef',
97 isa
=> 'ArrayRef[Int]|Undef',
103 my $schema = $self->bcs_schema();
104 my $nd_protocol_id = $self->nd_protocol_id();
105 my $high_dimensional_phenotype_type = $self->high_dimensional_phenotype_type();
106 my $high_dimensional_phenotype_identifier_list = $self->high_dimensional_phenotype_identifier_list();
107 my $accession_ids = $self->accession_list();
108 my $plot_ids = $self->plot_list();
109 my $plant_ids = $self->plant_list();
110 my $query_associated_stocks = $self->query_associated_stocks();
111 my $temporary_data_file = $self->temporary_data_file();
112 my $relationship_matrix_file = $self->relationship_matrix_file();
113 my $dbh = $schema->storage->dbh();
115 if (!$accession_ids && !$plot_ids && !$plant_ids) {
116 return { error
=> "No accessions or plots or plants in your selected dataset!" };
120 if ($high_dimensional_phenotype_type eq 'NIRS') {
121 $data_term = 'spectra';
123 elsif ($high_dimensional_phenotype_type eq 'Transcriptomics') {
124 $data_term = 'transcriptomics';
126 elsif ($high_dimensional_phenotype_type eq 'Metabolomics') {
127 $data_term = 'metabolomics';
130 return { error
=> "$high_dimensional_phenotype_type is not supported!" };
133 my $phenotypes_search = CXGN
::Phenotypes
::HighDimensionalPhenotypesSearch
->new({
135 nd_protocol_id
=>$nd_protocol_id,
136 high_dimensional_phenotype_type
=>$high_dimensional_phenotype_type, #NIRS, Transcriptomics, or Metabolomics
137 high_dimensional_phenotype_identifier_list
=>$high_dimensional_phenotype_identifier_list,
138 query_associated_stocks
=>$query_associated_stocks, #Query associated plots, plants, tissue samples, etc for accessions that are given
139 accession_list
=>$accession_ids,
140 plot_list
=>$plot_ids,
141 plant_list
=>$plant_ids,
143 my ($data_hash, $identifier_metadata_hash, $identifier_names_array) = $phenotypes_search->search();
145 my %high_dimensional_phenotype_identifier_list_hash = map {$_ => 1} @
$high_dimensional_phenotype_identifier_list;
148 while (my ($stock_id, $s) = each %$data_hash) {
149 my @row = ($stock_id);
150 while (my ($ident, $value) = each %{$s->{$data_term}}) {
151 if ($high_dimensional_phenotype_identifier_list && scalar(@
$high_dimensional_phenotype_identifier_list) > 0) {
152 if (exists($high_dimensional_phenotype_identifier_list_hash{$ident})) {
160 push @data_matrix, \
@row;
163 open(my $F, ">", $temporary_data_file) || die "Can't open file ".$temporary_data_file;
164 foreach (@data_matrix) {
165 my $line = join "\t", @
$_;
170 my $cmd = 'R -e "library(data.table);
171 mat <- fread(\''.$temporary_data_file.'\', header=FALSE, sep=\'\t\');
172 data_mat <- data.matrix(mat[,-1]);
173 rel_mat <- data_mat %*% t(data_mat);
174 rel_mat <- rel_mat / ncol(data_mat);
175 rownames(rel_mat) <- mat\$V1;
176 colnames(rel_mat) <- mat\$V1;
177 write.table(rel_mat, file=\''.$relationship_matrix_file.'\', row.names=TRUE, col.names=NA, sep=\'\t\');
179 my $cmd_status = system($cmd);
182 my $csv = Text
::CSV
->new({ sep_char
=> "\t" });
183 open(my $rel_res, '<', $relationship_matrix_file) or die "Could not open file '$relationship_matrix_file' $!";
184 print STDERR
"Opened $relationship_matrix_file\n";
185 my $header_row = <$rel_res>;
187 if ($csv->parse($header_row)) {
188 @header = $csv->fields();
191 while (my $row = <$rel_res>) {
193 if ($csv->parse($row)) {
194 @columns = $csv->fields();
196 my $stock_id1 = $columns[0];
198 foreach my $stock_id2 (@header) {
199 my $val = $columns[$counter];
200 $rel_matrix_data{$stock_id1}->{$stock_id2} = $val;
206 # print STDERR Dumper \%rel_matrix_data;
207 # print STDERR Dumper \@data_matrix;
208 # print STDERR Dumper $identifier_metadata_hash;
209 # print STDERR Dumper $identifier_names_array;
210 return (\
%rel_matrix_data, \
@data_matrix, $identifier_metadata_hash, $identifier_names_array);