3 #Commands for running phenotypic correlation analysis.
4 #Correlation coeffiecients are stored in tabular and json formats
7 # Isaak Y Tecle (iyt2@cornell.edu)
18 allargs
<-commandArgs()
20 refererQtl
<- grep("qtl",
27 phenoDataFile
<- grep("phenotype_data",
34 correCoefficientsFile
<- grep("corre_coefficients_table",
41 correCoefficientsJsonFile
<- grep("corre_coefficients_json",
50 if ( length(refererQtl
) != 0 ) {
52 phenoData
<- read
.csv(phenoDataFile
,
57 na
.strings
=c("NA", "-", " ", ".")
61 phenoData
<- read
.table(phenoDataFile
,
65 na
.strings
= c("NA", " ", "--", "-", "."),
71 formattedPhenoData
<- c()
74 if (length(refererQtl
) != 0) {
76 allNames
<- names(phenoData
)
77 nonTraitNames
<- c("ID")
79 allTraitNames
<- allNames
[! allNames
%in% nonTraitNames
]
82 dropColumns
<- c("uniquename", "stock_name")
83 phenoData
<- phenoData
[,!(names(phenoData
) %in% dropColumns
)]
85 allNames
<- names(phenoData
)
86 nonTraitNames
<- c("object_name", "object_id", "stock_id", "design", "block", "replicate")
88 allTraitNames
<- allNames
[! allNames
%in% nonTraitNames
]
92 for (i
in allTraitNames
) {
93 if (all(is
.nan(phenoData$i
))) {
94 phenoData
[, i
] <- sapply(phenoData
[, i
], function(x
) ifelse(is
.numeric(x
), x
, NA))
98 phenoData
<- phenoData
[, colSums(is
.na(phenoData
)) < nrow(phenoData
)]
103 if (length(refererQtl
) == 0) {
104 for (i
in allTraitNames
) {
109 experimentalDesign
<- c()
111 if ('design' %in% colnames(phenoData
)) {
113 phenoTrait
<- subset(phenoData
,
114 select
= c("object_name", "object_id", "design", "block", "replicate", trait
)
117 experimentalDesign
<- phenoTrait
[2, 'design']
119 if (is
.na(experimentalDesign
) == TRUE) {
120 experimentalDesign
<- c('No Design')
124 experimentalDesign
<- c('No Design')
127 if (experimentalDesign
== 'augmented' || experimentalDesign
== 'RCBD') {
129 message("experimental design: ", experimentalDesign
)
131 augData
<- subset(phenoTrait
,
132 select
= c("object_name", "object_id", "block", trait
)
135 colnames(augData
)[1] <- "genotypes"
136 colnames(augData
)[4] <- "trait"
138 ff
<- trait
~ 0 + genotypes
147 if (class(model
) != "try-error") {
148 adjMeans
<- data
.matrix(fixed
.effects(model
))
150 colnames(adjMeans
) <- trait
152 nn
<- gsub('genotypes', '', rownames(adjMeans
))
153 rownames(adjMeans
) <- nn
154 adjMeans
<- round(adjMeans
, digits
= 2)
156 phenoTrait
<- data
.frame(adjMeans
)
158 colnames(phenoTrait
) <- trait
161 formattedPhenoData
<- data
.frame(adjMeans
)
163 formattedPhenoData
<- merge(formattedPhenoData
, phenoTrait
, by
=0, all
=TRUE)
164 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
165 formattedPhenoData
[, 1] <- NULL
169 } else if (experimentalDesign
== 'alpha') {
172 alphaData
<- subset(phenoData
,
173 select
= c("object_name", "object_id","block", "replicate", trait
)
176 colnames(alphaData
)[2] <- "genotypes"
177 colnames(alphaData
)[5] <- "trait"
179 ff
<- trait
~ 0 + genotypes
183 random
= ~1|replicate
/block
,
188 if (class(model
) != "try-error") {
189 adjMeans
<- data
.matrix(fixed
.effects(model
))
190 colnames(adjMeans
) <- trait
192 nn
<- gsub('genotypes', '', rownames(adjMeans
))
193 rownames(adjMeans
) <- nn
194 adjMeans
<- round(adjMeans
, digits
= 2)
196 phenoTrait
<- data
.frame(adjMeans
)
197 colnames(phenoTrait
) <- trait
200 formattedPhenoData
<- data
.frame(adjMeans
)
202 formattedPhenoData
<- merge(formattedPhenoData
, phenoTrait
, by
=0, all
=TRUE)
203 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
204 formattedPhenoData
[, 1] <- NULL
209 message("experimental design: ", experimentalDesign
)
212 dropColumns
<- c("object_id", "stock_id", "design", "block", "replicate")
214 formattedPhenoData
<- phenoData
[, !(names(phenoData
) %in% dropColumns
)]
216 formattedPhenoData
<- ddply(formattedPhenoData
,
218 colwise(mean
, na
.rm
=TRUE)
221 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
222 formattedPhenoData
[, 1] <- NULL
229 formattedPhenoData
<- ddply(phenoData
,
231 colwise(mean
, na
.rm
=TRUE)
234 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
235 formattedPhenoData
[, 1] <- NULL
239 formattedPhenoData
<- round(formattedPhenoData
,
243 coefpvalues
<- rcor
.test(formattedPhenoData
,
249 coefficients
<- coefpvalues$cor
.mat
250 allcordata
<- coefpvalues$cor
.mat
251 allcordata
[lower
.tri(allcordata
)] <- coefpvalues$p
.values
[, 3]
252 diag(allcordata
) <- 1.00
254 pvalues
<- as
.matrix(allcordata
)
256 pvalues
<- round(pvalues
,
260 coefficients
<- round(coefficients
,
264 allcordata
<- round(allcordata
,
268 #remove rows and columns that are all "NA"
269 if ( apply(coefficients
,
271 function(x
)any(is
.na(x
))
276 function(x
)any(is
.na(x
))
281 coefficients
<-coefficients
[-which(apply(coefficients
,
283 function(x
)all(is
.na(x
)))
285 -which(apply(coefficients
,
287 function(x
)all(is
.na(x
)))
293 pvalues
[upper
.tri(pvalues
)]<-NA
294 coefficients
[upper
.tri(coefficients
)]<-NA
296 coefficients2json
<- function(mat
){
297 mat
<- as
.list(as
.data
.frame(t(mat
)))
302 traits
<- colnames(coefficients
)
304 correlationList
<- list(
305 "traits"=toJSON(traits
),
306 "coefficients"=coefficients2json(coefficients
)
309 correlationJson
<- paste("{",paste("\"", names(correlationList
), "\":", correlationList
, collapse
=","), "}")
311 write
.table(coefficients
,
312 file
=correCoefficientsFile
,
319 write
.table(correlationJson
,
320 file
=correCoefficientsJsonFile
,
325 q(save
= "no", runLast
= FALSE)