3 #runs phenotypic correlation analysis.
4 #Correlation coeffiecients are stored in tabular and json formats
7 # Isaak Y Tecle (iyt2@cornell.edu)
21 allargs
<-commandArgs()
23 refererQtl
<- grep("qtl",
30 phenoDataFile
<- grep("\\/phenotype_data",
37 correCoefficientsFile
<- grep("corre_coefficients_table",
44 correCoefficientsJsonFile
<- grep("corre_coefficients_json",
52 formattedPhenoFile
<- grep("formatted_phenotype_data",
60 formattedPhenoData
<- c()
63 if (file
.info(formattedPhenoFile
)$size
> 0 ) {
65 formattedPhenoData
<- as
.data
.frame(fread(formattedPhenoFile
,
66 na
.strings
= c("NA", " ", "--", "-", "."),
69 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
70 formattedPhenoData
[, 1] <- NULL
74 if ( length(refererQtl
) != 0 ) {
76 phenoData
<- as
.data
.frame(fread(phenoDataFile
,
78 na
.strings
=c("NA", "-", " ", ".", "..")
82 phenoData
<- as
.data
.frame(fread(phenoDataFile
,
83 na
.strings
= c("NA", " ", "--", "-", ".", "..")
91 if (length(refererQtl
) != 0) {
93 allNames
<- names(phenoData
)
94 nonTraitNames
<- c("ID")
95 allTraitNames
<- allNames
[! allNames
%in% nonTraitNames
]
97 } else if (file
.info(formattedPhenoFile
)$size
== 0 && length(refererQtl
) == 0) {
99 dropColumns
<- c("uniquename", "stock_name")
100 phenoData
<- phenoData
[,!(names(phenoData
) %in% dropColumns
)]
102 allNames
<- names(phenoData
)
103 nonTraitNames
<- c("object_name", "object_id", "stock_id", "design", "block", "replicate")
104 allTraitNames
<- allNames
[! allNames
%in% nonTraitNames
]
108 if (!is
.null(phenoData
) && length(refererQtl
) == 0) {
110 for (i
in allTraitNames
) {
112 if (class(phenoData
[, i
]) != 'numeric') {
113 phenoData
[, i
] <- as
.numeric(as
.character(phenoData
[, i
]))
116 if (all(is
.nan(phenoData$i
))) {
117 phenoData
[, i
] <- sapply(phenoData
[, i
], function(x
) ifelse(is
.numeric(x
), x
, NA))
122 phenoData
<- phenoData
[, colSums(is
.na(phenoData
)) < nrow(phenoData
)]
123 allTraitNames
<- names(phenoData
)[! names(phenoData
) %in% nonTraitNames
]
125 ###############################
126 if (length(refererQtl
) == 0 ) {
127 if (file
.info(formattedPhenoFile
)$size
== 0) {
131 for (i
in allTraitNames
) {
136 experimentalDesign
<- c()
138 if ('design' %in% colnames(phenoData
)) {
140 experimentalDesign
<- phenoData
[2, 'design']
142 if (is
.na(experimentalDesign
)) {
143 experimentalDesign
<- c('No Design')
147 experimentalDesign
<- c('No Design')
150 if ((experimentalDesign
== 'Augmented' || experimentalDesign
== 'RCBD') && unique(phenoData$block
) > 1) {
152 message("GS experimental design: ", experimentalDesign
)
154 augData
<- subset(phenoData
,
155 select
= c("object_name", "object_id", "block", trait
)
158 colnames(augData
)[1] <- "genotypes"
159 colnames(augData
)[4] <- "trait"
161 model
<- try(lmer(trait
~ 0 + genotypes
+ (1|block
),
167 if (class(model
) != "try-error") {
168 genoEffects
<- data
.frame(fixef(model
))
170 colnames(genoEffects
) <- trait
172 nn
<- gsub('genotypes', '', rownames(genoEffects
))
173 rownames(genoEffects
) <- nn
175 genoEffects
<- round(genoEffects
, digits
= 2)
179 formattedPhenoData
<- data
.frame(genoEffects
)
181 formattedPhenoData
<- merge(formattedPhenoData
, genoEffects
, by
=0, all
=TRUE)
182 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
183 formattedPhenoData
[, 1] <- NULL
186 } else if (experimentalDesign
== 'Alpha') {
189 message("Experimental desgin: ", experimentalDesign
)
191 alphaData
<- subset(phenoData
,
192 select
= c("object_name", "object_id","block", "replicate", trait
)
195 colnames(alphaData
)[1] <- "genotypes"
196 colnames(alphaData
)[5] <- "trait"
198 model
<- try(lmer(trait
~ 0 + genotypes
+ (1|replicate
/block
),
203 if (class(model
) != "try-error") {
204 genoEffects
<- data
.frame(fixef(model
))
206 colnames(genoEffects
) <- trait
208 nn
<- gsub('genotypes', '', rownames(genoEffects
))
209 rownames(genoEffects
) <- nn
211 genoEffects
<- round(genoEffects
, digits
= 2)
216 formattedPhenoData
<- genoEffects
218 formattedPhenoData
<- merge(formattedPhenoData
, genoEffects
, by
=0, all
=TRUE)
219 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
220 formattedPhenoData
[, 1] <- NULL
224 message("GS experimental design: ", experimentalDesign
)
226 dropColumns
<- c("object_id", "stock_id", "design", "block", "replicate")
228 formattedPhenoData
<- phenoData
[, !(names(phenoData
) %in% dropColumns
)]
230 formattedPhenoData
<- ddply(formattedPhenoData
,
232 colwise(mean
, na
.rm
=TRUE)
235 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
236 formattedPhenoData
[, 1] <- NULL
243 formattedPhenoData
<- ddply(phenoData
,
245 colwise(mean
, na
.rm
=TRUE)
248 row
.names(formattedPhenoData
) <- formattedPhenoData
[, 1]
249 formattedPhenoData
[, 1] <- NULL
251 formattedPhenoData
<- round(formattedPhenoData
, digits
= 2)
254 coefpvalues
<- rcor
.test(formattedPhenoData
,
259 coefficients
<- coefpvalues$cor
.mat
260 allcordata
<- coefpvalues$cor
.mat
262 allcordata
[lower
.tri(allcordata
)] <- coefpvalues$p
.values
[, 3]
263 diag(allcordata
) <- 1.00
265 pvalues
<- as
.matrix(allcordata
)
267 pvalues
<- round(pvalues
,
271 coefficients
<- round(coefficients
,
275 allcordata
<- round(allcordata
,
279 #remove rows and columns that are all "NA"
280 if ( apply(coefficients
,
282 function(x
)any(is
.na(x
))
287 function(x
)any(is
.na(x
))
292 coefficients
<-coefficients
[-which(apply(coefficients
,
294 function(x
)all(is
.na(x
)))
296 -which(apply(coefficients
,
298 function(x
)all(is
.na(x
)))
304 pvalues
[upper
.tri(pvalues
)] <- NA
305 coefficients
[upper
.tri(coefficients
)] <- NA
307 coefficients2json
<- function(mat
) {
308 mat
<- as
.list(as
.data
.frame(t(mat
)))
313 traits
<- colnames(coefficients
)
315 correlationList
<- list(
316 "traits" = toJSON(traits
),
317 "coefficients " =coefficients2json(coefficients
)
320 correlationJson
<- paste("{",paste("\"", names(correlationList
), "\":", correlationList
, collapse
=","), "}")
322 write
.table(coefficients
,
323 file
=correCoefficientsFile
,
330 write
.table(correlationJson
,
331 file
=correCoefficientsJsonFile
,
337 if (file
.info(formattedPhenoFile
)$size
== 0 && !is
.null(formattedPhenoData
) ) {
338 write
.table(formattedPhenoData
,
339 file
= formattedPhenoFile
,
347 q(save
= "no", runLast
= FALSE)