build jbrowse url by indexed feature name
[sgn.git] / R / phenotypic_correlation.r
blob7eb2701c954be8aeb223360c75d72260f35e47e3
1 #SNOPSIS
3 #Commands for running phenotypic correlation analysis.
4 #Correlation coeffiecients are stored in tabular and json formats
6 #AUTHOR
7 # Isaak Y Tecle (iyt2@cornell.edu)
10 options(echo = FALSE)
12 library(gplots)
13 library(ltm)
14 library(plyr)
15 library(rjson)
16 library(nlme)
18 allargs<-commandArgs()
20 refererQtl <- grep("qtl",
21 allargs,
22 ignore.case=TRUE,
23 perl=TRUE,
24 value=TRUE
27 phenoDataFile <- grep("phenotype_data",
28 allargs,
29 ignore.case=TRUE,
30 perl=TRUE,
31 value=TRUE
34 correCoefficientsFile <- grep("corre_coefficients_table",
35 allargs,
36 ignore.case=TRUE,
37 perl=TRUE,
38 value=TRUE
41 correCoefficientsJsonFile <- grep("corre_coefficients_json",
42 allargs,
43 ignore.case=TRUE,
44 perl=TRUE,
45 value=TRUE
48 phenoData <- c()
50 if ( length(refererQtl) != 0 ) {
52 phenoData <- read.csv(phenoDataFile,
53 header=TRUE,
54 row.names = NULL,
55 dec=".",
56 sep=",",
57 na.strings=c("NA", "-", " ", ".")
60 } else {
61 phenoData <- read.table(phenoDataFile,
62 header = TRUE,
63 row.names = NULL,
64 sep = "\t",
65 na.strings = c("NA", " ", "--", "-", "."),
66 dec = "."
71 formattedPhenoData <- c()
72 allTraitNames <- c()
74 if (length(refererQtl) != 0) {
76 allNames <- names(phenoData)
77 nonTraitNames <- c("ID")
79 allTraitNames <- allNames[! allNames %in% nonTraitNames]
81 } else {
82 dropColumns <- c("uniquename", "stock_name")
83 phenoData <- phenoData[,!(names(phenoData) %in% dropColumns)]
85 allNames <- names(phenoData)
86 nonTraitNames <- c("object_name", "object_id", "stock_id", "design", "block", "replicate")
88 allTraitNames <- allNames[! allNames %in% nonTraitNames]
92 for (i in allTraitNames) {
93 if (all(is.nan(phenoData$i))) {
94 phenoData[, i] <- sapply(phenoData[, i], function(x) ifelse(is.numeric(x), x, NA))
98 phenoData <- phenoData[, colSums(is.na(phenoData)) < nrow(phenoData)]
100 trait <- c()
101 cnt <- 0
103 if (length(refererQtl) == 0) {
104 for (i in allTraitNames) {
105 cnt <- cnt + 1
106 trait <- i
108 phenoTrait <- c()
109 experimentalDesign <- c()
111 if ('design' %in% colnames(phenoData)) {
113 phenoTrait <- subset(phenoData,
114 select = c("object_name", "object_id", "design", "block", "replicate", trait)
117 experimentalDesign <- phenoTrait[2, 'design']
119 if (is.na(experimentalDesign) == TRUE) {
120 experimentalDesign <- c('No Design')
123 } else {
124 experimentalDesign <- c('No Design')
127 if (experimentalDesign == 'augmented' || experimentalDesign == 'RCBD') {
129 message("experimental design: ", experimentalDesign)
131 augData <- subset(phenoTrait,
132 select = c("object_name", "object_id", "block", trait)
135 colnames(augData)[1] <- "genotypes"
136 colnames(augData)[4] <- "trait"
138 ff <- trait ~ 0 + genotypes
140 model <- try(lme(ff,
141 data=augData,
142 random = ~1|block,
143 method="REML",
144 na.action = na.omit
147 if (class(model) != "try-error") {
148 adjMeans <- data.matrix(fixed.effects(model))
150 colnames(adjMeans) <- trait
152 nn <- gsub('genotypes', '', rownames(adjMeans))
153 rownames(adjMeans) <- nn
154 adjMeans <- round(adjMeans, digits = 2)
156 phenoTrait <- data.frame(adjMeans)
158 colnames(phenoTrait) <- trait
160 if(cnt == 1 ) {
161 formattedPhenoData <- data.frame(adjMeans)
162 } else {
163 formattedPhenoData <- merge(formattedPhenoData, phenoTrait, by=0, all=TRUE)
164 row.names(formattedPhenoData) <- formattedPhenoData[, 1]
165 formattedPhenoData[, 1] <- NULL
169 } else if (experimentalDesign == 'alpha') {
171 trait <- i
172 alphaData <- subset(phenoData,
173 select = c("object_name", "object_id","block", "replicate", trait)
176 colnames(alphaData)[2] <- "genotypes"
177 colnames(alphaData)[5] <- "trait"
179 ff <- trait ~ 0 + genotypes
181 model <- try(lme(ff,
182 data = alphaData,
183 random = ~1|replicate/block,
184 method = "REML",
185 na.action = na.omit
188 if (class(model) != "try-error") {
189 adjMeans <- data.matrix(fixed.effects(model))
190 colnames(adjMeans) <- trait
192 nn <- gsub('genotypes', '', rownames(adjMeans))
193 rownames(adjMeans) <- nn
194 adjMeans <- round(adjMeans, digits = 2)
196 phenoTrait <- data.frame(adjMeans)
197 colnames(phenoTrait) <- trait
199 if(cnt == 1 ) {
200 formattedPhenoData <- data.frame(adjMeans)
201 } else {
202 formattedPhenoData <- merge(formattedPhenoData, phenoTrait, by=0, all=TRUE)
203 row.names(formattedPhenoData) <- formattedPhenoData[, 1]
204 formattedPhenoData[, 1] <- NULL
208 } else {
209 message("experimental design: ", experimentalDesign)
210 message("GS stuff")
212 dropColumns <- c("object_id", "stock_id", "design", "block", "replicate")
214 formattedPhenoData <- phenoData[, !(names(phenoData) %in% dropColumns)]
216 formattedPhenoData <- ddply(formattedPhenoData,
217 "object_name",
218 colwise(mean, na.rm=TRUE)
221 row.names(formattedPhenoData) <- formattedPhenoData[, 1]
222 formattedPhenoData[, 1] <- NULL
227 } else {
228 message("qtl stuff")
229 formattedPhenoData <- ddply(phenoData,
230 "ID",
231 colwise(mean, na.rm=TRUE)
234 row.names(formattedPhenoData) <- formattedPhenoData[, 1]
235 formattedPhenoData[, 1] <- NULL
239 formattedPhenoData <- round(formattedPhenoData,
240 digits = 2
243 coefpvalues <- rcor.test(formattedPhenoData,
244 method="pearson",
245 use="pairwise"
249 coefficients <- coefpvalues$cor.mat
250 allcordata <- coefpvalues$cor.mat
251 allcordata[lower.tri(allcordata)] <- coefpvalues$p.values[, 3]
252 diag(allcordata) <- 1.00
254 pvalues <- as.matrix(allcordata)
256 pvalues <- round(pvalues,
257 digits=2
260 coefficients <- round(coefficients,
261 digits=3
264 allcordata <- round(allcordata,
265 digits=3
268 #remove rows and columns that are all "NA"
269 if ( apply(coefficients,
271 function(x)any(is.na(x))
274 apply(coefficients,
276 function(x)any(is.na(x))
281 coefficients<-coefficients[-which(apply(coefficients,
283 function(x)all(is.na(x)))
285 -which(apply(coefficients,
287 function(x)all(is.na(x)))
293 pvalues[upper.tri(pvalues)]<-NA
294 coefficients[upper.tri(coefficients)]<-NA
296 coefficients2json <- function(mat){
297 mat <- as.list(as.data.frame(t(mat)))
298 names(mat) <- NULL
299 toJSON(mat)
302 traits <- colnames(coefficients)
304 correlationList <- list(
305 "traits"=toJSON(traits),
306 "coefficients"=coefficients2json(coefficients)
309 correlationJson <- paste("{",paste("\"", names(correlationList), "\":", correlationList, collapse=","), "}")
311 write.table(coefficients,
312 file=correCoefficientsFile,
313 col.names=TRUE,
314 row.names=TRUE,
315 quote=FALSE,
316 dec="."
319 write.table(correlationJson,
320 file=correCoefficientsJsonFile,
321 col.names=FALSE,
322 row.names=FALSE,
325 q(save = "no", runLast = FALSE)