Merge pull request #3618 from solgenomics/topic/progress_tool
[sgn.git] / R / solGS / genetic_correlation.r
blob7c0dd48e3362a76cd083998b6e9ca9fa1150ddfb
1 #SNOPSIS
3 #runs genetic correlation analyis.
4 #correlation coeffiecients are stored in tabular and json formats
6 #AUTHOR
7 # Isaak Y Tecle (iyt2@cornell.edu)
10 options(echo = FALSE)
12 library(ltm)
13 #library(rjson)
14 library(jsonlite)
15 library(methods)
16 library(dplyr)
17 library(tibble)
19 allArgs <- commandArgs()
21 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
22 what = "character")
24 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
25 what = "character")
27 correTableFile <- grep("genetic_corre_table", outputFiles, value=TRUE)
28 correJsonFile <- grep("genetic_corre_json", outputFiles, value=TRUE)
30 geneticDataFile <- grep("combined_gebvs", inputFiles, value=TRUE)
32 selectionIndexFile <- grep("selection_index", inputFiles, value=TRUE)
34 geneticData <- read.table(geneticDataFile,
35 header = TRUE,
36 row.names = 1,
37 sep = "\t",
38 na.strings = c("NA"),
39 dec = "."
42 indexData <- c()
44 if (length(selectionIndexFile) != 0
45 && file.info(selectionIndexFile)$size != 0) {
46 indexData <- read.table(selectionIndexFile,
47 header = TRUE,
48 row.names = 1,
49 sep = "\t",
50 na.strings = c("NA"),
51 dec = "."
55 corrData <- c()
57 if (!is.null(indexData)) {
58 geneticData <- rownames_to_column(geneticData, var="genotypes")
59 indexData <- rownames_to_column(indexData, var="genotypes")
61 geneticData <- geneticData %>% arrange(genotypes)
62 indexData <- indexData %>% arrange(genotypes)
64 corrData <- full_join(geneticData, indexData)
65 corrData <- column_to_rownames(corrData, var="genotypes")
67 } else {
68 corrData <- geneticData
72 coefpvalues <- rcor.test(corrData,
73 method="pearson",
74 use="pairwise"
77 coefficients <- coefpvalues$cor.mat
78 allcordata <- coefpvalues$cor.mat
80 allcordata[lower.tri(allcordata)] <- coefpvalues$p.values[, 3]
81 diag(allcordata) <- 1.00
83 pvalues <- as.matrix(allcordata)
84 pvalues <- round(pvalues,
85 digits=2
88 coefficients <- round(coefficients,
89 digits=3
92 allcordata <- round(allcordata,
93 digits=3
96 #remove rows and columns that are all "NA"
97 if (apply(coefficients, 1, function(x) any(is.na(x))) ||
98 apply(coefficients, 2, function(x) any(is.na(x)))) {
100 coefficients <- coefficients[-which(apply(coefficients, 1, function(x) all(is.na(x)))),
101 -which(apply(coefficients, 2, function(x) all(is.na(x))))]
105 pvalues[upper.tri(pvalues)] <- NA
106 coefficients[upper.tri(coefficients)] <- NA
107 coefficients <- data.frame(coefficients)
109 coefficients2json <- coefficients
110 names(coefficients2json) <- NULL
112 traits <- colnames(coefficients)
114 correlationList <- list(
115 labels = traits,
116 values = coefficients
119 correlationJson <- jsonlite::toJSON(correlationList)
121 write.table(coefficients,
122 file=correTableFile,
123 col.names=TRUE,
124 row.names=TRUE,
125 quote=FALSE,
126 dec="."
129 write(correlationJson,
130 file = correJsonFile)
133 q(save = "no", runLast = FALSE)