Merge pull request #4272 from solgenomics/topic/fix-vcf-download
[sgn.git] / R / solGS / selection_index.r
blobbc3ddda626ff8ab0fcc3239b35d0d3ad3f1ece4c
1 # calculates selection index
2 # and ranks genotypes accordingly
3 # Isaak Y Tecle iyt2cornell.edu
6 options(echo = FALSE)
8 library(data.table)
9 library(stats)
10 library(stringi)
11 library(dplyr)
13 allArgs <- commandArgs()
15 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
16 what = "character")
18 relWeightsFile <- grep("rel_weights", inputFiles, value = TRUE)
20 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
21 what = "character")
23 traitsFiles <- grep("gebv_files_of_traits", inputFiles, value = TRUE)
25 gebvsSelectionIndexFile <- grep("gebvs_selection_index",
26 outputFiles,
27 value = TRUE)
29 selectionIndexFile <- grep("selection_index_only",
30 outputFiles,
31 value=TRUE)
33 inTraitFiles <- scan(traitsFiles, what = "character")
35 traitFilesList <- strsplit(inTraitFiles, "\t");
36 traitsTotal <- length(traitFilesList)
38 if (traitsTotal == 0)
39 stop("There are no traits with GEBV data.")
40 if (length(relWeightsFile) == 0)
41 stop("There is no file with relative weights of traits.")
44 relWeights <- data.frame(fread(relWeightsFile, header = TRUE))
45 rownames(relWeights) <- relWeights[, 1]
46 relWeights[, 1] <- NULL
48 if (is.null(relWeights)) {
49 stop('There were no relative weights for all the traits.')
52 combinedRelGebvs <- c()
54 for (i in 1:traitsTotal) {
55 traitFile <- traitFilesList[[i]]
56 traitGEBV <- data.frame(fread(traitFile, header = TRUE))
57 rownames(traitGEBV) <- traitGEBV[, 1]
58 traitGEBV[, 1] <- NULL
59 traitGEBV <- traitGEBV[order(rownames(traitGEBV)),,drop=FALSE]
60 trait <- colnames(traitGEBV)
62 relWeight <- relWeights[trait, ]
64 if (is.na(relWeight) == FALSE && relWeight != 0 ) {
66 weightedTraitGEBV <- apply(traitGEBV, 1,
67 function(x) x*relWeight)
69 weightedTraitGEBV <- data.frame(weightedTraitGEBV)
70 colnames(weightedTraitGEBV) <- paste0(trait, '_weighted')
72 combinedRelGebvs <- merge(combinedRelGebvs, weightedTraitGEBV,
73 by = 0,
74 all = TRUE)
77 rownames(combinedRelGebvs) <- combinedRelGebvs[, 1]
78 combinedRelGebvs[, 1] <- NULL
82 sumRelWeights <- apply(relWeights, 2, sum)
83 sumRelWeights <- sumRelWeights[[1]]
85 combinedRelGebvs$Index <- apply(combinedRelGebvs, 1, function (x) sum(x))
87 combinedRelGebvs <- combinedRelGebvs[ with(combinedRelGebvs,
88 order(-combinedRelGebvs$Index)
92 combinedRelGebvs <- round(combinedRelGebvs, 2)
94 selectionIndex <-c()
96 if (!is.null(combinedRelGebvs)) {
97 selectionIndex <- subset(combinedRelGebvs,
98 select = 'Index'
102 if (gebvsSelectionIndexFile != 0) {
103 if (!is.null(combinedRelGebvs)) {
104 fwrite(combinedRelGebvs,
105 file = gebvsSelectionIndexFile,
106 sep = "\t",
107 row.names = TRUE,
108 quote = FALSE,
113 if (!is.null(selectionIndexFile)) {
114 if (!is.null(selectionIndex)) {
115 fwrite(selectionIndex,
116 file = selectionIndexFile,
117 row.names = TRUE,
118 quote = FALSE,
119 sep = "\t",
124 q(save = "no", runLast = FALSE)