more conflicts
[sgn.git] / R / selection_index.r
bloba7b0ee0d1eb15fb9e70acecc42511024c60e94af
1 # a script for calculating selection index
2 # and ranking genotypes accordingly
3 # Isaak Y Tecle iyt2cornell.edu
6 options(echo = FALSE)
8 library(data.table)
9 library(stats)
11 allArgs <- commandArgs()
13 inFile <- grep("input_rank_genotypes",
14 allArgs,
15 ignore.case = TRUE,
16 perl = TRUE,
17 value = TRUE
20 inputFiles <- scan(inFile, what = "character")
22 relWeightsFile <- grep("rel_weights",
23 inputFiles,
24 ignore.case = TRUE,
25 perl = TRUE,
26 value = TRUE
29 outFile <- grep("output_rank_genotypes",
30 allArgs,
31 ignore.case = TRUE,
32 perl = TRUE,
33 value = TRUE
36 outputFiles <- scan(outFile, what = "character")
38 traitsFiles <- grep("gebv_files_of_traits",
39 inputFiles,
40 ignore.case = TRUE,
41 perl = TRUE,
42 value = TRUE
45 rankedGenotypesFile <- grep("ranked_genotypes",
46 outputFiles,
47 ignore.case = TRUE,
48 perl = TRUE,
49 value = TRUE
52 selectionIndexFile <- grep("selection_index",
53 outputFiles,
54 ignore.case = TRUE,
55 perl = TRUE,
56 value = TRUE
59 inTraitFiles <- scan(traitsFiles, what = "character")
60 traitFilesList <- strsplit(inTraitFiles, "\t");
61 traitsTotal <- length(traitFilesList)
63 if (traitsTotal == 0)
64 stop("There are no traits with GEBV data.")
65 if (length(relWeightsFile) == 0)
66 stop("There is no file with relative weights of traits.")
69 relWeights <- data.frame(fread(relWeightsFile))
70 rownames(relWeights) <- relWeights[, 1]
71 relWeights[, 1] <- NULL
73 combinedRelGebvs <- c()
75 for (i in 1:traitsTotal) {
76 traitFile <- traitFilesList[[i]]
77 traitGEBV <- data.frame(fread(traitFile))
78 rownames(traitGEBV) <- traitGEBV[, 1]
79 traitGEBV[, 1] <- NULL
80 traitGEBV <- traitGEBV[order(rownames(traitGEBV)),,drop=FALSE]
81 trait <- colnames(traitGEBV)
83 relWeight <- relWeights[trait, ]
85 if (is.na(relWeight) == FALSE && relWeight != 0 ) {
86 weightedTraitGEBV <- apply(traitGEBV, 1,
87 function(x) x*relWeight
90 combinedRelGebvs <- merge(combinedRelGebvs, weightedTraitGEBV,
91 by = 0,
92 all = TRUE
95 rownames(combinedRelGebvs) <- combinedRelGebvs[, 1]
96 combinedRelGebvs[, 1] <- NULL
100 sumRelWeights <- apply(relWeights, 2, sum)
101 sumRelWeights <- sumRelWeights[[1]]
103 combinedRelGebvs$Index <- apply(combinedRelGebvs, 1, function (x) sum(x)/sumRelWeights)
105 combinedRelGebvs <- combinedRelGebvs[ with(combinedRelGebvs,
106 order(-combinedRelGebvs$Index)
110 combinedRelGebvs <- round(combinedRelGebvs,
111 digits = 2
114 selectionIndex <-c()
116 if (is.null(combinedRelGebvs) == FALSE) {
117 selectionIndex <- subset(combinedRelGebvs,
118 select = 'Index'
122 if (length(rankedGenotypesFile) != 0) {
123 if (is.null(combinedRelGebvs) == FALSE) {
124 fwrite(combinedRelGebvs,
125 file = rankedGenotypesFile,
126 sep = "\t",
127 row.names = TRUE,
128 quote = FALSE,
133 if (length(selectionIndexFile) != 0) {
134 if (is.null(selectionIndex) == FALSE) {
135 fwrite(selectionIndex,
136 file = selectionIndexFile,
137 row.names = TRUE,
138 quote = FALSE,
139 sep = "\t",
144 q(save = "no", runLast = FALSE)