10 ##### Getting data #####
11 args = commandArgs(trailingOnly = TRUE)
13 pheno <- read.table(args[1], sep = "\t", header = TRUE)
14 study_trait <- args[2]
16 stability_method <- args[4]
19 messageFile <- args[7]
20 jsonSummary <- args[8]
23 study_trait <- gsub("\\."," ",study_trait)
25 #Making names standard
26 names <- colnames(pheno)
27 new_names <- gsub(".CO.*","", names)
28 new_names <- gsub("\\."," ",new_names)
29 colnames(pheno) <- new_names
31 ## Function for pheno imputation
32 pheno_imputation <- function(inData){
35 dat_imput <- spread(inData, Env, Yield)
36 colnames(dat_imput) <- gsub(" ", "", colnames(dat_imput))
37 #pmm is is the predictive mean matching
38 pre_imputed <- mice(dat_imput, method = "mean", m = 3, maxit = 10)
39 final_imputed <- complete(pre_imputed, 1)
41 final_imputed <- gather(final_imputed, "Env", "Yield", -c("Gen","Rep"))
46 imputation_accuracy <- function(checkDF, fMissing){
47 checkDF$Gen <- as.character(checkDF$Gen)
48 checkDF$Env <- as.character(checkDF$Env)
50 realData <- pheno_imputation(checkDF)
52 # Subseting to check imputation accuracy
53 realData <- realData[order(realData$Gen, realData$Env), ]
54 data_missing <- sample(rownames(realData), nrow(realData)*fMissing)
56 testindDF <- testindDF[order(testindDF$Gen, testindDF$Env), ]
57 testindDF[rownames(testindDF)%in% data_missing, "Yield"] <- NA
59 testindDF$Gen <- as.factor(testindDF$Gen)
60 testindDF$Env <- as.factor(testindDF$Env)
61 testindDF$Rep <- as.factor(testindDF$Rep)
63 imputed_test <- pheno_imputation(testindDF)
64 imputed_test <- imputed_test[order(imputed_test$Gen, imputed_test$Env), ]
65 cat("Imput Acc ok!\n")
66 corrImput <- cor(realData$Yield,imputed_test$Yield, use = "complete")
73 # Setting dataframe to required format
74 pheno <- pheno[,colnames(pheno) %in% c("locationName", "germplasmName", "replicate", "entryType", study_trait[1])]
78 ########################################
80 # Checking conditions to run analysis #
82 ########################################
84 # Getting locations, accessions and reps
85 accessions_sel <- unique(pheno$germplasmName)
86 # Some breeding programs are using accession named Filler
87 accessions_sel <- accessions_sel[!accessions_sel == "Filler" | !accessions_sel == "filler"]
89 # Taking test accessions
90 dat <- pheno %>% dplyr::select(germplasmName, locationName, replicate, entryType, trait=study_trait[1])
91 dat <- dat[dat$entryType == "test",]
92 nLoc <- length(unique(dat$locationName))
93 nReps <- max(dat$replicate)
95 # Number of reps mat be different for checks
97 pheno <- pheno[pheno$replicate <= nReps & pheno$germplasmName %in% accessions_sel,]
99 # Preparing dataset for for analysis
100 dat1 <- pheno %>% dplyr::select(germplasmName, locationName, replicate, trait=study_trait[1])
102 fMissing <- nrow(dat1[is.na(dat1$trait),])/nrow(dat1)
103 message2 <- paste0("The frequency of missing data is too high. Please, check the dataset.")
105 summaryData <- data.frame(mean = mean(dat1[,4], na.rm=TRUE),
106 min = min(dat1[,4], na.rm=TRUE),
107 maxV = max(dat1[,4], na.rm=TRUE),
108 sdV = sd(dat1[,4], na.rm=TRUE),
112 myJson <- jsonlite::toJSON(summaryData)
113 jsonlite::write_json(myJson, jsonSummary)
116 find_duplications <- function(inData){
117 # Step 1: Identify duplicate rows based on the combination of four columns
118 duplicated_rows <- duplicated(inData[, c("Gen", "Env", "Rep")])
120 # Step 2: Calculate the average of 'Yield' for each unique combination of the four columns
121 averages <- aggregate(Yield ~ Gen + Env + Rep, data = dat1, FUN = mean)
123 # Step 3: Replace the duplicated rows with the calculated average
124 pheno_unique <- inData[!duplicated_rows, ] # Keep only unique rows
125 pheno_unique <- merge(pheno_unique, averages, by = c("Gen", "Env", "Rep"), all.x = TRUE)
127 # If there are any missing values (for rows that were not duplicated), fill them with the original 'Yield'
128 pheno_unique$Yield <- ifelse(is.na(pheno_unique$Yield.y), pheno_unique$Yield.x, pheno_unique$Yield.y)
130 # Remove unnecessary columns (if needed)
131 pheno_unique <- pheno_unique[, !(names(pheno_unique) %in% c("Yield.x", "Yield.y"))]
135 # View the resulting data frame
139 dat1$germplasmName <- as.factor(dat1$germplasmName)
140 dat1$locationName <- as.factor(dat1$locationName)
141 dat1$replicate <- as.factor(dat1$replicate)
142 dat1$trait <- as.double(dat1$trait)
144 # Running model from stability package
145 # Assuming dat1 is your data frame
146 dat1 <- dat1[order(dat1$germplasmName, dat1$locationName), ]
147 colnames(dat1) <- c("Gen", "Env", "Rep", "Yield")
150 dat2 <- find_duplications(dat1)
152 if(imputPheno == "imput_yes"){
153 testCor <- imputation_accuracy(dat2, fMissing)
154 dat2 <- pheno_imputation(dat2)
155 cat("The imputation accuracy is: ", sprintf("%.3f",testCor), "\n")
158 # testing if all accessions have the same number of reps in all locations
160 replicate_df <- data.frame(replicate = tapply(dat$Yield, list(dat$Gen, dat$Env), length))
161 replicate_df$germplasmName <- rownames(replicate_df)
162 gathered_df <- gather(replicate_df, key = "replicate_locations", value = "value", -germplasmName)
163 repAcc <- unique(gathered_df$value)
164 message1 <- paste0("Please, check dataset for accessions per locations and reps.")
166 # Saving error message
169 errorMessages <- append(errorMessages, message2)
170 }else if(length(repAcc)>1){
171 errorMessages <- append(errorMessages, message1)
174 # Setting second rep to compare augmented design with 1 rep per location
175 if(nReps == 1 && stability_method == "ammi"){
177 dat2.1$Rep <- as.numeric(dat2.1$Rep)
179 dat2$Rep <- as.numeric(dat2$Rep)
180 dat2 <- rbind(dat2,dat2.1)
181 dat2$Rep <- as.factor(dat2$Rep)
182 }else if (nReps == 1 && stability_method == "gge"){
183 errorMessages <- append(errorMessages, "The number of replication must be greater than 1 for gge.")
187 if(stability_method=="ammi" && length(errorMessages) == 0 ){
193 # Attempt to execute the code
194 model<- with(dat2,AMMI(Env, Gen, Rep, Yield, console=FALSE))
195 }, error = function(e) {
196 # Handle the error by printing a message
197 cat("An error occurred:", conditionMessage(e), "\n")
198 append(errorMessages, paste0("An error occurred:", conditionMessage(e)))
201 index<-index.AMMI(model)
202 index <- tibble::rownames_to_column(index, "Accession")
203 indexDF <- data.frame(Accession = index$Accession, ASV = index$ASV, Rank = index$rASV)
205 # Adding a column to scale ASV
206 # This column will be used to plot stability lines
207 slopes <- data.frame(Accession = index$Accession, slope = index$ASV, scaled= "NA")
208 slopes$scaled <-as.vector(scale(slopes$slope, center = T, scale = F))
209 indexDF <- left_join(indexDF, slopes, by="Accession")
211 indexDF <- as.data.frame(indexDF)
212 indexDF <- indexDF[order(indexDF[,3]),]
214 ## Preparing table with GxE effects
215 list_data <- list(model$genXenv)
216 dataGraphic <- data.frame(plyr::ldply(list_data))
217 rownames(dataGraphic) <- model$means$GEN[1:(nrow(dataGraphic))]
218 dataGraphic <- tibble::rownames_to_column(dataGraphic, "Accession")
220 myMeans <- data.frame(Accession = model$means$GEN, location = model$means$ENV, means = model$means$Yield)
221 myMeans$average <- myMeans$means
222 myMeans$means <- as.vector(scale(myMeans$means, center = T, scale = FALSE))
223 meanDF <- data.frame(tapply(myMeans$means, myMeans$Accession, mean))
224 meanDF$Accession <- rownames(meanDF)
225 colnames(meanDF) <- c("means", "Accession")
226 meanDF <- left_join(meanDF, slopes, by = "Accession") %>% select(Accession, means, slope)
228 meanDF$end <- length(unique(pheno$locationName))
229 minDF <- data.frame(tapply(myMeans$means, myMeans$Accession, min))
230 minDF$Accession <- rownames(minDF)
231 colnames(minDF) <- c("minimum", "Accession")
233 meanDFF <- left_join(meanDF, minDF, by = "Accession")
234 meanDFF$value <- meanDFF$minimum*meanDF$slope
235 meanDFF <- gather(meanDFF, key = "T", value = "X",start,end) %>% select(Accession, minimum, value, X) %>% arrange(Accession)
236 meanDFF <- left_join(meanDFF, slopes, by = "Accession")
238 for ( i in 1:nrow(meanDFF)){
239 if(meanDFF$X[i] == 0){
240 meanDFF$value[i] <- 0
242 meanDFF$value[i] <- (-1)*meanDFF$slope[i]
246 scaled_values <- (meanDFF$value - min(meanDFF$value)) / (max(meanDFF$value) - min(meanDFF$value))
247 meanDFF$value <- scaled_values
249 myData <- data.frame(pivot_longer(dataGraphic, 2:ncol(dataGraphic), names_to = "location", values_to = "Effect"))
250 myData$location <- gsub("\\."," ", myData$location)
252 preFinal <- left_join(myData, myMeans, by = c("Accession", "location"))
253 selLocations <- data.frame(locName = unique(preFinal$location), locNumber = c(1:length(unique(preFinal$location))))
254 preFinal <- left_join(preFinal, selLocations, by = c("location"="locName"))
256 finalDF <- left_join(preFinal, indexDF, by="Accession") %>% dplyr::select(Accession, location, Effect, Rank, means, slope, scaled)
257 finalDF <- finalDF[order(finalDF$Rank, -finalDF$Effect),]
258 finalDF$Effect <- sprintf("%.3f", finalDF$Effect)
259 finalDF$means <- sprintf("%.3f", finalDF$means)
260 finalDF$slope <- sprintf("%.3f", finalDF$slope)
261 finalDF$scaled <- sprintf("%.3f", finalDF$scaled)
263 graphicJson <- jsonlite::toJSON(meanDFF)
264 jsonlite::write_json(graphicJson, graphFile)
266 # Parsing files to combine Accession, locations, effects, rank, averages, slopes
267 preFinal <- left_join(myData, myMeans, by = c("Accession", "location"))
268 finalDF <- left_join(preFinal, indexDF, by="Accession") %>% dplyr::select(Accession, location, Effect, Rank, average, slope, scaled)
269 colnames(finalDF)[5] <- "means"
270 if(imputPheno == "imput_yes"){finalDF$imputAcc <- testCor}
271 finalDF <- finalDF[order(finalDF$Rank, -finalDF$Effect),]
273 myJson <- jsonlite::toJSON(finalDF)
274 jsonlite::write_json(myJson, jsonFile)
276 }else if( stability_method=="gge" && length(errorMessages) == 0){
278 cat("running gge", "\n")
279 # dat2 <- pheno_imputation(dat2)
281 dat2$Env <- as.character(dat2$Env)
283 # Use tryCatch to handle potential errors
285 # Attempt to execute the code
286 effectsDF <- stability::ge_means(.data = dat2, .y = Yield, .gen = Gen, .env = Env)
287 }, error = function(e) {
288 # Handle the error by printing a message
289 cat("An error occurred:", conditionMessage(e), "\n")
290 append(errorMessages, paste0("An error occurred:", conditionMessage(e)))
293 effectsDF <- stability::ge_means(.data=dat2, .y= Yield, .gen=Gen, .env=Env)
295 test <- data.frame(effectsDF$ge_ranks)
296 test$location <- rownames(test)
297 gathered_df <- gather(test, key = "rank", value = "genotypes", -location)
298 gathered_df$rank <- gsub("X", "", gathered_df$rank)
299 gathered_df$rank <- as.numeric(gathered_df$rank)
301 test1 <- data.frame(effectsDF$ge_means)
302 gathered_mean <- gather(test1, key = "locations", value = "means", -Gen)
305 gathered_df$track <- paste0(gathered_df$genotypes,"_",gathered_df$location)
306 gathered_mean$track <- paste0(gathered_mean$Gen,"_",gathered_mean$locations)
308 gathered_df$track <- gsub(" ","",gathered_df$track)
309 gathered_df$track <- gsub("\\.","",gathered_df$track)
311 gathered_mean$track <- gsub(" ","",gathered_mean$track)
312 gathered_mean$track <- gsub("\\.","",gathered_mean$track)
315 finalDF <- left_join(gathered_df, gathered_mean, by = "track") %>% dplyr::select(
316 genotypes, locations, means, rank
318 finalDF$locations <- gsub("\\."," ",finalDF$locations)
320 rankDF <- data.frame(sumRank=tapply(finalDF$rank, finalDF$genotypes, sum))
321 rankDF$genotypes <- rownames(rankDF)
322 rankDF$genotypesRank <- rank(rankDF$sumRank, ties.method = "min")
323 finalDF <- left_join(finalDF, rankDF, by = "genotypes") %>% dplyr::select(genotypes, locations, means, rank, genotypesRank)
324 colnames(finalDF) <- c("genotypes","locations", "means", "locationRank","genotypeRank")
325 finalDF$means <- sprintf("%.2f", finalDF$means)
327 colnames(finalDF) <- c("Accession", "location", "means", "locationRank", "genotypeRank")
328 if(imputPheno == "imput_yes"){finalDF$imputAcc <- testCor}
330 myJson <- jsonlite::toJSON(finalDF)
331 jsonlite::write_json(myJson, jsonFile)
334 ## Preparing Graphic dataset
335 prepGraph <- as.data.frame(tapply(dat2$Yield, list(dat2$Gen, dat2$Env), mean))
336 prepGraph <- tibble::rownames_to_column(prepGraph, var="Accession")
337 meanGraph <- gather(prepGraph, key = "location", value = "mean", -Accession)
338 prepGraph2 <- as.data.frame(tapply(dat2$Yield, list(dat2$Gen, dat2$Env), sd))
339 prepGraph2 <- tibble::rownames_to_column(prepGraph2, var="Accession")
340 sdGraph <- gather(prepGraph2, key = "location", value = "sd", -Accession)
341 finalGraph <- left_join(meanGraph, sdGraph, by=c("Accession","location"))
343 graphicJson <- jsonlite::toJSON(finalGraph)
344 jsonlite::write_json(graphicJson, graphFile)
348 if ( is.null(errorMessages) == F ) {
349 print(sprintf("Writing Error Messages to file: %s", messageFile))
351 write(errorMessages, messageFile)