3 #runs quality control analysis using st4gi.
6 # Christiano Simoes (ccs263@cornell.edu)
14 library(phenoAnalysis)
23 allArgs <- commandArgs()
26 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
29 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
32 #Preparing the phenodata
33 phenoDataFile <- grep("\\/phenotype_data", inputFiles, value=TRUE)
34 formattedPhenoFile <- grep("formatted_phenotype_data", inputFiles, fixed = FALSE, value = TRUE)
35 metadataFile <- grep("metadata", inputFiles, value=TRUE)
37 qcMessagesFile <- grep("qc_messages_table", outputFiles, value=TRUE)
38 qcMessagesJsonFile <- grep("qc_messages_json", outputFiles, value=TRUE)
40 formattedPhenoData <- c()
43 phenoData <- as.data.frame(fread(phenoDataFile, sep="\t",
44 na.strings = c("NA", "", "--", "-", ".", "..")
47 metaData <- scan(metadataFile, what="character")
49 message('pheno file ', phenoDataFile)
50 if (colnames(phenoData[ncol(phenoData)]) =="notes"){
51 mydata<-select(phenoData, -c("notes"))
56 mycolnames = tolower(colnames(mydata))
58 myNewdata <- data.frame(mydata[,23],
64 for (i in 40:ncol(mydata)){
65 myNewdata<-cbind(myNewdata, mydata[,i])
69 colnames(myNewdata) <- c("cipno","geno","row","col","rep")
70 traits = c(6:ncol(myNewdata))
72 for(i in 6:ncol(myNewdata)){
73 names(myNewdata)[i] <- mycolnames[i+34]
74 traits[j] <- mycolnames[i+34]
79 #This part I have to add to breedbase
81 testit <- suppressWarnings(catch_expr(check.data(myNewdata), warning = c(collect)))
82 message <-testit$warning
84 curationMessage <- function(original){
86 message2 <- str_replace_all(message1, "[[:punct:]]", " ")
87 message3 <- unlist(str_split(message2," "))
88 message4 <<- message3[2]
89 message5 <<- message3[3]
92 if (length(message)==0){
93 curation <- capture.output(check.data(myNewdata))
94 cicle <- length(curation)/6
96 black_list <- c(1:length(cicle))
97 black_list_message <- c(1:length(cicle))
99 curationMessage(curation[2+j])
100 black_list[i] <- message5
101 black_list_message[i] <- message4
102 cat(message4,"\n",message5,"\n")
106 message<-paste( unlist(testit$warning), collapse = '')
107 black_list <- c(1:length(traits))
108 black_list_message <- c(1:length(traits))
111 #formating the result
112 result <- unlist(str_split(message,":"))
114 result <- str_replace_all(result, "[[:punct:]]", " ")
115 result <- unlist(str_split(result," "))
116 result <- gsub(" ","",result)
118 #preparing the black list of traits that not passed on the test
119 if (length(result)>=1){
120 for (i in 1:length(result)){
122 cat("removing ",result[i],"\n" )
123 }else if (result[i] == ""){
124 cat("removing empty spaces ",i,"\n")
126 black_list[i] = result[i]
127 black_list_message[i] = "This trait is not in st4gi"
132 black_list<-black_list[!is.na(black_list)]
133 result_traits <- c(1:length(traits))
134 for (i in 1:length(traits)){
135 result_traits[i] <- "Trait passed on QC"
138 for (i in 1:length(result_traits)){
139 for (z in 1:length(black_list)){
140 if (traits[i] == black_list[z]){
141 result_traits[i] <- black_list_message[z]
146 if (length(result)>1){
147 for (i in 1:length(traits)){
149 for (j in 1:length(black_list)){
150 if (traits[i] == black_list[j]){
151 result_traits[i] = "Trait is not in st4gi"
152 cat("Found the wrong trait!", "\n")
161 Message = data.frame(traits,result_traits)
164 Message = Message %>%
167 "QC - comments" = result_traits,
170 qualityControlList <- list(
171 "traits" = toJSON(traits),
172 "messages" = toJSON(result_traits)
176 qualityControlJson <- paste("{",paste("\"", names(qualityControlList), "\":", qualityControlList, collapse=","), "}")
177 qualityControlJson <- list(qualityControlJson)
180 file = qcMessagesFile,
186 fwrite(qualityControlJson,
187 file = qcMessagesJsonFile,
194 # write.table(Message, file = qcMessagesFile, append = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE)
196 q(save = "no", runLast = FALSE)