Merge pull request #4719 from solgenomics/topic/fixing_histogram
[sgn.git] / R / solGS / qtl_analysis.r
blobdc199b9db6bae82db14bcb2a21799bce6b73cfee
2 ##SNOPSIS
4 #Qtl analysis based on rqtl.
7 ##AUTHOR
8 ## Isaak Y Tecle (iyt2@cornell.edu)
11 options(echo = FALSE)
13 library(qtl)
15 allargs <- commandArgs()
17 infile <- grep("infile_list", allargs, value=TRUE)
18 outfile <- grep("outfile_list", allargs, value=TRUE)
20 infile <- scan(infile, what="character")
21 statfile <- grep("stat", infile, value=TRUE)
23 ##### stat files
24 statfiles <- scan(statfile, what="character")
26 ###### QTL mapping method ############
27 qtlmethodfile <- grep("stat_qtl_method", statfiles, value=TRUE)
28 qtlmethod <- scan(qtlmethodfile, what="character", sep="\n")
30 if (qtlmethod == "Maximum Likelihood") {
31 qtlmethod <- c("em")
32 } else if (qtlmethod == "Haley-Knott Regression") {
33 qtlmethod <- c("hk")
34 } else if (qtlmethod == "Multiple Imputation") {
35 qtlmethod <- c("imp")
36 } else if (qtlmethod == "Marker Regression") {
37 qtlmethod <- c("mr")
40 ###### QTL model ############
41 qtlmodelfile <- grep("stat_qtl_model", statfiles, value=TRUE)
42 qtlmodel <- scan(qtlmodelfile, what="character", sep="\n")
44 if (qtlmodel == "Single-QTL Scan") {
45 qtlmodel <- c("scanone")
46 } else if (qtlmodel == "Two-QTL Scan") {
47 qtlmodel<-c("scantwo")
50 ###### permutation############
51 userpermufile <- grep("stat_permu_test", statfiles, value=TRUE)
52 userpermuvalue <- scan(userpermufile, what="numeric", dec= ".", sep="\n")
54 if (userpermuvalue == "None") {
55 userpermuvalue<-c(0)
58 userpermuvalue <- as.numeric(userpermuvalue)
60 #####for test only
61 #userpermuvalue<-c(100)
62 #####
65 ######genome step size############
66 stepsizefile <- grep("stat_step_size", statfiles, value=TRUE)
68 stepsize <- scan(stepsizefile, what="numeric", dec = ".", sep="\n")
70 if (qtlmethod == 'mr') {
71 stepsize <- c(0)
72 } else if (qtlmethod != 'mr' & stepsize == "zero") {
73 stepsize <- c(0)
76 stepsize <- as.numeric(stepsize)
78 ######genotype calculation method############
79 genoprobmethodfile <- grep("stat_prob_method", statfiles, value=TRUE)
81 genoprobmethod <- scan(genoprobmethodfile, what="character", dec=".", sep="\n")
84 ########No. of draws for sim.geno method###########
85 drawsnofile <- c()
86 drawsno <- c()
87 if (qtlmethod == 'imp') {
88 if (is.null(grep("stat_no_draws", statfiles))==FALSE) {
89 drawsnofile <- grep("stat_no_draws", statfiles, value=TRUE)
92 if (is.null(drawsnofile)==FALSE) {
93 drawsno <- scan(drawsnofile, what="numeric", dec = ".", sep="\n")
94 drawsno <- as.numeric(drawsno)
97 ########significance level for genotype
98 #######probablity calculation
99 genoproblevelfile <- grep("stat_prob_level", statfiles, value=TRUE)
100 genoproblevel <- scan(genoproblevelfile, what="numeric", dec = ".", sep="\n")
102 if (qtlmethod == 'mr') {
103 if (is.logical(genoproblevel) == FALSE) {
104 genoproblevel <- c(0)
107 if (is.logical(genoprobmethod) ==FALSE) {
108 genoprobmethod <- c('Calculate')
112 genoproblevel <- as.numeric(genoproblevel)
114 ########significance level for permutation test
115 permuproblevelfile <- grep("stat_permu_level", statfiles, value=TRUE)
117 permuproblevel <- scan(permuproblevelfile, what="numeric", dec = ".", sep="\n")
119 permuproblevel <- as.numeric(permuproblevel)
121 #########
122 cvtermfile <- grep("cvterm", infile, value=TRUE)
124 popidfile <- grep("popid", infile, value=TRUE)
126 genodata <- grep("genodata", infile, value=TRUE)
128 phenodata <- grep("phenodata", infile, value=TRUE)
130 permufile <- grep("permu", infile, value=TRUE)
132 crossfile <- grep("cross", infile, value=TRUE)
134 popid <- scan(popidfile, what="integer", sep="\n")
136 cross <- scan(crossfile,what="character", sep="\n")
138 popdata<-c()
140 if (cross == "f2")
142 popdata <- read.cross("csvs",
143 genfile=genodata,
144 phefile=phenodata,
145 na.strings=c("NA", "-"),
146 genotypes=c("1", "2", "3", "4", "5"),
149 popdata <-jittermap(popdata)
150 } else if (cross == "bc" | cross == "rilsib" | cross == "rilself") {
152 popdata <- read.cross("csvs",
153 genfile=genodata,
154 phefile=phenodata,
155 na.strings=c("NA", "-"),
156 genotypes=c("1", "2"),
159 popdata<-jittermap(popdata)
162 if (cross == "rilself") {
163 popdata<-convert2riself(popdata)
164 } else if (cross == "rilsib") {
165 popdata<-convert2risib(popdata)
168 #calculates the qtl genotype probablity at
169 #the specififed step size and probability level
170 genotypetype <- c()
172 if (genoprobmethod == "Calculate") {
173 popdata <- calc.genoprob(popdata,
174 step=stepsize,
175 error.prob=genoproblevel
177 genotypetype<-c('prob')
178 } else if (genoprobmethod == "Simulate") {
179 popdata <- sim.geno(popdata,
180 n.draws=drawsno,
181 step=stepsize,
182 error.prob=genoproblevel,
183 stepwidth="fixed"
186 genotypetype <- c('draws')
189 cvterm <- scan(cvtermfile, what="character") #reads the cvterm
191 cv <- find.pheno(popdata, cvterm)#returns the col no. of the cvterm
193 permuvalues <- scan(permufile, what="character")
195 permuvalue1 <- permuvalues[1]
196 permuvalue2 <- permuvalues[2]
197 permu <- c()
199 if (is.logical(permuvalue1) == FALSE) {
200 if (qtlmodel == "scanone") {
201 if (userpermuvalue == 0 ) {
202 popdataperm <- scanone(popdata,
203 pheno.col=cv,
204 model="normal",
205 method=qtlmethod
208 } else if (userpermuvalue != 0) {
209 popdataperm <- scanone(popdata,
210 pheno.col=cv,
211 model="normal",
212 n.perm = userpermuvalue,
213 method=qtlmethod
216 permu <- summary(popdataperm, alpha=permuproblevel)
218 } else if (qtlmethod != "mr") {
219 if (qtlmodel == "scantwo") {
220 if (userpermuvalue == 0 ) {
221 popdataperm <- scantwo(popdata,
222 pheno.col=cv,
223 model="normal",
224 method=qtlmethod
226 } else if (userpermuvalue != 0) {
227 popdataperm <- scantwo(popdata,
228 pheno.col=cv,
229 model="normal",
230 n.perm=userpermuvalue,
231 method=qtlmethod
234 permu <- summary(popdataperm, alpha=permuproblevel)
241 ##########set the LOD cut-off for singificant qtls ##############
242 LodThreshold <- c()
244 if(is.null(permu) == FALSE) {
245 LodThreshold <- permu[1,1]
247 ##########QTL EFFECTS ##############
249 chrlist <- c("chr1")
251 for (no in 2:12) {
252 chr <- paste("chr", no, sep="")
254 chrlist <- append(chrlist, chr)
257 chrdata <- paste(cvterm, popid, "chr1", sep="_")
259 chrtest <- c("chr1")
261 for (ch in chrlist) {
262 if (ch=="chr1") {
263 chrdata <- paste(cvterm, popid, ch, sep="_")
264 } else {
265 n <- paste(cvterm, popid, ch, sep="_")
266 chrdata <- append(chrdata, n)
270 chrno <- 1
272 datasummary <- c()
273 confidenceints <- c()
274 lodconfidenceints <- c()
275 QtlChrs <- c()
276 QtlPositions <- c()
277 QtlLods <- c()
279 for (i in chrdata) {
280 filedata <- paste(cvterm, popid, chrno, sep="_")
281 filedata <- paste(filedata,"txt",sep=".")
283 i <- scanone(popdata,
284 chr=chrno,
285 pheno.col=cv,
286 model = "normal",
287 method= qtlmethod
290 position <- max(i,chr=chrno)
292 p <- position[["pos"]]
293 LodScore <- position[["lod"]]
294 QtlChr <- levels(position[["chr"]])
296 if ( is.null(LodThreshold)==FALSE ) {
297 if (LodScore >=LodThreshold ) {
298 QtlChrs <- append(QtlChrs, QtlChr)
299 QtlLods <- append(QtlLods, LodScore)
300 QtlPositions <- append(QtlPositions, round(p, 0))
304 peakmarker <- find.marker(popdata, chr=chrno, pos=p)
306 lodpeakmarker <- i[peakmarker, ]
308 lodconfidenceint <- bayesint(i, chr=chrno, prob=0.95, expandtomarkers=TRUE)
310 if (is.na(lodconfidenceint[peakmarker, ])){
311 lodconfidenceint <- rbind(lodconfidenceint, lodpeakmarker)
314 peakmarker <- c(chrno, peakmarker)
316 if (chrno==1) {
317 datasummary <- i
318 peakmarkers <- peakmarker
319 lodconfidenceints <- lodconfidenceint
322 if (chrno > 1 ) {
323 datasummary <- rbind(datasummary, i)
324 peakmarkers <- rbind(peakmarkers, peakmarker)
325 lodconfidenceints <- rbind(lodconfidenceints, lodconfidenceint)
328 chrno <- chrno + 1;
332 ##########QTL EFFECTS ##############
333 ResultDrop <- c()
334 ResultFull <- c()
335 Effects <- c()
337 if (is.null(LodThreshold) == FALSE) {
338 if ( max(QtlLods) >= LodThreshold ) {
339 QtlObj <- makeqtl(popdata,
340 QtlChrs,
341 QtlPositions,
342 what=genotypetype
345 QtlsNo <- length(QtlPositions)
346 Eq <- c("y~")
348 for (i in 1:QtlsNo) {
349 q <- paste("Q", i, sep="")
351 if (i==1) {
352 Eq <- paste(Eq, q, sep="")
353 } else if (i>1) {
354 Eq <- paste(Eq, q, sep="*")
358 QtlEffects <- try(fitqtl(popdata,
359 pheno.col=cv,
360 QtlObj,
361 formula=Eq,
362 method="hk",
363 get.ests=TRUE
367 if(class(QtlEffects) != 'try-error') {
369 ResultModel <- attr(QtlEffects, "formula")
370 Effects <- QtlEffects$ests$ests
371 QtlLodAnova <- QtlEffects$lod
372 ResultFull <- QtlEffects$result.full
373 ResultDrop <- QtlEffects$result.drop
375 if (is.numeric(Effects)) {
376 Effects <- round(Effects, 2)
379 if (is.numeric(ResultFull)) {
380 ResultFull <- round(ResultFull,2)
383 if (is.numeric(ResultDrop)) {
384 ResultDrop<-round(ResultDrop, 2)
390 ##########creating vectors for the outfiles##############
392 outfiles <- scan(file=outfile, what="character")
394 qtlfile <- grep("qtl_summary", outfiles, value=TRUE)
395 peakmarkersfile <- grep("peak_marker", outfiles, value=TRUE)
396 confidencelodfile <- grep("confidence", outfiles, value=TRUE)
397 QtlEffectsFile <- grep("qtl_effects", outfiles, value=TRUE)
398 VariationFile <- grep("explained_variation", outfiles, value=TRUE)
400 ##### writing outputs to their respective files
402 write.table(datasummary,
403 file=qtlfile,
404 sep="\t",
405 col.names=NA,
406 quote=FALSE,
407 append=FALSE
410 write.table(peakmarkers,
411 file=peakmarkersfile,
412 sep="\t",
413 col.names=NA,
414 quote=FALSE,
415 append=FALSE
418 write.table(lodconfidenceints,
419 file=confidencelodfile,
420 sep="\t",
421 col.names=NA,
422 quote=FALSE,
423 append=FALSE
426 if (is.null(ResultDrop)==FALSE) {
427 write.table(ResultDrop,
428 file=VariationFile,
429 sep="\t",
430 col.names=NA,
431 quote=FALSE,
432 append=FALSE
434 } else {
435 if (is.null(ResultFull)==FALSE) {
436 write.table(ResultFull,
437 file=VariationFile,
438 sep="\t",
439 col.names=NA,
440 quote=FALSE,
441 append=FALSE
446 write.table(Effects,
447 file=QtlEffectsFile,
448 sep="\t",
449 col.names=NA,
450 quote=FALSE,
451 append=FALSE
454 write.table(permu,
455 file=permufile,
456 sep="\t",
457 col.names=NA,
458 quote=FALSE,
459 append=FALSE
462 q(runLast = FALSE)