R/solGS/qtl_analysis.r

   1
   2 ##SNOPSIS
   3
   4 #Qtl analysis based on rqtl.
   5
   6
   7 ##AUTHOR
   8 ## Isaak Y Tecle (iyt2@cornell.edu)
   9
  10
  11 options(echo = FALSE)
  12
  13 library(qtl)
  14
  15 allargs <- commandArgs()
  16
  17 infile   <- grep("infile_list", allargs, value=TRUE)
  18 outfile  <- grep("outfile_list", allargs, value=TRUE)
  19
  20 infile   <- scan(infile, what="character")
  21 statfile <- grep("stat", infile, value=TRUE)
  22
  23 ##### stat files
  24 statfiles <- scan(statfile, what="character")
  25
  26 ###### QTL mapping method ############
  27 qtlmethodfile <- grep("stat_qtl_method", statfiles, value=TRUE)
  28 qtlmethod <- scan(qtlmethodfile, what="character", sep="\n")
  29
  30 if (qtlmethod == "Maximum Likelihood") {
  31   qtlmethod <- c("em")
  32 } else if (qtlmethod == "Haley-Knott Regression") {
  33   qtlmethod <- c("hk")
  34 } else if (qtlmethod == "Multiple Imputation") {
  35   qtlmethod <- c("imp")
  36 } else if (qtlmethod == "Marker Regression") {
  37   qtlmethod <- c("mr")
  38 }
  39
  40 ###### QTL model ############
  41 qtlmodelfile <- grep("stat_qtl_model",  statfiles, value=TRUE)
  42 qtlmodel <- scan(qtlmodelfile, what="character", sep="\n")
  43
  44 if (qtlmodel == "Single-QTL Scan") {
  45   qtlmodel <- c("scanone")
  46 } else if  (qtlmodel == "Two-QTL Scan") {
  47   qtlmodel<-c("scantwo")
  48 }
  49
  50 ###### permutation############
  51 userpermufile  <- grep("stat_permu_test", statfiles, value=TRUE)
  52 userpermuvalue <- scan(userpermufile, what="numeric", dec= ".", sep="\n")
  53
  54 if (userpermuvalue == "None") {
  55   userpermuvalue<-c(0)
  56 }
  57
  58 userpermuvalue <- as.numeric(userpermuvalue)
  59
  60 #####for test only
  61 #userpermuvalue<-c(100)
  62 #####
  63
  64
  65 ######genome step size############
  66 stepsizefile <- grep("stat_step_size",  statfiles, value=TRUE)
  67
  68 stepsize <- scan(stepsizefile, what="numeric", dec = ".", sep="\n")
  69
  70 if (qtlmethod == 'mr') {
  71     stepsize <- c(0)
  72 } else if (qtlmethod != 'mr' & stepsize == "zero") {
  73       stepsize <- c(0)
  74   }
  75
  76 stepsize <- as.numeric(stepsize)
  77
  78 ######genotype calculation method############
  79 genoprobmethodfile <- grep("stat_prob_method", statfiles, value=TRUE)
  80
  81 genoprobmethod <- scan(genoprobmethodfile, what="character", dec=".", sep="\n")
  82
  83
  84 ########No. of draws for sim.geno method###########
  85 drawsnofile <- c()
  86 drawsno <- c()
  87 if (qtlmethod == 'imp') {
  88     if (is.null(grep("stat_no_draws", statfiles))==FALSE) {
  89         drawsnofile <- grep("stat_no_draws", statfiles, value=TRUE)
  90     }
  91
  92     if (is.null(drawsnofile)==FALSE) {
  93         drawsno <- scan(drawsnofile, what="numeric", dec = ".", sep="\n")
  94         drawsno <- as.numeric(drawsno)
  95       }
  96   }
  97 ########significance level for genotype
  98 #######probablity calculation
  99 genoproblevelfile <- grep("stat_prob_level", statfiles, value=TRUE)
 100 genoproblevel     <- scan(genoproblevelfile, what="numeric", dec = ".", sep="\n")
 101
 102 if (qtlmethod == 'mr') {
 103     if (is.logical(genoproblevel) == FALSE) {
 104         genoproblevel <- c(0)
 105     }
 106
 107     if (is.logical(genoprobmethod) ==FALSE) {
 108         genoprobmethod <- c('Calculate')
 109       }
 110   }
 111
 112 genoproblevel <- as.numeric(genoproblevel)
 113
 114 ########significance level for permutation test
 115 permuproblevelfile <- grep("stat_permu_level", statfiles, value=TRUE)
 116
 117 permuproblevel <- scan(permuproblevelfile, what="numeric", dec = ".", sep="\n")
 118
 119 permuproblevel <- as.numeric(permuproblevel)
 120
 121 #########
 122 cvtermfile <- grep("cvterm", infile, value=TRUE)
 123
 124 popidfile <- grep("popid", infile, value=TRUE)
 125
 126 genodata <- grep("genodata", infile, value=TRUE)
 127
 128 phenodata <- grep("phenodata", infile, value=TRUE)
 129
 130 permufile <- grep("permu", infile, value=TRUE)
 131
 132 crossfile <- grep("cross", infile, value=TRUE)
 133
 134 popid <- scan(popidfile, what="integer", sep="\n")
 135
 136 cross <- scan(crossfile,what="character", sep="\n")
 137
 138 popdata<-c()
 139
 140 if (cross == "f2")
 141 {
 142   popdata <- read.cross("csvs",
 143                        genfile=genodata,
 144                        phefile=phenodata,
 145                        na.strings=c("NA", "-"),
 146                        genotypes=c("1", "2", "3", "4", "5"),
 147                        )
 148
 149   popdata <-jittermap(popdata)
 150 } else if (cross == "bc" | cross == "rilsib" | cross == "rilself") {
 151
 152   popdata <- read.cross("csvs",
 153                        genfile=genodata,
 154                        phefile=phenodata,
 155                        na.strings=c("NA", "-"),
 156                        genotypes=c("1", "2"),
 157                        )
 158
 159   popdata<-jittermap(popdata)
 160 }
 161
 162 if (cross == "rilself") {
 163     popdata<-convert2riself(popdata)
 164 } else if (cross == "rilsib") {
 165     popdata<-convert2risib(popdata)
 166 }
 167
 168 #calculates the qtl genotype probablity at
 169 #the specififed step size and probability level
 170 genotypetype <- c()
 171
 172 if (genoprobmethod == "Calculate") {
 173     popdata <- calc.genoprob(popdata,
 174                            step=stepsize,
 175                            error.prob=genoproblevel
 176                            )
 177     genotypetype<-c('prob')
 178 } else if (genoprobmethod == "Simulate") {
 179     popdata <- sim.geno(popdata,
 180                       n.draws=drawsno,
 181                       step=stepsize,
 182                       error.prob=genoproblevel,
 183                       stepwidth="fixed"
 184                         )
 185
 186     genotypetype <- c('draws')
 187 }
 188
 189 cvterm <- scan(cvtermfile, what="character") #reads the cvterm
 190
 191 cv <- find.pheno(popdata, cvterm)#returns the col no. of the cvterm
 192
 193 permuvalues <- scan(permufile, what="character")
 194
 195 permuvalue1 <- permuvalues[1]
 196 permuvalue2 <- permuvalues[2]
 197 permu <- c()
 198
 199 if (is.logical(permuvalue1) == FALSE) {
 200   if (qtlmodel == "scanone") {
 201     if (userpermuvalue == 0 ) {
 202       popdataperm <- scanone(popdata,
 203                              pheno.col=cv,
 204                              model="normal",
 205                              method=qtlmethod
 206                              )
 207
 208     } else if (userpermuvalue != 0) {
 209         popdataperm <- scanone(popdata,
 210                                pheno.col=cv,
 211                                model="normal",
 212                                n.perm = userpermuvalue,
 213                                method=qtlmethod
 214                                )
 215
 216         permu <- summary(popdataperm, alpha=permuproblevel)
 217     }
 218   } else if (qtlmethod != "mr") {
 219     if (qtlmodel == "scantwo") {
 220       if (userpermuvalue == 0 ) {
 221         popdataperm <- scantwo(popdata,
 222                                pheno.col=cv,
 223                                model="normal",
 224                                method=qtlmethod
 225                                )
 226       } else if (userpermuvalue != 0) {
 227           popdataperm <- scantwo(popdata,
 228                                  pheno.col=cv,
 229                                  model="normal",
 230                                  n.perm=userpermuvalue,
 231                                  method=qtlmethod
 232                                  )
 233
 234         permu <- summary(popdataperm, alpha=permuproblevel)
 235
 236       }
 237     }
 238   }
 239 }
 240
 241 ##########set the LOD cut-off for singificant qtls ##############
 242 LodThreshold <- c()
 243
 244 if(is.null(permu) == FALSE) {
 245   LodThreshold <- permu[1,1]
 246 }
 247 ##########QTL EFFECTS ##############
 248
 249 chrlist <- c("chr1")
 250
 251 for (no in 2:12) {
 252     chr <- paste("chr", no, sep="")
 253
 254     chrlist <- append(chrlist, chr)
 255 }
 256
 257 chrdata <- paste(cvterm, popid, "chr1", sep="_")
 258
 259 chrtest <- c("chr1")
 260
 261 for (ch in chrlist) {
 262   if (ch=="chr1") {
 263     chrdata <- paste(cvterm, popid, ch, sep="_")
 264   } else {
 265     n <- paste(cvterm, popid, ch, sep="_")
 266     chrdata <- append(chrdata, n)
 267   }
 268 }
 269
 270 chrno <- 1
 271
 272 datasummary <- c()
 273 confidenceints <- c()
 274 lodconfidenceints <- c()
 275 QtlChrs <- c()
 276 QtlPositions <- c()
 277 QtlLods <- c()
 278
 279 for (i in chrdata) {
 280   filedata <- paste(cvterm, popid, chrno, sep="_")
 281   filedata <- paste(filedata,"txt",sep=".")
 282
 283   i <- scanone(popdata,
 284                chr=chrno,
 285                pheno.col=cv,
 286                model = "normal",
 287                method= qtlmethod
 288                )
 289
 290   position <- max(i,chr=chrno)
 291
 292   p <- position[["pos"]]
 293   LodScore <- position[["lod"]]
 294   QtlChr <- levels(position[["chr"]])
 295
 296       if ( is.null(LodThreshold)==FALSE ) {
 297           if (LodScore >=LodThreshold ) {
 298               QtlChrs <- append(QtlChrs, QtlChr)
 299               QtlLods <- append(QtlLods, LodScore)
 300               QtlPositions <- append(QtlPositions, round(p, 0))
 301             }
 302         }
 303
 304   peakmarker <- find.marker(popdata, chr=chrno, pos=p)
 305
 306   lodpeakmarker <- i[peakmarker, ]
 307
 308   lodconfidenceint <- bayesint(i, chr=chrno, prob=0.95, expandtomarkers=TRUE)
 309
 310   if (is.na(lodconfidenceint[peakmarker, ])){
 311       lodconfidenceint <- rbind(lodconfidenceint, lodpeakmarker)
 312     }
 313
 314   peakmarker <- c(chrno, peakmarker)
 315
 316   if (chrno==1) {
 317     datasummary <- i
 318     peakmarkers <- peakmarker
 319     lodconfidenceints <- lodconfidenceint
 320   }
 321
 322   if (chrno > 1 ) {
 323       datasummary <- rbind(datasummary, i)
 324       peakmarkers <- rbind(peakmarkers, peakmarker)
 325       lodconfidenceints <- rbind(lodconfidenceints, lodconfidenceint)
 326   }
 327
 328 chrno <- chrno + 1;
 329
 330 }
 331
 332 ##########QTL EFFECTS ##############
 333  ResultDrop <- c()
 334  ResultFull <- c()
 335  Effects <- c()
 336
 337 if (is.null(LodThreshold) == FALSE) {
 338     if ( max(QtlLods) >= LodThreshold ) {
 339         QtlObj <- makeqtl(popdata,
 340                           QtlChrs,
 341                           QtlPositions,
 342                           what=genotypetype
 343                           )
 344
 345         QtlsNo <- length(QtlPositions)
 346         Eq <- c("y~")
 347
 348         for (i in 1:QtlsNo) {
 349                 q <- paste("Q", i, sep="")
 350
 351                 if (i==1) {
 352                     Eq <- paste(Eq, q, sep="")
 353                   } else if (i>1) {
 354                       Eq <- paste(Eq, q, sep="*")
 355                   }
 356               }
 357
 358         QtlEffects <- try(fitqtl(popdata,
 359                                  pheno.col=cv,
 360                                  QtlObj,
 361                                  formula=Eq,
 362                                  method="hk",
 363                                  get.ests=TRUE
 364                                  )
 365                           )
 366
 367         if(class(QtlEffects) != 'try-error') {
 368
 369                 ResultModel <- attr(QtlEffects, "formula")
 370                 Effects     <- QtlEffects$ests$ests
 371                 QtlLodAnova <- QtlEffects$lod
 372                 ResultFull  <- QtlEffects$result.full
 373                 ResultDrop  <- QtlEffects$result.drop
 374
 375                 if (is.numeric(Effects)) {
 376                     Effects <- round(Effects, 2)
 377                 }
 378
 379                 if (is.numeric(ResultFull)) {
 380                     ResultFull <- round(ResultFull,2)
 381                 }
 382
 383                 if (is.numeric(ResultDrop)) {
 384                     ResultDrop<-round(ResultDrop, 2)
 385                 }
 386             }
 387     }
 388 }
 389
 390 ##########creating vectors for the outfiles##############
 391
 392 outfiles <- scan(file=outfile, what="character")
 393
 394 qtlfile           <- grep("qtl_summary", outfiles, value=TRUE)
 395 peakmarkersfile   <- grep("peak_marker", outfiles, value=TRUE)
 396 confidencelodfile <- grep("confidence", outfiles, value=TRUE)
 397 QtlEffectsFile    <- grep("qtl_effects", outfiles, value=TRUE)
 398 VariationFile     <- grep("explained_variation", outfiles, value=TRUE)
 399
 400 ##### writing outputs to their respective files
 401
 402 write.table(datasummary,
 403             file=qtlfile,
 404             sep="\t",
 405             col.names=NA,
 406             quote=FALSE,
 407             append=FALSE
 408             )
 409
 410 write.table(peakmarkers,
 411             file=peakmarkersfile,
 412             sep="\t",
 413             col.names=NA,
 414             quote=FALSE,
 415             append=FALSE
 416             )
 417
 418 write.table(lodconfidenceints,
 419             file=confidencelodfile,
 420             sep="\t",
 421             col.names=NA,
 422             quote=FALSE,
 423             append=FALSE
 424             )
 425
 426 if (is.null(ResultDrop)==FALSE) {
 427   write.table(ResultDrop,
 428               file=VariationFile,
 429               sep="\t",
 430               col.names=NA,
 431               quote=FALSE,
 432               append=FALSE
 433               )
 434 } else {
 435   if (is.null(ResultFull)==FALSE) {
 436       write.table(ResultFull,
 437               file=VariationFile,
 438               sep="\t",
 439               col.names=NA,
 440               quote=FALSE,
 441               append=FALSE
 442               )
 443     }
 444 }
 445
 446 write.table(Effects,
 447             file=QtlEffectsFile,
 448             sep="\t",
 449             col.names=NA,
 450             quote=FALSE,
 451             append=FALSE
 452             )
 453
 454 write.table(permu,
 455             file=permufile,
 456             sep="\t",
 457             col.names=NA,
 458             quote=FALSE,
 459             append=FALSE
 460                 )
 461
 462 q(runLast = FALSE)