R/qtl_analysis.r

   1
   2 ##SNOPSIS
   3
   4 ##A bunch of r and r/qtl commands for reading input data,
   5 ##running qtl analysis, and writing output data. Input data are feed to R
   6 ##from and output data from R feed to a perl script called
   7 ##../phenome/population_indls.
   8 ##QTL mapping visualization is done by the perl script mentioned above.
   9 ## r/qtl has functions for data visualization but the perl script
  10 ## allows more control for publishing on the browser,
  11 ##cross-referencing to other SGN datasets and CONSISTENT formatting/appearance
  12
  13
  14 ##AUTHOR
  15 ## Isaak Y Tecle (iyt2@cornell.edu)
  16
  17
  18 options(echo = FALSE)
  19
  20 library(qtl)
  21
  22 allargs<-commandArgs()
  23
  24 infile <- grep("infile_list",
  25                allargs, ignore.case=TRUE,
  26                perl=TRUE,
  27                value=TRUE
  28                )
  29
  30 outfile<-grep("outfile_list",
  31               allargs,
  32               ignore.case=TRUE,
  33               perl=TRUE,
  34               value=TRUE
  35               )
  36
  37 statfile<-grep("stat",
  38                allargs,
  39                ignore.case=TRUE,
  40                perl=TRUE,
  41                value=TRUE
  42                )
  43
  44
  45 ##### stat files
  46 statfiles<-scan(statfile,
  47                 what="character"
  48                 )
  49
  50
  51 ###### QTL mapping method ############
  52 qtlmethodfile<-grep("stat_qtl_method",
  53                     statfiles,
  54                     ignore.case=TRUE,
  55                     fixed = FALSE,
  56                     value=TRUE
  57                     )
  58 qtlmethod<-scan(qtlmethodfile,
  59                 what="character",
  60                 sep="\n"
  61                 )
  62
  63 if (qtlmethod == "Maximum Likelihood")
  64 {
  65   qtlmethod<-c("em")
  66 } else
  67
  68 if (qtlmethod == "Haley-Knott Regression")
  69 {
  70   qtlmethod<-c("hk")
  71
  72 } else
  73
  74 if (qtlmethod == "Multiple Imputation")
  75 {
  76   qtlmethod<-c("imp")
  77
  78 } else
  79 if (qtlmethod == "Marker Regression")
  80 {
  81   qtlmethod<-c("mr")
  82 }
  83
  84 ###### QTL model ############
  85 qtlmodelfile<-grep("stat_qtl_model",
  86                    statfiles,
  87                    ignore.case=TRUE,
  88                    fixed = FALSE,
  89                    value=TRUE
  90                    )
  91 qtlmodel<-scan(qtlmodelfile,
  92                what="character",
  93                sep="\n"
  94                )
  95 if (qtlmodel == "Single-QTL Scan")
  96 {
  97   qtlmodel<-c("scanone")
  98 } else
  99 if  (qtlmodel == "Two-QTL Scan")
 100 {
 101   qtlmodel<-c("scantwo")
 102 }
 103
 104 ###### permutation############
 105 userpermufile<-grep("stat_permu_test",
 106                     statfiles,
 107                     ignore.case=TRUE,
 108                     fixed = FALSE,
 109                     value=TRUE
 110                     )
 111 userpermuvalue<-scan(userpermufile,
 112                      what="numeric",
 113                      dec = ".",
 114                      sep="\n"
 115                      )
 116
 117 if (userpermuvalue == "None")
 118 {
 119   userpermuvalue<-c(0)
 120 }
 121 userpermuvalue<-as.numeric(userpermuvalue)
 122
 123 #####for test only
 124 #userpermuvalue<-c(100)
 125 #####
 126
 127
 128 ######genome step size############
 129 stepsizefile<-grep("stat_step_size",
 130                    statfiles,
 131                    ignore.case=TRUE,
 132                    fixed = FALSE,
 133                    value=TRUE)
 134
 135 stepsize<-scan(stepsizefile,
 136                what="numeric",
 137                dec = ".",
 138                sep="\n"
 139                )
 140
 141 if (qtlmethod == 'mr')
 142   {
 143     stepsize<-c(0)
 144   }else
 145 if
 146   (qtlmethod != 'mr' & stepsize == "zero")
 147     {
 148       stepsize<-c(0)
 149     }
 150
 151 stepsize<-as.numeric(stepsize)
 152
 153
 154
 155 ######genotype calculation method############
 156 genoprobmethodfile<-grep("stat_prob_method",
 157                          statfiles,
 158                          ignore.case=TRUE,
 159                          fixed = FALSE,
 160                          value=TRUE
 161                          )
 162
 163 genoprobmethod<-scan(genoprobmethodfile,
 164                      what="character",
 165                      dec = ".",
 166                      sep="\n"
 167                      )
 168
 169
 170 ########No. of draws for sim.geno method###########
 171 drawsnofile<-c()
 172 drawsno<-c()
 173 if (qtlmethod == 'imp')
 174   {
 175     if (is.null(grep("stat_no_draws", statfiles))==FALSE)
 176       {
 177         drawsnofile<-grep("stat_no_draws",
 178                           statfiles,
 179                           ignore.case=TRUE,
 180                           fixed = FALSE,
 181                           value=TRUE
 182                           )
 183       }
 184
 185     if (is.null(drawsnofile)==FALSE)
 186       {
 187         drawsno<-scan(drawsnofile,
 188                       what="numeric",
 189                       dec = ".",
 190                       sep="\n"
 191                       )
 192         drawsno<-as.numeric(drawsno)
 193
 194       }
 195   }
 196 ########significance level for genotype
 197 #######probablity calculation
 198 genoproblevelfile<-grep("stat_prob_level",
 199                         statfiles,
 200                         ignore.case=TRUE,
 201                         fixed = FALSE,
 202                         value=TRUE
 203                         )
 204
 205 genoproblevel<-scan(genoproblevelfile,
 206                     what="numeric",
 207                     dec = ".",
 208                     sep="\n"
 209                     )
 210
 211
 212 if (qtlmethod == 'mr')
 213   {
 214     if (is.logical(genoproblevel) == FALSE)
 215       {
 216         genoproblevel<-c(0)
 217       }
 218
 219     if (is.logical(genoprobmethod) ==FALSE)
 220       {
 221         genoprobmethod<-c('Calculate')
 222       }
 223   }
 224
 225 genoproblevel<-as.numeric(genoproblevel)
 226
 227 ########significance level for permutation test
 228 permuproblevelfile<-grep("stat_permu_level",
 229                          statfiles,
 230                          ignore.case=TRUE,
 231                          fixed = FALSE,
 232                          value=TRUE
 233                          )
 234
 235 permuproblevel<-scan(permuproblevelfile,
 236                      what="numeric",
 237                      dec = ".",
 238                      sep="\n"
 239                      )
 240
 241 permuproblevel<-as.numeric(permuproblevel)
 242
 243 #########
 244 infile<-scan(file=infile,
 245              what="character"
 246              )
 247
 248 cvtermfile<-grep("cvterm",
 249                  infile,
 250                  ignore.case=TRUE,
 251                  fixed = FALSE,
 252                  value=TRUE
 253                  )
 254
 255 popidfile<-grep("popid",
 256                  infile,
 257                  ignore.case=TRUE,
 258                  fixed = FALSE,
 259                  value=TRUE
 260                  )
 261
 262 genodata<-grep("genodata",
 263                  infile,
 264                  ignore.case=TRUE,
 265                  fixed = FALSE,
 266                  value=TRUE
 267                  )
 268
 269 phenodata<-grep("phenodata",
 270                  infile,
 271                  ignore.case=TRUE,
 272                  fixed = FALSE,
 273                  value=TRUE
 274                  )
 275
 276 permufile<-grep("permu",
 277                  infile,
 278                  ignore.case=TRUE,
 279                  fixed = FALSE,
 280                  value=TRUE
 281                  )
 282
 283 crossfile<-grep("cross",
 284                  infile,
 285                  ignore.case=TRUE,
 286                  fixed = FALSE,
 287                  value=TRUE
 288                  )
 289
 290 popid<-scan(popidfile,
 291             what="integer",
 292             sep="\n"
 293             )
 294
 295 cross<-scan(crossfile,
 296             what="character",
 297             sep="\n"
 298             )
 299
 300
 301 popdata<-c()
 302
 303 if (cross == "f2")
 304 {
 305   popdata<- read.cross("csvs",
 306                        genfile=genodata,
 307                        phefile=phenodata,
 308                        na.strings=c("NA", "-"),
 309                        genotypes=c("1", "2", "3", "4", "5"),
 310                        )
 311
 312   popdata<-jittermap(popdata)
 313 } else
 314 if (cross == "bc" | cross == "rilsib" | cross == "rilself")
 315 {
 316   popdata<- read.cross("csvs",
 317                        genfile=genodata,
 318                        phefile=phenodata,
 319                        na.strings=c("NA", "-"),
 320                        genotypes=c("1", "2"),
 321                        )
 322
 323   popdata<-jittermap(popdata)
 324 }
 325
 326 if (cross == "rilself")
 327   {
 328     popdata<-convert2riself(popdata)
 329   } else
 330 if (cross == "rilsib")
 331   {
 332     popdata<-convert2risib(popdata)
 333   }
 334
 335 #calculates the qtl genotype probablity at
 336 #the specififed step size and probability level
 337 genotypetype<-c()
 338 if (genoprobmethod == "Calculate")
 339   {
 340     popdata<-calc.genoprob(popdata,
 341                            step=stepsize,
 342                            error.prob=genoproblevel
 343                            )
 344     genotypetype<-c('prob')
 345   } else
 346   if (genoprobmethod == "Simulate")
 347     {
 348     popdata<-sim.geno(popdata,
 349                       n.draws=drawsno,
 350                       step=stepsize,
 351                       error.prob=genoproblevel,
 352                       stepwidth="fixed"
 353                       )
 354     genotypetype<-c('draws')
 355   }
 356
 357
 358
 359 cvterm<-scan(file=cvtermfile,
 360              what="character"
 361              ) #reads the cvterm
 362
 363 cv<-find.pheno(popdata,
 364                cvterm
 365                )#returns the col no. of the cvterm
 366
 367 permuvalues<-scan(file=permufile,
 368                   what="character"
 369                   )
 370
 371 permuvalue1<-permuvalues[1]
 372 permuvalue2<-permuvalues[2]
 373 permu<-c()
 374
 375 if (is.logical(permuvalue1) == FALSE)
 376 {
 377   if (qtlmodel == "scanone")
 378     {
 379     if (userpermuvalue == 0 )
 380       {
 381       popdataperm<-scanone(popdata,
 382                            pheno.col=cv,
 383                            model="normal",
 384                            method=qtlmethod
 385                            )
 386
 387     } else
 388     if (userpermuvalue != 0)
 389       {
 390         popdataperm<-scanone(popdata,
 391                            pheno.col=cv,
 392                            model="normal",
 393                            n.perm = userpermuvalue,
 394                            method=qtlmethod
 395                            )
 396
 397         permu<-summary(popdataperm,
 398                        alpha=permuproblevel
 399                        )
 400     }
 401   }else
 402   if (qtlmethod != "mr")
 403     {
 404     if (qtlmodel == "scantwo")
 405       {
 406       if (userpermuvalue == 0 )
 407         {
 408         popdataperm<-scantwo(popdata,
 409                              pheno.col=cv,
 410                              model="normal",
 411                              method=qtlmethod
 412                              )
 413       } else
 414       if (userpermuvalue != 0)
 415         {
 416         popdataperm<-scantwo(popdata,
 417                              pheno.col=cv,
 418                              model="normal",
 419                              n.perm = userpermuvalue,
 420                              method=qtlmethod
 421                              )
 422         permu<-summary(popdataperm,
 423                        alpha=permuproblevel
 424                        )
 425
 426       }
 427     }
 428   }
 429 }
 430
 431 ##########set the LOD cut-off for singificant qtls ##############
 432 LodThreshold<-c()
 433
 434 if(is.null(permu) == FALSE)
 435 {
 436   LodThreshold<-permu[1,1]
 437 }
 438 ##########QTL EFFECTS ##############
 439
 440 chrlist<-c("chr1")
 441
 442 for (no in 2:12)
 443 {
 444   chr<-paste("chr",
 445              no,
 446              sep=""
 447              )
 448   chrlist<-append(chrlist,
 449                   chr
 450                   )
 451 }
 452
 453 chrdata<-paste(cvterm,
 454                popid,
 455                "chr1",
 456                sep="_"
 457                )
 458 chrtest<-c("chr1")
 459
 460 for (ch in chrlist) {
 461   if (ch=="chr1"){
 462     chrdata<-paste(cvterm,
 463                    popid,
 464                    ch,
 465                    sep="_"
 466                    )
 467   }
 468
 469   else
 470     {
 471     n<-paste(cvterm,
 472              popid,
 473              ch,
 474              sep="_"
 475              )
 476     chrdata<-append(chrdata,
 477                     n
 478                     )
 479
 480   }
 481 }
 482
 483 chrno<-1
 484
 485 datasummary<-c()
 486 confidenceints<-c()
 487 lodconfidenceints<-c()
 488 QtlChrs<-c()
 489 QtlPositions<-c()
 490 QtlLods<-c()
 491
 492 for (i in chrdata)
 493 {
 494   filedata<-paste(cvterm,
 495                   popid,
 496                   chrno,
 497                   sep="_"
 498                   )
 499   filedata<-paste(filedata,
 500                   "txt",
 501                   sep="."
 502                   )
 503
 504   i<-scanone(popdata,
 505              chr=chrno,
 506              pheno.col=cv,
 507              model = "normal",
 508              method= qtlmethod
 509              )
 510   position<-max(i,
 511                 chr=chrno
 512                 )
 513
 514   p<-position[["pos"]]
 515   LodScore<-position[["lod"]]
 516   QtlChr<-levels(position[["chr"]])
 517
 518       if ( is.null(LodThreshold)==FALSE )
 519         {
 520           if (LodScore >=LodThreshold )
 521             {
 522               QtlChrs<-append(QtlChrs,
 523                               QtlChr
 524                               )
 525               QtlLods<-append(QtlLods,
 526                               LodScore
 527                               )
 528               QtlPositions<-append(QtlPositions,
 529                                    round(p,
 530                                          0
 531                                          )
 532                                    )
 533             }
 534         }
 535
 536
 537   peakmarker<-find.marker(popdata,
 538                           chr=chrno,
 539                           pos=p
 540                           )
 541
 542   lodpeakmarker<-i[peakmarker, ]
 543
 544   lodconfidenceint<-bayesint(i,
 545                           chr=chrno,
 546                           prob=0.95,
 547                           expandtomarkers=TRUE
 548                           )
 549
 550   if (is.na(lodconfidenceint[peakmarker, ]))
 551     {
 552       lodconfidenceint<-rbind(lodconfidenceint,
 553                               lodpeakmarker
 554                               )
 555     }
 556
 557   peakmarker<-c(chrno,
 558                 peakmarker
 559                 )
 560
 561   if (chrno==1)
 562     {
 563     datasummary<-i
 564     peakmarkers<-peakmarker
 565     lodconfidenceints<-lodconfidenceint
 566   }
 567
 568   if (chrno > 1 )
 569     {
 570       datasummary<-rbind(datasummary,
 571                          i
 572                          )
 573       peakmarkers<-rbind(peakmarkers,
 574                          peakmarker
 575                          )
 576       lodconfidenceints<-rbind(lodconfidenceints,
 577                                lodconfidenceint
 578                                )
 579   }
 580
 581 chrno<-chrno + 1;
 582
 583 }
 584
 585 ##########QTL EFFECTS ##############
 586  ResultDrop<-c()
 587  ResultFull<-c()
 588  Effects<-c()
 589
 590     if (is.null(LodThreshold) == FALSE)
 591       {
 592         if ( max(QtlLods) >= LodThreshold )
 593           {
 594             QtlObj<-makeqtl(popdata,
 595                             QtlChrs,
 596                             QtlPositions,
 597                             what=genotypetype
 598                             )
 599
 600             QtlsNo<-length(QtlPositions)
 601             Eq<-c("y~")
 602
 603             for (i in 1:QtlsNo)
 604               {
 605                 q<-paste("Q",
 606                          i,
 607                          sep=""
 608                          )
 609
 610                 if (i==1)
 611                   {
 612                     Eq<-paste(Eq,
 613                               q,
 614                               sep=""
 615                               )
 616                   }
 617                 else
 618                   if (i>1)
 619                     {
 620                       Eq<-paste(Eq,
 621                                 q,
 622                                 sep="*"
 623                                 )
 624                     }
 625               }
 626
 627             QtlEffects<-try(fitqtl(popdata,
 628                                    pheno.col=cv,
 629                                    QtlObj,
 630                                    formula=Eq,
 631                                    method="hk",
 632                                    get.ests=TRUE
 633                                    )
 634                             )
 635
 636             if(class(QtlEffects) != 'try-error')
 637               {
 638
 639                 ResultModel<-attr(QtlEffects,
 640                                   "formula"
 641                                   )
 642
 643                 Effects<-QtlEffects$ests$ests
 644                 QtlLodAnova<-QtlEffects$lod
 645                 ResultFull<-QtlEffects$result.full
 646                 ResultDrop<-QtlEffects$result.drop
 647
 648                 if (is.numeric(Effects))
 649                   {
 650                     Effects<-round(Effects,
 651                                    2
 652                                    )
 653                   }
 654
 655                 if (is.numeric(ResultFull))
 656                   {
 657                     ResultFull<-round(ResultFull,
 658                                       2
 659                                       )
 660                   }
 661
 662                 if (is.numeric(ResultDrop))
 663                   {
 664                     ResultDrop<-round(ResultDrop,
 665                                       2
 666                                       )
 667                   }
 668               }
 669           }
 670       }
 671
 672 ##########creating vectors for the outfiles##############
 673
 674 outfiles<-scan(file=outfile,
 675                what="character"
 676                )
 677
 678 qtlfile<-grep("qtl_summary",
 679                  outfiles,
 680                  ignore.case=TRUE,
 681                  fixed = FALSE,
 682                  value=TRUE
 683                  )
 684
 685 peakmarkersfile<-grep("peak_marker",
 686                  outfiles,
 687                  ignore.case=TRUE,
 688                  fixed = FALSE,
 689                  value=TRUE
 690                  )
 691
 692 confidencelodfile<-grep("confidence",
 693                  outfiles,
 694                  ignore.case=TRUE,
 695                  fixed = FALSE,
 696                  value=TRUE
 697                  )
 698 QtlEffectsFile<-grep("qtl_effects",
 699                  outfiles,
 700                  ignore.case=TRUE,
 701                  fixed = FALSE,
 702                  value=TRUE
 703                  )
 704 VariationFile<-grep("explained_variation",
 705                  outfiles,
 706                  ignore.case=TRUE,
 707                  fixed = FALSE,
 708                  value=TRUE
 709                  )
 710
 711 ##### writing outputs to their respective files
 712
 713 write.table(datasummary,
 714             file=qtlfile,
 715             sep="\t",
 716             col.names=NA,
 717             quote=FALSE,
 718             append=FALSE
 719             )
 720
 721 write.table(peakmarkers,
 722             file=peakmarkersfile,
 723             sep="\t",
 724             col.names=NA,
 725             quote=FALSE,
 726             append=FALSE
 727             )
 728
 729 write.table(lodconfidenceints,
 730             file=confidencelodfile,
 731             sep="\t",
 732             col.names=NA,
 733             quote=FALSE,
 734             append=FALSE
 735             )
 736
 737 if (is.null(ResultDrop)==FALSE)
 738 {
 739   write.table(ResultDrop,
 740               file=VariationFile,
 741               sep="\t",
 742               col.names=NA,
 743               quote=FALSE,
 744               append=FALSE
 745               )
 746 } else
 747 {
 748   if(is.null(ResultFull)==FALSE)
 749     {
 750       write.table(ResultFull,
 751               file=VariationFile,
 752               sep="\t",
 753               col.names=NA,
 754               quote=FALSE,
 755               append=FALSE
 756               )
 757     }
 758 }
 759
 760     write.table(Effects,
 761                 file=QtlEffectsFile,
 762                 sep="\t",
 763                 col.names=NA,
 764                 quote=FALSE,
 765                 append=FALSE
 766                 )
 767
 768     write.table(permu,
 769                 file=permufile,
 770                 sep="\t",
 771                 col.names=NA,
 772                 quote=FALSE,
 773                 append=FALSE
 774                 )
 775
 776 q(runLast = FALSE)