seedlot upload with accession synonyms. seedlot upload works to update existing seedlots
[sgn.git] / R / solGS / qtl_analysis.r
blobb013a8b341e1e2f4c8ea7c0ead2f29e80b54a571
2 ##SNOPSIS
4 ##A bunch of r and r/qtl commands for reading input data,
5 ##running qtl analysis, and writing output data. Input data are feed to R
6 ##from and output data from R feed to a perl script called
7 ##../phenome/population_indls.
8 ##QTL mapping visualization is done by the perl script mentioned above.
9 ## r/qtl has functions for data visualization but the perl script
10 ## allows more control for publishing on the browser,
11 ##cross-referencing to other SGN datasets and CONSISTENT formatting/appearance
14 ##AUTHOR
15 ## Isaak Y Tecle (iyt2@cornell.edu)
18 options(echo = FALSE)
20 library(qtl)
22 allargs<-commandArgs()
24 infile <- grep("infile_list",
25 allargs, ignore.case=TRUE,
26 perl=TRUE,
27 value=TRUE
30 outfile<-grep("outfile_list",
31 allargs,
32 ignore.case=TRUE,
33 perl=TRUE,
34 value=TRUE
37 statfile<-grep("stat",
38 allargs,
39 ignore.case=TRUE,
40 perl=TRUE,
41 value=TRUE
45 ##### stat files
46 statfiles<-scan(statfile,
47 what="character"
51 ###### QTL mapping method ############
52 qtlmethodfile<-grep("stat_qtl_method",
53 statfiles,
54 ignore.case=TRUE,
55 fixed = FALSE,
56 value=TRUE
58 qtlmethod<-scan(qtlmethodfile,
59 what="character",
60 sep="\n"
63 if (qtlmethod == "Maximum Likelihood")
65 qtlmethod<-c("em")
66 } else
68 if (qtlmethod == "Haley-Knott Regression")
70 qtlmethod<-c("hk")
72 } else
74 if (qtlmethod == "Multiple Imputation")
76 qtlmethod<-c("imp")
78 } else
79 if (qtlmethod == "Marker Regression")
81 qtlmethod<-c("mr")
84 ###### QTL model ############
85 qtlmodelfile<-grep("stat_qtl_model",
86 statfiles,
87 ignore.case=TRUE,
88 fixed = FALSE,
89 value=TRUE
91 qtlmodel<-scan(qtlmodelfile,
92 what="character",
93 sep="\n"
95 if (qtlmodel == "Single-QTL Scan")
97 qtlmodel<-c("scanone")
98 } else
99 if (qtlmodel == "Two-QTL Scan")
101 qtlmodel<-c("scantwo")
104 ###### permutation############
105 userpermufile<-grep("stat_permu_test",
106 statfiles,
107 ignore.case=TRUE,
108 fixed = FALSE,
109 value=TRUE
111 userpermuvalue<-scan(userpermufile,
112 what="numeric",
113 dec = ".",
114 sep="\n"
117 if (userpermuvalue == "None")
119 userpermuvalue<-c(0)
121 userpermuvalue<-as.numeric(userpermuvalue)
123 #####for test only
124 #userpermuvalue<-c(100)
125 #####
128 ######genome step size############
129 stepsizefile<-grep("stat_step_size",
130 statfiles,
131 ignore.case=TRUE,
132 fixed = FALSE,
133 value=TRUE)
135 stepsize<-scan(stepsizefile,
136 what="numeric",
137 dec = ".",
138 sep="\n"
141 if (qtlmethod == 'mr')
143 stepsize<-c(0)
144 }else
146 (qtlmethod != 'mr' & stepsize == "zero")
148 stepsize<-c(0)
151 stepsize<-as.numeric(stepsize)
155 ######genotype calculation method############
156 genoprobmethodfile<-grep("stat_prob_method",
157 statfiles,
158 ignore.case=TRUE,
159 fixed = FALSE,
160 value=TRUE
163 genoprobmethod<-scan(genoprobmethodfile,
164 what="character",
165 dec = ".",
166 sep="\n"
170 ########No. of draws for sim.geno method###########
171 drawsnofile<-c()
172 drawsno<-c()
173 if (qtlmethod == 'imp')
175 if (is.null(grep("stat_no_draws", statfiles))==FALSE)
177 drawsnofile<-grep("stat_no_draws",
178 statfiles,
179 ignore.case=TRUE,
180 fixed = FALSE,
181 value=TRUE
185 if (is.null(drawsnofile)==FALSE)
187 drawsno<-scan(drawsnofile,
188 what="numeric",
189 dec = ".",
190 sep="\n"
192 drawsno<-as.numeric(drawsno)
196 ########significance level for genotype
197 #######probablity calculation
198 genoproblevelfile<-grep("stat_prob_level",
199 statfiles,
200 ignore.case=TRUE,
201 fixed = FALSE,
202 value=TRUE
205 genoproblevel<-scan(genoproblevelfile,
206 what="numeric",
207 dec = ".",
208 sep="\n"
212 if (qtlmethod == 'mr')
214 if (is.logical(genoproblevel) == FALSE)
216 genoproblevel<-c(0)
219 if (is.logical(genoprobmethod) ==FALSE)
221 genoprobmethod<-c('Calculate')
225 genoproblevel<-as.numeric(genoproblevel)
227 ########significance level for permutation test
228 permuproblevelfile<-grep("stat_permu_level",
229 statfiles,
230 ignore.case=TRUE,
231 fixed = FALSE,
232 value=TRUE
235 permuproblevel<-scan(permuproblevelfile,
236 what="numeric",
237 dec = ".",
238 sep="\n"
241 permuproblevel<-as.numeric(permuproblevel)
243 #########
244 infile<-scan(file=infile,
245 what="character"
248 cvtermfile<-grep("cvterm",
249 infile,
250 ignore.case=TRUE,
251 fixed = FALSE,
252 value=TRUE
255 popidfile<-grep("popid",
256 infile,
257 ignore.case=TRUE,
258 fixed = FALSE,
259 value=TRUE
262 genodata<-grep("genodata",
263 infile,
264 ignore.case=TRUE,
265 fixed = FALSE,
266 value=TRUE
269 phenodata<-grep("phenodata",
270 infile,
271 ignore.case=TRUE,
272 fixed = FALSE,
273 value=TRUE
276 permufile<-grep("permu",
277 infile,
278 ignore.case=TRUE,
279 fixed = FALSE,
280 value=TRUE
283 crossfile<-grep("cross",
284 infile,
285 ignore.case=TRUE,
286 fixed = FALSE,
287 value=TRUE
290 popid<-scan(popidfile,
291 what="integer",
292 sep="\n"
295 cross<-scan(crossfile,
296 what="character",
297 sep="\n"
301 popdata<-c()
303 if (cross == "f2")
305 popdata<- read.cross("csvs",
306 genfile=genodata,
307 phefile=phenodata,
308 na.strings=c("NA", "-"),
309 genotypes=c("1", "2", "3", "4", "5"),
312 popdata<-jittermap(popdata)
313 } else
314 if (cross == "bc" | cross == "rilsib" | cross == "rilself")
316 popdata<- read.cross("csvs",
317 genfile=genodata,
318 phefile=phenodata,
319 na.strings=c("NA", "-"),
320 genotypes=c("1", "2"),
323 popdata<-jittermap(popdata)
326 if (cross == "rilself")
328 popdata<-convert2riself(popdata)
329 } else
330 if (cross == "rilsib")
332 popdata<-convert2risib(popdata)
335 #calculates the qtl genotype probablity at
336 #the specififed step size and probability level
337 genotypetype<-c()
338 if (genoprobmethod == "Calculate")
340 popdata<-calc.genoprob(popdata,
341 step=stepsize,
342 error.prob=genoproblevel
344 genotypetype<-c('prob')
345 } else
346 if (genoprobmethod == "Simulate")
348 popdata<-sim.geno(popdata,
349 n.draws=drawsno,
350 step=stepsize,
351 error.prob=genoproblevel,
352 stepwidth="fixed"
354 genotypetype<-c('draws')
359 cvterm<-scan(file=cvtermfile,
360 what="character"
361 ) #reads the cvterm
363 cv<-find.pheno(popdata,
364 cvterm
365 )#returns the col no. of the cvterm
367 permuvalues<-scan(file=permufile,
368 what="character"
371 permuvalue1<-permuvalues[1]
372 permuvalue2<-permuvalues[2]
373 permu<-c()
375 if (is.logical(permuvalue1) == FALSE)
377 if (qtlmodel == "scanone")
379 if (userpermuvalue == 0 )
381 popdataperm<-scanone(popdata,
382 pheno.col=cv,
383 model="normal",
384 method=qtlmethod
387 } else
388 if (userpermuvalue != 0)
390 popdataperm<-scanone(popdata,
391 pheno.col=cv,
392 model="normal",
393 n.perm = userpermuvalue,
394 method=qtlmethod
397 permu<-summary(popdataperm,
398 alpha=permuproblevel
401 }else
402 if (qtlmethod != "mr")
404 if (qtlmodel == "scantwo")
406 if (userpermuvalue == 0 )
408 popdataperm<-scantwo(popdata,
409 pheno.col=cv,
410 model="normal",
411 method=qtlmethod
413 } else
414 if (userpermuvalue != 0)
416 popdataperm<-scantwo(popdata,
417 pheno.col=cv,
418 model="normal",
419 n.perm = userpermuvalue,
420 method=qtlmethod
422 permu<-summary(popdataperm,
423 alpha=permuproblevel
431 ##########set the LOD cut-off for singificant qtls ##############
432 LodThreshold<-c()
434 if(is.null(permu) == FALSE)
436 LodThreshold<-permu[1,1]
438 ##########QTL EFFECTS ##############
440 chrlist<-c("chr1")
442 for (no in 2:12)
444 chr<-paste("chr",
446 sep=""
448 chrlist<-append(chrlist,
453 chrdata<-paste(cvterm,
454 popid,
455 "chr1",
456 sep="_"
458 chrtest<-c("chr1")
460 for (ch in chrlist) {
461 if (ch=="chr1"){
462 chrdata<-paste(cvterm,
463 popid,
465 sep="_"
469 else
471 n<-paste(cvterm,
472 popid,
474 sep="_"
476 chrdata<-append(chrdata,
483 chrno<-1
485 datasummary<-c()
486 confidenceints<-c()
487 lodconfidenceints<-c()
488 QtlChrs<-c()
489 QtlPositions<-c()
490 QtlLods<-c()
492 for (i in chrdata)
494 filedata<-paste(cvterm,
495 popid,
496 chrno,
497 sep="_"
499 filedata<-paste(filedata,
500 "txt",
501 sep="."
504 i<-scanone(popdata,
505 chr=chrno,
506 pheno.col=cv,
507 model = "normal",
508 method= qtlmethod
510 position<-max(i,
511 chr=chrno
514 p<-position[["pos"]]
515 LodScore<-position[["lod"]]
516 QtlChr<-levels(position[["chr"]])
518 if ( is.null(LodThreshold)==FALSE )
520 if (LodScore >=LodThreshold )
522 QtlChrs<-append(QtlChrs,
523 QtlChr
525 QtlLods<-append(QtlLods,
526 LodScore
528 QtlPositions<-append(QtlPositions,
529 round(p,
537 peakmarker<-find.marker(popdata,
538 chr=chrno,
539 pos=p
542 lodpeakmarker<-i[peakmarker, ]
544 lodconfidenceint<-bayesint(i,
545 chr=chrno,
546 prob=0.95,
547 expandtomarkers=TRUE
550 if (is.na(lodconfidenceint[peakmarker, ]))
552 lodconfidenceint<-rbind(lodconfidenceint,
553 lodpeakmarker
557 peakmarker<-c(chrno,
558 peakmarker
561 if (chrno==1)
563 datasummary<-i
564 peakmarkers<-peakmarker
565 lodconfidenceints<-lodconfidenceint
568 if (chrno > 1 )
570 datasummary<-rbind(datasummary,
573 peakmarkers<-rbind(peakmarkers,
574 peakmarker
576 lodconfidenceints<-rbind(lodconfidenceints,
577 lodconfidenceint
581 chrno<-chrno + 1;
585 ##########QTL EFFECTS ##############
586 ResultDrop<-c()
587 ResultFull<-c()
588 Effects<-c()
590 if (is.null(LodThreshold) == FALSE)
592 if ( max(QtlLods) >= LodThreshold )
594 QtlObj<-makeqtl(popdata,
595 QtlChrs,
596 QtlPositions,
597 what=genotypetype
600 QtlsNo<-length(QtlPositions)
601 Eq<-c("y~")
603 for (i in 1:QtlsNo)
605 q<-paste("Q",
607 sep=""
610 if (i==1)
612 Eq<-paste(Eq,
614 sep=""
617 else
618 if (i>1)
620 Eq<-paste(Eq,
622 sep="*"
627 QtlEffects<-try(fitqtl(popdata,
628 pheno.col=cv,
629 QtlObj,
630 formula=Eq,
631 method="hk",
632 get.ests=TRUE
636 if(class(QtlEffects) != 'try-error')
639 ResultModel<-attr(QtlEffects,
640 "formula"
643 Effects<-QtlEffects$ests$ests
644 QtlLodAnova<-QtlEffects$lod
645 ResultFull<-QtlEffects$result.full
646 ResultDrop<-QtlEffects$result.drop
648 if (is.numeric(Effects))
650 Effects<-round(Effects,
655 if (is.numeric(ResultFull))
657 ResultFull<-round(ResultFull,
662 if (is.numeric(ResultDrop))
664 ResultDrop<-round(ResultDrop,
672 ##########creating vectors for the outfiles##############
674 outfiles<-scan(file=outfile,
675 what="character"
678 qtlfile<-grep("qtl_summary",
679 outfiles,
680 ignore.case=TRUE,
681 fixed = FALSE,
682 value=TRUE
685 peakmarkersfile<-grep("peak_marker",
686 outfiles,
687 ignore.case=TRUE,
688 fixed = FALSE,
689 value=TRUE
692 confidencelodfile<-grep("confidence",
693 outfiles,
694 ignore.case=TRUE,
695 fixed = FALSE,
696 value=TRUE
698 QtlEffectsFile<-grep("qtl_effects",
699 outfiles,
700 ignore.case=TRUE,
701 fixed = FALSE,
702 value=TRUE
704 VariationFile<-grep("explained_variation",
705 outfiles,
706 ignore.case=TRUE,
707 fixed = FALSE,
708 value=TRUE
711 ##### writing outputs to their respective files
713 write.table(datasummary,
714 file=qtlfile,
715 sep="\t",
716 col.names=NA,
717 quote=FALSE,
718 append=FALSE
721 write.table(peakmarkers,
722 file=peakmarkersfile,
723 sep="\t",
724 col.names=NA,
725 quote=FALSE,
726 append=FALSE
729 write.table(lodconfidenceints,
730 file=confidencelodfile,
731 sep="\t",
732 col.names=NA,
733 quote=FALSE,
734 append=FALSE
737 if (is.null(ResultDrop)==FALSE)
739 write.table(ResultDrop,
740 file=VariationFile,
741 sep="\t",
742 col.names=NA,
743 quote=FALSE,
744 append=FALSE
746 } else
748 if(is.null(ResultFull)==FALSE)
750 write.table(ResultFull,
751 file=VariationFile,
752 sep="\t",
753 col.names=NA,
754 quote=FALSE,
755 append=FALSE
760 write.table(Effects,
761 file=QtlEffectsFile,
762 sep="\t",
763 col.names=NA,
764 quote=FALSE,
765 append=FALSE
768 write.table(permu,
769 file=permufile,
770 sep="\t",
771 col.names=NA,
772 quote=FALSE,
773 append=FALSE
776 q(runLast = FALSE)