modified: myjupyterlab.sh
[GalaxyCodeBases.git] / BioInfo / BS-Seq / bwa-meth / scripts / bsseq-caller.R
bloba43d9042d100b7868cc77a0011a08d2b057e51a3
1 suppressPackageStartupMessages(library(bsseq))
2 suppressPackageStartupMessages(library(data.table))
4 args = commandArgs(TRUE)
6 cov=data.frame(Sample=c(paste("2", 1:8, sep="_"), paste("1", 1:8, sep="_")),
7                grp_treat=as.factor(c(rep("tol", 8), rep("nontol", 8))))
9 #cov = read.delim("data/Mouse_allergen_clinical_data.txt")
10 #cov = cov[tolower(cov$Methylation) == "yes",]
11 #cov$grp_treat = paste(cov$Group, cov$Treatment, sep="_")
12 #cov = cov[cov$grp_treat %in% c('MTHFR-/-_HDM', 'Control_HDM'),]
13 #cov$grp_treat = relevel(cov$grp_treat, ref="Control_HDM")
16 rownames(cov) = cov$Sample
18 read.bs = function(fname, sample){
19     dat = as.data.frame(fread(fname))
20     chr = as.matrix(dat[,1], drop=FALSE)
21     pos = as.matrix(dat[,3], drop=FALSE)
23     M = as.matrix(dat$n_same, drop=FALSE)
24     C = as.matrix(dat$n_same + dat$n_converted, drop=FALSE)
25     BSseq(chr=chr, pos=pos, M=M,
26                C=C, sampleNames=sample)
30 bobjs = list()
32 for(i in 1:nrow(cov)){
33     sample = cov[i, "Sample"]
34     f = paste(sample, "methylation.txt", sep=".")
36     bobjs[i] = read.bs(f, sample)
39 bs = do.call("combine", unlist(bobjs))
41 bs.smooth = BSmooth(bs, mc.cores=8, verbose=TRUE, maxGap=4000)
42 bs.cov = getCoverage(bs.smooth)
44 keep = rowMeans(bs.cov) > 2
45 keep = rowSums(which(bs.cov > 2)) > 2
46 keep = rowSums(bs.cov >= 2) >= 2
47 bs.smooth = bs.smooth[keep,]
49 bs.tstat = BSmooth.tstat(bs.smooth, group1=as.character(cov$Sample[1:8]),
50                                     group2=as.character(cov$Sample[9:16]), 
51                                     estimate.var="group2",
52                                     local.correct=TRUE, mc.cores=4)
54 dmrs0 = dmrFinder(bs.tstat, cutoff = c(-5, 5), maxGap=200, qcutoff=c(0.01, 0.99))
55 dmrs = subset(dmrs0, n >= 5 & abs(meanDiff) >= 0.1)
57 pData(bs.smooth)$col = ifelse(cov$grp_treat == "tol", "red", "blue")
59 pdf('regions.pdf')
60 plotManyRegions(bs.smooth, dmrs, extend=100, addRegions=dmrs, BSseqTstat=bs.tstat, stat.ylim=c(-7, 7), addPoints=TRUE)
61 dev.off()