2 if (is
.null(argv
) | length(argv
)<3) {
3 cat("Usage: kmerfreq.r maxcnt hist_file coverage [scaledmin] [scaledmax]\n")
6 testXrange
<- as
.numeric(argv
[1])
8 avgcvg
<- as
.numeric(argv
[3])
9 scaledmin
<- as
.numeric(argv
[4])
10 scaledmax
<- as
.numeric(argv
[5])
12 if(!exists("D__myfit"))
15 #inid <- unlist(strsplit(infile,'.',TRUE))[1]
16 inid
<- strsplit(infile
,'.',TRUE)[[1]][1]
17 TheIDRelshp
<- c('Donor','MALBAC Sperm01','MALBAC Sperm02','MALBAC Sperm03','MDA Sperm23','MDA Sperm24','MDA Sperm28','Donor')
18 names(TheIDRelshp
) <- c('blood','S1','S2','S3','S23','S24','S28','mlbacDonor')
23 infile <- 'blood.fq.k25.gz.hist.fixed'
28 aa
=read
.table(infile
,skip
=8)
30 cnt
<- as
.numeric(aa
[,2])
33 thedata
<- cbind(thefreq
,cnt
)
34 flag
<- thedata
[,1] <= testXrange
35 #usedata <- cbind(cnt[flag],thefreq[flag])
36 usedata
<- cbind(theratio
[flag
],thefreq
[flag
])
38 scaled
<- scale2pois(usedata
,avgcvg
,cntmin
=scaledmin
,cntmax
=scaledmax
)
39 scaledata
<- scaled$ret
40 scaleres
<- myfit(scaledata
, par
= list(lambda
= avgcvg
))
41 print('===scaledmyfit===')
43 scalesum
<- summary(scaleres
)
45 scalechi
<- chisq
.test(cbind(scaleres$observed
,scaleres$fitted
))
48 fitres
<- goodfit(usedata
,type
='poisson', par
= list(lambda
= avgcvg
))
49 #fitres <- goodfit(usedata,type='poisson', method = 'MinChisq')
50 #sumres <- summary(fitres)
52 selfres
<- myfit(usedata
, par
= list(lambda
= avgcvg
))
55 selfsum
<- summary(selfres
)
58 newres
<-list(observed
= fitres$observed
[c(-1,-2)], count
= fitres$count
[c(-1,-2)], fitted
= fitres$fitted
[c(-1,-2)],
59 type
= "poisson", method
= fitres$method
, df
= fitres$df
- 2, par
= fitres$par
)
60 class(newres
) <- "goodfit"
61 print('===goodfit===')
63 sumres
<- summary(newres
)
67 arg
<- deparse(substitute(x
))
68 RVAL
<- cbind(x$observed
,x$fitted
,(x$observed
-x$fitted
)/x$fitted
)
69 #RVAL <- round(RVAL,digits = 6)
70 colnames(RVAL
) <- c(paste0(arg
,'.ob'),paste0(arg
,'.fi'),'(O-E)/E')
71 rownames(RVAL
) <- 1+ 1:nrow(RVAL
)
74 print(cbind(dbg1(scaleres
),dbg1(newres
),dbg1(selfres
)))
76 tiff(filename
= paste0(infile
,".tiff"), compression
="lzw", width
= 683, height
= 683)#, units = "px", pointsize = 52)
77 #par(mar=c(5, 6, 2, 2),ps=20,family='sans')
78 par(mar
=c(4.5, 6, 3, 2),ps
=26,family
='sans')
86 maxY
=max(zz
,yy
,zz
*selfres$zoom
)
88 #lines(xx,zz,xlim=c(0,60),type='l');
90 #barplot(yy,xlim=c(0,20),ylim=c(0,0.212),xlab="K-mer count",ylab='Ratio',col='navy',cex.lab=1)
91 plot(xx
,yy
,type
='h',lwd
=19,xlim
=c(1,maxXrange
),ylim
=c(0,maxY
),
92 main
= paste0("Pearson goodness of fit, p=",signif(scalesum
[1,3],digits
= 6)),
93 xlab
="K-mer count",ylab
='Ratio',col
='navy',cex
.lab
=1)
94 # dpois(3,3.62)=0.2117524
95 legend("topright",pch
=-1,lty
=1,col
=c('navy','green3','red'), x
.intersp
= 1, y
.intersp
= 2,cex
=1,lwd
=c(11,7,8),
96 legend
= c( TheIDRelshp
[inid
],paste0("lambda=",signif(avgcvg
,digits
=6)),
97 paste0("scaledby=",signif(scaled$zoom
, digits
=6)) )
99 lines(scaleres$count
,scaleres$observed
,xlim
=c(0,60),type
='h',col
=rgb(1,1,0,1),lwd
=11)
101 axis(1, at
= seq(0, maxXrange
, by
= 5),lwd
=3,cex
=1)
102 lines(xxx
,zz
,xlim
=c(0,maxXrange
),type
='l',col
=rgb(0,0.804,0,0.8),lwd
=7)
103 #lines(xx,yy*scaled$zoom,xlim=c(0,60),type='l',col=rgb(1,0,0,0.7),lwd=3)
104 #lines(scaleres$count,scaleres$observed,xlim=c(0,60),type='l',col=rgb(1,0,0,0.9),lwd=5)
106 lines(xx
,yy
*scaled$zoom
,xlim
=c(0,60),type
='h',col
=rgb(1,0,0,1),lwd
=7)
110 outline
<- paste0(TheIDRelshp
[inid
],':[',scaled$cntmin
,',',scaled$cntmax
,"] cvg=",avgcvg
," scaleR=",scaled$zoom
)
112 write(outline
,paste0(infile
,".txt"),append
=FALSE)
114 outline
<- paste0('X^2= ',scalesum
[1,1],', df= ',scalesum
[1,2],', p= ',scalesum
[1,3])
116 write(outline
,paste0(infile
,".txt"),append
=TRUE)
121 #./kmerfreq.r mlbacDonor.k25.lz4.hist 6.85
122 #./kmerfreq.r mdaS23.k25.lz4.hist 4.34
123 #./kmerfreq.r S01.k25.lz4.hist 3.62
126 #Pearson 1.56832025 18 0.9999998
127 #Likelihood Ratio -0.06964308 18 1.0000000
130 #Pearson 7285.4140548 18 0
131 #Likelihood Ratio 0.3987449 18 1
134 #Pearson 2.511278e+05 18 0.0000000
135 #Likelihood Ratio 2.540676e+00 18 0.9999924
137 ./kmerfreq.r 15 mlbacDonor.k25.lz4.hist.fixed 5.67811
138 ./kmerfreq.r 15 S1.fq.k25.gz.hist.fixed 3.29571
139 ./kmerfreq.r 15 S2.fq.k25.gz.hist.fixed 3.28174
140 ./kmerfreq.r 15 S3.fq.k25.gz.hist.fixed 2.94459
141 ./kmerfreq.r 15 S23.fq.k25.gz.hist.fixed 3.26044
142 ./kmerfreq.r 15 S24.fq.k25.gz.hist.fixed 3.27885
143 ./kmerfreq.r 15 S28.fq.k25.gz.hist.fixed 3.30884
144 ./kmerfreq.r 15 blood.fq.k25.gz.hist.fixed 3.27885