modified: makefile
[GalaxyCodeBases.git] / tools / genotyping / runMPR.r
bloba1581287b5ba59728fbbb6fafe674ce0a5ed40a4
1 #!/opt/blc/genome/biosoft/R/bin/Rscript
3 self='./runMPR.r';
4 argv = commandArgs(T);
5 #argv=c('Chr09.psnp','./t1/ttp.Chr09')
7 if (is.null(argv) | length(argv)<2) {
8 cat("Error: No options. Run",self,"<input> <output_prefix>.{pGT,rgt,bin,png,block} [number_of_sample_to_use]\n")
9 q(status=1)
12 sink(paste(argv[2],'.log',sep=''))
14 source('./mpr.R');
16 InDat=read.table(argv[1],T,na.strings='-',comment.char = '',as.is=TRUE)
17 #InDat=read.table('psnp/Chr01.add_ref',T,na.strings='-',comment.char = '',as.is=TRUE)
18 Ref=data.frame(InDat$Ref,row.names=InDat$Pos,check.names=F)
19 SNP=as.matrix(data.frame(InDat[-(1:3)],row.names=InDat$Pos))
20 #InDat='';
22 SampleCount=length(colnames(SNP))
23 if (length(argv)>2) {
24 ChoosedCount = min( SampleCount,max(as.numeric(argv[3]),3) );
25 #myBaseData <- SNP[,1:ChoosedCount];
26 myBaseData <- SNP[,sample.int(SampleCount,ChoosedCount)];
27 } else { myBaseData <- SNP; }
28 cat(length(colnames(myBaseData)),"\t[",colnames(myBaseData),"]\n")
30 snpT=base2Allele(myBaseData)
31 Tmp=sort(as.numeric(rownames(snpT)))
32 myBaseData <- myBaseData[as.character(Tmp),]
34 allele.MPR <- localMPR(baseData=myBaseData,maxIterate=50,maxNStep=5,showDetail=TRUE,verbose=TRUE)
35 warnings()
37 #snpSet <- sort(as.numeric(rownames(na.omit(allele.MPR))))
38 #allele.MPR <- allele.MPR[match(snpSet,rownames(allele.MPR)),]
40 write.table(allele.MPR,paste(argv[2],'.r0pGT',sep=''),col.names=TRUE,quote=F,sep='\t')
42 system.time(all.res <- globalMPRRefine(myBaseData,alleleA=allele.MPR[,1],groupSort=TRUE,
43 numPerm=20,numTry=3,
44 numBaseStep=50,numBaseCandidateStep=100,numKnownStep=30,
45 numKnownCandidateStep=50,useMedianToFindKnown=TRUE,maxIterate=150,
46 maxNStep=3,scoreMin=0.8,saveMidData=TRUE,verbose=TRUE))
47 warnings()
49 write.table(allele.MPR,paste(argv[2],'.r1pGT',sep=''),col.names=TRUE,quote=F,sep='\t')
51 perm <- 10
52 res <- all.res$midData[[perm]]
53 table(geno.res <- genotypeCallsBayes(res$call,errorRate=1e-11,eps=1e-10,maxIterate=100,verbose=FALSE)$type)
55 allele.MPR <- res$allele
56 allele.MPR[geno.res==2|rowMin(res$call)>=perm,] <- NA
57 allele.MPR[geno.res==3,] <- allele.MPR[geno.res==3,c(2,1)]
59 table(is.na(alleleA <- allele.MPR[,1]))
60 #table(is.na(alleleB <- allele.MPR[,2]))
61 idsA <- match(names(alleleA),rownames(Ref))
62 #idsB <- match(names(alleleB),rownames(Ref))
63 table(Ref[idsA,1]==alleleA)
64 #table(Ref[idsB,1]==alleleB)
66 snpSet <- sort(as.numeric(rownames(na.omit(allele.MPR))))
67 #alleleMPR=allele.MPR[match(snpSet,rownames(allele.MPR)),]
69 #argv=c('iChr01','oChr01')
71 write.table(allele.MPR,paste(argv[2],'.pGT',sep=''),col.names=TRUE,quote=F,sep='\t')
73 ids <- match(snpSet,rownames(myBaseData))
74 table(is.na(geno.data <- base2Geno(myBaseData[ids,],allele.MPR[ids,])))
75 write.table(geno.data,paste(argv[2],'.rgt',sep=''),col.names=TRUE,quote=F,sep='\t')
77 theBin=genoToBin(geno.data,as.numeric(rownames(geno.data)),corrected=F,heterozygote=TRUE,minBinsize=250000,geno.probability=c(0.49951171875, 0.49951171875,0.0009765625))
78 warnings()
80 write.table(theBin$bin,paste(argv[2],'.bin',sep=''),col.names=TRUE,quote=F,sep='\t')
81 write.table(theBin$border,paste(argv[2],'.border',sep=''),col.names=F,row.names=F,quote=F,sep='\t')
82 write.table(theBin$block$'1'$block,paste(argv[2],'.block',sep=''),col.names=F,quote=F,sep='\t')
84 geno.bin=theBin$bin
86 # A:(0,179,0) B:(0,51,179) AB:(230,230,0) NA:(205,0,0)
87 # A:(0,0.7,0) B:(0,0.2,0.7) AB:(0.9,0.9,0) NA:(0.8,0,0)
89 geno.colors <- geno.bin;geno.colors[is.na(geno.colors)] <- rgb(0.8,0,0)
90 geno.colors[geno.colors==0]=rgb(0,0.7,0)
91 geno.colors[geno.colors==1]=rgb(0,0.2,0.7)
92 geno.colors[geno.colors==0.5]=rgb(0.9,0.9,0)
93 rils=matrix(theBin$border,nrow=nrow(geno.bin),ncol=ncol(geno.bin)) # 1:nrow(geno.bin) or theBin$border
94 poses=matrix(rep(1:ncol(geno.bin),each=nrow(geno.bin)),nrow=nrow(geno.bin),ncol=ncol(geno.bin))
96 theX=min(1200,180+ncol(geno.bin)*16)
97 theY=min(2400,180+nrow(geno.bin)*32)
99 png(paste(argv[2],'.png',sep=''), theX, theY, bg='white')
100 plot(poses, rils, col=geno.colors,pch=20, ylab='',xlab="RIL index",mar=c(1.1,2.1,2.1,1.1))
102 for(j in 1:ncol(geno.colors)) {
103 for(i in 1:(nrow(geno.colors)-1)) {
104 if(geno.colors[i,j]==geno.colors[i+1,j])
105 segments(j,rils[i,j],j,rils[i+1,j],col=geno.colors[i,j],lwd=2)
109 #for(i in 1:nrow(rils)) {
110 # segments(1,rils[i,1],ncol(rils),rils[i,ncol(rils)],lwd=1,col=rgb(0,0,0.2,0.4))
113 dev.off()