Merge pull request #4719 from solgenomics/topic/fixing_histogram
[sgn.git] / R / GBS_QC.R
blob9cc63334568a2b58b8e573e8aec36e0f169bf33e
1 #to use
2 #R --slave --args output_test00.txt qc_output.txt < ~code/code_R/GBS_QC.R
4 myarg <- commandArgs()
5 cat(myarg,"\n");
6 m=length(myarg)
7 cat(m,"\n");
9 f_in<-myarg[4:4]
10 f_out<-myarg[5:5]
11 #f_plot<-myarg[6:6]
12 #f_output<-myarg[7:7]
14 #cat(f_index,"\n")
15 #cat(f_acc,"\n")
16 #cat(f_plot,"\n")
17 #cat(f_output,"\n")
20 #f_acc="WEMA_6x1122_entry_number_accession.csv_tail.csv"
21 #f_plot="Clean data 6x1122_WET11B-MARS-EVALTC-10-8_rep2_sorted.csv_tail.csv"
23 data.gbs<-read.csv(f_in,sep="\t",header=F)
26 m=dim(data.gbs)[1];
27 n=dim(data.gbs)[2];
28 nn=n-1;
30 #cat("There are",nn,"accession\n",file=f_out,sep=" ",append=TRUE);
31 #cat("Each accession has",m,"markers\n",file=f_out,sep=" ",append=TRUE);
34 data.cnr<-array();
36 s=1;
38 for (i in 2:n){
40 cn=length(which(data.gbs[,i]=="-9"));
41 cnr=cn/length(data.gbs[,i]);
42 j=i-1;
43 data.cnr[s]=cnr;
44 s=s+1;
45 cat("Sample",j,"missing rate is",cnr,"\n",file=f_out,sep=" ",append=TRUE);
49 png(file = "./documents/img/MissingRate.png")
50 hist(data.cnr)
51 dev.off()
53 #data.plot<-read.csv(f_plot,sep="\t",header=F)
55 #colnames(data.plot)[1]="ENTRY"
57 #V1 V2 V3  V4 V5 V6 V7   V8  V9 V10   V11  V12
61 #diff_acc_plot=setdiff(data.plot[,1],data.acc[,1])
62 #same_acc_plot=intersect(data.plot[,1],data.acc[,1])
64 #diff_acc_plot=diff_acc_plot[order(diff_acc_plot)]
65 #same_acc_plot=same_acc_plot[order(same_acc_plot)]
67 #dn=length(diff_acc_plot)
68 #sn=length(same_acc_plot)
71 #WEMA6x1008_WET10B-EVALTC-08-1_ungenotyped1_tester_CML395_CML444 
73 #mp=gregexpr("_rep",f_plot)
75 #data_acc_tester=as.character(data.acc[1,2])
77 #acc_tester=substr(data_acc_tester,gregexpr("tester",data_acc_tester)[[1]][1],nchar(data_acc_tester))
79 #ungenotyped=paste("WEMA_",substr(f_plot,12,mp[[1]][1]+4),"_ungenotyped_",acc_tester,"_",diff_acc_plot[1],sep="")
81 #for(i in 2:dn){
83 #ungenotyped=c(ungenotyped,paste("WEMA_",substr(f_plot,12,mp[[1]][1]+4),"_ungenotyped_",acc_tester,"_",diff_acc_plot[i],sep=""))
87 #ungenotyped=paste("WEMA_",substr(f_plot,12,17),"_ungenotyped_",1,"_",acc_tester,sep="")
89 #for(i in 2:dn){
91 #ungenotyped=c(ungenotyped,paste("WEMA_",substr(f_plot,12,17),"_ungenotyped_",i,"_",acc_tester,sep=""))
95 #diff_acc_plot_ungenotyped<-cbind(diff_acc_plot,ungenotyped)
96 #colnames(diff_acc_plot_ungenotyped)=c("ENTRY","DESIG")
98 #data.acc.sorted=data.acc[order(data.acc[,1]),]
99 #colnames(data.acc.sorted)=c("ENTRY","DESIG")
101 #data.acc.plus.ungenotyped<-rbind(data.acc.sorted,diff_acc_plot_ungenotyped)
103 #data.acc.plus.ungenotyped.plot<-merge(data.acc.plus.ungenotyped,data.plot,by="ENTRY",sort=F)
105 #cn=dim(data.acc.plus.ungenotyped.plot)[2]
107 #data.acc.plus.ungenotyped.plot.2<-data.acc.plus.ungenotyped.plot[,c(1,3,4,5,2,7:cn)]
109 #f_output=paste("WEMA_",substr(f_plot,12,mp[[1]][1]+3),"_plot_accession",sep="")
111 #acc_plot_file_name=paste(f_output,"_output.csv",sep="")
113 #cat(acc_plot_file_name,"\n")
115 #write.table(data.acc.plus.ungenotyped.plot.2,file=acc_plot_file_name,append = F, quote = F, sep = "\t",eol = "\n",row.names = F,col.names = F,na=" ");
118 #ff <- myarg[5:m]
120 #f<-paste(ff,collapse=" ")
122 #cat(f_index,"\n")
123 #cat(f,"\n")
124 #cat(length(f),"\n")
126 #cat(m,"\n")
128 #library(affy)
129 #eset=justRMA(celfile.path=f)
130 #write.exprs(eset,file=paste(f_index,"_exprs.txt",sep=""))
131 #save.image(file=paste(f,".RData",sep=""))
132 #q()
135 #list_trait<-function(file.name){
136 #getwd()
138 #library(gdata)
139 #file_name=paste("~/DataFromXuecai/Link genotypes with phenotypes/",f_index,sep="");
140 #cat(file_name,"\n");
142 #data.for.read<-read.csv(file_name,header=T,sep="\t")
144 #print(colnames(data.for.read))
146 #write.table(data.for.read[2:length(data.for.read[,2]),2],file="F3_name.txt",append = T, quote = F, s#ep = "\t",eol = "\n",row.names = F,col.names = F);
149 quit("yes")