R/GBS_QC.R

   1 #to use
   2 #R --slave --args output_test00.txt qc_output.txt < ~code/code_R/GBS_QC.R
   3
   4 myarg <- commandArgs()
   5 cat(myarg,"\n");
   6 m=length(myarg)
   7 cat(m,"\n");
   8
   9 f_in<-myarg[4:4]
  10 f_out<-myarg[5:5]
  11 #f_plot<-myarg[6:6]
  12 #f_output<-myarg[7:7]
  13
  14 #cat(f_index,"\n")
  15 #cat(f_acc,"\n")
  16 #cat(f_plot,"\n")
  17 #cat(f_output,"\n")
  18
  19
  20 #f_acc="WEMA_6x1122_entry_number_accession.csv_tail.csv"
  21 #f_plot="Clean data 6x1122_WET11B-MARS-EVALTC-10-8_rep2_sorted.csv_tail.csv"
  22
  23 data.gbs<-read.csv(f_in,sep="\t",header=F)
  24
  25
  26 m=dim(data.gbs)[1];
  27 n=dim(data.gbs)[2];
  28 nn=n-1;
  29
  30 #cat("There are",nn,"accession\n",file=f_out,sep=" ",append=TRUE);
  31 #cat("Each accession has",m,"markers\n",file=f_out,sep=" ",append=TRUE);
  32
  33
  34 data.cnr<-array();
  35
  36 s=1;
  37
  38 for (i in 2:n){
  39
  40 cn=length(which(data.gbs[,i]=="-9"));
  41 cnr=cn/length(data.gbs[,i]);
  42 j=i-1;
  43 data.cnr[s]=cnr;
  44 s=s+1;
  45 cat("Sample",j,"missing rate is",cnr,"\n",file=f_out,sep=" ",append=TRUE);
  46
  47 }
  48
  49 png(file = "./documents/img/MissingRate.png")
  50 hist(data.cnr)
  51 dev.off()
  52
  53 #data.plot<-read.csv(f_plot,sep="\t",header=F)
  54
  55 #colnames(data.plot)[1]="ENTRY"
  56
  57 #V1 V2 V3  V4 V5 V6 V7   V8  V9 V10   V11  V12
  58
  59
  60
  61 #diff_acc_plot=setdiff(data.plot[,1],data.acc[,1])
  62 #same_acc_plot=intersect(data.plot[,1],data.acc[,1])
  63
  64 #diff_acc_plot=diff_acc_plot[order(diff_acc_plot)]
  65 #same_acc_plot=same_acc_plot[order(same_acc_plot)]
  66
  67 #dn=length(diff_acc_plot)
  68 #sn=length(same_acc_plot)
  69
  70
  71 #WEMA6x1008_WET10B-EVALTC-08-1_ungenotyped1_tester_CML395_CML444
  72
  73 #mp=gregexpr("_rep",f_plot)
  74
  75 #data_acc_tester=as.character(data.acc[1,2])
  76
  77 #acc_tester=substr(data_acc_tester,gregexpr("tester",data_acc_tester)[[1]][1],nchar(data_acc_tester))
  78
  79 #ungenotyped=paste("WEMA_",substr(f_plot,12,mp[[1]][1]+4),"_ungenotyped_",acc_tester,"_",diff_acc_plot[1],sep="")
  80
  81 #for(i in 2:dn){
  82 #
  83 #ungenotyped=c(ungenotyped,paste("WEMA_",substr(f_plot,12,mp[[1]][1]+4),"_ungenotyped_",acc_tester,"_",diff_acc_plot[i],sep=""))
  84 #
  85 #}
  86
  87 #ungenotyped=paste("WEMA_",substr(f_plot,12,17),"_ungenotyped_",1,"_",acc_tester,sep="")
  88
  89 #for(i in 2:dn){
  90
  91 #ungenotyped=c(ungenotyped,paste("WEMA_",substr(f_plot,12,17),"_ungenotyped_",i,"_",acc_tester,sep=""))
  92
  93 #}
  94
  95 #diff_acc_plot_ungenotyped<-cbind(diff_acc_plot,ungenotyped)
  96 #colnames(diff_acc_plot_ungenotyped)=c("ENTRY","DESIG")
  97
  98 #data.acc.sorted=data.acc[order(data.acc[,1]),]
  99 #colnames(data.acc.sorted)=c("ENTRY","DESIG")
 100
 101 #data.acc.plus.ungenotyped<-rbind(data.acc.sorted,diff_acc_plot_ungenotyped)
 102
 103 #data.acc.plus.ungenotyped.plot<-merge(data.acc.plus.ungenotyped,data.plot,by="ENTRY",sort=F)
 104
 105 #cn=dim(data.acc.plus.ungenotyped.plot)[2]
 106
 107 #data.acc.plus.ungenotyped.plot.2<-data.acc.plus.ungenotyped.plot[,c(1,3,4,5,2,7:cn)]
 108
 109 #f_output=paste("WEMA_",substr(f_plot,12,mp[[1]][1]+3),"_plot_accession",sep="")
 110
 111 #acc_plot_file_name=paste(f_output,"_output.csv",sep="")
 112
 113 #cat(acc_plot_file_name,"\n")
 114
 115 #write.table(data.acc.plus.ungenotyped.plot.2,file=acc_plot_file_name,append = F, quote = F, sep = "\t",eol = "\n",row.names = F,col.names = F,na=" ");
 116
 117
 118 #ff <- myarg[5:m]
 119
 120 #f<-paste(ff,collapse=" ")
 121
 122 #cat(f_index,"\n")
 123 #cat(f,"\n")
 124 #cat(length(f),"\n")
 125
 126 #cat(m,"\n")
 127
 128 #library(affy)
 129 #eset=justRMA(celfile.path=f)
 130 #write.exprs(eset,file=paste(f_index,"_exprs.txt",sep=""))
 131 #save.image(file=paste(f,".RData",sep=""))
 132 #q()
 133
 134
 135 #list_trait<-function(file.name){
 136 #getwd()
 137
 138 #library(gdata)
 139 #file_name=paste("~/DataFromXuecai/Link genotypes with phenotypes/",f_index,sep="");
 140 #cat(file_name,"\n");
 141
 142 #data.for.read<-read.csv(file_name,header=T,sep="\t")
 143
 144 #print(colnames(data.for.read))
 145
 146 #write.table(data.for.read[2:length(data.for.read[,2]),2],file="F3_name.txt",append = T, quote = F, s#ep = "\t",eol = "\n",row.names = F,col.names = F);
 147
 148 #}
 149 quit("yes")