2 # run this in the output directory for rnaseq_pipeline.sh
3 # passing the pheno data csv file as the only argument
4 args = commandArgs(trailingOnly=TRUE)
6 # assume no output directory argument was given to rnaseq_pipeline.sh
7 pheno_data_file <- paste0(getwd(),"/sample.csv")
9 pheno_data_file <- args[1]
13 library(RSkittleBrewer)
18 ## Read phenotype sample data
19 pheno_data <- read.csv(pheno_data_file)
21 ## Read in expression data
22 out <- ballgown(dataDir = "out", samplePattern= "", pData=pheno_data)
24 ## Filter low abundance genes
25 out_filt <- subset(out, "rowVars(texpr(out)) > 1", genomesubset=TRUE)
28 sampleL <- subset(out_filt,"tissue %in% 'L'", genomesubset=FALSE)
29 sampleB <- subset(out_filt,"tissue %in% 'B'", genomesubset=FALSE)
30 sampleE <- subset(out_filt,"tissue %in% 'E'", genomesubset=FALSE)
33 LA <- subset(sampleL,"gko %in% 'WT'", genomesubset=FALSE)
34 LB <- subset(sampleL,"exp %in% 'CD'", genomesubset=FALSE)
35 LC <- subset(sampleL,"tko %in% c('CD-WT','HFD-KO')", genomesubset=FALSE)
36 LD <- subset(sampleL,"exp %in% 'HFD'", genomesubset=FALSE)
37 BA <- subset(sampleB,"gko %in% 'WT'", genomesubset=FALSE)
38 BB <- subset(sampleB,"exp %in% 'CD'", genomesubset=FALSE)
39 BC <- subset(sampleB,"tko %in% c('CD-WT','HFD-KO')", genomesubset=FALSE)
40 BD <- subset(sampleB,"exp %in% 'HFD'", genomesubset=FALSE)
41 EA <- subset(sampleE,"gko %in% 'WT'", genomesubset=FALSE)
42 EB <- subset(sampleE,"exp %in% 'CD'", genomesubset=FALSE)
43 EC <- subset(sampleE,"tko %in% c('CD-WT','HFD-KO')", genomesubset=FALSE)
44 ED <- subset(sampleE,"exp %in% 'HFD'", genomesubset=FALSE)
45 LA_data <- subset(pheno_data,tissue=="L"&gko=="WT",genomesubset=FALSE)
47 WA <- subset(out_filt,"gko %in% 'WT'", genomesubset=FALSE)
48 WB <- subset(out_filt,"exp %in% 'CD'", genomesubset=FALSE)
49 WC <- subset(out_filt,"tko %in% c('CD-WT','HFD-KO')", genomesubset=FALSE)
50 WD <- subset(out_filt,"exp %in% 'HFD'", genomesubset=FALSE)
53 LAresults_transcripts <- stattest(LA, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
54 LBresults_transcripts <- stattest(LB, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
55 LCresults_transcripts <- stattest(LC, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
56 LDresults_transcripts <- stattest(LD, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
57 BAresults_transcripts <- stattest(BA, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
58 BBresults_transcripts <- stattest(BB, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
59 BCresults_transcripts <- stattest(BC, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
60 BDresults_transcripts <- stattest(BD, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
61 EAresults_transcripts <- stattest(EA, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
62 EBresults_transcripts <- stattest(EB, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
63 ECresults_transcripts <- stattest(EC, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
64 EDresults_transcripts <- stattest(ED, feature='transcript', covariate="tko", getFC=TRUE, meas='FPKM')
66 WAresults_transcripts <- stattest(WA, feature='transcript', covariate="tko", adjustvars=c('tissue'), getFC=TRUE, meas='FPKM')
67 WBresults_transcripts <- stattest(WB, feature='transcript', covariate="tko", adjustvars=c('tissue'), getFC=TRUE, meas='FPKM')
68 WCresults_transcripts <- stattest(WC, feature='transcript', covariate="tko", adjustvars=c('tissue'), getFC=TRUE, meas='FPKM')
69 WDresults_transcripts <- stattest(WD, feature='transcript', covariate="tko", adjustvars=c('tissue'), getFC=TRUE, meas='FPKM')
72 LAresults_genes <- stattest(LA, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
73 LBresults_genes <- stattest(LB, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
74 LCresults_genes <- stattest(LC, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
75 LDresults_genes <- stattest(LD, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
76 BAresults_genes <- stattest(BA, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
77 BBresults_genes <- stattest(BB, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
78 BCresults_genes <- stattest(BC, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
79 BDresults_genes <- stattest(BD, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
80 EAresults_genes <- stattest(EA, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
81 EBresults_genes <- stattest(EB, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
82 ECresults_genes <- stattest(EC, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
83 EDresults_genes <- stattest(ED, feature='gene', covariate='tko',getFC=TRUE, meas='FPKM')
86 LAresults_transcripts <- data.frame(chr=LA@expr$trans$chr,strand=LA@expr$trans$strand,start=LA@expr$trans$start,end=LA@expr$trans$end,geneNames=ballgown::geneNames(LA),geneIDs=ballgown::geneIDs(LA), LAresults_transcripts)
87 LBresults_transcripts <- data.frame(chr=LB@expr$trans$chr,strand=LB@expr$trans$strand,start=LB@expr$trans$start,end=LB@expr$trans$end,geneNames=ballgown::geneNames(LB),geneIDs=ballgown::geneIDs(LB), LBresults_transcripts)
88 LCresults_transcripts <- data.frame(chr=LC@expr$trans$chr,strand=LC@expr$trans$strand,start=LC@expr$trans$start,end=LC@expr$trans$end,geneNames=ballgown::geneNames(LC),geneIDs=ballgown::geneIDs(LC), LCresults_transcripts)
89 LDresults_transcripts <- data.frame(chr=LD@expr$trans$chr,strand=LD@expr$trans$strand,start=LD@expr$trans$start,end=LD@expr$trans$end,geneNames=ballgown::geneNames(LD),geneIDs=ballgown::geneIDs(LD), LDresults_transcripts)
90 BAresults_transcripts <- data.frame(chr=BA@expr$trans$chr,strand=BA@expr$trans$strand,start=BA@expr$trans$start,end=BA@expr$trans$end,geneNames=ballgown::geneNames(BA),geneIDs=ballgown::geneIDs(BA), BAresults_transcripts)
91 BBresults_transcripts <- data.frame(chr=BB@expr$trans$chr,strand=BB@expr$trans$strand,start=BB@expr$trans$start,end=BB@expr$trans$end,geneNames=ballgown::geneNames(BB),geneIDs=ballgown::geneIDs(BB), BBresults_transcripts)
92 BCresults_transcripts <- data.frame(chr=BC@expr$trans$chr,strand=BC@expr$trans$strand,start=BC@expr$trans$start,end=BC@expr$trans$end,geneNames=ballgown::geneNames(BC),geneIDs=ballgown::geneIDs(BC), BCresults_transcripts)
93 BDresults_transcripts <- data.frame(chr=BD@expr$trans$chr,strand=BD@expr$trans$strand,start=BD@expr$trans$start,end=BD@expr$trans$end,geneNames=ballgown::geneNames(BD),geneIDs=ballgown::geneIDs(BD), BDresults_transcripts)
94 EAresults_transcripts <- data.frame(chr=EA@expr$trans$chr,strand=EA@expr$trans$strand,start=EA@expr$trans$start,end=EA@expr$trans$end,geneNames=ballgown::geneNames(EA),geneIDs=ballgown::geneIDs(EA), EAresults_transcripts)
95 EBresults_transcripts <- data.frame(chr=EB@expr$trans$chr,strand=EB@expr$trans$strand,start=EB@expr$trans$start,end=EB@expr$trans$end,geneNames=ballgown::geneNames(EB),geneIDs=ballgown::geneIDs(EB), EBresults_transcripts)
96 ECresults_transcripts <- data.frame(chr=EC@expr$trans$chr,strand=EC@expr$trans$strand,start=EC@expr$trans$start,end=EC@expr$trans$end,geneNames=ballgown::geneNames(EC),geneIDs=ballgown::geneIDs(EC), ECresults_transcripts)
97 EDresults_transcripts <- data.frame(chr=ED@expr$trans$chr,strand=ED@expr$trans$strand,start=ED@expr$trans$start,end=ED@expr$trans$end,geneNames=ballgown::geneNames(ED),geneIDs=ballgown::geneIDs(ED), EDresults_transcripts)
99 WAresults_transcripts <- data.frame(chr=WA@expr$trans$chr,strand=WA@expr$trans$strand,start=WA@expr$trans$start,end=WA@expr$trans$end,geneNames=ballgown::geneNames(WA),geneIDs=ballgown::geneIDs(WA), WAresults_transcripts)
100 WBresults_transcripts <- data.frame(chr=WB@expr$trans$chr,strand=WB@expr$trans$strand,start=WB@expr$trans$start,end=WB@expr$trans$end,geneNames=ballgown::geneNames(WB),geneIDs=ballgown::geneIDs(WB), WBresults_transcripts)
101 WCresults_transcripts <- data.frame(chr=WC@expr$trans$chr,strand=WC@expr$trans$strand,start=WC@expr$trans$start,end=WC@expr$trans$end,geneNames=ballgown::geneNames(WC),geneIDs=ballgown::geneIDs(WC), WCresults_transcripts)
102 WDresults_transcripts <- data.frame(chr=WD@expr$trans$chr,strand=WD@expr$trans$strand,start=WD@expr$trans$start,end=WD@expr$trans$end,geneNames=ballgown::geneNames(WD),geneIDs=ballgown::geneIDs(WD), WDresults_transcripts)
104 ## Sort results from smallest p-value
105 LAresults_transcripts <- arrange(LAresults_transcripts, pval)
106 LAresults_genes <- arrange(LAresults_genes, pval)
107 LBresults_transcripts <- arrange(LBresults_transcripts, pval)
108 LBresults_genes <- arrange(LBresults_genes, pval)
109 LCresults_transcripts <- arrange(LCresults_transcripts, pval)
110 LCresults_genes <- arrange(LCresults_genes, pval)
111 LDresults_transcripts <- arrange(LDresults_transcripts, pval)
112 LDresults_genes <- arrange(LDresults_genes, pval)
113 BAresults_transcripts <- arrange(BAresults_transcripts, pval)
114 BAresults_genes <- arrange(BAresults_genes, pval)
115 BBresults_transcripts <- arrange(BBresults_transcripts, pval)
116 BBresults_genes <- arrange(BBresults_genes, pval)
117 BCresults_transcripts <- arrange(BCresults_transcripts, pval)
118 BCresults_genes <- arrange(BCresults_genes, pval)
119 BDresults_transcripts <- arrange(BDresults_transcripts, pval)
120 BDresults_genes <- arrange(BDresults_genes, pval)
121 EAresults_transcripts <- arrange(EAresults_transcripts, pval)
122 EAresults_genes <- arrange(EAresults_genes, pval)
123 EBresults_transcripts <- arrange(EBresults_transcripts, pval)
124 EBresults_genes <- arrange(EBresults_genes, pval)
125 ECresults_transcripts <- arrange(ECresults_transcripts, pval)
126 ECresults_genes <- arrange(ECresults_genes, pval)
127 EDresults_transcripts <- arrange(EDresults_transcripts, pval)
128 EDresults_genes <- arrange(EDresults_genes, pval)
130 WAresults_transcripts <- arrange(WAresults_transcripts, pval)
131 WBresults_transcripts <- arrange(WBresults_transcripts, pval)
132 WCresults_transcripts <- arrange(WCresults_transcripts, pval)
133 WDresults_transcripts <- arrange(WDresults_transcripts, pval)
135 ## Write results to CSV
136 write.csv(LAresults_transcripts, "LA_transcripts_results.csv", row.names=FALSE)
137 write.csv(LAresults_genes, "LA_genes_results.csv", row.names=FALSE)
138 write.csv(LBresults_transcripts, "LB_transcripts_results.csv", row.names=FALSE)
139 write.csv(LBresults_genes, "LB_genes_results.csv", row.names=FALSE)
140 write.csv(LCresults_transcripts, "LC_transcripts_results.csv", row.names=FALSE)
141 write.csv(LCresults_genes, "LC_genes_results.csv", row.names=FALSE)
142 write.csv(LDresults_transcripts, "LD_transcripts_results.csv", row.names=FALSE)
143 write.csv(LDresults_genes, "LD_genes_results.csv", row.names=FALSE)
144 write.csv(BAresults_transcripts, "BA_transcripts_results.csv", row.names=FALSE)
145 write.csv(BAresults_genes, "BA_genes_results.csv", row.names=FALSE)
146 write.csv(BBresults_transcripts, "BB_transcripts_results.csv", row.names=FALSE)
147 write.csv(BBresults_genes, "BB_genes_results.csv", row.names=FALSE)
148 write.csv(BCresults_transcripts, "BC_transcripts_results.csv", row.names=FALSE)
149 write.csv(BCresults_genes, "BC_genes_results.csv", row.names=FALSE)
150 write.csv(BDresults_transcripts, "BD_transcripts_results.csv", row.names=FALSE)
151 write.csv(BDresults_genes, "BD_genes_results.csv", row.names=FALSE)
152 write.csv(EAresults_transcripts, "EA_transcripts_results.csv", row.names=FALSE)
153 write.csv(EAresults_genes, "EA_genes_results.csv", row.names=FALSE)
154 write.csv(EBresults_transcripts, "EB_transcripts_results.csv", row.names=FALSE)
155 write.csv(EBresults_genes, "EB_genes_results.csv", row.names=FALSE)
156 write.csv(ECresults_transcripts, "EC_transcripts_results.csv", row.names=FALSE)
157 write.csv(ECresults_genes, "EC_genes_results.csv", row.names=FALSE)
158 write.csv(EDresults_transcripts, "ED_transcripts_results.csv", row.names=FALSE)
159 write.csv(EDresults_genes, "ED_genes_results.csv", row.names=FALSE)
161 write.csv(WAresults_transcripts, "WA_transcripts_results.csv", row.names=FALSE)
162 write.csv(WBresults_transcripts, "WB_transcripts_results.csv", row.names=FALSE)
163 write.csv(WCresults_transcripts, "WC_transcripts_results.csv", row.names=FALSE)
164 write.csv(WDresults_transcripts, "WD_transcripts_results.csv", row.names=FALSE)
166 ## Filter for genes with q-val <0.05
167 subset(WAresults_transcripts, WAresults_transcripts$qval <=0.05)
168 #subset(LAresults_genes, LAresults_genes$qval <=0.05)
171 #tropical <- c('darkorange', 'dodgerblue', 'hotpink', 'limegreen', 'yellow')
174 ## Plotting gene abundance distribution
175 #fpkm <- texpr(bg_chrX, meas='FPKM')
176 #fpkm <- log2(fpkm +1)
177 #boxplot(fpkm, col=as.numeric(pheno_data$sex), las=2,ylab='log2(FPKM+1)')
179 ## Plot individual transcripts
180 #ballgown::transcriptNames(bg_chrX)[12]
181 #plot(fpkm[12,] ~ pheno_data$sex, border=c(1,2),
182 # main=paste(ballgown::geneNames(bg_chrX)[12], ' : ',ballgown::transcriptNames(bg_chrX)[12]),
183 # pch=19, xlab="Sex", ylab='log2(FPKM+1)')
184 #points(fpkm[12,] ~ jitter(as.numeric(pheno_data$sex)), col=as.numeric(pheno_data$sex))
186 ## Plot gene of transcript 1729
187 #plotTranscripts(ballgown::geneIDs(bg_chrX)[1729], bg_chrX,
188 # main=c('Gene XIST in sample ERR188234'), sample=c('ERR188234'))
190 ## Plot average expression
191 #plotMeans(ballgown::geneIDs(bg_chrX)[203], bg_chrX_filt, groupvar="sex", legend=FALSE)
193 ##print gene abundance distribution in your screen
194 pdf(file="1.Gene_abundance_distribution.pdf",width=15,height=9)
195 par(mai=c(1.8,1,1,1))
196 tropical <- c('darkorange', 'dodgerblue', 'hotpink', 'limegreen', 'yellow')
198 fpkm <- texpr(out_filt, meas='FPKM')
199 fpkm <- log2(fpkm +1)
200 boxplot(fpkm, col=as.numeric(pheno_data$tko), las=2,ylab='log2(FPKM+1)')
203 ##print individual transcripts
204 pdf(file="2.Individual_transcripts.pdf")
205 ballgown::transcriptNames(out_filt)[15]
206 plot(fpkm[15,] ~ pheno_data$gko, border=c(1,2),main=paste(ballgown::geneNames(out_filt)[15], ' : ',ballgown::transcriptNames(out_filt)[15]),pch=19, xlab="Gko", ylab='log2(FPKM+1)')
207 points(fpkm[15,] ~ jitter(as.numeric(pheno_data$gko)), col=as.numeric(pheno_data$gko))
210 ##print Plot gene of transcript
211 pdf(file="3.Plot_gene_of_transcript.pdf")
212 plotTranscripts(ballgown::geneIDs(out_filt)[8219], out_filt,main=c('Gene XIST in sample HFD-L14'), sample=c('HFD-L14'))
215 ##print the compare of average expression
216 pdf(file="4.Comparison_of_average_expression.pdf")
217 plotTranscripts(ballgown::geneIDs(out_filt)[8219], out_filt,main=c('Gene XIST in sample HFD-L14'), sample=c('HFD-L14'))
218 plotMeans('MSTRG.7196', out_filt,groupvar="gko",legend=FALSE)
221 #print the first picture in sequence
222 fpkm2 <- texpr(out_filt, meas='FPKM')
223 tbl_fpkm2<-tbl_df(fpkm2)
224 colnames(tbl_fpkm2) <- c("CD-B1","CD-B2","CD-B3","CD-B4","CD-B5","CD-E1","CD-E2","CD-E4","CD-E5","CD-KO-B10","CD-KO-B6","CD-KO-B7","CD-KO-B8","CD-KO-E10","CD-KO-E6","CD-KO-E7","CD-KO-E8","CD-KO-E9","CD-KO-L10","CD-KO-L6","CD-KO-L7","CD-KO-L8","CD-KO-L9","CD-L1","CD-L2","CD-L3","CD-L4","CD-L5","HFD-B11","HFD-B12","HFD-B13","HFD-B14","HFD-B15","HFD-E11","HFD-E12","HFD-E13","HFD-E14","HFD-E15","HFD-KO-B16","HFD-KO-B17","HFD-KO-B18","HFD-KO-B19","HFD-KO-E16","HFD-KO-E17","HFD-KO-E18","HFD-KO-E19","HFD-KO-E20","HFD-KO-L16","HFD-KO-L17","HFD-KO-L18","HFD-KO-L19","HFD-KO-L20","HFD-L11","HFD-L12","HFD-L13","HFD-L14","HFD-L15")
225 newdf <- select(tbl_fpkm2,"CD-L1","CD-L2","CD-L3","CD-L4","CD-L5","CD-KO-L6","CD-KO-L7","CD-KO-L8","CD-KO-L9","CD-KO-L10","HFD-L11","HFD-L12","HFD-L13","HFD-L14","HFD-L15","HFD-KO-L16","HFD-KO-L17","HFD-KO-L18","HFD-KO-L19","HFD-KO-L20","CD-B1","CD-B2","CD-B3","CD-B4","CD-B5","CD-KO-B6","CD-KO-B7","CD-KO-B8","CD-KO-B10","HFD-B11","HFD-B12","HFD-B13","HFD-B14","HFD-B15","HFD-KO-B16","HFD-KO-B17","HFD-KO-B18","HFD-KO-B19","CD-E1","CD-E2","CD-E4","CD-E5","CD-KO-E6","CD-KO-E7","CD-KO-E8","CD-KO-E9","CD-KO-E10","HFD-E11","HFD-E12","HFD-E13","HFD-E14","HFD-E15","HFD-KO-E16","HFD-KO-E17","HFD-KO-E18","HFD-KO-E19","HFD-KO-E20")
226 pdf(file="5.Gene_abundance_distribution_insequnce.pdf",width=15,height=9)
227 par(mai=c(1.8,1,1,1))
228 tropical <- c('darkorange', 'dodgerblue', 'hotpink', 'limegreen', 'yellow')
230 newdf <- log2(newdf +1)
231 boxplot(newdf, col=as.numeric(pheno_data$tko), las=2,ylab='log2(FPKM+1)')