2 # R CMD BATCH --no-save --no-restore '--args phenotype_file="blabla.txt" output_file="blalba.png" ' analyze_phenotype.r output.txt
7 args
=(commandArgs(TRUE))
10 print("No arguments supplied.")
11 ##supply default values
12 phenotype_file
= 'phenotypes.txt'
13 output_file
= paste0(phenotype_file
, ".png", sep
="")
15 for(i
in 1:length(args
)){
16 eval(parse(text
=args
[[i
]]))
20 write(paste("phenotype file: ", phenotype_file
), stderr())
21 write(paste("output_file: ", output_file
), stderr())
23 errorfile
= paste(phenotype_file
, ".err", sep
="");
25 phenodata
= read
.csv(phenotype_file
,fill
=TRUE, sep
=",", header
= TRUE, stringsAsFactors
= T, na
.strings
="NA");
27 blocks
= unique(phenodata$blockNumber
)
28 print(paste("blocks: ", blocks
));
29 studyNames
= unique(phenodata$studyName
)
30 accessions
= unique(phenodata$germplasmName
)
33 trial_accessions
<- c()
34 all_accessions
= unique(phenodata$germplasmName
)
36 datamatrix
= matrix(nrow
= length(all_accessions
), ncol
=length(studyNames
)) # * length(blocks))
37 wfAlltrialsdata
<- c()
38 for (i
in 1:(length(studyNames
))) {
40 trialdata
<- phenodata
[phenodata
[,"studyName"]==studyNames
[i
], ] # & phenodata[,"blockNumber"]==n, ]
42 metadata
<- c('studyYear', 'studyDbId', 'studyName', 'studyDesign', 'locationDbId', 'locationName', 'germplasmDbId', 'germplasmSynonyms', 'observationLevel', 'observationUnitDbId', 'observationUnitName', 'plotNumber')
44 trialdata
<- trialdata
[, !(names(trialdata
) %in% metadata
)]
46 trialdata
<- trialdata
[, !(names(trialdata
) %in% c('replicate', 'blockNumber'))]
48 trialdata
<- ddply(trialdata
,
50 colwise(mean
, na
.rm
=TRUE)
53 trialdata
<- data
.frame(trialdata
)
55 trialdata
<- trialdata
[complete
.cases(trialdata
), ]
57 colnames(trialdata
)[2]<- make
.names(studyNames
[i
])
60 wfAllTrialsData
<- trialdata
62 wfAllTrialsData
<- merge(wfAllTrialsData
, trialdata
, by
="germplasmName")
66 #create a fake 4 trials dataset
67 #wfAllTrialsData <- merge(wfAllTrialsData, wfAllTrialsData, by="germplasmName")
69 names(wfAllTrialsData
) <- make
.names(names(wfAllTrialsData
))
71 lfTrialsData
<- function (wfTrialsData
) {
72 lfTrDa
<- gather(wfTrialsData
, Trials
, Trait
,
73 2:length(wfTrialsData
),
80 datamatrix
<- data
.matrix(wfAllTrialsData
)
83 if (nrow(datamatrix
)==0) {
84 write("No data was retrieved from the database for this combination of trials: ", file
= errorfile
);
86 if (ncol(datamatrix
) < 2) {
87 write("No data. Try again", file
= errorfile
);
92 panel
.cor
<- function(x
, y
, digits
=2, cex
.cor
)
94 usr
<- par("usr"); on
.exit(par(usr
))
95 par(usr
= c(0, 1, 0, 1))
96 r
<- abs(cor(x
, y
, use
="na.or.complete"))
97 txt
<- format(c(r
, 0.123456789), digits
=digits
)[1]
98 test
<- cor
.test(x
,y
,use
="na.or.complete")
99 Signif
<- ifelse(round(test$p
.value
,3)<0.001,"p<0.001",paste("p=",round(test$p
.value
,3)))
100 text(0.5, 0.25, paste("r=",txt
))
101 text(.5, .75, Signif
)
104 #pairs(data_test,lower.panel=panel.smooth,upper.panel=panel.cor)
108 panel
.smooth
<-function (x
, y
, col
= "black", bg
= NA, pch
= 18, cex
= 0.8, col
.smooth
= "red", span
= 2/3, iter
= 3, ...)
110 points(x
, y
, pch
= pch
, col
= col
, bg
= bg
, cex
= cex
)
111 ok
<- is
.finite(x
) & is
.finite(y
)
113 lines(stats
::lowess(x
[ok
], y
[ok
], f
= span
, iter
= iter
),
114 col
= col
.smooth
, ...)
118 #pairs(data,lower.panel=panel.smooth,upper.panel=panel.smooth)
121 panel
.hist
<- function(x
, ...)
123 usr
<- par("usr"); on
.exit(par(usr
))
124 par(usr
= c(usr
[1:2], 0, 1.5) )
125 h
<- hist(x
, plot
= TRUE)
126 breaks
<- h$breaks
; nB
<- length(breaks
)
127 y
<- h$counts
; y
<- y
/max(y
)
128 rect(breaks
[-nB
], 0, breaks
[-1], y
, col
="red", ...)
131 scatterPlot
<- function (wfTrialsData
) {
133 lfTrDa
<- lfTrialsData(wfTrialsData
)
135 scatter
<- ggplot(wfTrialsData
, aes_string(x
=names(wfTrialsData
)[2], y
=names(wfTrialsData
)[3])) +
136 theme(plot
.title
= element_text(size
=18, face
="bold", color
="olivedrab4", margin
= margin(40, 40, 40, 40)),
137 plot
.margin
= unit(c(0.75, 1, 0.75, 1), "cm"),
138 axis
.title
.x
= element_text(size
=14, face
="bold", color
="olivedrab4"),
139 axis
.title
.y
= element_text(size
=14, face
="bold", color
="olivedrab4"),
140 axis
.text
.x
= element_text(angle
=90, vjust
=0.5, size
=10, color
="olivedrab4"),
141 axis
.text
.y
= element_text(size
=10, color
="olivedrab4")) +
142 geom_point(shape
=1, color
='DodgerBlue') +
143 scale_x_continuous(breaks
= round(seq(min(lfTrDa$Trait
), max(lfTrDa$Trait
), by
= 2),1)) +
144 scale_y_continuous(breaks
= round(seq(min(lfTrDa$Trait
), max(lfTrDa$Trait
), by
= 2),1)) +
145 geom_smooth(method
=lm
, se
=FALSE)
152 freqPlot
<- function (wfTrialsData
) {
154 lfTrDa
<- lfTrialsData(wfTrialsData
)
156 averages
<- ddply(lfTrDa
, "Trials", summarise
, traitAverage
= mean(Trait
))
158 freq
<- ggplot(lfTrDa
, aes(x
=Trait
, fill
=Trials
)) +
159 xlab("Trait values") +
161 theme(plot
.title
= element_text(size
=18, face
="bold", color
="olivedrab4", margin
= margin(40, 40, 40, 40)),
162 plot
.margin
= unit(c(0.75, 1, 0.75, 1), "cm"),
163 axis
.title
.x
= element_text(size
=14, face
="bold", color
="olivedrab4"),
164 axis
.title
.y
= element_text(size
=14, face
="bold", color
="olivedrab4"),
165 axis
.text
.x
= element_text(angle
=90, size
=10, color
="olivedrab4"),
166 axis
.text
.y
= element_text(size
=10, color
="olivedrab4"),
167 legend
.title
=element_blank(),
168 legend
.text
=element_text(size
=12, color
="olivedrab4"),
169 legend
.position
="bottom") +
170 geom_histogram(binwidth
=2, alpha
=.5, position
="identity") +
171 scale_x_continuous(breaks
= round(seq(min(lfTrDa$Trait
), max(lfTrDa$Trait
), by
= 2),1)) +
172 scale_fill_manual(values
=c("ForestGreen", "DodgerBlue")) +
173 geom_vline(data
=averages
,
174 aes(xintercept
=traitAverage
, colour
=Trials
),
175 linetype
="dashed", size
=2)
181 # Multiple plot function: for how to use this function go here:
182 # http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
184 multiplot
<- function(..., plotlist
=NULL, file
, cols
=1, layout
=NULL) {
187 plots
<- c(list(...), plotlist
)
189 numPlots
= length(plots
)
191 if (is
.null(layout
)) {
192 layout
<- matrix(seq(1, cols
* ceiling(numPlots
/cols
)),
193 ncol
= cols
, nrow
= ceiling(numPlots
/cols
))
201 pushViewport(viewport(layout
= grid
.layout(nrow(layout
), ncol(layout
))))
203 for (i
in 1:numPlots
) {
204 matchidx
<- as
.data
.frame(which(layout
== i
, arr
.ind
= TRUE))
206 print(plots
[[i
]], vp
= viewport(layout
.pos
.row
= matchidx$row
,
207 layout
.pos
.col
= matchidx$col
))
213 getTrialsPairs
<- function (wfAllTrialsData
) {
214 combiTrMx
<- combn(names(wfAllTrialsData
[, 2:length(names(wfAllTrialsData
))]), 2)
215 nPairs
<- dim(combiTrMx
)[2]
218 for (i
in 1:nPairs
) {
219 pairs
<- combiTrMx
[, i
]
220 message("pair " , i
, " ", pairs
)
221 allPairs
[i
] <- list(i
=pairs
)
224 return (list("trialsPairs"= allPairs
, "pairsCount"= nPairs
))
228 prTr
<- getTrialsPairs(wfAllTrialsData
)
229 trialsPairs
<- prTr
[["trialsPairs"]]
230 pairsCount
<- prTr
[["pairsCount"]]
232 message("pairs count ", pairsCount
)
234 createGraphNames
<- function (pairsCount
) {
237 for (i
in 1:pairsCount
) {
238 pf
<- paste("freq", i
, sep
="")
239 ps
<- paste("scatter", i
, sep
="")
241 graphNames
[i
] <- list(i
=c(ps
, pf
))
248 png(output_file
, height
= pairsCount
* 400, width
=800)
250 graphNames
<- createGraphNames(pairsCount
)
252 for (i
in 1:pairsCount
) {
253 pnames
<- graphNames
[[i
]]
254 message(pnames
, " ", pnames
[1], " ", pnames
[2])
256 scatter
<- scatterPlot(wfAllTrialsData
[, c("germplasmName", trialsPairs
[[i
]])])
257 freq
<- freqPlot(wfAllTrialsData
[, c("germplasmName", trialsPairs
[[i
]])])
259 assign(pnames
[1], scatter
)
260 assign(pnames
[2], freq
)
264 if (pairsCount
== 1) {
265 multiplot(scatter1
, freq1
, cols
=2)
266 } else if (pairsCount
== 3) {
267 multiplot(scatter1
, freq1
, scatter2
, freq2
, scatter3
, freq3
, cols
=2)
268 } else if (pairsCount
== 6) {
269 multiplot(scatter1
, freq1
, scatter2
, freq2
, scatter3
, freq3
, scatter4
, freq4
, scatter5
, freq5
, scatter6
, freq6
, cols
=2)