3 #prepares trait phenotype data for histogram plotting
7 # Isaak Y Tecle (iyt2@cornell.edu)
14 allArgs
<-commandArgs(trailingOnly
=TRUE)
17 allTraitsPhenoFile
<- sub('input_file=', "", allArgs
[1])
18 trait
<- sub('trait_name=', "", allArgs
[2])
19 traitPhenoFile
<- sub('output_file=', "", allArgs
[3])
21 message("population phenotype file: ", allTraitsPhenoFile
)
22 message("pheno data file: ", traitPhenoFile
)
23 message("trait: ", trait
)
26 if (is
.null(grep("phenotype_data", allTraitsPhenoFile
)))
28 stop("Phenotype dataset missing.")
31 if (is
.null(grep("phenotype_trait", traitPhenoFile
)))
33 stop("Output file is missing.")
36 if (is
.null(grep("trait_name", allArgs
[3])))
38 stop("trait name is missing.")
41 allTraitsPhenoData
<- read
.table(allTraitsPhenoFile
,
45 na
.strings
= c("NA", " ", "--", "-", ".", ".."),
49 selectColumns
<- c("object_name", "object_id", "stock_id", trait
)
50 traitPhenoData
<- allTraitsPhenoData
[selectColumns
]
52 dropColumns
<- c("object_id", "stock_id")
53 traitPhenoData
<- traitPhenoData
[, !(names(traitPhenoData
) %in% dropColumns
)]
55 if (class(traitPhenoData
[, trait
]) != 'numeric') {
56 traitPhenoData
[, trait
] <- as
.numeric(as
.character(traitPhenoData
[, trait
]))
59 if (!all(is
.numeric(traitPhenoData
[, trait
]))) {
60 traitPhenoData
[, trait
] <- sapply(traitPhenoData
[, trait
], function(x
) ifelse(is
.numeric(x
), x
, NA))
63 if (!all(is
.na(traitPhenoData
[, trait
]))) {
64 traitPhenoData
<- ddply(traitPhenoData
,
66 colwise(mean
, na
.rm
= TRUE)
70 row
.names(traitPhenoData
) <- traitPhenoData
[, 1]
71 traitPhenoData
[, 1] <- NULL
73 traitPhenoData
<- round(traitPhenoData
,
77 traitPhenoData
<- NULL
80 write
.table(traitPhenoData
,
81 file
= traitPhenoFile
,
88 q(save
= "no", runLast
= FALSE)