clean
[sgn.git] / R / histogram.r
blob095c248d0316c1f3bb016b8aa0fb85b21577d864
1 #SNOPSIS
3 #prepares trait phenotype data for histogram plotting
6 #AUTHOR
7 # Isaak Y Tecle (iyt2@cornell.edu)
10 options(echo = FALSE)
12 library(plyr)
14 allArgs<-commandArgs(trailingOnly=TRUE)
17 allTraitsPhenoFile <- sub('input_file=', "", allArgs[1])
18 trait <- sub('trait_name=', "", allArgs[2])
19 traitPhenoFile <- sub('output_file=', "", allArgs[3])
21 message("population phenotype file: ", allTraitsPhenoFile)
22 message("pheno data file: ", traitPhenoFile)
23 message("trait: ", trait)
26 if (is.null(grep("phenotype_data", allTraitsPhenoFile)))
28 stop("Phenotype dataset missing.")
31 if (is.null(grep("phenotype_trait", traitPhenoFile)))
33 stop("Output file is missing.")
36 if (is.null(grep("trait_name", allArgs[3])))
38 stop("trait name is missing.")
41 allTraitsPhenoData <- read.table(allTraitsPhenoFile,
42 header = TRUE,
43 row.names = NULL,
44 sep = "\t",
45 na.strings = c("NA", " ", "--", "-", ".", ".."),
46 dec = "."
49 selectColumns <- c("object_name", "object_id", "stock_id", trait)
50 traitPhenoData <- allTraitsPhenoData[selectColumns]
52 dropColumns <- c("object_id", "stock_id")
53 traitPhenoData <- traitPhenoData[, !(names(traitPhenoData) %in% dropColumns)]
55 if (class(traitPhenoData[, trait]) != 'numeric') {
56 traitPhenoData[, trait] <- as.numeric(as.character(traitPhenoData[, trait]))
59 if (!all(is.numeric(traitPhenoData[, trait]))) {
60 traitPhenoData[, trait] <- sapply(traitPhenoData[, trait], function(x) ifelse(is.numeric(x), x, NA))
63 if (!all(is.na(traitPhenoData[, trait]))) {
64 traitPhenoData <- ddply(traitPhenoData,
65 "object_name",
66 colwise(mean, na.rm = TRUE)
70 row.names(traitPhenoData) <- traitPhenoData[, 1]
71 traitPhenoData[, 1] <- NULL
73 traitPhenoData <- round(traitPhenoData,
74 digits=2
76 } else {
77 traitPhenoData <- NULL
80 write.table(traitPhenoData,
81 file = traitPhenoFile,
82 sep = "\t",
83 col.names = NA,
84 quote = FALSE,
85 append = FALSE
88 q(save = "no", runLast = FALSE)