Merge pull request #4719 from solgenomics/topic/fixing_histogram
[sgn.git] / R / solGS / histogram.r
blob0f4d65426465536cb6844a9d53de320338a48b91
1 #SNOPSIS
3 #prepares trait phenotype data for histogram plotting
6 #AUTHOR
7 # Isaak Y Tecle (iyt2@cornell.edu)
10 options(echo = FALSE)
12 library(phenoAnalysis)
13 library(dplyr)
15 allArgs <- commandArgs()
17 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
18 what = "character")
20 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
21 what = "character")
23 allTraitsPhenoFile <- grep("phenotype_data", inputFiles, value = TRUE)
24 message('pheno file: ', allTraitsPhenoFile)
26 metaDataFile <- grep("metadata_file", inputFiles, value = TRUE)
27 message('metadata file: ', metaDataFile)
29 traitsFile <- grep("traits", inputFiles, value = TRUE)
30 message('traits file: ', traitsFile)
31 traits <- scan(traitsFile, what = "character")
32 trait <- strsplit(traits, "\t")
33 message("trait: ", trait)
35 traitPhenoMeansFile <- grep("phenotype_data", outputFiles, value = TRUE)
36 message('pheno file: ', traitPhenoMeansFile)
38 traitRawPhenoFile <- grep("trait_raw_phenodata", outputFiles, value = TRUE)
39 message('raw pheno file: ', traitRawPhenoFile)
42 if (is.null(grep("phenotype_data", allTraitsPhenoFile))) {
43 stop("Phenotype dataset missing.")
46 if (is.null(grep("phenotype_trait", traitPhenoMeansFile))) {
47 stop("Output file is missing.")
50 if (is.null(trait)) {
51 stop("trait name is missing.")
54 traitRawPhenoData <- extractPhenotypes(inputFiles, metadata_file)
56 allTraitsPhenoData <- read.table(allTraitsPhenoFile,
57 header = TRUE,
58 row.names = NULL,
59 sep = "\t",
60 na.strings = c("NA", " ", "--", "-", ".", ".."),
63 allTraitsPhenoData <- data.frame(fread(allTraitsPhenoFile,
64 na.strings = c("NA", "", "--", "-", ".")
67 traitPhenoMeansData <- getAdjMeans(allTraitsPhenoData,
68 traitName=trait,
69 calcAverages=TRUE)
71 keepMetaCols <- c('observationUnitName', 'germplasmName', 'studyDbId', 'locationName',
72 'studyYear', 'replicate', 'blockNumber')
74 traitRawPhenoData <- allTraitsPhenoData %>%
75 select(c(keepMetaCols, trait))
78 write.table(traitPhenoMeansData,
79 file = traitPhenoMeansFile,
80 sep = "\t",
81 quote = FALSE,
84 write.table(traitRawPhenoData,
85 file = traitRawPhenoFile,
86 sep = "\t",
87 na = 'NA',
88 quote = FALSE,
91 q(save = "no", runLast = FALSE)