Merge pull request #2091 from solgenomics/topic/update_fixture_dump
[sgn.git] / R / anova.r
blob56381629cc05dca5f25256cf1c05c64c9fb4f7ae
1 #SNOPSIS
3 #runs ANOVA.
5 #AUTHOR
6 # Isaak Y Tecle (iyt2@cornell.edu)
9 options(echo = FALSE)
12 #library(dplyr)
13 library(data.table)
14 library(phenoAnalysis)
15 library(methods)
18 allArgs <- commandArgs()
20 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
21 what = "character")
23 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
24 what = "character")
26 phenoDataFile <- grep("phenotype_data", inputFiles, value = TRUE)
27 message('pheno file: ', phenoDataFile)
29 traitsFile <- grep("traits", inputFiles, value = TRUE)
30 message('traits file: ', traitsFile)
32 #phenoDataFile = '/export/prod/tmp/localhost/GBSApeKIgenotypingv4/anova/cache/phenotype_data_3639.txt';
33 #phenoDataFile = '/mnt/hgfs/cxgn/phenotype-3639.txt';
35 dropCols <- c('programDbId','programName', 'programDescription', 'studyDbId', 'studyDescription', 'plotWidth', 'plotLength')
36 dropCols <- c(dropCols, 'fieldSize', 'fieldTrialIsPlannedToBeGenotyped', 'fieldTrialIsPlannedToCross', 'plantingDate', 'harvestDate')
37 dropCols <- c(dropCols,'entryType', 'plantNumber', 'plantedSeedlotStockDbId', 'plantedSeedlotStockUniquename')
38 dropCols <- c(dropCols, 'plantedSeedlotCurrentCount', 'plantedSeedlotCurrentWeightGram', 'plantedSeedlotBoxName')
39 dropCols <- c(dropCols, 'plantedSeedlotTransactionCount','plantedSeedlotTransactionWeight')
40 dropCols <- c(dropCols, 'plantedSeedlotTransactionDescription', 'availableGermplasmSeedlotUniquenames')
42 phenoData <- fread(phenoDataFile, sep="\t", drop=dropCols,
43 na.strings=c("NA", "-", " ", ".", ".."))
45 phenoData <- data.frame(phenoData)
47 traits <- scan(traitsFile,
48 what = "character")
50 traits <- strsplit(traits, "\t")
53 #needs more work for multi traits anova
54 for (trait in traits) {
56 message('trait: ', trait)
57 anovaFiles <- grep("anova_table",
58 outputFiles,
59 value = TRUE)
61 message('anova file: ', anovaFiles)
62 anovaHtmlFile <- grep("html",
63 anovaFiles,
64 value = TRUE)
66 message('anova html file: ', anovaHtmlFile)
67 anovaTxtFile <- grep("txt",
68 anovaFiles,
69 value = TRUE)
71 message('anova txt file: ', anovaTxtFile)
72 modelSummFile <- grep("anova_model",
73 outputFiles,
74 value = TRUE)
76 message('model file: ', modelSummFile)
77 adjMeansFile <- grep("adj_means",
78 outputFiles,
79 value = TRUE)
81 message('means file: ', adjMeansFile)
84 diagnosticsFile <- grep("anova_diagnostics",
85 outputFiles,
86 value = TRUE)
88 errorFile <- grep("anova_error",
89 outputFiles,
90 value = TRUE)
92 anovaOut <- runAnova(phenoData, trait)
93 if (class(anovaOut) == 'character') {
94 cat(anovaOut, file=errorFile)
95 } else if (is.null(anovaOut)) {
96 cat('Error occured fitting anova model to this trait data. Please check the trait data and design factors.', file=errorFile)
97 } else if (class(anovaOut)[1] == 'lmerModLmerTest' || class(anovaOut)[1] == 'merModLmerTest') {
99 png(diagnosticsFile, 960, 480)
100 par(mfrow=c(1,2))
101 plot(fitted(anovaOut), resid(anovaOut),
102 xlab="Fitted values",
103 ylab="Residuals",
104 main="Fitted values vs Residuals")
105 abline(0,0)
106 qqnorm(resid(anovaOut))
107 dev.off()
109 anovaTable <- getAnovaTable(anovaOut,
110 tableType="html",
111 traitName=trait,
112 out=anovaHtmlFile)
114 anovaTable <- getAnovaTable(anovaOut,
115 tableType="text",
116 traitName=trait,
117 out=anovaTxtFile)
120 adjMeans <- getAdjMeans(phenoData, trait)
122 fwrite(adjMeans,
123 file = adjMeansFile,
124 row.names = FALSE,
125 sep = "\t",
126 quote = FALSE,
129 sink(modelSummFile)
130 print(anovaOut)
131 sink()
137 q(save = "no", runLast = FALSE)