Merge pull request #4272 from solgenomics/topic/fix-vcf-download
[sgn.git] / R / solGS / gs.r
blobab180cbc4073710642e46724f6f96e7379d02ecb
1 #SNOPSIS
2 #calculates genomic estimated breeding values (GEBVs) using rrBLUP,
3 #GBLUP method
5 #AUTHOR
6 # Isaak Y Tecle (iyt2@cornell.edu)
8 options(echo = FALSE)
10 library(methods)
11 library(rrBLUP)
12 library(plyr)
13 library(stringr)
14 #library(lme4)
15 library(randomForest)
16 library(parallel)
17 library(genoDataFilter)
18 library(phenoAnalysis)
19 library(caret)
20 library(dplyr)
21 library(tibble)
22 library(rlang)
23 library(jsonlite)
24 library(data.table)
27 allArgs <- commandArgs()
30 inputFiles <- scan(grep("input_files", allArgs, value = TRUE),
31 what = "character")
33 outputFiles <- scan(grep("output_files", allArgs, value = TRUE),
34 what = "character")
36 traitsFile <- grep("traits", inputFiles, value = TRUE)
37 modelInfoFile <- grep("model_info", inputFiles, value = TRUE)
38 message('model_info_file ', modelInfoFile)
40 modelInfo <- read.table(modelInfoFile,
41 header=TRUE, sep ="\t",
42 as.is = c('Value'))
44 modelInfo <- column_to_rownames(modelInfo, var="Name")
45 traitId <- modelInfo["trait_id", 1]
46 traitAbbr <- modelInfo["trait_abbr", 1]
47 modelId <- modelInfo["model_id", 1]
48 protocolId <- modelInfo["protocol_id", 1]
50 message('class ', class(traitAbbr))
51 message('trait_id ', traitId)
52 message('trait_abbr ', traitAbbr)
53 message('protocol_id ', protocolId)
54 message('model_id ', modelId)
56 datasetInfoFile <- grep("dataset_info", inputFiles, value = TRUE)
57 datasetInfo <- c()
59 if (length(datasetInfoFile) != 0 ) {
60 datasetInfo <- scan(datasetInfoFile, what = "character")
61 datasetInfo <- paste(datasetInfo, collapse = " ")
62 } else {
63 datasetInfo <- c('single population')
66 #validationTrait <- paste("validation", trait, sep = "_")
67 validationFile <- grep('validation', outputFiles, value = TRUE)
69 if (is.null(validationFile)) {
70 stop("Validation output file is missing.")
73 #kinshipTrait <- paste("rrblup_training_gebvs", trait, sep = "_")
74 blupFile <- grep('rrblup_training_gebvs', outputFiles, value = TRUE)
76 if (is.null(blupFile)) {
77 stop("GEBVs file is missing.")
80 #markerTrait <- paste("marker_effects", trait, sep = "_")
81 markerFile <- grep('marker_effects', outputFiles, value = TRUE)
83 #traitPhenoFile <- paste("trait_phenotype_data", traitId, sep = "_")
84 modelPhenoFile <- grep('model_phenodata', outputFiles, value = TRUE)
85 message('model input trait pheno file ', modelPhenoFile)
86 modelGenoFile <- grep('model_genodata', outputFiles, value = TRUE)
87 message('model input trait geno file ', modelPhenoFile)
88 traitRawPhenoFile <- grep('trait_raw_phenodata', outputFiles, value = TRUE)
89 varianceComponentsFile <- grep("variance_components", outputFiles, value = TRUE)
90 filteredTrainingGenoFile <- grep("filtered_training_genotype_data", outputFiles, value = TRUE)
91 filteredSelGenoFile <- grep("filtered_selection_genotype_data", outputFiles, value = TRUE)
92 formattedPhenoFile <- grep("formatted_phenotype_data", inputFiles, value = TRUE)
94 genoFile <- grep("genotype_data_", inputFiles, value = TRUE)
96 if (is.null(genoFile)) {
97 stop("genotype data file is missing.")
100 if (file.info(genoFile)$size == 0) {
101 stop("genotype data file is empty.")
104 readfilteredTrainingGenoData <- c()
105 filteredTrainingGenoData <- c()
106 formattedPhenoData <- c()
107 phenoData <- c()
108 genoData <- c()
110 if (length(filteredTrainingGenoFile) != 0 && file.info(filteredTrainingGenoFile)$size != 0) {
111 filteredTrainingGenoData <- fread(filteredTrainingGenoFile,
112 na.strings = c("NA", "", "--", "-"),
113 header = TRUE)
115 genoData <- data.frame(filteredTrainingGenoData)
116 genoData <- column_to_rownames(genoData, 'V1')
117 readfilteredTrainingGenoData <- 1
120 if (is.null(filteredTrainingGenoData)) {
121 genoData <- fread(genoFile,
122 na.strings = c("NA", "", "--", "-"),
123 header = TRUE)
124 genoData <- unique(genoData, by='V1')
125 genoData <- data.frame(genoData)
126 genoData <- column_to_rownames(genoData, 'V1')
127 #genoDataFilter::filterGenoData
128 genoData <- convertToNumeric(genoData)
129 genoData <- filterGenoData(genoData, maf=0.01)
130 genoData <- roundAlleleDosage(genoData)
132 filteredTrainingGenoData <- genoData
135 genoData <- genoData[order(row.names(genoData)), ]
137 if (length(formattedPhenoFile) != 0 && file.info(formattedPhenoFile)$size != 0) {
138 formattedPhenoData <- data.frame(fread(formattedPhenoFile,
139 header = TRUE,
140 na.strings = c("NA", "", "--", "-", ".")
143 } else {
145 if (datasetInfo == 'combined populations') {
147 phenoFile <- grep("model_phenodata", inputFiles, value = TRUE)
148 } else {
150 phenoFile <- grep("\\/phenotype_data", inputFiles, value = TRUE)
153 if (is.null(phenoFile)) {
154 stop("phenotype data file is missing.")
157 if (file.info(phenoFile)$size == 0) {
158 stop("phenotype data file is empty.")
161 phenoData <- data.frame(fread(phenoFile,
162 sep = "\t",
163 na.strings = c("NA", "", "--", "-", "."),
164 header = TRUE))
169 phenoTrait <- c()
170 traitRawPhenoData <- c()
172 if (datasetInfo == 'combined populations') {
174 if (!is.null(formattedPhenoData)) {
175 phenoTrait <- subset(formattedPhenoData, select = traitAbbr)
176 phenoTrait <- na.omit(phenoTrait)
178 } else {
180 if (any(grepl('Average', names(phenoData)))) {
181 phenoTrait <- phenoData %>% select(V1, Average) %>% data.frame
182 } else {
183 phenoTrait <- phenoData
186 colnames(phenoTrait) <- c('genotypes', traitAbbr)
188 } else {
190 if (!is.null(formattedPhenoData)) {
191 phenoTrait <- subset(formattedPhenoData, select = c('V1', traitAbbr))
192 phenoTrait <- as.data.frame(phenoTrait)
193 phenoTrait <- na.omit(phenoTrait)
194 colnames(phenoTrait)[1] <- 'genotypes'
196 } else if (length(grep('list', phenoFile)) != 0) {
197 message('phenoTrait traitAbbr ', traitAbbr)
198 phenoTrait <- averageTrait(phenoData, traitAbbr)
200 } else {
201 message('phenoTrait trait_abbr ', traitAbbr)
202 phenoTrait <- getAdjMeans(phenoData,
203 traitName = traitAbbr,
204 calcAverages = TRUE)
207 keepMetaCols <- c('observationUnitName', 'germplasmName', 'studyDbId', 'locationName',
208 'studyYear', 'replicate', 'blockNumber', traitAbbr)
210 traitRawPhenoData <- phenoData %>%
211 select(all_of(keepMetaCols))
216 meanType <- names(phenoTrait)[2]
217 names(phenoTrait) <- c('genotypes', traitAbbr)
219 selectionTempFile <- grep("selection_population", inputFiles, value = TRUE)
221 selectionFile <- c()
222 filteredPredGenoFile <- c()
223 selectionAllFiles <- c()
225 if (length(selectionTempFile) !=0 ) {
226 selectionAllFiles <- scan(selectionTempFile, what = "character")
228 selectionFile <- grep("\\/genotype_data", selectionAllFiles, value = TRUE)
230 #filteredPredGenoFile <- grep("filtered_genotype_data_", selectionAllFiles, value = TRUE)
233 selectionPopGEBVsFile <- grep("rrblup_selection_gebvs", outputFiles, value = TRUE)
235 selectionData <- c()
236 readFilteredPredGenoData <- c()
237 filteredPredGenoData <- c()
239 ## if (length(filteredPredGenoFile) != 0 && file.info(filteredPredGenoFile)$size != 0) {
240 ## selectionData <- fread(filteredPredGenoFile, na.strings = c("NA", " ", "--", "-"),)
241 ## readFilteredPredGenoData <- 1
243 ## selectionData <- data.frame(selectionData)
244 ## rownames(selectionData) <- selectionData[, 1]
245 ## selectionData[, 1] <- NULL
247 ## } else
249 if (length(selectionFile) != 0) {
251 selectionData <- fread(selectionFile,
252 header = TRUE,
253 na.strings = c("NA", "", "--", "-"))
255 selectionData <- data.frame(selectionData)
258 selectionData <- unique(selectionData, by='V1')
260 selectionData <- column_to_rownames(selectionData, 'V1')
261 selectionData <- convertToNumeric(selectionData)
262 selectionData <- filterGenoData(selectionData, maf=0.01)
263 selectionData <- roundAlleleDosage(selectionData)
268 #impute genotype values for obs with missing values,
269 genoDataMissing <- c()
271 if (sum(is.na(genoData)) > 0) {
272 genoDataMissing<- c('yes')
274 genoData <- na.roughfix(genoData)
275 genoData <- data.frame(genoData)
278 #create phenotype and genotype datasets with
279 #common stocks only
281 #extract observation lines with both
282 #phenotype and genotype data only.
283 commonObs <- intersect(phenoTrait$genotypes, row.names(genoData))
284 commonObs <- data.frame(commonObs)
285 rownames(commonObs) <- commonObs[, 1]
287 #include in the genotype dataset only phenotyped lines
288 genoDataFilteredObs <- genoData[(rownames(genoData) %in% rownames(commonObs)), ]
290 #drop phenotyped lines without genotype data
291 phenoTrait <- phenoTrait[(phenoTrait$genotypes %in% rownames(commonObs)), ]
293 phenoTraitMarker <- data.frame(phenoTrait)
294 rownames(phenoTraitMarker) <- phenoTraitMarker[, 1]
295 phenoTraitMarker[, 1] <- NULL
297 #impute missing data in prediction data
298 selectionDataMissing <- c()
299 if (length(selectionData) != 0) {
300 #purge markers unique to both populations
301 commonMarkers <- intersect(names(data.frame(genoDataFilteredObs)), names(selectionData))
302 selectionData <- subset(selectionData, select = commonMarkers)
303 genoDataFilteredObs <- subset(genoDataFilteredObs, select= commonMarkers)
305 if (sum(is.na(selectionData)) > 0) {
306 selectionDataMissing <- c('yes')
307 selectionData <- na.roughfix(selectionData)
308 selectionData <- data.frame(selectionData)
311 #change genotype coding to [-1, 0, 1], to use the A.mat ) if [0, 1, 2]
312 genoTrCode <- grep("2", genoDataFilteredObs[1, ], value = TRUE)
313 if(length(genoTrCode)) {
314 genoData <- genoData - 1
315 genoDataFilteredObs <- genoDataFilteredObs - 1
318 if (length(selectionData) != 0 ) {
319 genoSlCode <- grep("2", selectionData[1, ], value = TRUE)
320 if (length(genoSlCode) != 0 ) {
321 selectionData <- selectionData - 1
325 ordered.markerEffects <- c()
326 trGEBV <- c()
327 validationAll <- c()
328 combinedGebvsFile <- c()
329 allGebvs <- c()
330 modelPhenoData <- c()
331 relationshipMatrix <- c()
333 #additive relationship model
334 #calculate the inner products for
335 #genotypes (realized relationship matrix)
336 relationshipMatrixFile <- grep("relationship_matrix_table", outputFiles, value = TRUE)
337 relationshipMatrixJsonFile <- grep("relationship_matrix_json", outputFiles, value = TRUE)
339 traitRelationshipMatrixFile <- grep("relationship_matrix_adjusted_table", outputFiles, value = TRUE)
340 traitRelationshipMatrixJsonFile <- grep("relationship_matrix_adjusted_json", outputFiles, value = TRUE)
342 inbreedingFile <- grep('inbreeding_coefficients', outputFiles, value=TRUE)
343 aveKinshipFile <- grep('average_kinship', outputFiles, value=TRUE)
345 inbreeding <- c()
346 aveKinship <- c()
348 if (length(relationshipMatrixFile) != 0) {
349 if (file.info(relationshipMatrixFile)$size > 0 ) {
350 relationshipMatrix <- data.frame(fread(relationshipMatrixFile,
351 header = TRUE))
353 rownames(relationshipMatrix) <- relationshipMatrix[, 1]
354 relationshipMatrix[, 1] <- NULL
355 colnames(relationshipMatrix) <- rownames(relationshipMatrix)
356 relationshipMatrix <- data.matrix(relationshipMatrix)
358 } else {
359 relationshipMatrix <- A.mat(genoData)
360 diag(relationshipMatrix) <- diag(relationshipMatrix) %>% replace(., . < 1, 1)
361 relationshipMatrix <- relationshipMatrix %>% replace(., . < 0, 0)
363 inbreeding <- diag(relationshipMatrix)
364 inbreeding <- inbreeding - 1
365 inbreeding <- data.frame(inbreeding)
367 inbreeding <- inbreeding %>%
368 rownames_to_column('genotypes') %>%
369 rename(Inbreeding = inbreeding) %>%
370 arrange(Inbreeding) %>%
371 mutate_at('Inbreeding', round, 3) %>%
372 column_to_rownames('genotypes')
376 relationshipMatrix <- data.frame(relationshipMatrix)
377 colnames(relationshipMatrix) <- rownames(relationshipMatrix)
379 relationshipMatrix <- rownames_to_column(relationshipMatrix, var="genotypes")
380 relationshipMatrix <- relationshipMatrix %>% mutate_if(is.numeric, round, 3)
381 relationshipMatrix <- column_to_rownames(relationshipMatrix, var="genotypes")
383 traitRelationshipMatrix <- relationshipMatrix[(rownames(relationshipMatrix) %in% rownames(commonObs)), ]
384 traitRelationshipMatrix <- traitRelationshipMatrix[, (colnames(traitRelationshipMatrix) %in% rownames(commonObs))]
386 traitRelationshipMatrix <- data.matrix(traitRelationshipMatrix)
388 nCores <- detectCores()
390 if (nCores > 1) {
391 nCores <- (nCores %/% 2)
392 } else {
393 nCores <- 1
396 if (length(selectionData) == 0) {
398 trModel <- kin.blup(data = phenoTrait,
399 geno = 'genotypes',
400 pheno = traitAbbr,
401 K = traitRelationshipMatrix,
402 n.core = nCores,
403 PEV = TRUE
406 trGEBV <- trModel$g
407 trGEBVPEV <- trModel$PEV
408 trGEBVSE <- sqrt(trGEBVPEV)
409 trGEBVSE <- data.frame(round(trGEBVSE, 2))
411 trGEBV <- data.frame(round(trGEBV, 2))
413 colnames(trGEBVSE) <- c('SE')
414 colnames(trGEBV) <- traitAbbr
416 trGEBVSE <- rownames_to_column(trGEBVSE, var="genotypes")
417 trGEBV <- rownames_to_column(trGEBV, var="genotypes")
419 trGEBVSE <- full_join(trGEBV, trGEBVSE)
421 trGEBVSE <- trGEBVSE %>% arrange_(.dots= paste0('desc(', traitAbbr, ')'))
423 trGEBVSE <- column_to_rownames(trGEBVSE, var="genotypes")
425 trGEBV <- trGEBV %>% arrange_(.dots = paste0('desc(', traitAbbr, ')'))
426 trGEBV <- column_to_rownames(trGEBV, var="genotypes")
428 phenoTraitMarker <- data.matrix(phenoTraitMarker)
429 genoDataFilteredObs <- data.matrix(genoDataFilteredObs)
431 markerEffects <- mixed.solve(y = phenoTraitMarker,
432 Z = genoDataFilteredObs
435 ordered.markerEffects <- data.matrix(markerEffects$u)
436 ordered.markerEffects <- data.matrix(ordered.markerEffects [order (-ordered.markerEffects[, 1]), ])
437 ordered.markerEffects <- round(ordered.markerEffects, 5)
439 colnames(ordered.markerEffects) <- c("Marker Effects")
440 ordered.markerEffects <- data.frame(ordered.markerEffects)
443 modelPhenoData <- data.frame(round(phenoTraitMarker, 2))
445 heritability <- round((trModel$Vg/(trModel$Ve + trModel$Vg)), 2)
446 additiveVar <- round(trModel$Vg, 2)
447 errorVar <- round(trModel$Ve, 2)
449 cat("\n", file = varianceComponentsFile, append = FALSE)
450 cat('Additive genetic variance', additiveVar , file = varianceComponentsFile, sep = '\t', append = TRUE)
451 cat("\n", file = varianceComponentsFile, append = TRUE)
452 cat('Error variance', errorVar, file = varianceComponentsFile, sep = "\t", append = TRUE)
453 cat("\n", file = varianceComponentsFile, append = TRUE)
454 cat('SNP heritability (h)', heritability, file = varianceComponentsFile, sep = '\t', append = TRUE)
456 combinedGebvsFile <- grep('selected_traits_gebv', outputFiles, ignore.case = TRUE,value = TRUE)
458 if (length(combinedGebvsFile) != 0) {
459 fileSize <- file.info(combinedGebvsFile)$size
460 if (fileSize != 0 ) {
461 combinedGebvs <- data.frame(fread(combinedGebvsFile,
462 header = TRUE))
464 rownames(combinedGebvs) <- combinedGebvs[,1]
465 combinedGebvs[,1] <- NULL
467 allGebvs <- merge(combinedGebvs, trGEBV,
468 by = 0,
469 all = TRUE
472 rownames(allGebvs) <- allGebvs[,1]
473 allGebvs[,1] <- NULL
477 #cross-validation
479 if (is.null(selectionFile)) {
480 genoNum <- nrow(phenoTrait)
482 if (genoNum < 20 ) {
483 warning(genoNum, " is too small number of genotypes.")
486 set.seed(4567)
489 k <- 10
490 times <- 2
491 cvFolds <- createMultiFolds(phenoTrait[, 2], k=k, times=times)
493 for ( r in 1:times) {
494 re <- paste0('Rep', r)
496 for (i in 1:k) {
497 fo <- ifelse(i < 10, 'Fold0', 'Fold')
499 trFoRe <- paste0(fo, i, '.', re)
500 trG <- cvFolds[[trFoRe]]
501 slG <- as.numeric(rownames(phenoTrait[-trG,]))
503 kblup <- paste("rKblup", i, sep = ".")
505 result <- kin.blup(data = phenoTrait[trG,],
506 geno = 'genotypes',
507 pheno = traitAbbr,
508 K = traitRelationshipMatrix,
509 n.core = nCores,
510 PEV = TRUE
513 assign(kblup, result)
515 #calculate cross-validation accuracy
516 valBlups <- result$g
518 valBlups <- data.frame(valBlups)
520 slG <- slG[which(slG <= nrow(phenoTrait))]
522 slGDf <- phenoTrait[(rownames(phenoTrait) %in% slG),]
523 rownames(slGDf) <- slGDf[, 1]
524 slGDf[, 1] <- NULL
526 valBlups <- rownames_to_column(valBlups, var="genotypes")
527 slGDf <- rownames_to_column(slGDf, var="genotypes")
529 valCorData <- inner_join(slGDf, valBlups, by="genotypes")
530 valCorData$genotypes <- NULL
532 accuracy <- try(cor(valCorData))
533 validation <- paste("validation", trFoRe, sep = ".")
534 cvTest <- paste("CV", trFoRe, sep = " ")
536 if (inherits(accuracy, 'try-error') == FALSE)
538 accuracy <- round(accuracy[1,2], digits = 3)
539 accuracy <- data.matrix(accuracy)
541 colnames(accuracy) <- c("correlation")
542 rownames(accuracy) <- cvTest
544 assign(validation, accuracy)
546 if (!is.na(accuracy[1,1])) {
547 validationAll <- rbind(validationAll, accuracy)
553 validationAll <- data.frame(validationAll[order(-validationAll[, 1]), ])
554 colnames(validationAll) <- c('Correlation')
558 selectionPopResult <- c()
559 selectionPopGEBVs <- c()
560 selectionPopGEBVSE <- c()
562 #selection pop geno data after cleaning up and removing unique markers to selection pop
563 filteredSelGenoData <- selectionData
564 if (length(selectionData) != 0) {
566 genoDataTrSl <- rbind(genoDataFilteredObs, selectionData)
567 rTrSl <- A.mat(genoDataTrSl)
569 selectionPopResult <- kin.blup(data = phenoTrait,
570 geno = 'genotypes',
571 pheno = traitAbbr,
572 K = rTrSl,
573 n.core = nCores,
574 PEV = TRUE
577 selectionPopGEBVs <- round(data.frame(selectionPopResult$g), 2)
578 colnames(selectionPopGEBVs) <- traitAbbr
579 selectionPopGEBVs <- rownames_to_column(selectionPopGEBVs, var="genotypes")
581 selectionPopPEV <- selectionPopResult$PEV
582 selectionPopSE <- sqrt(selectionPopPEV)
583 selectionPopSE <- data.frame(round(selectionPopSE, 2))
584 colnames(selectionPopSE) <- 'SE'
585 genotypesSl <- rownames(selectionData)
587 selectionPopSE <- rownames_to_column(selectionPopSE, var="genotypes")
588 selectionPopSE <- selectionPopSE %>% filter(genotypes %in% genotypesSl)
590 selectionPopGEBVs <- selectionPopGEBVs %>% filter(genotypes %in% genotypesSl)
592 selectionPopGEBVSE <- inner_join(selectionPopGEBVs, selectionPopSE, by="genotypes")
594 sortVar <- parse_expr(traitAbbr)
595 selectionPopGEBVs <- selectionPopGEBVs %>% arrange(desc((!!sortVar)))
596 selectionPopGEBVs <- column_to_rownames(selectionPopGEBVs, var="genotypes")
598 selectionPopGEBVSE <- selectionPopGEBVSE %>% arrange(desc((!!sortVar)))
599 selectionPopGEBVSE <- column_to_rownames(selectionPopGEBVSE, var="genotypes")
602 if (!is.null(selectionPopGEBVs) & length(selectionPopGEBVsFile) != 0) {
603 fwrite(selectionPopGEBVs,
604 file = selectionPopGEBVsFile,
605 row.names = TRUE,
606 sep = "\t",
607 quote = FALSE,
611 if(!is.null(validationAll)) {
612 fwrite(validationAll,
613 file = validationFile,
614 row.names = TRUE,
615 sep = "\t",
616 quote = FALSE,
621 if (!is.null(ordered.markerEffects)) {
622 fwrite(ordered.markerEffects,
623 file = markerFile,
624 row.names = TRUE,
625 sep = "\t",
626 quote = FALSE,
631 if (!is.null(trGEBV)) {
632 fwrite(trGEBV,
633 file = blupFile,
634 row.names = TRUE,
635 sep = "\t",
636 quote = FALSE,
640 if (length(combinedGebvsFile) != 0 ) {
641 if(file.info(combinedGebvsFile)$size == 0) {
642 fwrite(trGEBV,
643 file = combinedGebvsFile,
644 row.names = TRUE,
645 sep = "\t",
646 quote = FALSE,
648 } else {
649 fwrite(allGebvs,
650 file = combinedGebvsFile,
651 row.names = TRUE,
652 sep = "\t",
653 quote = FALSE,
659 if (!is.null(modelPhenoData) && length(modelPhenoFile) != 0) {
661 if (!is.null(meanType)) {
662 colnames(modelPhenoData) <- meanType
665 fwrite(modelPhenoData,
666 file = modelPhenoFile,
667 row.names = TRUE,
668 sep = "\t",
669 quote = FALSE,
673 if (!is.null(genoDataFilteredObs) && length(modelGenoFile) != 0) {
675 fwrite(genoDataFilteredObs,
676 file = modelGenoFile,
677 row.names = TRUE,
678 sep = "\t",
679 quote = FALSE,
683 if (!is.null(traitRawPhenoData) && length(traitRawPhenoFile) != 0) {
685 fwrite(traitRawPhenoData,
686 file = traitRawPhenoFile,
687 row.names = FALSE,
688 sep = "\t",
689 na = 'NA',
690 quote = FALSE,
694 message('filteredTrainingGenoFile: ', filteredTrainingGenoFile)
696 if (!is.null(filteredTrainingGenoData) && file.info(filteredTrainingGenoFile)$size == 0) {
697 fwrite(filteredTrainingGenoData,
698 file = filteredTrainingGenoFile,
699 row.names = TRUE,
700 sep = "\t",
701 quote = FALSE,
706 if (length(filteredSelGenoFile) != 0 && file.info(filteredSelGenoFile)$size == 0) {
707 fwrite(filteredSelGenoData,
708 file = filteredSelGenoFile,
709 row.names = TRUE,
710 sep = "\t",
711 quote = FALSE,
715 ## if (!is.null(genoDataMissing)) {
716 ## write.table(genoData,
717 ## file = genoFile,
718 ## sep = "\t",
719 ## col.names = NA,
720 ## quote = FALSE,
721 ## )
723 ## }
725 ## if (!is.null(predictionDataMissing)) {
726 ## write.table(predictionData,
727 ## file = predictionFile,
728 ## sep = "\t",
729 ## col.names = NA,
730 ## quote = FALSE,
731 ## )
732 ## }
735 if (file.info(relationshipMatrixFile)$size == 0) {
737 fwrite(relationshipMatrix,
738 file = relationshipMatrixFile,
739 row.names = TRUE,
740 sep = "\t",
741 quote = FALSE,
745 if (file.info(relationshipMatrixJsonFile)$size == 0) {
747 relationshipMatrixJson <- relationshipMatrix
748 relationshipMatrixJson[upper.tri(relationshipMatrixJson)] <- NA
751 relationshipMatrixJson <- data.frame(relationshipMatrixJson)
753 relationshipMatrixList <- list(labels = names(relationshipMatrixJson),
754 values = relationshipMatrixJson)
756 relationshipMatrixJson <- jsonlite::toJSON(relationshipMatrixList)
759 write(relationshipMatrixJson,
760 file = relationshipMatrixJsonFile,
765 if (file.info(traitRelationshipMatrixFile)$size == 0) {
767 inbre <- diag(traitRelationshipMatrix)
768 inbre <- inbre - 1
770 diag(traitRelationshipMatrix) <- inbre
772 traitRelationshipMatrix <- data.frame(traitRelationshipMatrix) %>% replace(., . < 0, 0)
774 fwrite(traitRelationshipMatrix,
775 file = traitRelationshipMatrixFile,
776 row.names = TRUE,
777 sep = "\t",
778 quote = FALSE,
781 if (file.info(traitRelationshipMatrixJsonFile)$size == 0) {
783 traitRelationshipMatrixJson <- traitRelationshipMatrix
784 traitRelationshipMatrixJson[upper.tri(traitRelationshipMatrixJson)] <- NA
786 traitRelationshipMatrixJson <- data.frame(traitRelationshipMatrixJson)
788 traitRelationshipMatrixList <- list(labels = names(traitRelationshipMatrixJson),
789 values = traitRelationshipMatrixJson)
791 traitRelationshipMatrixJson <- jsonlite::toJSON(traitRelationshipMatrixList)
793 write(traitRelationshipMatrixJson,
794 file = traitRelationshipMatrixJsonFile,
800 if (file.info(inbreedingFile)$size == 0) {
802 fwrite(inbreeding,
803 file = inbreedingFile,
804 row.names = TRUE,
805 sep = "\t",
806 quote = FALSE,
811 if (file.info(aveKinshipFile)$size == 0) {
813 aveKinship <- data.frame(apply(traitRelationshipMatrix, 1, mean))
815 aveKinship<- aveKinship %>%
816 rownames_to_column('genotypes') %>%
817 rename(Mean_kinship = contains('traitRe')) %>%
818 arrange(Mean_kinship) %>%
819 mutate_at('Mean_kinship', round, 3) %>%
820 column_to_rownames('genotypes')
822 fwrite(aveKinship,
823 file = aveKinshipFile,
824 row.names = TRUE,
825 sep = "\t",
826 quote = FALSE,
831 if (file.info(formattedPhenoFile)$size == 0 && !is.null(formattedPhenoData) ) {
832 fwrite(formattedPhenoData,
833 file = formattedPhenoFile,
834 row.names = TRUE,
835 sep = "\t",
836 quote = FALSE,
840 message("Done.")
842 q(save = "no", runLast = FALSE)