3 #runs population structure analysis using singular values decomposition (SVD)
6 # Isaak Y Tecle (iyt2@cornell.edu)
16 allArgs
<- commandArgs()
18 outFile
<- grep("output_files",
25 outFiles
<- scan(outFile
,
29 genoDataFile
<- grep("genotype_data",
36 scoresFile
<- grep("pca_scores",
43 loadingsFile
<- grep("pca_loadings",
50 varianceFile
<- grep("pca_variance",
57 message("genotype file: ", genoDataFile
)
58 message("pca scores file: ", scoresFile
)
59 message("pca loadings file: ", loadingsFile
)
60 message("pca variance file: ", varianceFile
)
62 if (is
.null(genoDataFile
))
64 stop("genotype dataset missing.")
68 if (is
.null(scoresFile
))
70 stop("Scores output file is missing.")
74 if (is
.null(loadingsFile
))
76 stop("Laodings file is missing.")
80 genoData
<- as
.data
.frame(fread(genoDataFile
,
81 na
.strings
= c("NA", " ", "--", "-", "."),
84 rownames(genoData
) <- genoData
[, 1]
90 message("No. of geno missing values, ", sum(is
.na(genoData
)) )
92 genoData
<- genoData
[, colSums(is
.na(genoData
)) < nrow(genoData
) * 0.5]
94 #genoData <- as.data.frame(round(genoData, digits=0))
96 #change genotype coding to [-1, 0, 1], to use the A.mat ) if [0, 1, 2]
97 #genoTrCode <- grep("2", genoData[1, ], fixed=TRUE, value=TRUE)
98 #if(length(genoTrCode) != 0) {
99 # genoData <- genoData - 1
102 message("No. of geno missing values, ", sum(is
.na(genoData
)) )
103 genoDataMissing
<- c()
104 if (sum(is
.na(genoData
)) > 0) {
105 genoDataMissing
<- c('yes')
106 genoData
<- na
.roughfix(genoData
)
111 genotypes
<- rownames(genoData
)
112 svdOut
<- irlba(scale(genoData
, TRUE, FALSE), nu
=10, nv
=10)
113 scores
<- round(svdOut$u
%*% diag(svdOut$d
), digits
=2)
114 loadings
<- round(svdOut$v
, digits
=5)
115 totalVar
<- sum(svdOut$d
)
119 round((x
/ totalVar
)*100, digits
=2)
123 variances
<- data
.frame(variances
)
124 scores
<- data
.frame(scores
)
125 rownames(scores
) <- genotypes
129 headers
[i
] <- paste("PC", i
, sep
='')
132 colnames(scores
) <- c(headers
)
142 write
.table(loadings
,
150 write
.table(variances
,
159 if (!is
.null(genoDataMissing
)) {
160 write
.table(genoData
,
169 q(save
= "no", runLast
= FALSE)