2 #calculates kinship, indbreeding coefficients
5 # Isaak Y Tecle (iyt2@cornell.edu)
16 library(genoDataFilter
)
25 allArgs
<- commandArgs()
28 inputFiles
<- scan(grep("input_files", allArgs
, value
= TRUE),
31 outputFiles
<- scan(grep("output_files", allArgs
, value
= TRUE),
36 createGenoData
<- function(inputFiles
) {
38 genoFiles
<- grep("genotype_data", inputFiles
, value
= TRUE)
41 filteredGenoFile
<- c()
43 if (length(genoFiles
) > 1) {
44 genoData
<- combineGenoData(genoFiles
)
46 genoMetaData
<- genoData$trial
47 genoData$trial
<- NULL
52 genoData
<- fread(genoFile
,
54 na
.strings
= c("NA", " ", "--", "-", "."))
56 if (is
.null(genoData
)) {
57 filteredGenoFile
<- grep("filtered_genotype_data_", genoFile
, value
= TRUE)
58 genoData
<- fread(filteredGenoFile
, header
= TRUE)
62 genoData
<- unique(genoData
, by
= 'V1')
63 genoData
<- data
.frame(genoData
)
64 genoData
<- column_to_rownames(genoData
, 'V1')
67 if (is
.null(genoData
)) {
68 stop("There is no genotype dataset.")
72 ##genoDataFilter::filterGenoData
73 genoData
<- convertToNumeric(genoData
)
74 genoData
<- filterGenoData(genoData
, maf
=0.01)
75 genoData
<- roundAlleleDosage(genoData
)
77 message("No. of geno missing values, ", sum(is
.na(genoData
)))
78 if (sum(is
.na(genoData
)) > 0) {
79 genoData
<- na
.roughfix(genoData
)
82 genoData
<- data
.frame(genoData
)
86 genoData
<- createGenoData(inputFiles
)
87 genoData
<- genoData
[order(row
.names(genoData
)), ]
89 #change genotype coding to [-1, 0, 1], to use the A.mat ) if [0, 1, 2]
90 genoTrCode
<- grep("2", genoData
[1, ], value
= TRUE)
91 if(length(genoTrCode
) != 0) {
92 genoData
<- genoData
- 1
95 relationshipMatrixFile
<- grep("relationship_matrix_adjusted_table", outputFiles
, value
= TRUE)
96 relationshipMatrixJsonFile
<- grep("relationship_matrix_adjusted_json", outputFiles
, value
= TRUE)
98 message('matrix file ', relationshipMatrixFile
)
99 message('json file ', relationshipMatrixJsonFile
)
101 inbreedingFile
<- grep('inbreeding_coefficients', outputFiles
, value
=TRUE)
102 aveKinshipFile
<- grep('average_kinship', outputFiles
, value
=TRUE)
104 message('inbreeding file ', inbreedingFile
)
105 message('ave file ', aveKinshipFile
)
107 relationshipMatrix
<- c()
110 relationshipMatrixJson
<- c()
113 relationshipMatrix
<- A
.mat(genoData
)
114 diag(relationshipMatrix
) <- diag(relationshipMatrix
) + 1e-6
115 genos
<- rownames(relationshipMatrix
)
117 relationshipMatrix
<- data
.frame(relationshipMatrix
)
119 colnames(relationshipMatrix
) <- genos
120 rownames(relationshipMatrix
) <- genos
122 relationshipMatrix
<- relationshipMatrix
%>%
123 rownames_to_column('genotypes') %>%
124 mutate_if(is
.numeric
, round
, 3) %>%
125 column_to_rownames('genotypes')
129 inbreeding
<- diag(data
.matrix(relationshipMatrix
))
130 inbreeding
<- inbreeding
- 1
131 diag(relationshipMatrix
) <- inbreeding
133 relationshipMatrix
<- relationshipMatrix
%>% replace(., . < 0, 0)
134 relationshipMatrix
<- relationshipMatrix
%>% replace(., . > 1, 0.99)
136 inbreeding
<- inbreeding
%>% replace(., . < 0, 0)
137 inbreeding
<- inbreeding
%>% replace(., . > 1, . - 1)
138 inbreeding
<- data
.frame(inbreeding
)
140 inbreeding
<- inbreeding
%>%
141 rownames_to_column('genotypes') %>%
142 rename(Inbreeding
= inbreeding
) %>%
143 arrange(Inbreeding
) %>%
144 mutate_at('Inbreeding', round
, 3) %>%
145 column_to_rownames('genotypes')
148 aveKinship
<- data
.frame(apply(relationshipMatrix
, 1, mean
))
150 aveKinship
<- aveKinship
%>%
151 rownames_to_column('genotypes') %>%
152 rename(Mean_kinship
= contains('apply')) %>%
153 arrange(Mean_kinship
) %>%
154 mutate_at('Mean_kinship', round
, 3) %>%
155 column_to_rownames('genotypes')
157 relationshipMatrixJson
<- relationshipMatrix
158 relationshipMatrixJson
[upper
.tri(relationshipMatrixJson
)] <- NA
161 #relationshipMatrixJson <- data.frame(relationshipMatrixJson)
162 relationshipMatrixList
<- list(labels
= names(relationshipMatrixJson
),
163 values
= relationshipMatrixJson
)
164 relationshipMatrixJson
<- jsonlite
::toJSON(relationshipMatrixList
)
167 #if (file.info(relationshipMatrixFile)$size == 0) {
169 fwrite(relationshipMatrix
,
170 file
= relationshipMatrixFile
,
177 #if (file.info(relationshipMatrixJsonFile)$size == 0) {
179 write(relationshipMatrixJson
,
180 file
= relationshipMatrixJsonFile
,
185 message('inbreedingfile ', inbreedingFile
)
186 message('ave file', aveKinshipFile
)
187 message('kinshipfile ', relationshipMatrixFile
)
188 #if (file.info(inbreedingFile)$size == 0) {
191 file
= inbreedingFile
,
199 #if (file.info(aveKinshipFile)$size == 0) {
202 file
= aveKinshipFile
,
212 q(save
= "no", runLast
= FALSE)