3 function (genoData, allele.matrix, numStep = 1, ...)
5 rawGenoSum <- NumRecomEvents(genoData = genoData)
6 for (x in 1:(nrow(allele.matrix) - numStep + 1)) {
7 ids <- x:(x + numStep - 1)
8 new.genoData <- genoData
9 new.genoData[ids, ] <- abs(new.genoData[ids, ] - 1)
10 newGenoSum <- NumRecomEvents(genoData = new.genoData)
11 if (newGenoSum < rawGenoSum) {
12 allele.matrix[ids, ] <- allele.matrix[ids, c(2, 1)]
13 genoData <- new.genoData
14 rawGenoSum <- newGenoSum
17 list(rawGenoSum, allele.matrix, genoData)
25 `._rice_phy2get_factor_` <-
30 function (baseData, allele.matrix, genoData = NULL)
32 if (is.null(genoData)) {
33 genoData <- base2Geno(baseData, allele.matrix)
36 idsBorder <- cumsum(colSums(y))
37 idsBorder <- idsBorder[idsBorder < sum(y)]
38 sum(diff(as.numeric(genoData[y]))[-idsBorder] != 0, na.rm = TRUE)
52 # if (length(x[x == 'R'])) {
58 .IUPAC=c('R','Y','S','W','K','M','B','D','H','V','N');
59 .Decode=list(R=c('A','G'),Y=c('C','T'),S=c('G','C'),W=c('A','T'),K=c('G','T'),M=c('A','C'),B=c('C','G','T'),D=c('A','G','T'),H=c('A','C','T'),V=c('A','C','G'),N=NA);
62 function (baseData = NULL)
64 allele.matrix <- t(apply(baseData, 1, function(x) {
65 x <- unique(x[!is.na(x)])
67 for(i in 1:length(.IUPAC)) {
68 if ( length(grep(.IUPAC[i],x)) ) {
69 x=x[-(grep(.IUPAC[i],x))]
74 x <- unique(x[!is.na(x)])
77 else warning('SNP sit is not biallelic!')
80 colnames(allele.matrix) <- c('P1', 'P2')
81 na.exclude(allele.matrix)
86 function (baseData = NULL, allele.matrix = NULL)
88 if (nrow(baseData) == ncol(allele.matrix))
89 allele.matrix <- t(allele.matrix)
90 if (nrow(baseData) != nrow(allele.matrix))
91 stop('nrow(baseData)!=nrow(allele.matrix), allele.matrix error!!!')
93 genoData[baseData == allele.matrix[, 1]] <- 0
94 genoData[baseData == allele.matrix[, 2]] <- 1
95 genoData[genoData != 1 & genoData != 0] <- NA
96 genoData <- matrix(as.numeric(genoData), ncol = ncol(genoData))
97 dimnames(genoData) <- dimnames(baseData)
101 # calculateLODScore.R
102 `calculateLODScore` <-
103 function (geno.data, pheno.data, mapInfo)
105 geno.data.tmp <- geno.data > 0.5
106 geno.data.tmp[geno.data.tmp == FALSE] <- NA
107 ph1 <- matrix(t(matrix(pheno.data, nrow = length(pheno.data),
108 ncol = nrow(geno.data)))[geno.data.tmp], ncol = ncol(geno.data))
109 geno.data.tmp <- geno.data < 0.5
110 geno.data.tmp[geno.data.tmp == FALSE] <- NA
111 ph0 <- matrix(t(matrix(pheno.data, nrow = length(pheno.data),
112 ncol = nrow(geno.data)))[geno.data.tmp], ncol = ncol(geno.data))
113 g1 <- rowMeans(ph1, na.rm = T)
114 n1 <- rowSums(!is.na(ph1))
115 s1 <- rowVars(ph1, na.rm = T)
116 g0 <- rowMeans(ph0, na.rm = T)
117 n0 <- rowSums(!is.na(ph0))
118 s0 <- rowVars(ph0, na.rm = T)
119 n <- rowSums(!is.na(geno.data))
120 ss <- (s1 * (n1 - 1) + s0 * (n0 - 1))/(n1 + n0 - 2)
121 t <- (g1 - g0)/sqrt(ss/n1 + ss/n0)
122 res <- data.frame(mapInfo, '-log10 P' = -log10(pt(abs(t),
123 df = pmax(1, n1 + n0 - 2), lower.tail = F, log.p = FALSE) *
124 2), t = t, d = g1 - g0, n0 = n0, n1 = n1, check.names = FALSE)
125 class(res) <- c('scanone', 'data.frame')
131 function (x, base.position = as.numeric(sub('[^0-9]*([0-9]*)[^0-9]*',
132 '\\1', names(x))), hmmFUN = hmm.vitFUN.rils, geno.probability = c(0.495,
133 0.495, 0.01), transitionFUN = phy2get.haldane.rils, emissionFUN = makeEmissionFUN(errorRate = 0.01),
136 x.cr <- hmmFUN(geno = x + 1, position = base.position, geno.probability = geno.probability,
137 transitionFUN = transitionFUN, emissionFUN = emissionFUN,
139 x.cr[x.cr == 2] <- .MPR_hetero_
145 function (geno.data, base.position = as.numeric(sub('[^0-9]*([0-9]*)[^0-9]*',
146 '\\1', rownames(geno.data))), correct.FUN = correctFUNHMM,
147 minInterval = 1, verbose = TRUE, ...)
150 geno.data.cr <- apply(geno.data, 2, function(x, ...) {
152 cat('\r', i <<- i + 1)
153 x.nna <- which(!is.na(x))
154 ids <- sort(unique(x.nna[c(1, which(diff(x[x.nna]) !=
155 0 | diff(base.position[x.nna]) >= minInterval) +
157 x.cr <- correct.FUN(x[ids], base.position = base.position[ids],
168 # findBlockAndFilter.R
169 `findBlockAndFilter` <-
170 function (x, base.position = 1:length(x), size = sum(blocks[,
171 'size'], na.rm = TRUE)/100, num = sum(blocks[, 'num'], na.rm = TRUE)/100,
172 extendNA = TRUE, fillSmallNA = FALSE)
177 block_start <- base.position[1]
178 block_end <- base.position[t]
182 x.val[is.na(x)] <- max(x, na.rm = TRUE) + 1
183 x.diff <- sort(unique(c(1, which(x.val[-1] != x.val[-t]) +
185 x.diff <- x.diff[x.diff <= t]
186 block.num <- length(x.diff)
188 return(blocks <- cbind(start = block_start, end = block_end,
189 size = block_end - block_start + 1, num = blockNumItem,
191 block_start <- base.position[x.diff]
192 block_end <- base.position[c(x.diff[-1] - 1, t)]
193 blockNumItem <- c(diff(x.diff), t - x.diff[block.num] + 1)
194 blockType <- x[x.diff]
195 blocks <- cbind(start = block_start, end = block_end, size = block_end -
196 block_start + 1, num = blockNumItem, type = blockType)
198 block.ids <- which(is.na(blocks[, 'type']) & blocks[,
200 block.ids.next <- block.ids[block.ids < nrow(blocks)]
201 block.ids.pre <- block.ids[block.ids > 1]
202 if (length(block.ids.next) > 0)
203 blocks[block.ids.next, 'end'] <- blocks[block.ids.next +
205 if (length(block.ids.pre) > 0)
206 blocks[block.ids.pre, 'start'] <- blocks[block.ids.pre -
208 blocks[, 'size'] <- blocks[, 'end'] - blocks[, 'start'] +
211 blocks.margin <- block_start[-1] - block_end[-length(block_end)] -
213 blocks.extentSize <- as.numeric(blocks[, 'size']) + c(blocks.margin,
214 0) + c(0, blocks.margin)
215 filter.block <- (blocks.extentSize < size & as.numeric(blocks[,
217 if (sum(filter.block, na.rm = T) > 0) {
218 blocks[filter.block, 'type'] <- NA
220 blocks <- mergeBlocks(blocks)
221 if (nrow(blocks) == 1)
224 blocks.margin <- blocks[-1, 'start'] - blocks[-nrow(blocks),
226 blocks.extentSize <- as.numeric(blocks[, 'size']) + c(blocks.margin,
227 0) + c(0, blocks.margin)
228 filter.block <- blocks.extentSize < size
229 block.na.ids <- which(is.na(blocks[, 'type']) & filter.block)
230 block.na.num <- length(block.na.ids)
231 if (block.na.num > 0) {
232 blocks[block.na.ids, 'type'] <- sapply(block.na.ids,
235 return(blocks[2, 'type'])
236 if (i == nrow(blocks))
237 return(blocks[nrow(blocks) - 1, 'type'])
238 if (blocks[i - 1, 'type'] == blocks[i + 1,
240 return(blocks[i - 1, 'type'])
245 blocks <- mergeBlocks(blocks)
246 if (nrow(blocks) == 1)
248 block.ids <- which(is.na(blocks[, 'type']))
249 block.ids.next <- block.ids[block.ids < nrow(blocks)]
250 block.ids.pre <- block.ids[block.ids > 1]
251 if (length(block.ids.next) > 0)
252 blocks[block.ids.next, 'end'] <- blocks[block.ids.next +
254 if (length(block.ids.pre) > 0)
255 blocks[block.ids.pre, 'start'] <- blocks[block.ids.pre -
258 for (i in 2:nrow(blocks)) if (blocks[i, 1] - blocks[i - 1,
260 tmp.blocks <- rbind(tmp.blocks, c(blocks[i - 1, 'end'] +
261 1, blocks[i, 'start'] - 1, blocks[i, 'start'] - blocks[i -
262 1, 'end'] - 1, 0, NA))
263 blocks <- tmp.blocks[order(tmp.blocks[, 'end']), ]
269 function (geno.data, pheno.data, cm_unit = 250000)
271 myGeno.data <- geno.data
272 geno.value <- sort(unique(na.omit(c(myGeno.data))))
273 if (identical(as.numeric(geno.value), c(0, 1))) {
274 myGeno.data <- myGeno.data + 1
275 geno.value <- sort(unique(na.omit(c(myGeno.data))))
277 if (!identical(as.numeric(geno.value), c(1, 2)))
278 stop('the value of geno.data should be 1, 2, or NA.')
279 ids.RILs <- match(colnames(myGeno.data), rownames(pheno.data))
280 myGeno.site <- data.frame(Chr = as.numeric(substr(rownames(myGeno.data),
281 1, 2)), Position = as.numeric(substr(rownames(myGeno.data),
283 myCrossData <- list()
284 myCrossData$geno <- lapply(split(1:nrow(myGeno.data), myGeno.site$Chr),
286 myMarkers <- myGeno.site$Position[ids]/cm_unit
287 names(myMarkers) <- rownames(myGeno.data)[ids]
288 myMap <- list(data = t(myGeno.data[ids, ]), map = myMarkers)
292 myCrossData$pheno <- as.data.frame(pheno.data[ids.RILs, ])
293 class(myCrossData) <- c('riself', 'cross')
294 attr(myCrossData, 'alleles') <- c('A', 'B')
300 function (genoData, base.position = 1:nrow(genoData), corrected = FALSE,
301 correct.FUN = correctFUNHMM, size = 250000, num = 5, fillSmallNA = TRUE,
302 minBinsize = 0, seqERR = 0.01, heterozygote = FALSE, ...)
304 SNPbyChr <- split(1:nrow(genoData), 1)
306 res <- lapply(SNPbyChr, function(ids) {
309 blocks <- apply(genoData[ids, ], 2, function(x) {
310 cat('chr: ', i.chr, '\tline: ', i.line <<- i.line +
311 1, '\t', colnames(genoData)[i.line], '\r')
313 if (corrected == FALSE)
314 x.correct <- correct.FUN(x[x.nna], base.position[ids][x.nna],
316 else x.correct <- x[x.nna]
317 blocks.mat <- findBlockAndFilter(x.correct, base.position = base.position[ids][x.nna],
318 size = size, num = num, fillSmallNA = fillSmallNA)
319 blocks.mat[nrow(blocks.mat), 'end'] <- max(base.position[ids])
320 blocks.mat[nrow(blocks.mat), 'size'] <- blocks.mat[nrow(blocks.mat),
321 'end'] - blocks.mat[nrow(blocks.mat), 'start'] +
323 if (heterozygote == FALSE)
324 blocks.mat[blocks.mat[, 'type'] == .MPR_hetero_, 'type'] <- NA
325 blocks.mat <- mergeBlocks(blocks.mat)
328 blocks.mat <- matrix(unlist(blocks, recursive = TRUE,
329 use.names = FALSE), ncol = 5, byrow = TRUE)
330 bin.border <- sort(unique(as.numeric(blocks.mat[, 2])))
331 cat('\rchr: ', i.chr, '\tTotal', length(bin.border),
333 bin.border <- sort(unique(bin.border[c(1, which(diff(bin.border) >=
335 cat(length(bin.border), 'borders after filtering out bins less than',
336 round(minBinsize/1000, 1), 'kb.\n')
337 geno.bin <- sapply(blocks, function(blocks.line) {
338 blocks.end <- rbind(blocks.line)[2, ]
339 ids <- match(bin.border, blocks.end)
340 ids[is.na(ids)] <- findInterval(bin.border[is.na(ids)],
341 blocks.end, rightmost.closed = FALSE) + 1
342 filter <- ids > ncol(blocks.line)
343 ids[filter] <- ncol(blocks.line)
344 rbind(blocks.line)[5, ids]
346 rownames(geno.bin) <- sprintf('%010d', bin.border)
347 list(block = blocks, bin = geno.bin, border = bin.border)
350 for (i in 1:length(res)) geno.bin <- rbind(geno.bin, res[[i]][[2]])
352 list(block = res, bin = geno.bin, border = as.numeric(rownames(geno.bin)))
355 # genotypeCallsBayes.R
356 `genotypeCallsBayes` <-
357 function (ALLELE.num, errorRate = 5e-04, eps = 1e-10, maxIterate = 100,
361 P0.p1 <- P0.p2 <- (1 - P0.n)/2
364 P.p1 <- P.p2 <- (1 - P.n)/2
367 while (abs(P0.n - P.n) > eps && numIterate <= maxIterate) {
371 n <- rowSums(ALLELE.num)
373 ALLELE.prob <- cbind((1 - E)^k * E^(n - k), 0.5^n, (1 -
374 E)^(n - k) * E^k) %*% diag(c(P0.p1, P0.n, P0.p2))
375 ALLELE.type <- rep(NA, nrow(ALLELE.prob))
376 ALLELE.maxprob <- rowMax(ALLELE.prob)
377 ALLELE.type[ALLELE.prob[, 1] == ALLELE.maxprob] <- 1
378 ALLELE.type[ALLELE.prob[, 3] == ALLELE.maxprob] <- 3
379 ALLELE.type[ALLELE.prob[, 2] == ALLELE.maxprob | ALLELE.prob[,
380 1] == ALLELE.prob[, 3]] <- 2
381 P.n <- mean(ALLELE.type == 2)
382 P.p1 <- P.p2 <- (1 - P.n)/2
384 cat(P.p1, P.n, P.p2, table(filter.markers <- ALLELE.type !=
385 2), '\n', sep = '\t')
386 numIterate <- numIterate + 1
388 ALLELE.prob <- ALLELE.prob/rowSums(ALLELE.prob, na.rm = TRUE)
389 list(prop = c(P.p1, P.n, P.p2), prob = ALLELE.prob, type = ALLELE.type)
392 # globalMPRByMarkers.R
393 `globalMPRByMarkers` <-
394 function (baseData, markers = NULL, alleleA = NULL, numTry = 3,
395 numBaseStep = 50, numBaseCandidateStep = numBaseStep * 2,
396 numKnownStep = pmax(numBaseStep/5, 10), numKnownCandidateStep = numKnownStep *
397 1.5, useMedianToFindKnown = TRUE, maxIterate = 150, maxNStep = 3,
398 scoreMin = 0.8, verbose = FALSE, strSTART = '\r', strEND = '',
401 if (is.null(alleleA))
403 ALLELE.mat <- matrix(NA, nrow = nrow(baseData), ncol = 2)
404 rownames(ALLELE.mat) <- rownames(baseData)
405 alleleA.base <- alleleA[match(rownames(ALLELE.mat), names(alleleA))]
406 alleleA.ids <- which(!is.na(alleleA.base))
407 if (numKnownCandidateStep > length(alleleA.ids))
408 numKnownCandidateStep <- length(alleleA.ids)
409 if (numKnownStep > numKnownCandidateStep)
410 numKnownStep <- numKnownCandidateStep
412 ids.RILrows <- which(rowSums(!is.na(cbind(baseData))) > 0)
413 rowN <- length(ids.RILrows)
414 ids.times <- rep(0, rowN)
415 ids.ok <- rep(0, rowN)
416 ids.candidate <- na.omit(which(ids.times < numTry & ids.ok ==
417 0)[1:numBaseCandidateStep])
418 n <- length(ids.candidate)
420 if (length(ids.candidate) > numBaseStep) {
421 filter.dis <- ids.candidate < (median(ids.candidate) -
422 numBaseCandidateStep)
423 ids.dis <- ids.candidate[filter.dis]
424 if (length(ids.dis) < numBaseStep)
425 ids.candidate <- c(ids.dis, sample(ids.candidate[!filter.dis],
426 numBaseStep - length(ids.dis)))
427 else ids.candidate <- ids.dis
429 ids.times[ids.candidate] <- ids.times[ids.candidate] +
431 ids <- ids.RILrows[ids.candidate]
432 ids.point <- ifelse(useMedianToFindKnown == TRUE, median(ids),
434 is.known <- !is.na(alleleA.base[ids])
435 if (sum(is.known) < numKnownStep) {
436 ids.known <- na.omit(alleleA.ids[order(abs(alleleA.ids -
437 ids.point))[sample(numKnownCandidateStep, numKnownStep)]])
438 if (length(ids.known) > (numKnownStep - sum(is.known)))
439 ids.know <- ids.known[1:(numKnownStep - sum(is.known))]
440 ids <- unique(c(ids, ids.known))
443 is.known <- !is.na(alleleA.base[ids])
444 iResult <- localMPR(baseData[ids, ], maxIterate = maxIterate,
445 maxNStep = maxNStep, returnNumIterate = TRUE, verbose = 0)
446 allele.matrix <- iResult[['allele']]
447 a <- allele.matrix[is.known, ]
448 b <- alleleA.base[ids[is.known]]
449 a1 <- colSums(a == b, na.rm = T)/length(b)
450 if (sum(a1 >= scoreMin) > 0) {
452 allele.matrix <- allele.matrix[, c(2, 1)]
454 a <- ALLELE.mat[ids, ]
455 ids.na <- rowSums(is.na(a)) > 0
457 ALLELE.mat[ids[ids.na], ] <- allele.matrix[ids.na,
459 ids.ok[ids.candidate] <- 1
461 ids.all <- which(ids.times < numTry & ids.ok == 0)
462 ids.candidate <- na.omit(ids.all[1:numBaseCandidateStep])
463 n <- length(ids.candidate)
465 cat(strSTART, length(ids.all), j, strEND, sep = '\t')
467 invisible(ALLELE.mat)
470 # globalMPRByMarkersPlus.R
471 `globalMPRByMarkersPlus` <-
472 function (baseData, markers = NULL, alleleA = NULL, numGroup = 1,
473 groupSort = FALSE, numPerm = 1, numTry = 3, numBaseStep = 50,
474 numBaseCandidateStep = numBaseStep * 2, numKnownStep = pmax(numBaseStep/5,
475 10), numKnownCandidateStep = numKnownStep * 2, useMedianToFindKnown = TRUE,
476 maxIterate = 150, maxNStep = 3, scoreMin = 0.8, saveMidData = FALSE,
477 verbose = FALSE, strSTART = '\r', strEND = '', ...)
479 if (is.null(alleleA))
481 ALLELE.mat <- base2Allele(baseData)
482 ALLELE.num <- matrix(0, nrow = nrow(ALLELE.mat), ncol = ncol(ALLELE.mat))
483 dimnames(ALLELE.num) <- dimnames(ALLELE.mat)
484 alleleA.base <- alleleA[match(rownames(ALLELE.mat), names(alleleA))]
485 alleleA.ids <- which(!is.na(alleleA.base))
486 if (numKnownCandidateStep > length(alleleA.ids))
487 numKnownCandidateStep <- length(alleleA.ids)
488 if (numKnownStep > numKnownCandidateStep)
489 numKnownStep <- numKnownCandidateStep
492 for (perm in 1:numPerm) {
495 groupRIL <- split(order(colSums(!is.na(baseData)), decreasing = TRUE), cut(1:ncol(baseData), numGroup))
496 else groupRIL <- split(sample(ncol(baseData)), cut(1:ncol(baseData), numGroup))
497 names(groupRIL) <- 1:numGroup
500 groupRIL <- list('1' = 1:ncol(baseData))
502 groupRIL <- groupRIL[sapply(groupRIL, length) > 0]
503 for (ids.cols in groupRIL) {
504 ids.RILrows <- which(rowSums(!is.na(cbind(baseData[, ids.cols]))) > 0)
505 rowN <- length(ids.RILrows)
506 ids.times <- rep(0, rowN)
507 ids.ok <- rep(0, rowN)
508 ids.candidate <- na.omit(which(ids.times < numTry &
509 ids.ok == 0)[1:numBaseCandidateStep])
510 n <- length(ids.candidate)
512 if (length(ids.candidate) > numBaseStep) {
513 filter.dis <- ids.candidate < (median(ids.candidate) - numBaseCandidateStep)
514 ids.dis <- ids.candidate[filter.dis]
515 if (length(ids.dis) < numBaseStep)
516 ids.candidate <- c(ids.dis, sample(ids.candidate[!filter.dis], numBaseStep - length(ids.dis)))
517 else ids.candidate <- ids.dis
519 ids.times[ids.candidate] <- ids.times[ids.candidate] + 1
520 ids <- ids.RILrows[ids.candidate]
521 ids.point <- ifelse(useMedianToFindKnown == TRUE, median(ids), ids[1])
522 is.known <- !is.na(alleleA.base[ids])
523 if (sum(is.known) < numKnownStep) {
524 ids.known <- na.omit(alleleA.ids[order(abs(alleleA.ids - ids.point))[sample(numKnownCandidateStep, numKnownStep)]])
525 if (length(ids.known) > (numKnownStep - sum(is.known)))
526 ids.know <- ids.known[1:(numKnownStep - sum(is.known))]
527 ids <- unique(c(ids, ids.known))
530 is.known <- !is.na(alleleA.base[ids])
531 iResult <- localMPR(baseData[ids, ], allele.matrix = ALLELE.mat[ids,
532 ], maxIterate = maxIterate, maxNStep = maxNStep,
533 returnNumIterate = TRUE, verbose = 0)
534 allele.matrix <- iResult[['allele']]
535 a <- allele.matrix[is.known, ]
536 b <- alleleA.base[ids[is.known]]
537 a1 <- colSums(a == b, na.rm = T)/length(b)
538 if (sum(a1 >= scoreMin) > 0) {
540 allele.matrix <- allele.matrix[, c(2, 1)]
542 a <- ALLELE.mat[ids, ]
543 a1 <- rowSums(a == allele.matrix) == 2
544 ALLELE.num[ids, ] <- ALLELE.num[ids, ] + cbind(a1,
546 ids.ok[ids.candidate] <- 1
548 ids.all <- which(ids.times < numTry & ids.ok ==
550 ids.candidate <- na.omit(ids.all[1:numBaseCandidateStep])
551 n <- length(ids.candidate)
553 cat(strSTART, 'perm', perm, 'unprocessed',
554 length(ids.all), j, strEND, sep = '\t')
557 filter.exchange <- ALLELE.num[, 1] < ALLELE.num[, 2]
559 tmp[filter.exchange, ] <- tmp[filter.exchange, c(2, 1)]
562 tmp[filter.exchange, ] <- tmp[filter.exchange, c(2, 1)]
565 midData[[perm]] <- list(allele = ALLELE.mat, call = ALLELE.num)
567 invisible(list(allele = ALLELE.mat, call = ALLELE.num, midData = midData))
572 function (baseData, markers = NULL, alleleA = NULL, numGroup = ncol(baseData),
573 groupSort = FALSE, numPerm = 10, numTry = 3, numBaseStep = 50,
574 numBaseCandidateStep = numBaseStep * 2, numKnownStep = numBaseStep/2,
575 numKnownCandidateStep = numBaseStep * 2, useMedianToFindKnown = TRUE,
576 maxIterate = 150, maxNStep = 3, scoreMin = 0.8, useOnlyKnownToType = FALSE,
577 useBayes = FALSE, errorRate = 5e-04, saveMidData = FALSE,
578 verbose = FALSE, strSTART = '\r', strEND = '', ...)
580 if (is.null(alleleA))
582 ALLELE.mat <- base2Allele(baseData)
583 ALLELE.num <- matrix(0, nrow = nrow(ALLELE.mat), ncol = ncol(ALLELE.mat))
584 dimnames(ALLELE.num) <- dimnames(ALLELE.mat)
585 alleleA.base <- alleleA[match(rownames(ALLELE.mat), names(alleleA))]
586 alleleA.ids <- which(!is.na(alleleA.base))
587 ALLELE.num[ALLELE.mat == alleleA.base] <- 1
588 tmpALLELE.num <- ALLELE.num
589 if (numKnownCandidateStep > length(alleleA.ids))
590 numKnownCandidateStep <- length(alleleA.ids)
591 if (numKnownStep > numKnownCandidateStep)
592 numKnownStep <- numKnownCandidateStep
595 for (perm in 1:numPerm) {
598 groupRIL <- split(order(colSums(!is.na(baseData)),
599 decreasing = TRUE), cut(1:ncol(baseData), numGroup))
600 else groupRIL <- split(sample(ncol(baseData)), cut(1:ncol(baseData),
602 names(groupRIL) <- 1:numGroup
605 groupRIL <- list('1' = 1:ncol(baseData))
607 groupRIL <- groupRIL[sapply(groupRIL, length) > 0]
608 for (g in 1:length(groupRIL)) {
609 ids.cols <- groupRIL[[g]]
610 ids.RILrows <- which(rowSums(!is.na(cbind(baseData[,
612 rowN <- length(ids.RILrows)
613 ids.times <- rep(0, rowN)
614 ids.ok <- rep(0, rowN)
615 ids.candidate <- na.omit(which(ids.times < numTry &
616 ids.ok == 0)[1:numBaseCandidateStep])
617 n <- length(ids.candidate)
619 if (length(ids.candidate) > numBaseStep) {
620 filter.dis <- ids.candidate < (median(ids.candidate) -
621 numBaseCandidateStep)
622 ids.dis <- ids.candidate[filter.dis]
623 if (length(ids.dis) < numBaseStep)
624 ids.candidate <- c(ids.dis, sample(ids.candidate[!filter.dis],
625 numBaseStep - length(ids.dis)))
626 else ids.candidate <- ids.dis
628 ids.times[ids.candidate] <- ids.times[ids.candidate] +
630 ids <- ids.RILrows[ids.candidate]
631 ids.point <- ifelse(useMedianToFindKnown == TRUE,
633 is.known <- !is.na(alleleA.base[ids])
634 if (sum(is.known) < numKnownStep) {
635 ids.known <- na.omit(alleleA.ids[order(abs(alleleA.ids -
636 ids.point))[sample(numKnownCandidateStep,
638 if (length(ids.known) > (numKnownStep - sum(is.known)))
639 ids.know <- ids.known[1:(numKnownStep - sum(is.known))]
640 ids <- unique(c(ids, ids.known))
642 ids <- sort(as.numeric(ids))
643 is.known <- !is.na(alleleA.base[ids])
644 iResult <- localMPR(baseData[ids, ], allele.matrix = ALLELE.mat[ids,
645 ], maxIterate = maxIterate, maxNStep = maxNStep,
646 returnNumIterate = TRUE, verbose = 0)
647 allele.matrix <- iResult[['allele']]
648 if (useOnlyKnownToType) {
649 a <- allele.matrix[is.known, ]
650 b <- alleleA.base[ids[is.known]]
651 a1 <- colSums(a == b, na.rm = T)/length(b)
654 a <- ALLELE.mat[ids, ]
655 b <- ALLELE.num[ids, ]
657 b.f <- b[, 1] < b[, 2]
658 a.x[b.f, ] <- a.x[b.f, c(2, 1)]
659 a1 <- colMeans(allele.matrix == a.x[, 1], na.rm = T)
661 if (sum(a1 >= scoreMin) > 0) {
663 allele.matrix <- allele.matrix[, c(2, 1)]
665 a <- ALLELE.mat[ids, ]
666 a1 <- rowSums(a == allele.matrix) == 2
667 ALLELE.num[ids, ] <- ALLELE.num[ids, ] + cbind(a1,
669 ids.ok[ids.candidate] <- 1
671 ids.all <- which(ids.times < numTry & ids.ok ==
673 ids.candidate <- na.omit(ids.all[1:numBaseCandidateStep])
674 n <- length(ids.candidate)
676 cat(strSTART, 'perm', perm, 'group', g, 'unprocessed',
677 length(ids.all), 'MPR', j, strEND, sep = '\t')
680 geno.res <- genotypeCallsBayes(ALLELE.num, errorRate = errorRate,
681 eps = 1e-10, maxIterate = 100, verbose = FALSE)$type
683 tmp[geno.res == 2, ] <- NA
684 tmp[geno.res == 3, ] <- tmp[geno.res == 3, c(2,
689 filter <- ALLELE.num[, 1] < ALLELE.num[, 2]
690 tmp[filter, ] <- tmp[filter, c(2, 1)]
692 alleleA.base <- tmp[, 1]
693 alleleA.ids <- which(!is.na(alleleA.base))
696 midData[[perm]] <- list(allele = ALLELE.mat, call = ALLELE.num -
697 tmpALLELE.num, base = alleleA.base)
699 invisible(list(allele = ALLELE.mat, call = ALLELE.num - tmpALLELE.num,
700 base = alleleA.base, midData = midData))
705 function (geno, position, geno.probability, transitionFUN = phy2get.haldane.rils,
706 emissionFUN = makeEmissionFUN(errorRate = 0.01), ...)
708 n.obs <- length(geno)
709 n.state <- length(geno.probability)
710 psi <- delta <- matrix(0, nrow = n.state, ncol = n.obs)
711 n.con <- geno.cr <- numeric(n.obs)
712 geno.dis <- abs(diff(position))
716 n.con[i] <- ifelse(geno[i] == g, n.con[i - 1] + 1, 1)
719 for (i in 1:n.state) delta[i, 1] <- log(geno.probability[i]) +
720 emissionFUN(i, geno[1], n.con[1])
721 preProb <- numeric(n.state)
723 for (j in 1:n.state) {
724 for (i in 1:n.state) preProb[i] <- delta[i, t - 1] +
725 log(transitionFUN(i, j, geno.dis[t - 1]))
726 psi[j, t] <- which.max(preProb)
727 delta[j, t] <- max(preProb) + emissionFUN(j, geno[t],
731 geno.cr[n.obs] <- which.max(delta[, n.obs])
732 for (t in seq(n.obs - 1, 1, by = -1)) geno.cr[t] <- psi[geno.cr[t +
739 function (baseData, allele.matrix = NULL, maxIterate = 50, returnNumIterate = FALSE,
740 maxNStep = 5, verbose = FALSE, strEND = '\n', ...)
742 if (is.null(allele.matrix))
743 allele.matrix <- base2Allele(baseData)
744 if (nrow(baseData) == ncol(allele.matrix))
745 allele.matrix <- t(allele.matrix)
746 if (nrow(baseData) != nrow(allele.matrix))
747 stop('nrow(baseData)!=nrow(allele.matrix), allele.matrix error!!!')
748 newALLELE <- ALLELE <- allele.matrix
749 genoData <- base2Geno(baseData, allele.matrix)
750 newRSum <- oriRSum <- length(baseData)
753 while (numStepSize <= maxNStep) {
754 numIterate <- numIterate + 1
756 cat('\r', numIterate, '\t', newRSum, '\t', numStepSize,
759 loopRes <- .loopMPR(genoData, allele.matrix = ALLELE,
760 numStep = numStepSize)
761 newRSum <- loopRes[[1]]
762 if (oriRSum > newRSum) {
763 ALLELE <- loopRes[[2]]
764 genoData <- loopRes[[3]]
768 numStepSize <- numStepSize + 1
772 cat('\tDone.', strEND)
773 rownames(ALLELE) <- rownames(baseData)
774 if (returnNumIterate)
775 list(allele = ALLELE, num_iterate = numIterate, numR = newRSum)
781 function (errorRate = 0.01)
784 E2 <- log(1 - errorRate)
788 return(ifelse(h == x, E2, E))
799 IsUniq <- rep(1, nrow(geno.fill))
800 for (j in 1:nrow(geno.fill)) {
801 cur.nna <- which(!is.na(geno.fill[j, ]))
802 cur <- geno.fill[j, cur.nna]
803 if (identical(pre.nna, cur.nna) && identical(pre, cur))
810 geno.fill[unique(c(which(IsUniq == 1)[-1] - 1, nrow(geno.fill))),
818 if (nrow(blocks) < 2)
820 type.val <- blocks[, 'type']
821 t <- length(type.val)
822 type.val[is.na(blocks[, 'type'])] <- as.numeric(max(blocks[,
823 'type'], na.rm = TRUE)) + 1
824 block.start.ids <- sort(unique(c(1, which(type.val[-1] !=
826 block.start.ids <- block.start.ids[block.start.ids <= nrow(blocks)]
827 block.block.num <- length(block.start.ids)
828 blocks[-block.start.ids, 'start'] <- NA
829 blocks[block.start.ids, c('end', 'num')] <- t(sapply(1:block.block.num,
831 ids <- block.start.ids[i]:ifelse(i >= block.block.num,
832 nrow(blocks), block.start.ids[i + 1] - 1)
833 c(blocks[ids[length(ids)], 'end'], sum(blocks[ids,
834 'num'], na.rm = TRUE))
836 blocks <- rbind(blocks[!is.na(blocks[, 'start']), ])
837 blocks[, 'size'] <- blocks[, 'end'] - blocks[, 'start'] +
842 # phy2get.haldane.rils.R
843 `phy2get.haldane.rils` <-
846 d <- l/(._rice_phy2get_factor_ * 100)
847 p <- (1 - exp(-2 * d))
849 ifelse(a == b, 1 - p, p)
852 # Append for de-dependence rowQ from package:Biobase
856 rowmax <-apply(matrix,1,function(x){
863 rowmin <-apply(matrix,1,function(x){