1 hapMap2numeric <- function(file){
\r
2 hapmap <- as.matrix(read.table(file, header=TRUE, row.names=1, sep="\t",
\r
3 stringsAsFactors=FALSE)[,-(2:10)])
\r
5 samples <- dimnames(hapmap)[[2]][-1]
\r
6 loci <- dimnames(hapmap)[[1]]
\r
8 #make sure sites are biallelic
\r
9 siteCheck <- names(table(hapmap[ ,1]))
\r
10 if(sum(grepl("/", siteCheck)) != length(siteCheck)){
\r
11 stop("There appear to be monomorphic sites in your file. Please remove them.")
\r
14 siteCheck2 <- strsplit(siteCheck, "/")
\r
15 siteCheck2 <- unlist(lapply(siteCheck2, length))
\r
16 siteCheck2 <- siteCheck2 == 2
\r
17 if(sum(siteCheck2) != length(siteCheck2)){
\r
18 stop("There appear to be sites with more than two allele states in your file.
\r
19 Please allow a maximum of two alleles per site in your file.")
\r
23 #check whether the file uses IUPAC coding or double letters
\r
24 nFor <- nchar(hapmap[1, 2])
\r
27 #use the appropriate conversions
\r
29 # set up conversion table
\r
30 s <- as.integer(c(0,1,1,2,NA))
\r
41 names(ac) <- c("AA","AC","CA","CC","NN")
\r
42 names(ag) <- c("AA","AG","GA","GG","NN")
\r
43 names(at) <- c("AA","AT","TA","TT","NN")
\r
44 names(cg) <- c("CC","CG","GC","GG","NN")
\r
45 names(ct) <- c("CC","CT","TC","TT","NN")
\r
46 names(gt) <- c("GG","GT","TG","TT","NN")
\r
47 names(cx) <- c("CC","C-","-C","--","NN")
\r
48 names(gx) <- c("GG","G-","-G","--","NN")
\r
49 names(tx) <- c("TT","T-","-T","--","NN")
\r
50 names(ax) <- c("AA","A-","-A","--","NN")
\r
51 conv <- list(ac,ac,ag,ag,at,at,cg,cg,ct,ct,gt,gt,cx,cx,gx,gx,tx,tx,ax,ax)
\r
52 names(conv) <- c("A/C","C/A","A/G","G/A","A/T","T/A","C/G","G/C",
\r
53 "C/T","T/C","G/T","T/G","C/-","-/C","G/-","-/G","T/-","-/T","A/-","-/A")
\r
57 # set up conversion table
\r
58 s <- as.integer(c(0,1,2,NA))
\r
69 names(ac) <- c("A","M","C","N")
\r
70 names(ag) <- c("A","R","G","N")
\r
71 names(at) <- c("A","W","T","N")
\r
72 names(cg) <- c("C","S","G","N")
\r
73 names(ct) <- c("C","Y","T","N")
\r
74 names(gt) <- c("G","K","T","N")
\r
75 names(cx) <- c("C","0","-","N")
\r
76 names(gx) <- c("G","0","-","N")
\r
77 names(tx) <- c("T","0","-","N")
\r
78 names(ax) <- c("A","0","-","N")
\r
79 conv <- list(ac,ac,ag,ag,at,at,cg,cg,ct,ct,gt,gt,cx,cx,gx,gx,tx,tx,ax,ax)
\r
80 names(conv) <- c("A/C","C/A","A/G","G/A","A/T","T/A","C/G","G/C",
\r
81 "C/T","T/C","G/T","T/G","C/-","-/C","G/-","-/G","T/-","-/T","A/-","-/A")
\r
84 # matrix to hold output
\r
85 x <- matrix(NA, nrow=length(samples), ncol=length(loci),
\r
86 dimnames=list(samples, loci))
\r
88 for(L in 1:length(loci)){
\r
89 thisconv <- conv[[hapmap[L, 1]]]
\r
90 x[,L] <- thisconv[hapmap[L, -1]]
\r