new file: pixi.toml
[GalaxyCodeBases.git] / projects / recombineMap / sperm_by_fw / crossover_prediction / HMM_method / sumData.py
blobfd87ae652d817f1aad2bfbca547bb4d7f1e1a6ce
1 '''
2 Created on 2012-7-12
4 @author: LiJinsen
5 '''
7 import sys, os
9 #read data file
10 filename1 = []
11 def mydir(arg, dirname, names):
12 files = [os.path.normpath(os.path.join(dirname, file)) for file in names]
13 for filename in files:
14 if filename.find("FaMo")!=-1:
15 filename1.append(filename)
17 if len(sys.argv)==1:
18 path=os.getcwd()
19 else:
20 path = sys.argv[1]
21 os.path.walk(path, mydir, 0)
22 data = {}
24 for filename in filename1:
25 print "Loading File:", filename
26 content = open(filename,'r').read()
27 tempdata = {}
28 '''
29 #Chr Pos FaHap MoHap SpNum S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 S21 S22 S23 S24 S25 S26 S27 S28 S29 S30 S31 S32 S33 S34 S35 S36 S37 S38 S39 S40 S41 S42 S43 S44 S45 S46 S47 S48 S49 S50 S51 S52 S53 S54 S55 S56 S57 S58 S59 S60 S61 S62 S63 S64 S65 S66 S67 S68 S69 S70 S71 S72 S73 S74 S75 S76 S77 S78 S79 S80 S81 S82 S83 S84 S85 S86 S87 S88 S89 S90 S91 S92 S93 S94 S95 S96 S97 S98 S99
30 #chr13 19020145 T G 9 N T N N N N N N N N N N N G N N N N N N N G N G N N N N N T N N N N N N N N N N N N N N G N N N N T N N N G N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N G N N N N N N
31 #chr13 19020627 G A 39 N G N A G N N N G N A G A A A N N G N N N A N A G G N G N G G G N N N G N G N G N N A N N N N N N G A A N A N N G N N A N N N G N N A G G N N A N N N G N N N N N N A N N N N A N N N N A G N G N N N
32 #chr13 19020776 G T 24 N G N T G N N N N N T G N T T T N N N G N T N T G N N N G G N N N N N G N N N N N N N N N N T N N N N N N N N N G N N T N N N N N N T G N T N N T N N N N N N N N G N N N N N T N N N N N N N N N N N
33 '''
34 itemkey = []
35 chrName = ''
36 for line in content.split('\n'):
37 if line=='':
38 continue
39 tmp = line.split('\t')
40 if tmp[0]=='#Chr':
41 itemkey = tmp[:4]
42 continue
43 if chrName=='':
44 chrName = tmp[0]
45 tempdata.update({tmp[1]:tmp[2]+tmp[3]})
47 data.update({chrName:tempdata})
49 #read result file
50 filename1 = []
51 def mydir2(arg, dirname, names):
52 files = [os.path.normpath(os.path.join(dirname, file)) for file in names]
53 for filename in files:
54 if filename.find("result")!=-1:
55 filename1.append(filename)
57 path = 'data'
58 allSNP = 0
59 mayErrorSNP = 0
60 unsolvedSNP = 0
61 os.path.walk(path, mydir2, 0)
62 w = open('Result.all.txt.TRASH','w')
63 for filename in filename1:
64 print 'Reading File:', filename
65 chrName = filename.split('.')[2]
66 content = open(filename,'r').read().split('\n')
67 '''
68 #Sperm Chr Pos Observation Guess
69 S86 chrX 62304 1 F
70 S86 chrX 173892 1 F
71 S86 chrX 178135 1 F
72 '''
73 count = {'F':0,'M':0, 'U':0}
74 last = ''
75 change = []
76 for line in content:
77 item = line.split('\t')
78 if item[0]=='' or item[0][0]=='#':
79 continue
80 if (item[4]=='F' and item[3]=='0') or (item[4]=='M' and item[3]=='1'):
81 mayErrorSNP+=1
82 if item[4]=='U':
83 unsolvedSNP+=1
84 allSNP+=1
85 if last=='':
86 last = item
87 count[last[4]]=1
88 else:
89 count[item[4]]+=1
90 if item[4]=='U' and last[4]!='U':
91 change = last
92 if last[4]=='U' and item[4]!='U':
93 if change!=[] and change[4]!=item[4]:
94 w.write(change[0])
95 w.write('\t')
96 w.write(change[1])
97 w.write('\t')
98 w.write(str((int(change[2])+int(item[2]))/2))
99 w.write('\t')
100 w.write(str( int(item[2])-int(change[2]) ))
101 w.write('\t')
102 w.write(change[2]+','+data[chrName][change[2]])
103 w.write('\t')
104 w.write(item[2]+','+data[chrName][item[2]])
105 w.write('\t1\t')
106 w.write(str(count['U']))
107 w.write('\t\n')
108 count = {'F':0,'M':0, 'U':0}
109 change = []
110 if item[4]!=last[4] and item[4]!='U' and last[4]!='U':
111 w.write(last[0])
112 w.write('\t')
113 w.write(last[1])
114 w.write('\t')
115 w.write(str( (int(last[2])+int(item[2]))/2 ))
116 w.write('\t')
117 w.write(str( int(item[2])-int(last[2]) ))
118 w.write('\t')
119 w.write(last[2]+','+data[chrName][last[2]])
120 w.write('\t')
121 w.write(item[2]+','+data[chrName][item[2]])
122 w.write('\t1\t0\t\n')
123 count = {'F':0,'M':0, 'U':0}
124 last = item
127 print "All SNP", allSNP
128 print 'Error SNP', mayErrorSNP
129 print 'Illegible SNP', unsolvedSNP