10 def Baum_Welch(obs
, states
, start_p
, trans_p
, emit_p
):
13 T
[state
] = start_p
[state
]
19 def forward_viterbi(obs
, states
, start_p
, trans_p
, emit_p
):
22 ## prob. V.path V.prob.
23 T
[state
] = (start_p
[state
], [state
], start_p
[state
])
26 for next_state
in states
:
30 for source_state
in states
:
31 (prob
, v_path
, v_prob
) = T
[source_state
]
34 tmp1
= emit_p
[source_state
][output
]
35 tmp2
= trans_p
[source_state
][next_state
]
41 argmax
= v_path
+ [next_state
]
43 U
[next_state
] = (total
, argmax
, valmax
)
45 ## apply sum/max to the final states:
50 (prob
, v_path
, v_prob
) = T
[state
]
55 return (total
, argmax
, valmax
)
59 def mydir(arg
, dirname
, names
):
60 files
= [os
.path
.normpath(os
.path
.join(dirname
, file)) for file in names
]
61 for filename
in files
:
62 if filename
.find("FaMo")!=-1:
63 filename1
.append(filename
)
69 os
.path
.walk(path
, mydir
, 0)
71 #diffFile = open('difference.txt','r').read().split('\n')
73 #for line in diffFile:
74 # tmp = line.split('\t')
75 # needReRun.append(tmp)
76 for filename
in filename1
:
77 print "Loading File:", filename
78 content
= open(filename
,'r').read()
82 #Chr Pos FaHap MoHap SpNum S01 S02 S03 S04 S05 S06 S07 S08 S09 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 S21 S22 S23 S24 S25 S26 S27 S28 S29 S30 S31 S32 S33 S34 S35 S36 S37 S38 S39 S40 S41 S42 S43 S44 S45 S46 S47 S48 S49 S50 S51 S52 S53 S54 S55 S56 S57 S58 S59 S60 S61 S62 S63 S64 S65 S66 S67 S68 S69 S70 S71 S72 S73 S74 S75 S76 S77 S78 S79 S80 S81 S82 S83 S84 S85 S86 S87 S88 S89 S90 S91 S92 S93 S94 S95 S96 S97 S98 S99
83 #chr13 19020145 T G 9 N T N N N N N N N N N N N G N N N N N N N G N G N N N N N T N N N N N N N N N N N N N N G N N N N T N N N G N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N N G N N N N N N
84 #chr13 19020627 G A 39 N G N A G N N N G N A G A A A N N G N N N A N A G G N G N G G G N N N G N G N G N N A N N N N N N G A A N A N N G N N A N N N G N N A G G N N A N N N G N N N N N N A N N N N A N N N N A G N G N N N
85 #chr13 19020776 G T 24 N G N T G N N N N N T G N T T T N N N G N T N T G N N N G G N N N N N G N N N N N N N N N N T N N N N N N N N N G N N T N N N N N N T G N T N N T N N N N N N N N G N N N N N T N N N N N N N N N N N
88 for line
in content
.split('\n'):
91 tmp
= line
.split('\t')
95 tempdata
.update({item
:[]})
96 posData
.update({item
:[]})
98 for i
in range(len(itemkey
)):
101 tmp
[i
]='1'#father is 1
103 tmp
[i
]='-1'#mother is -1
106 tempdata
[itemkey
[i
]].append(tmp
[i
])
107 posData
[itemkey
[i
]].append(tmp
[1])
108 chrName
= tempdata
['#Chr'][0]
109 transition_probability
= {'F' : {'F': 0.9999998,'U': 0.00000009,'M': 0.00000002},
110 'M' : {'M': 0.9999998,'U': 0.00000009,'F': 0.00000002},
111 'U' : {'U': 0.9, 'F': 0.05, 'M': 0.05}}
112 emission_probability
= {'F' : {'1': 0.9, '-1': 0.1},
113 'M' : {'-1': 0.9, '1': 0.1},
114 'U' : {'1':0.5, '-1':0.5}}
115 start_probability
= {'F' : 0.3,
119 if item
[0]=='S' and item
[1]!='p':
120 #if [item, chrName, 'More'] not in needReRun:
122 states
= ('F','U','M')
123 observations
= tempdata
[item
]
124 print 'Calculating',item
125 tmpdt
= forward_viterbi(observations
, states
, start_probability
, transition_probability
, emission_probability
)
126 print 'Calculating',item
, 'Finished!'
127 print 'Writing File',"result."+item
+'.'+chrName
128 w
= open("result."+item
+'.'+chrName
,'w')
129 w
.write('#Sperm\tChr\tPos\tObservation\tGuess\n')
130 posTemp
= posData
[item
]
131 for i
in range(len(tmpdt
[1])-1):
138 w
.write(observations
[i
])
143 print 'Writing Completed.'