8 // new/delete hmm_par_t
10 hmm_par_t
*hmm_new_par(int m
, int n
)
14 assert(m
> 0 && n
> 0);
15 hp
= (hmm_par_t
*)calloc(1, sizeof(hmm_par_t
));
17 hp
->a0
= (FLOAT
*)calloc(n
, sizeof(FLOAT
));
18 hp
->a
= (FLOAT
**)calloc2(n
, n
, sizeof(FLOAT
));
19 hp
->e
= (FLOAT
**)calloc2(m
+ 1, n
, sizeof(FLOAT
));
20 hp
->ae
= (FLOAT
**)calloc2((m
+ 1) * n
, n
, sizeof(FLOAT
));
21 for (i
= 0; i
!= n
; ++i
) hp
->e
[m
][i
] = 1.0;
24 void hmm_delete_par(hmm_par_t
*hp
)
28 for (i
= 0; i
!= hp
->n
; ++i
) free(hp
->a
[i
]);
29 for (i
= 0; i
<= hp
->m
; ++i
) free(hp
->e
[i
]);
30 for (i
= 0; i
< (hp
->m
+ 1) * hp
->n
; ++i
) free(hp
->ae
[i
]);
31 free(hp
->a
); free(hp
->e
); free(hp
->a0
); free(hp
->ae
);
35 // new/delete hmm_data_t
37 hmm_data_t
*hmm_new_data(int L
, const char *seq
, const hmm_par_t
*hp
)
40 hd
= (hmm_data_t
*)calloc(1, sizeof(hmm_data_t
));
42 hd
->seq
= (char*)malloc(L
+ 1);
43 memcpy(hd
->seq
+ 1, seq
, L
);
46 void hmm_delete_data(hmm_data_t
*hd
)
50 for (i
= 0; i
<= hd
->L
; ++i
) {
51 if (hd
->f
) free(hd
->f
[i
]);
52 if (hd
->b
) free(hd
->b
[i
]);
54 free(hd
->f
); free(hd
->b
); free(hd
->s
); free(hd
->v
); free(hd
->p
); free(hd
->seq
);
58 // new/delete hmm_exp_t
60 hmm_exp_t
*hmm_new_exp(const hmm_par_t
*hp
)
64 he
= (hmm_exp_t
*)calloc(1, sizeof(hmm_exp_t
));
65 he
->m
= hp
->m
; he
->n
= hp
->n
;
66 he
->A0
= (FLOAT
*)calloc(hp
->n
, sizeof(FLOAT
));
67 he
->A
= (FLOAT
**)calloc2(hp
->n
, hp
->n
, sizeof(FLOAT
));
68 he
->E
= (FLOAT
**)calloc2(hp
->m
+ 1, hp
->n
, sizeof(FLOAT
));
71 void hmm_delete_exp(hmm_exp_t
*he
)
75 for (i
= 0; i
!= he
->n
; ++i
) free(he
->A
[i
]);
76 for (i
= 0; i
<= he
->m
; ++i
) free(he
->E
[i
]);
77 free(he
->A
); free(he
->E
); free(he
->A0
);
83 FLOAT
hmm_Viterbi(const hmm_par_t
*hp
, hmm_data_t
*hd
)
85 FLOAT
**la
, **le
, *preV
, *curV
, max
;
86 int **Vmax
, max_l
; // backtrace matrix
89 if (hd
->v
) free(hd
->v
);
90 hd
->v
= (int*)calloc(hd
->L
+1, sizeof(int));
91 la
= (FLOAT
**)calloc2(hp
->n
, hp
->n
, sizeof(FLOAT
));
92 le
= (FLOAT
**)calloc2(hp
->m
+ 1, hp
->n
, sizeof(FLOAT
));
93 Vmax
= (int**)calloc2(hd
->L
+1, hp
->n
, sizeof(int));
94 preV
= (FLOAT
*)malloc(sizeof(FLOAT
) * hp
->n
);
95 curV
= (FLOAT
*)malloc(sizeof(FLOAT
) * hp
->n
);
96 for (k
= 0; k
!= hp
->n
; ++k
)
97 for (l
= 0; l
!= hp
->n
; ++l
)
98 la
[k
][l
] = log(hp
->a
[l
][k
]); // this is not a bug
99 for (b
= 0; b
!= hp
->m
; ++b
)
100 for (k
= 0; k
!= hp
->n
; ++k
)
101 le
[b
][k
] = log(hp
->e
[b
][k
]);
102 for (k
= 0; k
!= hp
->n
; ++k
) le
[hp
->m
][k
] = 0.0;
104 for (k
= 0; k
!= hp
->n
; ++k
) {
105 preV
[k
] = le
[(int)hd
->seq
[1]][k
] + log(hp
->a0
[k
]);
109 for (u
= 2; u
<= hd
->L
; ++u
) {
110 FLOAT
*tmp
, *leu
= le
[(int)hd
->seq
[u
]];
111 for (k
= 0; k
!= hp
->n
; ++k
) {
113 for (l
= 0, max
= -HMM_INF
, max_l
= -1; l
!= hp
->n
; ++l
) {
114 if (max
< preV
[l
] + laa
[l
]) {
115 max
= preV
[l
] + laa
[l
];
119 assert(max_l
>= 0); // cannot be zero
120 curV
[k
] = leu
[k
] + max
;
123 tmp
= curV
; curV
= preV
; preV
= tmp
; // swap
126 for (k
= 0, max_l
= -1, max
= -HMM_INF
; k
!= hp
->n
; ++k
) {
128 max
= preV
[k
]; max_l
= k
;
131 assert(max_l
>= 0); // cannot be zero
132 hd
->v
[hd
->L
] = max_l
;
133 for (u
= hd
->L
; u
>= 1; --u
)
134 hd
->v
[u
-1] = Vmax
[u
][hd
->v
[u
]];
135 for (k
= 0; k
!= hp
->n
; ++k
) free(la
[k
]);
136 for (b
= 0; b
< hp
->m
; ++b
) free(le
[b
]);
137 for (u
= 0; u
<= hd
->L
; ++u
) free(Vmax
[u
]);
138 free(la
); free(le
); free(Vmax
); free(preV
); free(curV
);
139 hd
->status
|= HMM_VITERBI
;
145 void hmm_forward(const hmm_par_t
*hp
, hmm_data_t
*hd
)
147 FLOAT sum
, tmp
, **at
;
151 // allocate memory for hd->f and hd->s
152 n
= hp
->n
; m
= hp
->m
; L
= hd
->L
;
153 if (hd
->s
) free(hd
->s
);
155 for (k
= 0; k
<= hd
->L
; ++k
) free(hd
->f
[k
]);
158 hd
->f
= (FLOAT
**)calloc2(hd
->L
+1, hp
->n
, sizeof(FLOAT
));
159 hd
->s
= (FLOAT
*)calloc(hd
->L
+1, sizeof(FLOAT
));
160 hd
->status
&= ~(unsigned)HMM_FORWARD
;
161 // at[][] array helps to improve the cache efficiency
162 at
= (FLOAT
**)calloc2(n
, n
, sizeof(FLOAT
));
164 for (k
= 0; k
!= n
; ++k
)
165 for (l
= 0; l
!= n
; ++l
)
166 at
[k
][l
] = hp
->a
[l
][k
];
167 // f[0], but it should never be used
169 for (k
= 0; k
!= n
; ++k
) hd
->f
[0][k
] = 0.0;
171 for (k
= 0, sum
= 0.0; k
!= n
; ++k
)
172 sum
+= (hd
->f
[1][k
] = hp
->a0
[k
] * hp
->e
[(int)hd
->seq
[1]][k
]);
173 for (k
= 0; k
!= n
; ++k
) hd
->f
[1][k
] /= sum
;
175 // f[2..hmmL], the core loop
176 for (u
= 2; u
<= L
; ++u
) {
177 FLOAT
*fu
= hd
->f
[u
], *fu1
= hd
->f
[u
-1], *eu
= hp
->e
[(int)hd
->seq
[u
]];
178 for (k
= 0, sum
= 0.0; k
!= n
; ++k
) {
180 for (l
= 0, tmp
= 0.0; l
!= n
; ++l
) tmp
+= fu1
[l
] * aa
[l
];
181 sum
+= (fu
[k
] = eu
[k
] * tmp
);
183 for (k
= 0; k
!= n
; ++k
) fu
[k
] /= sum
;
187 for (k
= 0; k
!= hp
->n
; ++k
) free(at
[k
]);
189 hd
->status
|= HMM_FORWARD
;
192 // precalculate hp->ae
194 void hmm_pre_backward(hmm_par_t
*hp
)
198 m
= hp
->m
; n
= hp
->n
;
199 for (b
= 0; b
<= m
; ++b
) {
200 for (k
= 0; k
!= n
; ++k
) {
201 FLOAT
*p
= hp
->ae
[b
* hp
->n
+ k
];
202 for (l
= 0; l
!= n
; ++l
)
203 p
[l
] = hp
->e
[b
][l
] * hp
->a
[k
][l
];
208 // backward algorithm
210 void hmm_backward(const hmm_par_t
*hp
, hmm_data_t
*hd
)
216 assert(hd
->status
& HMM_FORWARD
);
217 // allocate memory for hd->b
218 m
= hp
->m
; n
= hp
->n
; L
= hd
->L
;
220 for (k
= 0; k
<= hd
->L
; ++k
) free(hd
->b
[k
]);
223 hd
->status
&= ~(unsigned)HMM_BACKWARD
;
224 hd
->b
= (FLOAT
**)calloc2(L
+1, hp
->n
, sizeof(FLOAT
));
226 for (k
= 0; k
!= hp
->n
; ++k
) hd
->b
[L
][k
] = 1.0 / hd
->s
[L
];
227 // b[1..L-1], the core loop
228 for (u
= L
-1; u
>= 1; --u
) {
229 FLOAT
*bu1
= hd
->b
[u
+1], **p
= hp
->ae
+ (int)hd
->seq
[u
+1] * n
;
230 for (k
= 0; k
!= n
; ++k
) {
232 for (l
= 0, tmp
= 0.0; l
!= n
; ++l
) tmp
+= q
[l
] * bu1
[l
];
233 hd
->b
[u
][k
] = tmp
/ hd
->s
[u
];
236 hd
->status
|= HMM_BACKWARD
;
237 for (l
= 0, tmp
= 0.0; l
!= n
; ++l
)
238 tmp
+= hp
->a0
[l
] * hd
->b
[1][l
] * hp
->e
[(int)hd
->seq
[1]][l
];
239 if (tmp
> 1.0 + 1e-6 || tmp
< 1.0 - 1e-6) // in theory, tmp should always equal to 1
240 fprintf(stderr
, "++ Underflow may have happened (%lg).\n", tmp
);
243 // log-likelihood of the observation
245 FLOAT
hmm_lk(const hmm_data_t
*hd
)
247 FLOAT sum
= 0.0, prod
= 1.0;
250 assert(hd
->status
& HMM_FORWARD
);
251 for (u
= 1; u
<= L
; ++u
) {
253 if (prod
< HMM_TINY
|| prod
>= 1.0/HMM_TINY
) { // reset
262 // posterior decoding
264 void hmm_post_decode(const hmm_par_t
*hp
, hmm_data_t
*hd
)
267 assert(hd
->status
&& HMM_BACKWARD
);
268 if (hd
->p
) free(hd
->p
);
269 hd
->p
= (int*)calloc(hd
->L
+ 1, sizeof(int));
270 for (u
= 1; u
<= hd
->L
; ++u
) {
271 FLOAT prob
, max
, *fu
= hd
->f
[u
], *bu
= hd
->b
[u
], su
= hd
->s
[u
];
273 for (k
= 0, max
= -1.0, max_k
= -1; k
!= hp
->n
; ++k
) {
274 if (max
< (prob
= fu
[k
] * bu
[k
] * su
)) {
275 max
= prob
; max_k
= k
;
281 hd
->status
|= HMM_POSTDEC
;
284 // posterior probability of states
286 FLOAT
hmm_post_state(const hmm_par_t
*hp
, const hmm_data_t
*hd
, int u
, FLOAT
*prob
)
288 FLOAT sum
= 0.0, ss
= hd
->s
[u
], *fu
= hd
->f
[u
], *bu
= hd
->b
[u
];
290 for (k
= 0; k
!= hp
->n
; ++k
)
291 sum
+= (prob
[k
] = fu
[k
] * bu
[k
] * ss
);
292 return sum
; // in theory, this should always equal to 1.0
297 hmm_exp_t
*hmm_expect(const hmm_par_t
*hp
, const hmm_data_t
*hd
)
299 int k
, l
, u
, b
, m
, n
;
301 assert(hd
->status
& HMM_BACKWARD
);
302 he
= hmm_new_exp(hp
);
304 m
= hp
->m
; n
= hp
->n
;
305 for (k
= 0; k
!= n
; ++k
)
306 for (l
= 0; l
!= n
; ++l
) he
->A
[k
][l
] = HMM_TINY
;
307 for (b
= 0; b
<= m
; ++b
)
308 for (l
= 0; l
!= n
; ++l
) he
->E
[b
][l
] = HMM_TINY
;
309 // calculate A_{kl} and E_k(b), k,l\in[0,n)
310 for (u
= 1; u
< hd
->L
; ++u
) {
311 FLOAT
*fu
= hd
->f
[u
], *bu
= hd
->b
[u
], *bu1
= hd
->b
[u
+1], ss
= hd
->s
[u
];
312 FLOAT
*Ec
= he
->E
[(int)hd
->seq
[u
]], **p
= hp
->ae
+ (int)hd
->seq
[u
+1] * n
;
313 for (k
= 0; k
!= n
; ++k
) {
314 FLOAT
*q
= p
[k
], *AA
= he
->A
[k
], fuk
= fu
[k
];
315 for (l
= 0; l
!= n
; ++l
) // this is cache-efficient
316 AA
[l
] += fuk
* q
[l
] * bu1
[l
];
317 Ec
[k
] += fuk
* bu
[k
] * ss
;
321 for (l
= 0; l
!= n
; ++l
)
322 he
->A0
[l
] += hp
->a0
[l
] * hp
->e
[(int)hd
->seq
[1]][l
] * hd
->b
[1][l
];
326 FLOAT
hmm_Q0(const hmm_par_t
*hp
, hmm_exp_t
*he
)
330 for (k
= 0; k
!= hp
->n
; ++k
) {
332 for (b
= 0, tmp
= 0.0; b
!= hp
->m
; ++b
) tmp
+= he
->E
[b
][k
];
333 for (b
= 0; b
!= hp
->m
; ++b
)
334 sum
+= he
->E
[b
][k
] * log(he
->E
[b
][k
] / tmp
);
336 for (k
= 0; k
!= hp
->n
; ++k
) {
337 FLOAT tmp
, *A
= he
->A
[k
];
338 for (l
= 0, tmp
= 0.0; l
!= hp
->n
; ++l
) tmp
+= A
[l
];
339 for (l
= 0; l
!= hp
->n
; ++l
) sum
+= A
[l
] * log(A
[l
] / tmp
);
341 return (he
->Q0
= sum
);
346 void hmm_add_expect(const hmm_exp_t
*he0
, hmm_exp_t
*he1
)
349 assert(he0
->m
== he1
->m
&& he0
->n
== he1
->n
);
350 for (k
= 0; k
!= he1
->n
; ++k
) {
351 he1
->A0
[k
] += he0
->A0
[k
];
352 for (l
= 0; l
!= he1
->n
; ++l
)
353 he1
->A
[k
][l
] += he0
->A
[k
][l
];
355 for (b
= 0; b
!= he1
->m
; ++b
) {
356 for (l
= 0; l
!= he1
->n
; ++l
)
357 he1
->E
[b
][l
] += he0
->E
[b
][l
];
363 FLOAT
hmm_Q(const hmm_par_t
*hp
, const hmm_exp_t
*he
)
367 for (bb
= 0; bb
!= he
->m
; ++bb
) {
368 FLOAT
*eb
= hp
->e
[bb
], *Eb
= he
->E
[bb
];
369 for (k
= 0; k
!= hp
->n
; ++k
) {
370 if (eb
[k
] <= 0.0) return -HMM_INF
;
371 sum
+= Eb
[k
] * log(eb
[k
]);
374 for (k
= 0; k
!= he
->n
; ++k
) {
375 FLOAT
*Ak
= he
->A
[k
], *ak
= hp
->a
[k
];
376 for (l
= 0; l
!= he
->n
; ++l
) {
377 if (ak
[l
] <= 0.0) return -HMM_INF
;
378 sum
+= Ak
[l
] * log(ak
[l
]);
381 return (sum
-= he
->Q0
);
386 char *hmm_simulate(const hmm_par_t
*hp
, int L
)
391 seq
= (char*)calloc(L
+1, 1);
392 // calculate the transpose of hp->e[][]
393 et
= (FLOAT
**)calloc2(hp
->n
, hp
->m
, sizeof(FLOAT
));
394 for (k
= 0; k
!= hp
->n
; ++k
)
395 for (b
= 0; b
!= hp
->m
; ++b
)
396 et
[k
][b
] = hp
->e
[b
][k
];
397 // the initial state, drawn from a0[]
399 for (k
= 0, y
= 0.0; k
!= hp
->n
; ++k
) {
404 for (i
= 0; i
!= L
; ++i
) {
405 FLOAT
*el
, *ak
= hp
->a
[k
];
407 for (l
= 0, y
= 0.0; l
!= hp
->n
; ++l
) {
413 for (b
= 0, y
= 0.0; b
!= hp
->m
; ++b
) {
420 for (k
= 0; k
!= hp
->n
; ++k
) free(et
[k
]);