1 # cython: language_level=3
3 # Time-stamp: <2022-10-04 15:14:15 Tao Liu>
7 This code is free software; you can redistribute it and/or modify it
8 under the terms of the BSD License (see the file LICENSE included with
11 # ------------------------------------
13 # ------------------------------------
16 # ------------------------------------
18 # ------------------------------------
22 from cpython cimport bool
23 from hmmlearn
import hmm
25 # from hmmlearn cimport hmm
27 # ------------------------------------
29 # ------------------------------------
30 from MACS3
.Signal
.Prob
import pnorm2
32 # ------------------------------------
34 # ------------------------------------
36 # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
37 cdef inline
float get_weighted_density
( int x
, float m
, float v
, w
):
41 1. x: the observed value
42 2. m: the mean of gaussian
43 3. v: the variance of the gaussian
47 return w
* pnorm2
( float(x
), m
, v
)
49 # ------------------------------------
51 # ------------------------------------
52 # ------------------------------------
54 # ------------------------------------
57 cpdef hmm_training
( list training_data
, list training_data_lengths
, int n_states
= 3, int random_seed
= 12345, covar
= 'full' ):
58 # training data should be in array like format: X = np.array([[.5, .3, .1, .1], [.6, .4, 0, 0]])
59 # if we do not want init_prob to be updated through learning, set params = 'tmc' and init_prob = initial_state otherwise it will be overwritten
60 # according to base documentation, if init_prob not stated, it is set to be equally likely for any state (1/ # of components)
61 # if we have other known parameters, we should set these (ie: means_weights, covariance_type etc.)
62 rs
= np
.random
.RandomState
(np
.random
.MT19937
(np
.random
.SeedSequence
(random_seed
)))
63 hmm_model
= hmm
.GaussianHMM
( n_components
= n_states
, covariance_type
= covar
, random_state
= rs
, verbose
= False
)
64 hmm_model
= hmm_model
.fit
( training_data
, training_data_lengths
)
65 assert hmm_model
.n_features
== 4
68 cpdef hmm_predict
( list signals
, list lens
, hmm_model
):
69 predictions
= hmm_model
.predict_proba
( signals
, lens
)
72 cpdef
void hmm_model_save
( str model_file
, object hmm_model
, int hmm_binsize
, int i_open_region
, int i_nucleosomal_region
, int i_background_region
):
73 if hmm_model
.covariance_type
== "diag":
74 covars
= hmm_model
.covars_
.diagonal
(axis1
=1, axis2
=2)
75 elif hmm_model
.covariance_type
== "full":
76 covars
= hmm_model
.covars_
78 raise Exception(f
"Unknown covariance type {hmm_model.covariance_type}")
79 with
open( model_file
, "w" ) as f
:
80 json
.dump
( {"startprob":hmm_model
.startprob_
.tolist
(),
81 "transmat":hmm_model
.transmat_
.tolist
(),
82 "means":hmm_model
.means_
.tolist
(),
83 "covars":covars
.tolist
(),
84 "covariance_type":hmm_model
.covariance_type
,
85 "n_features":int(hmm_model
.n_features
),
86 "i_open_region":int(i_open_region
),
87 "i_background_region":int(i_background_region
),
88 "i_nucleosomal_region":int(i_nucleosomal_region
),
89 "hmm_binsize":int(hmm_binsize
)}, f
)
91 cpdef
list hmm_model_init
( str model_file
):
92 with
open( model_file
) as f
:
94 hmm_model
= hmm
.GaussianHMM
( n_components
=3, covariance_type
=m
["covariance_type"] )
95 hmm_model
.startprob_
= np
.array
(m
["startprob"])
96 hmm_model
.transmat_
= np
.array
(m
["transmat"])
97 hmm_model
.means_
= np
.array
(m
["means"])
98 hmm_model
.covars_
= np
.array
(m
["covars"])
99 hmm_model
.covariance_type
= m
["covariance_type"]
100 hmm_model
.n_features
= m
["n_features"]
101 return [ hmm_model
, m
["i_open_region"], m
["i_background_region"], m
["i_nucleosomal_region"], m
["hmm_binsize"] ]