1 # cython: language_level=3
3 # Time-stamp: <2024-10-04 11:45:30 Tao Liu>
7 This code is free software; you can redistribute it and/or modify it
8 under the terms of the BSD License (see the file LICENSE included with
11 # ------------------------------------
13 # ------------------------------------
16 from cython
.cimports
import numpy
as cnp
17 from hmmlearn
.hmm
import GaussianHMM
, PoissonHMM
20 # ------------------------------------
22 # ------------------------------------
23 from MACS3
.Signal
.Prob
import pnorm2
25 # ------------------------------------
27 # ------------------------------------
29 # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
34 def get_weighted_density(x
: cython
.int, m
: cython
.float,
35 v
: cython
.float, w
: cython
.float) -> cython
.float:
39 1. x: the observed value
40 2. m: the mean of gaussian
41 3. v: the variance of the gaussian
45 return w
* pnorm2(float(x
), m
, v
)
47 # ------------------------------------
49 # ------------------------------------
51 # ------------------------------------
53 # ------------------------------------
57 def hmm_training(training_data
: list, training_data_lengths
: list,
58 n_states
: cython
.int = 3, random_seed
: cython
.int = 12345,
59 covar
: str = 'full', hmm_type
: str = 'gaussian'):
60 # training data should be in array like format: X = np.array([[.5, .3, .1, .1], [.6, .4, 0, 0]])
61 # if we do not want init_prob to be updated through learning, set params = 'tmc' and init_prob = initial_state otherwise it will be overwritten
62 # according to base documentation, if init_prob not stated, it is set to be equally likely for any state (1/ # of components)
63 # if we have other known parameters, we should set these (ie: means_weights, covariance_type etc.)
64 rs
= np
.random
.RandomState(np
.random
.MT19937(np
.random
.SeedSequence(random_seed
)))
65 if hmm_type
== "gaussian":
66 hmm_model
= GaussianHMM(n_components
=n_states
, covariance_type
=covar
, random_state
=rs
, verbose
=False)
67 if hmm_type
== "poisson":
68 hmm_model
= PoissonHMM(n_components
=n_states
, random_state
=rs
, verbose
=False)
69 hmm_model
= hmm_model
.fit(training_data
, training_data_lengths
)
70 assert hmm_model
.n_features
== 4
75 @cython.returns(cnp
.ndarray
)
76 def hmm_predict(signals
: list, lens
: list, hmm_model
):
77 predictions
= hmm_model
.predict_proba(signals
, lens
)
82 def hmm_model_save(model_file
: str, hmm_model
, hmm_binsize
: cython
.int,
83 i_open_region
: cython
.int, i_nucleosomal_region
: cython
.int,
84 i_background_region
: cython
.int, hmm_type
: str):
85 if hmm_type
== "gaussian":
86 if hmm_model
.covariance_type
== "diag":
87 covars
= hmm_model
.covars_
.diagonal(axis1
=1, axis2
=2)
88 elif hmm_model
.covariance_type
== "full":
89 covars
= hmm_model
.covars_
91 raise Exception(f
"Unknown covariance type {hmm_model.covariance_type}")
92 with
open(model_file
, "w") as f
:
93 json
.dump({"startprob": hmm_model
.startprob_
.tolist(),
94 "transmat": hmm_model
.transmat_
.tolist(),
95 "means": hmm_model
.means_
.tolist(),
96 "covars": covars
.tolist(),
97 "covariance_type": hmm_model
.covariance_type
,
98 "n_features": int(hmm_model
.n_features
),
99 "i_open_region": int(i_open_region
),
100 "i_background_region": int(i_background_region
),
101 "i_nucleosomal_region": int(i_nucleosomal_region
),
102 "hmm_binsize": int(hmm_binsize
),
103 "hmm_type": hmm_type
}, f
)
104 if hmm_type
== "poisson":
105 with
open(model_file
, "w") as f
:
106 json
.dump({"startprob": hmm_model
.startprob_
.tolist(),
107 "transmat": hmm_model
.transmat_
.tolist(),
108 "lambdas": hmm_model
.lambdas_
.tolist(),
109 "n_features": int(hmm_model
.n_features
),
110 "i_open_region": int(i_open_region
),
111 "i_background_region": int(i_background_region
),
112 "i_nucleosomal_region": int(i_nucleosomal_region
),
113 "hmm_binsize": int(hmm_binsize
),
114 "hmm_type": hmm_type
}, f
)
118 def hmm_model_init(model_file
: str) -> list:
119 with
open(model_file
) as f
:
121 hmm_type
= m
.get("hmm_type", "gaussian") # if no model_type written, assume older file and use gaussian
122 if hmm_type
== "gaussian":
123 hmm_model
= GaussianHMM(n_components
=3, covariance_type
=m
["covariance_type"])
124 hmm_model
.means_
= np
.array(m
["means"])
125 hmm_model
.covars_
= np
.array(m
["covars"])
126 hmm_model
.covariance_type
= m
["covariance_type"]
127 if hmm_type
== "poisson":
128 hmm_model
= PoissonHMM(n_components
=3)
129 hmm_model
.lambdas_
= np
.array(m
["lambdas"])
130 hmm_model
.startprob_
= np
.array(m
["startprob"])
131 hmm_model
.transmat_
= np
.array(m
["transmat"])
132 hmm_model
.n_features
= m
["n_features"]
133 return [hmm_model
, m
["i_open_region"], m
["i_background_region"],
134 m
["i_nucleosomal_region"], m
["hmm_binsize"], hmm_type
]