implement PETrackII for fragment files from scATAC experiments
[MACS.git] / MACS3 / Signal / HMMR_HMM.py
blob8471d56772a5569f0ed62d0482a089555d08d763
1 # cython: language_level=3
2 # cython: profile=True
3 # Time-stamp: <2024-10-04 11:45:30 Tao Liu>
5 """Module description:
7 This code is free software; you can redistribute it and/or modify it
8 under the terms of the BSD License (see the file LICENSE included with
9 the distribution).
10 """
11 # ------------------------------------
12 # python modules
13 # ------------------------------------
14 import cython
15 import numpy as np
16 from cython.cimports import numpy as cnp
17 from hmmlearn.hmm import GaussianHMM, PoissonHMM
18 import json
20 # ------------------------------------
21 # MACS3 modules
22 # ------------------------------------
23 from MACS3.Signal.Prob import pnorm2
25 # ------------------------------------
26 # Misc functions
27 # ------------------------------------
29 # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
32 @cython.cfunc
33 @cython.inline
34 def get_weighted_density(x: cython.int, m: cython.float,
35 v: cython.float, w: cython.float) -> cython.float:
36 """Description:
38 parameters:
39 1. x: the observed value
40 2. m: the mean of gaussian
41 3. v: the variance of the gaussian
42 4. w: the weight
43 return value:
44 """
45 return w * pnorm2(float(x), m, v)
47 # ------------------------------------
48 # Classes
49 # ------------------------------------
51 # ------------------------------------
52 # public functions
53 # ------------------------------------
56 @cython.ccall
57 def hmm_training(training_data: list, training_data_lengths: list,
58 n_states: cython.int = 3, random_seed: cython.int = 12345,
59 covar: str = 'full', hmm_type: str = 'gaussian'):
60 # training data should be in array like format: X = np.array([[.5, .3, .1, .1], [.6, .4, 0, 0]])
61 # if we do not want init_prob to be updated through learning, set params = 'tmc' and init_prob = initial_state otherwise it will be overwritten
62 # according to base documentation, if init_prob not stated, it is set to be equally likely for any state (1/ # of components)
63 # if we have other known parameters, we should set these (ie: means_weights, covariance_type etc.)
64 rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_seed)))
65 if hmm_type == "gaussian":
66 hmm_model = GaussianHMM(n_components=n_states, covariance_type=covar, random_state=rs, verbose=False)
67 if hmm_type == "poisson":
68 hmm_model = PoissonHMM(n_components=n_states, random_state=rs, verbose=False)
69 hmm_model = hmm_model.fit(training_data, training_data_lengths)
70 assert hmm_model.n_features == 4
71 return hmm_model
74 @cython.ccall
75 @cython.returns(cnp.ndarray)
76 def hmm_predict(signals: list, lens: list, hmm_model):
77 predictions = hmm_model.predict_proba(signals, lens)
78 return predictions
81 @cython.ccall
82 def hmm_model_save(model_file: str, hmm_model, hmm_binsize: cython.int,
83 i_open_region: cython.int, i_nucleosomal_region: cython.int,
84 i_background_region: cython.int, hmm_type: str):
85 if hmm_type == "gaussian":
86 if hmm_model.covariance_type == "diag":
87 covars = hmm_model.covars_.diagonal(axis1=1, axis2=2)
88 elif hmm_model.covariance_type == "full":
89 covars = hmm_model.covars_
90 else:
91 raise Exception(f"Unknown covariance type {hmm_model.covariance_type}")
92 with open(model_file, "w") as f:
93 json.dump({"startprob": hmm_model.startprob_.tolist(),
94 "transmat": hmm_model.transmat_.tolist(),
95 "means": hmm_model.means_.tolist(),
96 "covars": covars.tolist(),
97 "covariance_type": hmm_model.covariance_type,
98 "n_features": int(hmm_model.n_features),
99 "i_open_region": int(i_open_region),
100 "i_background_region": int(i_background_region),
101 "i_nucleosomal_region": int(i_nucleosomal_region),
102 "hmm_binsize": int(hmm_binsize),
103 "hmm_type": hmm_type}, f)
104 if hmm_type == "poisson":
105 with open(model_file, "w") as f:
106 json.dump({"startprob": hmm_model.startprob_.tolist(),
107 "transmat": hmm_model.transmat_.tolist(),
108 "lambdas": hmm_model.lambdas_.tolist(),
109 "n_features": int(hmm_model.n_features),
110 "i_open_region": int(i_open_region),
111 "i_background_region": int(i_background_region),
112 "i_nucleosomal_region": int(i_nucleosomal_region),
113 "hmm_binsize": int(hmm_binsize),
114 "hmm_type": hmm_type}, f)
117 @cython.ccall
118 def hmm_model_init(model_file: str) -> list:
119 with open(model_file) as f:
120 m = json.load(f)
121 hmm_type = m.get("hmm_type", "gaussian") # if no model_type written, assume older file and use gaussian
122 if hmm_type == "gaussian":
123 hmm_model = GaussianHMM(n_components=3, covariance_type=m["covariance_type"])
124 hmm_model.means_ = np.array(m["means"])
125 hmm_model.covars_ = np.array(m["covars"])
126 hmm_model.covariance_type = m["covariance_type"]
127 if hmm_type == "poisson":
128 hmm_model = PoissonHMM(n_components=3)
129 hmm_model.lambdas_ = np.array(m["lambdas"])
130 hmm_model.startprob_ = np.array(m["startprob"])
131 hmm_model.transmat_ = np.array(m["transmat"])
132 hmm_model.n_features = m["n_features"]
133 return [hmm_model, m["i_open_region"], m["i_background_region"],
134 m["i_nucleosomal_region"], m["hmm_binsize"], hmm_type]