Merge pull request #589 from tjni/clean-up-build-dependencies
[MACS.git] / MACS3 / Signal / HMMR_HMM.pyx
blobfb74cbc88a902b2b16f8f33b47b55a8f9888baf0
1 # cython: language_level=3
2 # cython: profile=True
3 # Time-stamp: <2022-10-04 15:14:15 Tao Liu>
5 """Module description:
7 This code is free software; you can redistribute it and/or modify it
8 under the terms of the BSD License (see the file LICENSE included with
9 the distribution).
10 """
11 # ------------------------------------
12 # python modules
13 # ------------------------------------
14 from math import sqrt
16 # ------------------------------------
17 # Other modules
18 # ------------------------------------
20 import numpy as np
21 cimport numpy as np
22 from cpython cimport bool
23 from hmmlearn import hmm
24 import json
25 # from hmmlearn cimport hmm
27 # ------------------------------------
28 # MACS3 modules
29 # ------------------------------------
30 from MACS3.Signal.Prob import pnorm2
32 # ------------------------------------
33 # Misc functions
34 # ------------------------------------
36 # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html
37 cdef inline float get_weighted_density( int x, float m, float v, w ):
38 """Description:
40 parameters:
41 1. x: the observed value
42 2. m: the mean of gaussian
43 3. v: the variance of the gaussian
44 4. w: the weight
45 return value:
46 """
47 return w * pnorm2( float(x), m, v )
49 # ------------------------------------
50 # Classes
51 # ------------------------------------
52 # ------------------------------------
53 # public functions
54 # ------------------------------------
57 cpdef hmm_training( list training_data, list training_data_lengths, int n_states = 3, int random_seed = 12345, covar = 'full' ):
58 # training data should be in array like format: X = np.array([[.5, .3, .1, .1], [.6, .4, 0, 0]])
59 # if we do not want init_prob to be updated through learning, set params = 'tmc' and init_prob = initial_state otherwise it will be overwritten
60 # according to base documentation, if init_prob not stated, it is set to be equally likely for any state (1/ # of components)
61 # if we have other known parameters, we should set these (ie: means_weights, covariance_type etc.)
62 rs = np.random.RandomState(np.random.MT19937(np.random.SeedSequence(random_seed)))
63 hmm_model = hmm.GaussianHMM( n_components= n_states, covariance_type = covar, random_state = rs, verbose = False )
64 hmm_model = hmm_model.fit( training_data, training_data_lengths )
65 assert hmm_model.n_features == 4
66 return hmm_model
68 cpdef hmm_predict( list signals, list lens, hmm_model ):
69 predictions = hmm_model.predict_proba( signals, lens )
70 return predictions
72 cpdef void hmm_model_save( str model_file, object hmm_model, int hmm_binsize, int i_open_region, int i_nucleosomal_region, int i_background_region ):
73 if hmm_model.covariance_type == "diag":
74 covars = hmm_model.covars_.diagonal(axis1=1, axis2=2)
75 elif hmm_model.covariance_type == "full":
76 covars = hmm_model.covars_
77 else:
78 raise Exception(f"Unknown covariance type {hmm_model.covariance_type}")
79 with open( model_file, "w" ) as f:
80 json.dump( {"startprob":hmm_model.startprob_.tolist(),
81 "transmat":hmm_model.transmat_.tolist(),
82 "means":hmm_model.means_.tolist(),
83 "covars":covars.tolist(),
84 "covariance_type":hmm_model.covariance_type,
85 "n_features":int(hmm_model.n_features),
86 "i_open_region":int(i_open_region),
87 "i_background_region":int(i_background_region),
88 "i_nucleosomal_region":int(i_nucleosomal_region),
89 "hmm_binsize":int(hmm_binsize)}, f )
91 cpdef list hmm_model_init( str model_file ):
92 with open( model_file ) as f:
93 m = json.load( f )
94 hmm_model = hmm.GaussianHMM( n_components=3, covariance_type=m["covariance_type"] )
95 hmm_model.startprob_ = np.array(m["startprob"])
96 hmm_model.transmat_ = np.array(m["transmat"])
97 hmm_model.means_ = np.array(m["means"])
98 hmm_model.covars_ = np.array(m["covars"])
99 hmm_model.covariance_type = m["covariance_type"]
100 hmm_model.n_features = m["n_features"]
101 return [ hmm_model, m["i_open_region"], m["i_background_region"], m["i_nucleosomal_region"], m["hmm_binsize"] ]