1 # cython: language_level=3
3 # Time-stamp: <2022-09-15 16:13:48 Tao Liu>
5 """Module for Region classe.
7 This code is free software; you can redistribute it and/or modify it
8 under the terms of the BSD License (see the file LICENSE included with
12 # ------------------------------------
14 # ------------------------------------
19 # ------------------------------------
21 # ------------------------------------
23 from MACS3
.Utilities
.Constants
import *
25 # ------------------------------------
27 # ------------------------------------
29 from cpython cimport bool
31 # ------------------------------------
33 # ------------------------------------
34 __version__
= "Region $Revision$"
35 __author__
= "Tao Liu <vladimir.liu@gmail.com>"
36 __doc__
= "Region class"
38 # ------------------------------------
40 # ------------------------------------
42 # ------------------------------------
44 # ------------------------------------
46 """For plain region of chrom, start and end
55 self.__flag_sorted
= False
58 def __getitem__
(self, chrom
):
59 return self.regions
[ chrom
]
61 cpdef pop
( self, int n
):
62 # when called, pop the first n regions in Regions class. Self will be modified.
64 list clist
# for chromosomes
67 int n_taken
# remember the number of regions prepared
68 object ret
# returned Regions
71 raise Exception("None left")
73 clist
= sorted
(list(self.regions
.keys
()))
76 for chrom
in sorted
(clist
):
77 ret
.regions
[chrom
] = self.regions
[chrom
][:n_taken
]
78 self.regions
[chrom
] = self.regions
[chrom
][n_taken
:]
79 if not self.regions
[chrom
]:
80 # remove this chromosome if there is none left
81 self.regions
.pop
( chrom
)
82 tmp_l
= len( ret
.regions
[chrom
] )
86 #print( ret.total, self.total )
89 # when there is no need, quit loop
93 cpdef init_from_PeakIO
( self, object peaks
):
94 """Initialize the object with a PeakIO object.
96 Note: I intentionally forgot to check if peakio is actually a
107 for chrom
in sorted
(peaks
.get_chr_names
()):
108 ps
= peaks
.get_data_from_chrom
( chrom
)
109 self.regions
[chrom
] = []
110 for i
in range( len( ps
) ):
112 self.regions
[chrom
].append
( ( p
['start'], p
['end'] ) )
116 cpdef add_loc
( self, bytes chrom
, int start
, int end
):
117 if self.regions
.has_key
(chrom
):
118 self.regions
[chrom
].append
( (start
,end
) )
120 self.regions
[chrom
] = [ (start
,end
), ]
122 self.__flag_sorted
= False
129 if self.__flag_sorted
:
131 for chrom
in sorted
(self.regions
.keys
()):
132 self.regions
[chrom
].sort
()
133 self.__flag_sorted
= True
135 cpdef set get_chr_names
(self):
136 return set
( sorted
(self.regions
.keys
()) )
138 cpdef expand
( self, int flanking
):
145 for chrom
in sorted
(self.regions
.keys
()):
146 ps
= self.regions
[ chrom
]
147 for i
in range( len( ps
) ):
148 ps
[i
] = ( max(0, ps
[i
][0] - flanking
), ps
[i
][1] + flanking
)
150 self.regions
[chrom
] = ps
152 cpdef merge_overlap
( self ):
154 merge overlapping regions
158 int s_new_region
, e_new_region
, i
, j
159 dict regions
, new_regions
165 regions
= self.regions
168 chrs
= list(regions
.keys
())
171 for i
in range(len(chrs
)):
173 new_regions
[chrom
]=[]
174 n_append
= new_regions
[chrom
].append
176 regions_chr
= regions
[chrom
]
177 for i
in range( len(regions_chr
) ):
179 prev_region
= regions_chr
[i
]
182 if regions_chr
[i
][0] <= prev_region
[1]:
183 s_new_region
= prev_region
[0]
184 e_new_region
= regions_chr
[i
][1]
185 prev_region
= (s_new_region
,e_new_region
)
187 n_append
(prev_region
)
188 prev_region
= regions_chr
[i
]
190 n_append
(prev_region
)
191 self.total
+= len( new_regions
[chrom
] )
192 self.regions
= new_regions
196 cpdef write_to_bed
(self, fhd
):
202 chrs
= list(self.regions
.keys
())
204 for i
in range( len(chrs
) ):
206 for region
in self.regions
[chrom
]:
207 fhd
.write
( "%s\t%d\t%d\n" % (chrom
.decode
(),region
[0],region
[1] ) )
209 def __str__
( self ):
217 chrs
= list(self.regions
.keys
())
219 for i
in range( len(chrs
) ):
221 for region
in self.regions
[chrom
]:
222 ret
+= "%s\t%d\t%d\n" % (chrom
.decode
(),region
[0],region
[1] )
225 # cpdef object randomly_pick ( self, int n, int seed = 12345 ):
226 # """Shuffle the regions and get n regions out of it. Return a
227 # new Regions object.
234 # object ret_peakio, p
236 # chrs = sorted(list(self.peaks.keys()))
239 # all_pc.extend(self.peaks[chrom])
240 # random.seed( seed )
241 # all_pc = random.shuffle( all_pc )[:n]
242 # ret_peakio = PeakIO()
244 # ret_peakio.add_PeakContent ( p["chrom"], p )
248 cpdef
void exclude
(self, object regionio2
):
249 """ Remove overlapping regions in regionio2, another Region
254 dict regions1
, regions2
263 regions1
= self.regions
265 assert isinstance(regionio2
,Regions
)
267 regions2
= regionio2
.regions
270 chrs1
= list(regions1
.keys
())
271 chrs2
= list(regions2
.keys
())
273 #print(f"chromosome {k}")
274 if not chrs2
.count
(k
):
275 # no such chromosome in peaks1, then don't touch the peaks in this chromosome
276 ret_regions
[ k
] = regions1
[ k
]
277 self.total
+= len( ret_regions
[ k
] )
279 ret_regions
[ k
] = []
280 n_rl1
= len( regions1
[k
] )
281 n_rl2
= len( regions2
[k
] )
282 rl1_k
= iter
( regions1
[k
] ).__next__
283 rl2_k
= iter
( regions2
[k
] ).__next__
284 overlap_found
= False
290 # we do this until there is no r1 or r2 left.
291 if r2
[0] < r1
[1] and r1
[0] < r2
[1]:
292 # since we found an overlap, r1 will be skipped/excluded
293 # and move to the next r1
298 overlap_found
= False
303 # in this case, we need to move to the next r1,
304 # we will check if overlap_found is true, if not, we put r1 in a new dict
305 if not overlap_found
:
306 ret_regions
[ k
].append
( r1
)
310 overlap_found
= False
315 # in this case, we need to move the next r2
323 ret_regions
[ k
].extend
( regions1
[ k
][-n_rl1
-1:] )
324 self.total
+= len( ret_regions
[ k
] )
326 self.regions
= ret_regions
327 self.__flag_sorted
= False