1 # Time-stamp: <2024-10-11 11:11:00 Tao Liu>
3 """Description: refine peak summits
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
10 # ------------------------------------
12 # ------------------------------------
15 from collections
import Counter
17 # ------------------------------------
19 # ------------------------------------
20 # from MACS3.Utilities.Constants import *
21 from MACS3
.Utilities
.OptValidator
import opt_validate_refinepeak
22 # from MACS3.Signal.Prob import binomial_cdf_inv
23 from MACS3
.IO
.PeakIO
import PeakIO
25 # ------------------------------------
27 # ------------------------------------
31 """The Main function/pipeline for duplication filter.
35 options
= opt_validate_refinepeak(o_options
)
36 # end of parsing commandline options
39 # debug = options.debug
40 # error = options.error
43 outputfile
= open(os
.path
.join(options
.outdir
, options
.ofile
), 'w')
44 options
.oprefix
= options
.ofile
46 outputfile
= open(os
.path
.join(options
.outdir
, "%s_refinepeak.bed" % options
.oprefix
), "w")
48 peakio
= open(options
.bedfile
, "rb")
51 fs
= l_p
.rstrip().split()
52 peaks
.add(fs
[0], int(fs
[1]), int(fs
[2]), name
=fs
[3])
58 info("read tag files...")
59 fwtrack
= load_tag_files_options(options
)
61 retval
= fwtrack
.compute_region_tags_from_peaks(peaks
, find_summit
, window_size
=options
.windowsize
, cutoff
=options
.cutoff
)
62 outputfile
.write((b
"\n".join([b
"%s\t%d\t%d\t%s\t%.2f" % x
for x
in retval
])).decode())
67 def find_summit(chrom
, plus
, minus
, peak_start
, peak_end
, name
=b
"peak", window_size
=100, cutoff
=5):
68 def left_sum(strand
, pos
, width
=window_size
):
69 return sum([strand
[x
] for x
in strand
if x
<= pos
and x
>= pos
- width
])
71 def right_sum(strand
, pos
, width
=window_size
):
72 return sum([strand
[x
] for x
in strand
if x
>= pos
and x
<= pos
+ width
])
74 def left_forward(strand
, pos
):
75 return strand
.get(pos
, 0) - strand
.get(pos
-window_size
, 0)
77 def right_forward(strand
, pos
):
78 return strand
.get(pos
+ window_size
, 0) - strand
.get(pos
, 0)
80 watson
, crick
= (Counter(plus
), Counter(minus
))
81 watson_left
= left_sum(watson
, peak_start
)
82 crick_left
= left_sum(crick
, peak_start
)
83 watson_right
= right_sum(watson
, peak_start
)
84 crick_right
= right_sum(crick
, peak_start
)
87 for j
in range(peak_start
, peak_end
+1):
88 wtd_list
.append(2 * (watson_left
* crick_right
)**0.5 - watson_right
- crick_left
)
89 watson_left
+= left_forward(watson
, j
)
90 watson_right
+= right_forward(watson
, j
)
91 crick_left
+= left_forward(crick
, j
)
92 crick_right
+= right_forward(crick
, j
)
94 wtd_max_val
= max(wtd_list
)
95 wtd_max_pos
= wtd_list
.index(wtd_max_val
) + peak_start
97 # return (chrom, wtd_max_pos, wtd_max_pos+1, wtd_max_val)
99 if wtd_max_val
> cutoff
:
100 return (chrom
, wtd_max_pos
, wtd_max_pos
+1, name
+b
"_R", wtd_max_val
) # 'R'efined
102 return (chrom
, wtd_max_pos
, wtd_max_pos
+1, name
+b
"_F", wtd_max_val
) # 'F'ailed
105 def load_tag_files_options(options
):
106 """From the options, load alignment tags.
109 options
.info("# read treatment tags...")
110 tp
= options
.parser(options
.ifile
[0], buffer_size
=options
.buffer_size
)
111 treat
= tp
.build_fwtrack()
113 if len(options
.ifile
) > 1:
115 for tfile
in options
.ifile
[1:]:
116 tp
= options
.parser(tfile
, buffer_size
=options
.buffer_size
)
117 treat
= tp
.append_fwtrack(treat
)