implement PETrackII for fragment files from scATAC experiments
[MACS.git] / MACS3 / Commands / refinepeak_cmd.py
blobba9a4939e9d83e70b8c2c5f8b8d5d15c01121d43
1 # Time-stamp: <2024-10-11 11:11:00 Tao Liu>
3 """Description: refine peak summits
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
7 the distribution).
8 """
10 # ------------------------------------
11 # python modules
12 # ------------------------------------
14 import os
15 from collections import Counter
17 # ------------------------------------
18 # own python modules
19 # ------------------------------------
20 # from MACS3.Utilities.Constants import *
21 from MACS3.Utilities.OptValidator import opt_validate_refinepeak
22 # from MACS3.Signal.Prob import binomial_cdf_inv
23 from MACS3.IO.PeakIO import PeakIO
25 # ------------------------------------
26 # Main function
27 # ------------------------------------
30 def run(o_options):
31 """The Main function/pipeline for duplication filter.
33 """
34 # Parse options...
35 options = opt_validate_refinepeak(o_options)
36 # end of parsing commandline options
37 info = options.info
38 # warn = options.warn
39 # debug = options.debug
40 # error = options.error
42 if options.ofile:
43 outputfile = open(os.path.join(options.outdir, options.ofile), 'w')
44 options.oprefix = options.ofile
45 else:
46 outputfile = open(os.path.join(options.outdir, "%s_refinepeak.bed" % options.oprefix), "w")
48 peakio = open(options.bedfile, "rb")
49 peaks = PeakIO()
50 for l_p in peakio:
51 fs = l_p.rstrip().split()
52 peaks.add(fs[0], int(fs[1]), int(fs[2]), name=fs[3])
54 peaks.sort()
55 peakio.close()
57 # 1 Read tag files
58 info("read tag files...")
59 fwtrack = load_tag_files_options(options)
61 retval = fwtrack.compute_region_tags_from_peaks(peaks, find_summit, window_size=options.windowsize, cutoff=options.cutoff)
62 outputfile.write((b"\n".join([b"%s\t%d\t%d\t%s\t%.2f" % x for x in retval])).decode())
63 outputfile.close()
64 info("Done!")
67 def find_summit(chrom, plus, minus, peak_start, peak_end, name=b"peak", window_size=100, cutoff=5):
68 def left_sum(strand, pos, width=window_size):
69 return sum([strand[x] for x in strand if x <= pos and x >= pos - width])
71 def right_sum(strand, pos, width=window_size):
72 return sum([strand[x] for x in strand if x >= pos and x <= pos + width])
74 def left_forward(strand, pos):
75 return strand.get(pos, 0) - strand.get(pos-window_size, 0)
77 def right_forward(strand, pos):
78 return strand.get(pos + window_size, 0) - strand.get(pos, 0)
80 watson, crick = (Counter(plus), Counter(minus))
81 watson_left = left_sum(watson, peak_start)
82 crick_left = left_sum(crick, peak_start)
83 watson_right = right_sum(watson, peak_start)
84 crick_right = right_sum(crick, peak_start)
86 wtd_list = []
87 for j in range(peak_start, peak_end+1):
88 wtd_list.append(2 * (watson_left * crick_right)**0.5 - watson_right - crick_left)
89 watson_left += left_forward(watson, j)
90 watson_right += right_forward(watson, j)
91 crick_left += left_forward(crick, j)
92 crick_right += right_forward(crick, j)
94 wtd_max_val = max(wtd_list)
95 wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
97 # return (chrom, wtd_max_pos, wtd_max_pos+1, wtd_max_val)
99 if wtd_max_val > cutoff:
100 return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_R", wtd_max_val) # 'R'efined
101 else:
102 return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_F", wtd_max_val) # 'F'ailed
105 def load_tag_files_options(options):
106 """From the options, load alignment tags.
109 options.info("# read treatment tags...")
110 tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
111 treat = tp.build_fwtrack()
112 # treat.sort()
113 if len(options.ifile) > 1:
114 # multiple input
115 for tfile in options.ifile[1:]:
116 tp = options.parser(tfile, buffer_size=options.buffer_size)
117 treat = tp.append_fwtrack(treat)
118 # treat.sort()
119 treat.finalize()
120 return treat