Merge pull request #378 from taoliu/fix_setup_script_364
[MACS.git] / MACS2 / refinepeak_cmd.py
blobaee0cd5bd5afa4a7159a95deb1c61e73cf1bc629
1 # Time-stamp: <2019-09-25 10:03:13 taoliu>
3 """Description: refine peak summits
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
7 the distribution).
8 """
10 # ------------------------------------
11 # python modules
12 # ------------------------------------
14 import os
15 import sys
16 import logging
17 from collections import Counter
19 # ------------------------------------
20 # own python modules
21 # ------------------------------------
22 from MACS2.OptValidator import opt_validate_refinepeak as opt_validate
23 from MACS2.Prob import binomial_cdf_inv
24 from MACS2.IO.BedGraphIO import bedGraphIO,genericBedIO
25 from MACS2.IO.PeakIO import PeakIO
26 from MACS2.Constants import *
29 # ------------------------------------
30 # Main function
31 # ------------------------------------
32 def run( o_options ):
33 """The Main function/pipeline for duplication filter.
35 """
36 # Parse options...
37 options = opt_validate( o_options )
38 # end of parsing commandline options
39 info = options.info
40 warn = options.warn
41 debug = options.debug
42 error = options.error
44 if options.ofile:
45 outputfile = open( os.path.join( options.outdir, options.ofile ), 'w' )
46 options.oprefix = options.ofile
47 else:
48 outputfile = open( os.path.join( options.outdir, "%s_refinepeak.bed" % options.oprefix), "w" )
51 peakio = open(options.bedfile,"rb")
52 peaks = PeakIO()
53 for l in peakio:
54 fs = l.rstrip().split()
55 peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] )
57 peaks.sort()
58 peakio.close()
60 #1 Read tag files
61 info("read tag files...")
62 fwtrack = load_tag_files_options (options)
64 retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff )
65 outputfile.write( (b"\n".join( [b"%s\t%d\t%d\t%s\t%.2f" % x for x in retval] )).decode() )
66 outputfile.close()
67 info("Done!")
69 def find_summit(chrom, plus, minus, peak_start, peak_end, name = b"peak", window_size=100, cutoff = 5):
71 left_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x <= pos and x >= pos - width])
72 right_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x >= pos and x <= pos + width])
73 left_forward = lambda strand, pos: strand.get(pos,0) - strand.get(pos-window_size, 0)
74 right_forward = lambda strand, pos: strand.get(pos + window_size, 0) - strand.get(pos, 0)
76 watson, crick = (Counter(plus), Counter(minus))
77 watson_left = left_sum(watson, peak_start)
78 crick_left = left_sum(crick, peak_start)
79 watson_right = right_sum(watson, peak_start)
80 crick_right = right_sum(crick, peak_start)
82 wtd_list = []
83 for j in range(peak_start, peak_end+1):
84 wtd_list.append(2 * (watson_left * crick_right)**0.5 - watson_right - crick_left)
85 watson_left += left_forward(watson, j)
86 watson_right += right_forward(watson, j)
87 crick_left += left_forward(crick, j)
88 crick_right += right_forward(crick,j)
90 wtd_max_val = max(wtd_list)
91 wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
93 #return (chrom, wtd_max_pos, wtd_max_pos+1, wtd_max_val)
95 if wtd_max_val > cutoff:
96 return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_R" , wtd_max_val) # 'R'efined
97 else:
98 return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_F" , wtd_max_val) # 'F'ailed
100 #return "{}\t{}\t{}\tRefinePeak_summit\t{:.2f}\n".format(chrom,
101 # wtd_max_pos,
102 # wtd_max_pos+1,
103 # wtd_max_val,)
107 # def find_summit(bed_file, sam_file, window_size, output_file):
108 # def count_by_strand(ialign):
109 # pred = lambda x:x.is_reverse
110 # watson_5_end = lambda x:x.pos
111 # crick_5_end = lambda x:x.aend
112 # ialign1, ialign2 = tee(ialign)
114 # return (Counter(map(watson_5_end,
115 # ifilterfalse(pred, ialign1))),
116 # Counter(map(crick_5_end,
117 # ifilter(pred, ialign2))))
119 # left_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x <= pos and x >= pos - width])
120 # right_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x >= pos and x <= pos + width])
121 # left_forward = lambda strand, pos: strand.get(pos,0) - strand.get(pos-window_size, 0)
122 # right_forward = lambda strand, pos: strand.get(pos + window_size, 0) - strand.get(pos, 0)
123 # samfile = pysam.Samfile(sam_file, "rb" )
125 # cnt = 0
126 # with open(bed_file) as bfile, open(output_file,"w") as ofile:
127 # for i in bfile:
128 # i = i.split("\t")
129 # chrom = i[0]
130 # peak_start = int(i[1])
131 # peak_end = int(i[2])
133 # watson, crick = count_by_strand(samfile.fetch(chrom, peak_start-window_size, peak_end+window_size))
134 # watson_left = left_sum(watson, peak_start)
135 # crick_left = left_sum(crick, peak_start)
136 # watson_right = right_sum(watson, peak_start)
137 # crick_right = right_sum(crick, peak_start)
139 # wtd_list = []
140 # for j in range(peak_start, peak_end+1):
141 # wtd_list.append(2 * sqrt(watson_left * crick_right) - watson_right - crick_left)
142 # watson_left += left_forward(watson, j)
143 # watson_right += right_forward(watson, j)
144 # crick_left += left_forward(crick, j)
145 # crick_right += right_forward(crick,j)
147 # wtd_max_val = max(wtd_list)
148 # wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
149 # cnt += 1
151 # ofile.write("{}\t{}\t{}\tSPP_summit_{}\t{:.2f}\n".format(chrom,
152 # wtd_max_pos,
153 # wtd_max_pos+1,
154 # cnt,
155 # wtd_max_val,))
156 # samfile.close()
160 def load_tag_files_options ( options ):
161 """From the options, load alignment tags.
164 options.info("# read treatment tags...")
165 tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
166 treat = tp.build_fwtrack()
167 #treat.sort()
168 if len(options.ifile) > 1:
169 # multiple input
170 for tfile in options.ifile[1:]:
171 tp = options.parser(tfile, buffer_size=options.buffer_size)
172 treat = tp.append_fwtrack( treat )
173 #treat.sort()
174 treat.finalize()
175 return treat