1 # Time-stamp: <2019-09-25 10:03:13 taoliu>
3 """Description: refine peak summits
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
10 # ------------------------------------
12 # ------------------------------------
17 from collections
import Counter
19 # ------------------------------------
21 # ------------------------------------
22 from MACS2
.OptValidator
import opt_validate_refinepeak
as opt_validate
23 from MACS2
.Prob
import binomial_cdf_inv
24 from MACS2
.IO
.BedGraphIO
import bedGraphIO
,genericBedIO
25 from MACS2
.IO
.PeakIO
import PeakIO
26 from MACS2
.Constants
import *
29 # ------------------------------------
31 # ------------------------------------
33 """The Main function/pipeline for duplication filter.
37 options
= opt_validate( o_options
)
38 # end of parsing commandline options
45 outputfile
= open( os
.path
.join( options
.outdir
, options
.ofile
), 'w' )
46 options
.oprefix
= options
.ofile
48 outputfile
= open( os
.path
.join( options
.outdir
, "%s_refinepeak.bed" % options
.oprefix
), "w" )
51 peakio
= open(options
.bedfile
,"rb")
54 fs
= l
.rstrip().split()
55 peaks
.add( fs
[0], int(fs
[1]), int(fs
[2]), name
=fs
[3] )
61 info("read tag files...")
62 fwtrack
= load_tag_files_options (options
)
64 retval
= fwtrack
.compute_region_tags_from_peaks( peaks
, find_summit
, window_size
= options
.windowsize
, cutoff
= options
.cutoff
)
65 outputfile
.write( (b
"\n".join( [b
"%s\t%d\t%d\t%s\t%.2f" % x
for x
in retval
] )).decode() )
69 def find_summit(chrom
, plus
, minus
, peak_start
, peak_end
, name
= b
"peak", window_size
=100, cutoff
= 5):
71 left_sum
= lambda strand
, pos
, width
= window_size
: sum([strand
[x
] for x
in strand
if x
<= pos
and x
>= pos
- width
])
72 right_sum
= lambda strand
, pos
, width
= window_size
: sum([strand
[x
] for x
in strand
if x
>= pos
and x
<= pos
+ width
])
73 left_forward
= lambda strand
, pos
: strand
.get(pos
,0) - strand
.get(pos
-window_size
, 0)
74 right_forward
= lambda strand
, pos
: strand
.get(pos
+ window_size
, 0) - strand
.get(pos
, 0)
76 watson
, crick
= (Counter(plus
), Counter(minus
))
77 watson_left
= left_sum(watson
, peak_start
)
78 crick_left
= left_sum(crick
, peak_start
)
79 watson_right
= right_sum(watson
, peak_start
)
80 crick_right
= right_sum(crick
, peak_start
)
83 for j
in range(peak_start
, peak_end
+1):
84 wtd_list
.append(2 * (watson_left
* crick_right
)**0.5 - watson_right
- crick_left
)
85 watson_left
+= left_forward(watson
, j
)
86 watson_right
+= right_forward(watson
, j
)
87 crick_left
+= left_forward(crick
, j
)
88 crick_right
+= right_forward(crick
,j
)
90 wtd_max_val
= max(wtd_list
)
91 wtd_max_pos
= wtd_list
.index(wtd_max_val
) + peak_start
93 #return (chrom, wtd_max_pos, wtd_max_pos+1, wtd_max_val)
95 if wtd_max_val
> cutoff
:
96 return (chrom
, wtd_max_pos
, wtd_max_pos
+1, name
+b
"_R" , wtd_max_val
) # 'R'efined
98 return (chrom
, wtd_max_pos
, wtd_max_pos
+1, name
+b
"_F" , wtd_max_val
) # 'F'ailed
100 #return "{}\t{}\t{}\tRefinePeak_summit\t{:.2f}\n".format(chrom,
107 # def find_summit(bed_file, sam_file, window_size, output_file):
108 # def count_by_strand(ialign):
109 # pred = lambda x:x.is_reverse
110 # watson_5_end = lambda x:x.pos
111 # crick_5_end = lambda x:x.aend
112 # ialign1, ialign2 = tee(ialign)
114 # return (Counter(map(watson_5_end,
115 # ifilterfalse(pred, ialign1))),
116 # Counter(map(crick_5_end,
117 # ifilter(pred, ialign2))))
119 # left_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x <= pos and x >= pos - width])
120 # right_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x >= pos and x <= pos + width])
121 # left_forward = lambda strand, pos: strand.get(pos,0) - strand.get(pos-window_size, 0)
122 # right_forward = lambda strand, pos: strand.get(pos + window_size, 0) - strand.get(pos, 0)
123 # samfile = pysam.Samfile(sam_file, "rb" )
126 # with open(bed_file) as bfile, open(output_file,"w") as ofile:
130 # peak_start = int(i[1])
131 # peak_end = int(i[2])
133 # watson, crick = count_by_strand(samfile.fetch(chrom, peak_start-window_size, peak_end+window_size))
134 # watson_left = left_sum(watson, peak_start)
135 # crick_left = left_sum(crick, peak_start)
136 # watson_right = right_sum(watson, peak_start)
137 # crick_right = right_sum(crick, peak_start)
140 # for j in range(peak_start, peak_end+1):
141 # wtd_list.append(2 * sqrt(watson_left * crick_right) - watson_right - crick_left)
142 # watson_left += left_forward(watson, j)
143 # watson_right += right_forward(watson, j)
144 # crick_left += left_forward(crick, j)
145 # crick_right += right_forward(crick,j)
147 # wtd_max_val = max(wtd_list)
148 # wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
151 # ofile.write("{}\t{}\t{}\tSPP_summit_{}\t{:.2f}\n".format(chrom,
160 def load_tag_files_options ( options
):
161 """From the options, load alignment tags.
164 options
.info("# read treatment tags...")
165 tp
= options
.parser(options
.ifile
[0], buffer_size
=options
.buffer_size
)
166 treat
= tp
.build_fwtrack()
168 if len(options
.ifile
) > 1:
170 for tfile
in options
.ifile
[1:]:
171 tp
= options
.parser(tfile
, buffer_size
=options
.buffer_size
)
172 treat
= tp
.append_fwtrack( treat
)