MACS2/refinepeak_cmd.py

   1 # Time-stamp: <2019-09-25 10:03:13 taoliu>
   2
   3 """Description: refine peak summits
   4
   5 This code is free software; you can redistribute it and/or modify it
   6 under the terms of the BSD License (see the file LICENSE included with
   7 the distribution).
   8 """
   9
  10 # ------------------------------------
  11 # python modules
  12 # ------------------------------------
  13
  14 import os
  15 import sys
  16 import logging
  17 from collections import Counter
  18
  19 # ------------------------------------
  20 # own python modules
  21 # ------------------------------------
  22 from MACS2.OptValidator import opt_validate_refinepeak as opt_validate
  23 from MACS2.Prob import binomial_cdf_inv
  24 from MACS2.IO.BedGraphIO import bedGraphIO,genericBedIO
  25 from MACS2.IO.PeakIO import PeakIO
  26 from MACS2.Constants import *
  27
  28
  29 # ------------------------------------
  30 # Main function
  31 # ------------------------------------
  32 def run( o_options ):
  33     """The Main function/pipeline for duplication filter.
  34
  35     """
  36     # Parse options...
  37     options = opt_validate( o_options )
  38     # end of parsing commandline options
  39     info = options.info
  40     warn = options.warn
  41     debug = options.debug
  42     error = options.error
  43
  44     if options.ofile:
  45         outputfile = open( os.path.join( options.outdir, options.ofile ), 'w' )
  46         options.oprefix = options.ofile
  47     else:
  48         outputfile = open( os.path.join( options.outdir, "%s_refinepeak.bed" % options.oprefix), "w" )
  49
  50
  51     peakio = open(options.bedfile,"rb")
  52     peaks = PeakIO()
  53     for l in peakio:
  54         fs = l.rstrip().split()
  55         peaks.add( fs[0], int(fs[1]), int(fs[2]), name=fs[3] )
  56
  57     peaks.sort()
  58     peakio.close()
  59
  60     #1 Read tag files
  61     info("read tag files...")
  62     fwtrack = load_tag_files_options (options)
  63
  64     retval = fwtrack.compute_region_tags_from_peaks( peaks, find_summit, window_size = options.windowsize, cutoff = options.cutoff )
  65     outputfile.write( (b"\n".join( [b"%s\t%d\t%d\t%s\t%.2f" % x for x in retval] )).decode() )
  66     outputfile.close()
  67     info("Done!")
  68
  69 def find_summit(chrom, plus, minus, peak_start, peak_end, name = b"peak", window_size=100, cutoff = 5):
  70
  71     left_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x <= pos and x >= pos - width])
  72     right_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x >= pos and x <= pos + width])
  73     left_forward = lambda strand, pos: strand.get(pos,0) - strand.get(pos-window_size, 0)
  74     right_forward = lambda strand, pos: strand.get(pos + window_size, 0) - strand.get(pos, 0)
  75
  76     watson, crick = (Counter(plus), Counter(minus))
  77     watson_left = left_sum(watson, peak_start)
  78     crick_left = left_sum(crick, peak_start)
  79     watson_right = right_sum(watson, peak_start)
  80     crick_right = right_sum(crick, peak_start)
  81
  82     wtd_list = []
  83     for j in range(peak_start, peak_end+1):
  84         wtd_list.append(2 * (watson_left * crick_right)**0.5 - watson_right - crick_left)
  85         watson_left += left_forward(watson, j)
  86         watson_right += right_forward(watson, j)
  87         crick_left += left_forward(crick, j)
  88         crick_right += right_forward(crick,j)
  89
  90     wtd_max_val = max(wtd_list)
  91     wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
  92
  93     #return (chrom, wtd_max_pos, wtd_max_pos+1, wtd_max_val)
  94
  95     if wtd_max_val > cutoff:
  96         return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_R" , wtd_max_val) # 'R'efined
  97     else:
  98         return (chrom, wtd_max_pos, wtd_max_pos+1, name+b"_F" , wtd_max_val) # 'F'ailed
  99
 100     #return "{}\t{}\t{}\tRefinePeak_summit\t{:.2f}\n".format(chrom,
 101     #                                                        wtd_max_pos,
 102     #                                                        wtd_max_pos+1,
 103     #                                                        wtd_max_val,)
 104
 105
 106
 107 # def find_summit(bed_file, sam_file, window_size, output_file):
 108 #     def count_by_strand(ialign):
 109 #         pred = lambda x:x.is_reverse
 110 #         watson_5_end = lambda x:x.pos
 111 #         crick_5_end = lambda x:x.aend
 112 #         ialign1, ialign2 = tee(ialign)
 113
 114 #         return (Counter(map(watson_5_end,
 115 #                             ifilterfalse(pred, ialign1))),
 116 #                 Counter(map(crick_5_end,
 117 #                             ifilter(pred, ialign2))))
 118
 119 #     left_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x <= pos and x >= pos - width])
 120 #     right_sum = lambda strand, pos, width = window_size: sum([strand[x] for x in strand if x >= pos and x <= pos + width])
 121 #     left_forward = lambda strand, pos: strand.get(pos,0) - strand.get(pos-window_size, 0)
 122 #     right_forward = lambda strand, pos: strand.get(pos + window_size, 0) - strand.get(pos, 0)
 123 #     samfile = pysam.Samfile(sam_file, "rb" )
 124
 125 #     cnt = 0
 126 #     with open(bed_file) as bfile, open(output_file,"w") as ofile:
 127 #         for i in bfile:
 128 #             i = i.split("\t")
 129 #             chrom = i[0]
 130 #             peak_start = int(i[1])
 131 #             peak_end = int(i[2])
 132
 133 #             watson, crick = count_by_strand(samfile.fetch(chrom, peak_start-window_size, peak_end+window_size))
 134 #             watson_left = left_sum(watson, peak_start)
 135 #             crick_left = left_sum(crick, peak_start)
 136 #             watson_right = right_sum(watson, peak_start)
 137 #             crick_right = right_sum(crick, peak_start)
 138
 139 #             wtd_list = []
 140 #             for j in range(peak_start, peak_end+1):
 141 #                 wtd_list.append(2 * sqrt(watson_left * crick_right) - watson_right - crick_left)
 142 #                 watson_left += left_forward(watson, j)
 143 #                 watson_right += right_forward(watson, j)
 144 #                 crick_left += left_forward(crick, j)
 145 #                 crick_right += right_forward(crick,j)
 146
 147 #             wtd_max_val = max(wtd_list)
 148 #             wtd_max_pos = wtd_list.index(wtd_max_val) + peak_start
 149 #             cnt += 1
 150
 151 #             ofile.write("{}\t{}\t{}\tSPP_summit_{}\t{:.2f}\n".format(chrom,
 152 #                                                                      wtd_max_pos,
 153 #                                                                      wtd_max_pos+1,
 154 #                                                                      cnt,
 155 #                                                                      wtd_max_val,))
 156 #     samfile.close()
 157
 158
 159
 160 def load_tag_files_options ( options ):
 161     """From the options, load alignment tags.
 162
 163     """
 164     options.info("# read treatment tags...")
 165     tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
 166     treat = tp.build_fwtrack()
 167     #treat.sort()
 168     if len(options.ifile) > 1:
 169         # multiple input
 170         for tfile in options.ifile[1:]:
 171             tp = options.parser(tfile, buffer_size=options.buffer_size)
 172             treat = tp.append_fwtrack( treat )
 173             #treat.sort()
 174     treat.finalize()
 175     return treat
 176