Merge pull request #378 from taoliu/fix_setup_script_364
[MACS.git] / MACS2 / randsample_cmd.py
blob01b92247874719876450199c20b0d865baf6939a
1 # Time-stamp: <2019-09-20 11:36:33 taoliu>
3 """Description: Random sample certain number/percentage of tags.
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
7 the distribution).
8 """
10 # ------------------------------------
11 # python modules
12 # ------------------------------------
14 import os
15 import sys
16 import logging
18 # ------------------------------------
19 # own python modules
20 # ------------------------------------
21 from MACS2.OptValidator import opt_validate_randsample as opt_validate
22 from MACS2.Constants import *
24 # ------------------------------------
25 # Main function
26 # ------------------------------------
27 def run( options0 ):
28 options = opt_validate( options0 )
29 # end of parsing commandline options
30 info = options.info
31 warn = options.warn
32 debug = options.debug
33 error = options.error
35 options.PE_MODE = options.format in ('BAMPE','BEDPE')
37 #0 check output file
38 if options.outputfile:
39 outfhd = open( os.path.join( options.outdir, options.outputfile ), "w" )
40 else:
41 outfhd = sys.stdout
43 #1 Read tag files
44 if options.PE_MODE:
45 info("# read input file in Paired-end mode.")
46 treat = load_frag_files_options ( options ) # return PETrackI object
47 t0 = treat.total # total fragments
48 info("# total fragments/pairs in alignment file: %d" % (t0) )
49 else:
50 info("read tag files...")
51 treat = load_tag_files_options (options)
53 info("tag size = %d" % options.tsize)
54 treat.fw = options.tsize
56 t0 = treat.total
57 info(" total tags in alignment file: %d" % (t0))
59 if options.number:
60 if options.number > t0:
61 error(" Number you want is bigger than total number of tags in alignment file! Please specify a smaller number and try again!")
62 error(" %.2e > %.2e" % (options.number, t0))
63 sys.exit(1)
64 info(" Number of tags you want to keep: %.2e" % (options.number))
65 options.percentage = float(options.number)/t0*100
66 info(" Percentage of tags you want to keep: %.2f%%" % (options.percentage))
68 if options.seed >= 0:
69 info(" Random seed has been set as: %d" % options.seed )
71 treat.sample_percent(options.percentage/100.0, options.seed )
73 info(" tags after random sampling in alignment file: %d" % (treat.total))
75 info("Write to BED file")
76 treat.print_to_bed(fhd=outfhd)
77 info("finished! Check %s." % options.outputfile)
79 def load_tag_files_options ( options ):
80 """From the options, load alignment tags.
82 """
83 options.info("# read treatment tags...")
84 tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
85 if not options.tsize: # override tsize if user specified --tsize
86 ttsize = tp.tsize()
87 options.tsize = ttsize
88 treat = tp.build_fwtrack()
89 #treat.sort()
90 if len(options.ifile) > 1:
91 # multiple input
92 for ifile in options.ifile[1:]:
93 tp = options.parser(ifile, buffer_size=options.buffer_size)
94 treat = tp.append_fwtrack( treat )
95 #treat.sort()
96 treat.finalize()
98 options.info("tag size is determined as %d bps" % options.tsize)
99 return treat
101 def load_frag_files_options ( options ):
102 """From the options, load treatment fragments and control fragments (if available).
105 options.info("# read treatment fragments...")
107 tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
108 treat = tp.build_petrack()
109 #treat.sort()
110 if len(options.ifile) > 1:
111 # multiple input
112 for ifile in options.ifile[1:]:
113 tp = options.parser(ifile, buffer_size=options.buffer_size)
114 treat = tp.append_petrack( treat )
115 #treat.sort()
116 treat.finalize()
117 return treat