1 # Time-stamp: <2019-09-20 11:36:33 taoliu>
3 """Description: Random sample certain number/percentage of tags.
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
10 # ------------------------------------
12 # ------------------------------------
18 # ------------------------------------
20 # ------------------------------------
21 from MACS2
.OptValidator
import opt_validate_randsample
as opt_validate
22 from MACS2
.Constants
import *
24 # ------------------------------------
26 # ------------------------------------
28 options
= opt_validate( options0
)
29 # end of parsing commandline options
35 options
.PE_MODE
= options
.format
in ('BAMPE','BEDPE')
38 if options
.outputfile
:
39 outfhd
= open( os
.path
.join( options
.outdir
, options
.outputfile
), "w" )
45 info("# read input file in Paired-end mode.")
46 treat
= load_frag_files_options ( options
) # return PETrackI object
47 t0
= treat
.total
# total fragments
48 info("# total fragments/pairs in alignment file: %d" % (t0
) )
50 info("read tag files...")
51 treat
= load_tag_files_options (options
)
53 info("tag size = %d" % options
.tsize
)
54 treat
.fw
= options
.tsize
57 info(" total tags in alignment file: %d" % (t0
))
60 if options
.number
> t0
:
61 error(" Number you want is bigger than total number of tags in alignment file! Please specify a smaller number and try again!")
62 error(" %.2e > %.2e" % (options
.number
, t0
))
64 info(" Number of tags you want to keep: %.2e" % (options
.number
))
65 options
.percentage
= float(options
.number
)/t0
*100
66 info(" Percentage of tags you want to keep: %.2f%%" % (options
.percentage
))
69 info(" Random seed has been set as: %d" % options
.seed
)
71 treat
.sample_percent(options
.percentage
/100.0, options
.seed
)
73 info(" tags after random sampling in alignment file: %d" % (treat
.total
))
75 info("Write to BED file")
76 treat
.print_to_bed(fhd
=outfhd
)
77 info("finished! Check %s." % options
.outputfile
)
79 def load_tag_files_options ( options
):
80 """From the options, load alignment tags.
83 options
.info("# read treatment tags...")
84 tp
= options
.parser(options
.ifile
[0], buffer_size
=options
.buffer_size
)
85 if not options
.tsize
: # override tsize if user specified --tsize
87 options
.tsize
= ttsize
88 treat
= tp
.build_fwtrack()
90 if len(options
.ifile
) > 1:
92 for ifile
in options
.ifile
[1:]:
93 tp
= options
.parser(ifile
, buffer_size
=options
.buffer_size
)
94 treat
= tp
.append_fwtrack( treat
)
98 options
.info("tag size is determined as %d bps" % options
.tsize
)
101 def load_frag_files_options ( options
):
102 """From the options, load treatment fragments and control fragments (if available).
105 options
.info("# read treatment fragments...")
107 tp
= options
.parser(options
.ifile
[0], buffer_size
=options
.buffer_size
)
108 treat
= tp
.build_petrack()
110 if len(options
.ifile
) > 1:
112 for ifile
in options
.ifile
[1:]:
113 tp
= options
.parser(ifile
, buffer_size
=options
.buffer_size
)
114 treat
= tp
.append_petrack( treat
)