1 # Time-stamp: <2022-06-10 10:24:34 Tao Liu>
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
10 # ------------------------------------
12 # ------------------------------------
16 import resource
# we turn on memory monitoring inside of logging
17 from argparse
import ArgumentError
18 from subprocess
import Popen
, PIPE
21 # ------------------------------------
23 # ------------------------------------
24 from MACS3
.IO
.Parser
import BEDParser
, ELANDResultParser
, ELANDMultiParser
, \
25 ELANDExportParser
, SAMParser
, BAMParser
, BAMPEParser
,\
26 BEDPEParser
, BowtieParser
, guess_parser
28 from MACS3
.Utilities
.Constants
import EFFECTIVEGS
as efgsize
29 # ------------------------------------
31 # ------------------------------------
34 import MACS3
.Utilities
.Logger
35 logger
= logging
.getLogger(__name__
)
37 # ------------------------------------
39 # ------------------------------------
41 logger
= logging
.getLogger(__name__
)
43 def opt_validate_callpeak ( options
):
44 """Validate options from a OptParser object.
46 Ret: Validated options object.
49 logger
.setLevel((4-options
.verbose
)*10)
51 options
.error
= logger
.critical
# function alias
52 options
.warn
= logger
.warning
53 options
.debug
= logger
.debug
54 options
.info
= logger
.info
58 options
.gsize
= efgsize
[options
.gsize
]
61 options
.gsize
= float(options
.gsize
)
63 logger
.error("Error when interpreting --gsize option: %s" % options
.gsize
)
64 logger
.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize
.keys())))
68 options
.gzip_flag
= False # if the input is gzip file
70 options
.format
= options
.format
.upper()
71 if options
.format
== "ELAND":
72 options
.parser
= ELANDResultParser
73 elif options
.format
== "BED":
74 options
.parser
= BEDParser
75 elif options
.format
== "ELANDMULTI":
76 options
.parser
= ELANDMultiParser
77 elif options
.format
== "ELANDEXPORT":
78 options
.parser
= ELANDExportParser
79 elif options
.format
== "SAM":
80 options
.parser
= SAMParser
81 elif options
.format
== "BAM":
82 options
.parser
= BAMParser
83 options
.gzip_flag
= True
84 elif options
.format
== "BAMPE":
85 options
.parser
= BAMPEParser
86 options
.gzip_flag
= True
87 options
.nomodel
= True
88 elif options
.format
== "BEDPE":
89 options
.parser
= BEDPEParser
90 options
.nomodel
= True
91 elif options
.format
== "BOWTIE":
92 options
.parser
= BowtieParser
93 elif options
.format
== "AUTO":
94 options
.parser
= guess_parser
96 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
100 if options
.keepduplicates
!= "auto" and options
.keepduplicates
!= "all":
101 if not options
.keepduplicates
.isdigit():
102 logger
.error("--keep-dup should be 'auto', 'all' or an integer!")
106 #if options.shiftsize: # only if --shiftsize is set, it's true
107 # options.extsize = 2 * options.shiftsize
108 #else: # if --shiftsize is not set
109 # options.shiftsize = options.extsize / 2
110 if options
.extsize
< 1 :
111 logger
.error("--extsize must >= 1!")
114 # refine_peaks, call_summits can't be combined with --broad
115 #if options.broad and (options.refine_peaks or options.call_summits):
116 # logger.error("--broad can't be combined with --refine-peaks or --call-summits!")
119 if options
.broad
and options
.call_summits
:
120 logger
.error("--broad can't be combined with --call-summits!")
124 # if set, ignore qvalue cutoff
125 options
.log_qvalue
= None
126 options
.log_pvalue
= log(options
.pvalue
,10)*-1
128 options
.log_qvalue
= log(options
.qvalue
,10)*-1
129 options
.log_pvalue
= None
131 options
.log_broadcutoff
= log(options
.broadcutoff
,10)*-1
133 # uppercase the format string
134 options
.format
= options
.format
.upper()
136 # d_min is non-negative
137 if options
.d_min
< 0:
138 logger
.error("Minimum fragment size shouldn't be negative!" % options
.d_min
)
141 # upper and lower mfold
142 options
.lmfold
= options
.mfold
[0]
143 options
.umfold
= options
.mfold
[1]
144 if options
.lmfold
> options
.umfold
:
145 logger
.error("Upper limit of mfold should be greater than lower limit!" % options
.mfold
)
149 options
.peakxls
= os
.path
.join( options
.outdir
, options
.name
+"_peaks.xls" )
150 options
.peakbed
= os
.path
.join( options
.outdir
, options
.name
+"_peaks.bed" )
151 options
.peakNarrowPeak
= os
.path
.join( options
.outdir
, options
.name
+"_peaks.narrowPeak" )
152 options
.peakBroadPeak
= os
.path
.join( options
.outdir
, options
.name
+"_peaks.broadPeak" )
153 options
.peakGappedPeak
= os
.path
.join( options
.outdir
, options
.name
+"_peaks.gappedPeak" )
154 options
.summitbed
= os
.path
.join( options
.outdir
, options
.name
+"_summits.bed" )
155 options
.bdg_treat
= os
.path
.join( options
.outdir
, options
.name
+"_treat_pileup.bdg" )
156 options
.bdg_control
= os
.path
.join( options
.outdir
, options
.name
+"_control_lambda.bdg" )
157 if options
.cutoff_analysis
:
158 options
.cutoff_analysis_file
= os
.path
.join( options
.outdir
, options
.name
+"_cutoff_analysis.txt" )
160 options
.cutoff_analysis_file
= "None"
161 #options.negxls = os.path.join( options.name+"_negative_peaks.xls" )
162 #options.diagxls = os.path.join( options.name+"_diag.xls" )
163 options
.modelR
= os
.path
.join( options
.outdir
, options
.name
+"_model.r" )
164 #options.pqtable = os.path.join( options.outdir, options.name+"_pq_table.txt" )
166 options
.argtxt
= "\n".join((
167 "# Command line: %s" % " ".join(sys
.argv
[1:]),\
168 "# ARGUMENTS LIST:",\
169 "# name = %s" % (options
.name
),\
170 "# format = %s" % (options
.format
),\
171 "# ChIP-seq file = %s" % (options
.tfile
),\
172 "# control file = %s" % (options
.cfile
),\
173 "# effective genome size = %.2e" % (options
.gsize
),\
174 #"# tag size = %d" % (options.tsize),\
175 "# band width = %d" % (options
.bw
),\
176 "# model fold = %s\n" % (options
.mfold
),\
181 options
.argtxt
+= "# pvalue cutoff for narrow/strong regions = %.2e\n" % (options
.pvalue
)
182 options
.argtxt
+= "# pvalue cutoff for broad/weak regions = %.2e\n" % (options
.broadcutoff
)
183 options
.argtxt
+= "# qvalue will not be calculated and reported as -1 in the final output.\n"
185 options
.argtxt
+= "# pvalue cutoff = %.2e\n" % (options
.pvalue
)
186 options
.argtxt
+= "# qvalue will not be calculated and reported as -1 in the final output.\n"
189 options
.argtxt
+= "# qvalue cutoff for narrow/strong regions = %.2e\n" % (options
.qvalue
)
190 options
.argtxt
+= "# qvalue cutoff for broad/weak regions = %.2e\n" % (options
.broadcutoff
)
192 options
.argtxt
+= "# qvalue cutoff = %.2e\n" % (options
.qvalue
)
195 options
.argtxt
+= "# The maximum gap between significant sites = %d\n" % options
.maxgap
197 options
.argtxt
+= "# The maximum gap between significant sites is assigned as the read length/tag size.\n"
199 options
.argtxt
+= "# The minimum length of peaks = %d\n" % options
.minlen
201 options
.argtxt
+= "# The minimum length of peaks is assigned as the predicted fragment length \"d\".\n"
203 if options
.downsample
:
204 options
.argtxt
+= "# Larger dataset will be randomly sampled towards smaller dataset.\n"
205 if options
.seed
>= 0:
206 options
.argtxt
+= "# Random seed has been set as: %d\n" % options
.seed
208 if options
.scaleto
== "large":
209 options
.argtxt
+= "# Smaller dataset will be scaled towards larger dataset.\n"
211 options
.argtxt
+= "# Larger dataset will be scaled towards smaller dataset.\n"
213 if options
.ratio
!= 1.0:
214 options
.argtxt
+= "# Using a custom scaling factor: %.2e\n" % (options
.ratio
)
217 options
.argtxt
+= "# Range for calculating regional lambda is: %d bps and %d bps\n" % (options
.smalllocal
,options
.largelocal
)
219 options
.argtxt
+= "# Range for calculating regional lambda is: %d bps\n" % (options
.largelocal
)
222 options
.argtxt
+= "# Broad region calling is on\n"
224 options
.argtxt
+= "# Broad region calling is off\n"
226 if options
.fecutoff
!= 1.0:
227 options
.argtxt
+= "# Additional cutoff on fold-enrichment is: %.2f\n" % (options
.fecutoff
)
229 if options
.format
in ["BAMPE", "BEDPE"]:
232 options
.argtxt
+= "# Paired-End mode is on\n"
234 options
.argtxt
+= "# Paired-End mode is off\n"
236 #if options.refine_peaks:
237 # options.argtxt += "# Refining peak for read balance is on\n"
238 if options
.call_summits
:
239 options
.argtxt
+= "# Searching for subpeak summits is on\n"
241 if options
.do_SPMR
and options
.store_bdg
:
242 options
.argtxt
+= "# MACS will save fragment pileup signal per million reads\n"
246 def opt_validate_diffpeak ( options
):
247 """Validate options from a OptParser object.
249 Ret: Validated options object.
252 logger
.setLevel((4-options
.verbose
)*10)
254 options
.error
= logger
.critical
# function alias
255 options
.warn
= logger
.warning
256 options
.debug
= logger
.debug
257 options
.info
= logger
.info
260 options
.gzip_flag
= False # if the input is gzip file
262 # options.format = options.format.upper()
265 # elif options.format == "AUTO":
266 # options.parser = guess_parser
268 # logger.error("Format \"%s\" cannot be recognized!" % (options.format))
271 if options
.peaks_pvalue
:
272 # if set, ignore qvalue cutoff
273 options
.peaks_log_qvalue
= None
274 options
.peaks_log_pvalue
= log(options
.peaks_pvalue
,10)*-1
275 options
.track_score_method
= 'p'
277 options
.peaks_log_qvalue
= log(options
.peaks_qvalue
,10)*-1
278 options
.peaks_log_pvalue
= None
279 options
.track_score_method
= 'q'
281 if options
.diff_pvalue
:
282 # if set, ignore qvalue cutoff
283 options
.log_qvalue
= None
284 options
.log_pvalue
= log(options
.diff_pvalue
,10)*-1
285 options
.score_method
= 'p'
287 options
.log_qvalue
= log(options
.diff_qvalue
,10)*-1
288 options
.log_pvalue
= None
289 options
.score_method
= 'q'
292 options
.peakxls
= options
.name
+"_diffpeaks.xls"
293 options
.peakbed
= options
.name
+"_diffpeaks.bed"
294 options
.peak1xls
= options
.name
+"_diffpeaks_by_peaks1.xls"
295 options
.peak2xls
= options
.name
+"_diffpeaks_by_peaks2.xls"
296 options
.bdglogLR
= options
.name
+"_logLR.bdg"
297 options
.bdgpvalue
= options
.name
+"_logLR.bdg"
298 options
.bdglogFC
= options
.name
+"_logLR.bdg"
300 options
.call_peaks
= True
301 if not (options
.peaks1
== '' or options
.peaks2
== ''):
302 if options
.peaks1
== '':
303 raise ArgumentError('peaks1', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
304 elif options
.peaks2
== '':
305 raise ArgumentError('peaks2', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
306 options
.call_peaks
= False
307 options
.argtxt
= "\n".join((
308 "# ARGUMENTS LIST:",\
309 "# name = %s" % (options
.name
),\
310 # "# format = %s" % (options.format),\
311 "# ChIP-seq file 1 = %s" % (options
.t1bdg
),\
312 "# control file 1 = %s" % (options
.c1bdg
),\
313 "# ChIP-seq file 2 = %s" % (options
.t2bdg
),\
314 "# control file 2 = %s" % (options
.c2bdg
),\
315 "# Peaks, condition 1 = %s" % (options
.peaks1
),\
316 "# Peaks, condition 2 = %s" % (options
.peaks2
),\
320 options
.argtxt
= "\n".join((
321 "# ARGUMENTS LIST:",\
322 "# name = %s" % (options
.name
),\
323 # "# format = %s" % (options.format),\
324 "# ChIP-seq file 1 = %s" % (options
.t1bdg
),\
325 "# control file 1 = %s" % (options
.c1bdg
),\
326 "# ChIP-seq file 2 = %s" % (options
.t2bdg
),\
327 "# control file 2 = %s" % (options
.c2bdg
),\
331 if options
.peaks_pvalue
:
332 options
.argtxt
+= "# treat/control -log10(pvalue) cutoff = %.2e\n" % (options
.peaks_log_pvalue
)
333 options
.argtxt
+= "# treat/control -log10(qvalue) will not be calculated and reported as -1 in the final output.\n"
335 options
.argtxt
+= "# treat/control -log10(qvalue) cutoff = %.2e\n" % (options
.peaks_log_qvalue
)
337 if options
.diff_pvalue
:
338 options
.argtxt
+= "# differential pvalue cutoff = %.2e\n" % (options
.log_pvalue
)
339 options
.argtxt
+= "# differential qvalue will not be calculated and reported as -1 in the final output.\n"
341 options
.argtxt
+= "# differential qvalue cutoff = %.2e\n" % (options
.log_qvalue
)
345 def opt_validate_filterdup ( options
):
346 """Validate options from a OptParser object.
348 Ret: Validated options object.
351 logger
.setLevel((4-options
.verbose
)*10)
353 options
.error
= logger
.critical
# function alias
354 options
.warn
= logger
.warning
355 options
.debug
= logger
.debug
356 options
.info
= logger
.info
360 options
.gsize
= efgsize
[options
.gsize
]
363 options
.gsize
= float(options
.gsize
)
365 logger
.error("Error when interpreting --gsize option: %s" % options
.gsize
)
366 logger
.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize
.keys())))
371 options
.gzip_flag
= False # if the input is gzip file
373 options
.format
= options
.format
.upper()
374 if options
.format
== "ELAND":
375 options
.parser
= ELANDResultParser
376 elif options
.format
== "BED":
377 options
.parser
= BEDParser
378 elif options
.format
== "BEDPE":
379 options
.parser
= BEDPEParser
380 elif options
.format
== "ELANDMULTI":
381 options
.parser
= ELANDMultiParser
382 elif options
.format
== "ELANDEXPORT":
383 options
.parser
= ELANDExportParser
384 elif options
.format
== "SAM":
385 options
.parser
= SAMParser
386 elif options
.format
== "BAM":
387 options
.parser
= BAMParser
388 options
.gzip_flag
= True
389 elif options
.format
== "BOWTIE":
390 options
.parser
= BowtieParser
391 elif options
.format
== "BAMPE":
392 options
.parser
= BAMPEParser
393 options
.gzip_flag
= True
394 elif options
.format
== "BEDPE":
395 options
.parser
= BEDPEParser
396 elif options
.format
== "AUTO":
397 options
.parser
= guess_parser
399 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
403 if options
.keepduplicates
!= "auto" and options
.keepduplicates
!= "all":
404 if not options
.keepduplicates
.isdigit():
405 logger
.error("--keep-dup should be 'auto', 'all' or an integer!")
408 # uppercase the format string
409 options
.format
= options
.format
.upper()
413 def opt_validate_randsample ( options
):
414 """Validate options from a OptParser object.
416 Ret: Validated options object.
419 logger
.setLevel((4-options
.verbose
)*10)
421 options
.error
= logger
.critical
# function alias
422 options
.warn
= logger
.warning
423 options
.debug
= logger
.debug
424 options
.info
= logger
.info
428 options
.gzip_flag
= False # if the input is gzip file
430 options
.format
= options
.format
.upper()
431 if options
.format
== "ELAND":
432 options
.parser
= ELANDResultParser
433 elif options
.format
== "BED":
434 options
.parser
= BEDParser
435 elif options
.format
== "ELANDMULTI":
436 options
.parser
= ELANDMultiParser
437 elif options
.format
== "ELANDEXPORT":
438 options
.parser
= ELANDExportParser
439 elif options
.format
== "SAM":
440 options
.parser
= SAMParser
441 elif options
.format
== "BAM":
442 options
.parser
= BAMParser
443 options
.gzip_flag
= True
444 elif options
.format
== "BOWTIE":
445 options
.parser
= BowtieParser
446 elif options
.format
== "BAMPE":
447 options
.parser
= BAMPEParser
448 options
.gzip_flag
= True
449 elif options
.format
== "BEDPE":
450 options
.parser
= BEDPEParser
451 elif options
.format
== "AUTO":
452 options
.parser
= guess_parser
454 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
457 # uppercase the format string
458 options
.format
= options
.format
.upper()
460 # percentage or number
461 if options
.percentage
:
462 if options
.percentage
> 100.0:
463 logger
.error("Percentage can't be bigger than 100.0. Please check your options and retry!")
466 if options
.number
<= 0:
467 logger
.error("Number of tags can't be smaller than or equal to 0. Please check your options and retry!")
472 def opt_validate_refinepeak ( options
):
473 """Validate options from a OptParser object.
475 Ret: Validated options object.
478 logger
.setLevel((4-options
.verbose
)*10)
480 options
.error
= logger
.critical
# function alias
481 options
.warn
= logger
.warning
482 options
.debug
= logger
.debug
483 options
.info
= logger
.info
487 options
.gzip_flag
= False # if the input is gzip file
489 options
.format
= options
.format
.upper()
490 if options
.format
== "ELAND":
491 options
.parser
= ELANDResultParser
492 elif options
.format
== "BED":
493 options
.parser
= BEDParser
494 elif options
.format
== "ELANDMULTI":
495 options
.parser
= ELANDMultiParser
496 elif options
.format
== "ELANDEXPORT":
497 options
.parser
= ELANDExportParser
498 elif options
.format
== "SAM":
499 options
.parser
= SAMParser
500 elif options
.format
== "BAM":
501 options
.parser
= BAMParser
502 options
.gzip_flag
= True
503 elif options
.format
== "BOWTIE":
504 options
.parser
= BowtieParser
505 elif options
.format
== "AUTO":
506 options
.parser
= guess_parser
508 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
511 # uppercase the format string
512 options
.format
= options
.format
.upper()
516 def opt_validate_predictd ( options
):
517 """Validate options from a OptParser object.
519 Ret: Validated options object.
522 logger
.setLevel((4-options
.verbose
)*10)
524 options
.error
= logger
.critical
# function alias
525 options
.warn
= logger
.warning
526 options
.debug
= logger
.debug
527 options
.info
= logger
.info
531 options
.gsize
= efgsize
[options
.gsize
]
534 options
.gsize
= float(options
.gsize
)
536 logger
.error("Error when interpreting --gsize option: %s" % options
.gsize
)
537 logger
.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize
.keys())))
541 options
.gzip_flag
= False # if the input is gzip file
543 options
.format
= options
.format
.upper()
544 if options
.format
== "ELAND":
545 options
.parser
= ELANDResultParser
546 elif options
.format
== "BED":
547 options
.parser
= BEDParser
548 elif options
.format
== "ELANDMULTI":
549 options
.parser
= ELANDMultiParser
550 elif options
.format
== "ELANDEXPORT":
551 options
.parser
= ELANDExportParser
552 elif options
.format
== "SAM":
553 options
.parser
= SAMParser
554 elif options
.format
== "BAM":
555 options
.parser
= BAMParser
556 options
.gzip_flag
= True
557 elif options
.format
== "BAMPE":
558 options
.parser
= BAMPEParser
559 options
.gzip_flag
= True
560 options
.nomodel
= True
561 elif options
.format
== "BEDPE":
562 options
.parser
= BEDPEParser
563 options
.nomodel
= True
564 elif options
.format
== "BOWTIE":
565 options
.parser
= BowtieParser
566 elif options
.format
== "AUTO":
567 options
.parser
= guess_parser
569 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
572 # uppercase the format string
573 options
.format
= options
.format
.upper()
575 # d_min is non-negative
576 if options
.d_min
< 0:
577 logger
.error("Minimum fragment size shouldn't be negative!" % options
.d_min
)
580 # upper and lower mfold
581 options
.lmfold
= options
.mfold
[0]
582 options
.umfold
= options
.mfold
[1]
583 if options
.lmfold
> options
.umfold
:
584 logger
.error("Upper limit of mfold should be greater than lower limit!" % options
.mfold
)
587 options
.modelR
= os
.path
.join( options
.outdir
, options
.rfile
)
592 def opt_validate_pileup ( options
):
593 """Validate options from a OptParser object.
595 Ret: Validated options object.
598 logger
.setLevel((4-options
.verbose
)*10)
600 options
.error
= logger
.critical
# function alias
601 options
.warn
= logger
.warning
602 options
.debug
= logger
.debug
603 options
.info
= logger
.info
607 options
.gzip_flag
= False # if the input is gzip file
609 options
.format
= options
.format
.upper()
610 if options
.format
== "ELAND":
611 options
.parser
= ELANDResultParser
612 elif options
.format
== "BED":
613 options
.parser
= BEDParser
614 elif options
.format
== "ELANDMULTI":
615 options
.parser
= ELANDMultiParser
616 elif options
.format
== "ELANDEXPORT":
617 options
.parser
= ELANDExportParser
618 elif options
.format
== "SAM":
619 options
.parser
= SAMParser
620 elif options
.format
== "BAM":
621 options
.parser
= BAMParser
622 options
.gzip_flag
= True
623 elif options
.format
== "BOWTIE":
624 options
.parser
= BowtieParser
625 elif options
.format
== "BAMPE":
626 options
.parser
= BAMPEParser
627 options
.gzip_flag
= True
628 elif options
.format
== "BEDPE":
629 options
.parser
= BEDPEParser
631 logger
.error("Format \"%s\" cannot be recognized!" % (options
.format
))
634 # uppercase the format string
635 options
.format
= options
.format
.upper()
638 if options
.extsize
<= 0 :
639 logger
.error("--extsize must > 0!")
644 def opt_validate_bdgcmp ( options
):
645 """Validate options from a OptParser object.
647 Ret: Validated options object.
650 logger
.setLevel((4-options
.verbose
)*10)
652 options
.error
= logger
.critical
# function alias
653 options
.warn
= logger
.warning
654 options
.debug
= logger
.debug
655 options
.info
= logger
.info
657 # methods should be valid:
659 for method
in set(options
.method
):
660 if method
not in [ 'ppois', 'qpois', 'subtract', 'logFE', 'FE', 'logLR', 'slogLR', 'max' ]:
661 logger
.error( "Invalid method: %s" % method
)
664 # # of --ofile must == # of -m
667 if len(options
.method
) != len(options
.ofile
):
668 logger
.error("The number and the order of arguments for --ofile must be the same as for -m.")
674 def opt_validate_cmbreps ( options
):
675 """Validate options from a OptParser object.
677 Ret: Validated options object.
680 logger
.setLevel((4-options
.verbose
)*10)
682 options
.error
= logger
.critical
# function alias
683 options
.warn
= logger
.warning
684 options
.debug
= logger
.debug
685 options
.info
= logger
.info
687 # methods should be valid:
690 if options
.method
not in [ 'fisher', 'max', 'mean']:
691 logger
.error( "Invalid method: %s" % options
.method
)
694 if len( options
.ifile
) < 2:
695 logger
.error("Combining replicates needs at least two replicates!")
698 # # of -i must == # of -w
700 # if not options.weights:
701 # options.weights = [ 1.0 ] * len( options.ifile )
703 # if len( options.ifile ) != len( options.weights ):
704 # logger.error("Must provide same number of weights as number of input files.")
707 # if options.method == "fisher" and len( options.ifile ) > 3:
708 # logger.error("NOT IMPLEMENTED! Can't combine more than 3 replicates using Fisher's method.")
714 def opt_validate_bdgopt ( options
):
715 """Validate options from a OptParser object.
717 Ret: Validated options object.
720 logger
.setLevel((4-options
.verbose
)*10)
722 options
.error
= logger
.critical
# function alias
723 options
.warn
= logger
.warning
724 options
.debug
= logger
.debug
725 options
.info
= logger
.info
727 # methods should be valid:
729 if options
.method
.lower() not in [ 'multiply', 'add', 'p2q', 'max', 'min']:
730 logger
.error( "Invalid method: %s" % options
.method
)
733 if options
.method
.lower() in [ 'multiply', 'add' ] and not options
.extraparam
:
734 logger
.error( "Need EXTRAPARAM for method multiply or add!")
739 def opt_validate_callvar ( options
):
740 """Validate options from a OptParser object.
742 Ret: Validated options object.
745 logger
.setLevel((4-options
.verbose
)*10)
747 options
.error
= logger
.critical
# function alias
748 options
.warn
= logger
.warning
749 options
.debug
= logger
.debug
750 options
.info
= logger
.info
752 # methods should be valid:
759 def opt_validate_hmmratac ( options
):
760 """Validate options from a OptParser object.
762 Ret: Validated options object.
765 logger
.setLevel((4-options
.verbose
)*10)
767 options
.error
= logger
.critical
# function alias
768 options
.warn
= logger
.warning
769 options
.debug
= logger
.debug
770 options
.info
= logger
.info
772 # input options.argtxt for hmmratac
773 options
.argtxt
= "# Command line: %s\n" % " ".join(sys
.argv
[1:])
774 # "# ARGUMENTS LIST:",\
775 # "# outfile = %s" % (options.ofile),\
776 # "# input file = %s\n" % (options.bam_file),\
780 #if options.store_bdg:
781 # options.argtxt += "# HMMRATAC will report whole genome bedgraph of all state annotations. \n"
783 #if options.store_bgscore:
784 # options.argtxt += "# HMMRATAC score will be added to each state annotation in bedgraph. \n"
786 #if options.store_peaks:
787 # options.argtxt += "# Peaks not reported in bed format\n"
789 #if options.print_exclude:
790 # options.print_exclude = os.path.join(options.outdir, options.ofile+"Output_exclude.bed")
792 # options.print_exclude = "None"
794 #if options.print_train:
795 # options.print_train = os.path.join(options.outdir, options.ofile+"Output_training.bed")
797 # options.print_train = "None"
803 options
.argtxt
+= "# EM training not performed on fragment distribution. \n"
804 # em_means non-negative
805 if sum( [ x
< 0 for x
in options
.em_means
] ):
806 logger
.error(" --means should not be negative! ")
808 # em_stddev non-negative
809 if sum( [ x
< 0 for x
in options
.em_stddevs
] ):
810 logger
.error(" --stddev should not be negative! ")
815 # hmm_states non-negative int, warn if not k=3
816 #if options.hmm_states <=0:
817 # logger.error(" -s, --states must be an integer >= 0.")
819 #elif options.hmm_states != 3 and options.hmm_states > 0 and options.store_peaks == False:
820 # logger.warn(" If -s, --states not k=3, recommend NOT calling peaks, use bedgraph.")
823 if options
.hmm_binsize
<=0:
824 logger
.error(" --binsize must be larger than 0.")
827 # hmm_lower less than hmm_upper, non-negative
828 if options
.hmm_lower
<0:
829 logger
.error(" -l, --lower should not be negative! ")
831 if options
.hmm_upper
<0:
832 logger
.error(" -u, --upper should not be negative! ")
834 if options
.hmm_lower
> options
.hmm_upper
:
835 logger
.error("Upper limit of fold change range should be greater than lower limit!" % options
.mfold
)
838 # hmm_maxTrain non-negative
839 if options
.hmm_maxTrain
<= 0:
840 logger
.error(" --maxTrain should be larger than 0!")
843 # hmm_training_regions
844 if options
.hmm_training_regions
:
845 options
.argtxt
+= "# Using -t, --training input to train HMM instead of using fold change settings to select. \n"
847 # hmm_zscore non-negative
848 #if options.hmm_zscore <0:
849 # logger.error(" -z, --zscore should not be negative!")
853 if options
.hmm_randomSeed
:
854 options
.argtxt
+= "# Random seed selected as: %d\n" % options
.hmm_randomSeed
856 # hmm_window non-negative
857 #if options.hmm_window <0:
858 # logger.error(" --window should not be negative! ")
862 #if options.hmm_file:
863 # options.argtxt += "# HMM training will be skipped, --model input used instead. \n"
866 if options
.hmm_modelonly
:
867 options
.argtxt
+= "# Program will stop after generating model, which can be later applied with '--model'. \n"
871 if options
.prescan_cutoff
<= 1:
872 logger
.error(" In order to use -c or --prescan-cutoff, the cutoff must be larger than 1.")
875 if options
.openregion_minlen
< 0: # and options.store_peaks == True:
876 logger
.error(" In order to use --minlen, the length should not be negative.")
879 #if options.call_score.lower() not in [ 'max', 'ave', 'med', 'fc', 'zscore', 'all']:
880 # logger.error( " Invalid method: %s" % options.call_score )
883 # call_threshold non-negative
884 #if options.call_threshold <0:
885 # logger.error(" --threshold should not be negative! ")
891 #if options.misc_keep_duplicates:
892 # options.argtxt += "# Duplicate reads from analysis will be stored. \n"
894 # misc_trim non-negative
895 #if options.misc_trim <0:
896 # logger.error(" --trim should not be negative! ")
899 # np # should this be mp? non-negative
901 # logger.error(" -m, --multiple-processing should not be negative! ")
904 # min_map_quality non-negative
905 #if options.min_map_quality <0:
906 # logger.error(" -q, --minmapq should not be negative! ")