add -t and --modelonly
[MACS.git] / MACS3 / Utilities / OptValidator.py
bloba714f9538f9e92077497ed7418af53529d81236d
1 # Time-stamp: <2022-06-10 10:24:34 Tao Liu>
3 """Module Description
5 This code is free software; you can redistribute it and/or modify it
6 under the terms of the BSD License (see the file LICENSE included with
7 the distribution).
8 """
10 # ------------------------------------
11 # python modules
12 # ------------------------------------
13 import sys
14 import os
15 import re
16 import resource # we turn on memory monitoring inside of logging
17 from argparse import ArgumentError
18 from subprocess import Popen, PIPE
19 from math import log
21 # ------------------------------------
22 # MACS3 modules
23 # ------------------------------------
24 from MACS3.IO.Parser import BEDParser, ELANDResultParser, ELANDMultiParser, \
25 ELANDExportParser, SAMParser, BAMParser, BAMPEParser,\
26 BEDPEParser, BowtieParser, guess_parser
28 from MACS3.Utilities.Constants import EFFECTIVEGS as efgsize
29 # ------------------------------------
30 # constants
31 # ------------------------------------
33 import logging
34 import MACS3.Utilities.Logger
35 logger = logging.getLogger(__name__)
37 # ------------------------------------
38 # Misc functions
39 # ------------------------------------
41 logger = logging.getLogger(__name__)
43 def opt_validate_callpeak ( options ):
44 """Validate options from a OptParser object.
46 Ret: Validated options object.
47 """
48 # logging object
49 logger.setLevel((4-options.verbose)*10)
51 options.error = logger.critical # function alias
52 options.warn = logger.warning
53 options.debug = logger.debug
54 options.info = logger.info
56 # gsize
57 try:
58 options.gsize = efgsize[options.gsize]
59 except:
60 try:
61 options.gsize = float(options.gsize)
62 except:
63 logger.error("Error when interpreting --gsize option: %s" % options.gsize)
64 logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
65 sys.exit(1)
67 # format
68 options.gzip_flag = False # if the input is gzip file
70 options.format = options.format.upper()
71 if options.format == "ELAND":
72 options.parser = ELANDResultParser
73 elif options.format == "BED":
74 options.parser = BEDParser
75 elif options.format == "ELANDMULTI":
76 options.parser = ELANDMultiParser
77 elif options.format == "ELANDEXPORT":
78 options.parser = ELANDExportParser
79 elif options.format == "SAM":
80 options.parser = SAMParser
81 elif options.format == "BAM":
82 options.parser = BAMParser
83 options.gzip_flag = True
84 elif options.format == "BAMPE":
85 options.parser = BAMPEParser
86 options.gzip_flag = True
87 options.nomodel = True
88 elif options.format == "BEDPE":
89 options.parser = BEDPEParser
90 options.nomodel = True
91 elif options.format == "BOWTIE":
92 options.parser = BowtieParser
93 elif options.format == "AUTO":
94 options.parser = guess_parser
95 else:
96 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
97 sys.exit(1)
99 # duplicate reads
100 if options.keepduplicates != "auto" and options.keepduplicates != "all":
101 if not options.keepduplicates.isdigit():
102 logger.error("--keep-dup should be 'auto', 'all' or an integer!")
103 sys.exit(1)
105 # shiftsize>0
106 #if options.shiftsize: # only if --shiftsize is set, it's true
107 # options.extsize = 2 * options.shiftsize
108 #else: # if --shiftsize is not set
109 # options.shiftsize = options.extsize / 2
110 if options.extsize < 1 :
111 logger.error("--extsize must >= 1!")
112 sys.exit(1)
114 # refine_peaks, call_summits can't be combined with --broad
115 #if options.broad and (options.refine_peaks or options.call_summits):
116 # logger.error("--broad can't be combined with --refine-peaks or --call-summits!")
117 # sys.exit(1)
119 if options.broad and options.call_summits:
120 logger.error("--broad can't be combined with --call-summits!")
121 sys.exit(1)
123 if options.pvalue:
124 # if set, ignore qvalue cutoff
125 options.log_qvalue = None
126 options.log_pvalue = log(options.pvalue,10)*-1
127 else:
128 options.log_qvalue = log(options.qvalue,10)*-1
129 options.log_pvalue = None
130 if options.broad:
131 options.log_broadcutoff = log(options.broadcutoff,10)*-1
133 # uppercase the format string
134 options.format = options.format.upper()
136 # d_min is non-negative
137 if options.d_min < 0:
138 logger.error("Minimum fragment size shouldn't be negative!" % options.d_min)
139 sys.exit(1)
141 # upper and lower mfold
142 options.lmfold = options.mfold[0]
143 options.umfold = options.mfold[1]
144 if options.lmfold > options.umfold:
145 logger.error("Upper limit of mfold should be greater than lower limit!" % options.mfold)
146 sys.exit(1)
148 # output filenames
149 options.peakxls = os.path.join( options.outdir, options.name+"_peaks.xls" )
150 options.peakbed = os.path.join( options.outdir, options.name+"_peaks.bed" )
151 options.peakNarrowPeak = os.path.join( options.outdir, options.name+"_peaks.narrowPeak" )
152 options.peakBroadPeak = os.path.join( options.outdir, options.name+"_peaks.broadPeak" )
153 options.peakGappedPeak = os.path.join( options.outdir, options.name+"_peaks.gappedPeak" )
154 options.summitbed = os.path.join( options.outdir, options.name+"_summits.bed" )
155 options.bdg_treat = os.path.join( options.outdir, options.name+"_treat_pileup.bdg" )
156 options.bdg_control= os.path.join( options.outdir, options.name+"_control_lambda.bdg" )
157 if options.cutoff_analysis:
158 options.cutoff_analysis_file = os.path.join( options.outdir, options.name+"_cutoff_analysis.txt" )
159 else:
160 options.cutoff_analysis_file = "None"
161 #options.negxls = os.path.join( options.name+"_negative_peaks.xls" )
162 #options.diagxls = os.path.join( options.name+"_diag.xls" )
163 options.modelR = os.path.join( options.outdir, options.name+"_model.r" )
164 #options.pqtable = os.path.join( options.outdir, options.name+"_pq_table.txt" )
166 options.argtxt = "\n".join((
167 "# Command line: %s" % " ".join(sys.argv[1:]),\
168 "# ARGUMENTS LIST:",\
169 "# name = %s" % (options.name),\
170 "# format = %s" % (options.format),\
171 "# ChIP-seq file = %s" % (options.tfile),\
172 "# control file = %s" % (options.cfile),\
173 "# effective genome size = %.2e" % (options.gsize),\
174 #"# tag size = %d" % (options.tsize),\
175 "# band width = %d" % (options.bw),\
176 "# model fold = %s\n" % (options.mfold),\
179 if options.pvalue:
180 if options.broad:
181 options.argtxt += "# pvalue cutoff for narrow/strong regions = %.2e\n" % (options.pvalue)
182 options.argtxt += "# pvalue cutoff for broad/weak regions = %.2e\n" % (options.broadcutoff)
183 options.argtxt += "# qvalue will not be calculated and reported as -1 in the final output.\n"
184 else:
185 options.argtxt += "# pvalue cutoff = %.2e\n" % (options.pvalue)
186 options.argtxt += "# qvalue will not be calculated and reported as -1 in the final output.\n"
187 else:
188 if options.broad:
189 options.argtxt += "# qvalue cutoff for narrow/strong regions = %.2e\n" % (options.qvalue)
190 options.argtxt += "# qvalue cutoff for broad/weak regions = %.2e\n" % (options.broadcutoff)
191 else:
192 options.argtxt += "# qvalue cutoff = %.2e\n" % (options.qvalue)
194 if options.maxgap:
195 options.argtxt += "# The maximum gap between significant sites = %d\n" % options.maxgap
196 else:
197 options.argtxt += "# The maximum gap between significant sites is assigned as the read length/tag size.\n"
198 if options.minlen:
199 options.argtxt += "# The minimum length of peaks = %d\n" % options.minlen
200 else:
201 options.argtxt += "# The minimum length of peaks is assigned as the predicted fragment length \"d\".\n"
203 if options.downsample:
204 options.argtxt += "# Larger dataset will be randomly sampled towards smaller dataset.\n"
205 if options.seed >= 0:
206 options.argtxt += "# Random seed has been set as: %d\n" % options.seed
207 else:
208 if options.scaleto == "large":
209 options.argtxt += "# Smaller dataset will be scaled towards larger dataset.\n"
210 else:
211 options.argtxt += "# Larger dataset will be scaled towards smaller dataset.\n"
213 if options.ratio != 1.0:
214 options.argtxt += "# Using a custom scaling factor: %.2e\n" % (options.ratio)
216 if options.cfile:
217 options.argtxt += "# Range for calculating regional lambda is: %d bps and %d bps\n" % (options.smalllocal,options.largelocal)
218 else:
219 options.argtxt += "# Range for calculating regional lambda is: %d bps\n" % (options.largelocal)
221 if options.broad:
222 options.argtxt += "# Broad region calling is on\n"
223 else:
224 options.argtxt += "# Broad region calling is off\n"
226 if options.fecutoff != 1.0:
227 options.argtxt += "# Additional cutoff on fold-enrichment is: %.2f\n" % (options.fecutoff)
229 if options.format in ["BAMPE", "BEDPE"]:
230 # neutralize SHIFT
231 options.shift = 0
232 options.argtxt += "# Paired-End mode is on\n"
233 else:
234 options.argtxt += "# Paired-End mode is off\n"
236 #if options.refine_peaks:
237 # options.argtxt += "# Refining peak for read balance is on\n"
238 if options.call_summits:
239 options.argtxt += "# Searching for subpeak summits is on\n"
241 if options.do_SPMR and options.store_bdg:
242 options.argtxt += "# MACS will save fragment pileup signal per million reads\n"
244 return options
246 def opt_validate_diffpeak ( options ):
247 """Validate options from a OptParser object.
249 Ret: Validated options object.
251 # logging object
252 logger.setLevel((4-options.verbose)*10)
254 options.error = logger.critical # function alias
255 options.warn = logger.warning
256 options.debug = logger.debug
257 options.info = logger.info
259 # format
260 options.gzip_flag = False # if the input is gzip file
262 # options.format = options.format.upper()
263 # fox this stuff
264 # if True: pass
265 # elif options.format == "AUTO":
266 # options.parser = guess_parser
267 # else:
268 # logger.error("Format \"%s\" cannot be recognized!" % (options.format))
269 # sys.exit(1)
271 if options.peaks_pvalue:
272 # if set, ignore qvalue cutoff
273 options.peaks_log_qvalue = None
274 options.peaks_log_pvalue = log(options.peaks_pvalue,10)*-1
275 options.track_score_method = 'p'
276 else:
277 options.peaks_log_qvalue = log(options.peaks_qvalue,10)*-1
278 options.peaks_log_pvalue = None
279 options.track_score_method = 'q'
281 if options.diff_pvalue:
282 # if set, ignore qvalue cutoff
283 options.log_qvalue = None
284 options.log_pvalue = log(options.diff_pvalue,10)*-1
285 options.score_method = 'p'
286 else:
287 options.log_qvalue = log(options.diff_qvalue,10)*-1
288 options.log_pvalue = None
289 options.score_method = 'q'
291 # output filenames
292 options.peakxls = options.name+"_diffpeaks.xls"
293 options.peakbed = options.name+"_diffpeaks.bed"
294 options.peak1xls = options.name+"_diffpeaks_by_peaks1.xls"
295 options.peak2xls = options.name+"_diffpeaks_by_peaks2.xls"
296 options.bdglogLR = options.name+"_logLR.bdg"
297 options.bdgpvalue = options.name+"_logLR.bdg"
298 options.bdglogFC = options.name+"_logLR.bdg"
300 options.call_peaks = True
301 if not (options.peaks1 == '' or options.peaks2 == ''):
302 if options.peaks1 == '':
303 raise ArgumentError('peaks1', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
304 elif options.peaks2 == '':
305 raise ArgumentError('peaks2', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
306 options.call_peaks = False
307 options.argtxt = "\n".join((
308 "# ARGUMENTS LIST:",\
309 "# name = %s" % (options.name),\
310 # "# format = %s" % (options.format),\
311 "# ChIP-seq file 1 = %s" % (options.t1bdg),\
312 "# control file 1 = %s" % (options.c1bdg),\
313 "# ChIP-seq file 2 = %s" % (options.t2bdg),\
314 "# control file 2 = %s" % (options.c2bdg),\
315 "# Peaks, condition 1 = %s" % (options.peaks1),\
316 "# Peaks, condition 2 = %s" % (options.peaks2),\
319 else:
320 options.argtxt = "\n".join((
321 "# ARGUMENTS LIST:",\
322 "# name = %s" % (options.name),\
323 # "# format = %s" % (options.format),\
324 "# ChIP-seq file 1 = %s" % (options.t1bdg),\
325 "# control file 1 = %s" % (options.c1bdg),\
326 "# ChIP-seq file 2 = %s" % (options.t2bdg),\
327 "# control file 2 = %s" % (options.c2bdg),\
331 if options.peaks_pvalue:
332 options.argtxt += "# treat/control -log10(pvalue) cutoff = %.2e\n" % (options.peaks_log_pvalue)
333 options.argtxt += "# treat/control -log10(qvalue) will not be calculated and reported as -1 in the final output.\n"
334 else:
335 options.argtxt += "# treat/control -log10(qvalue) cutoff = %.2e\n" % (options.peaks_log_qvalue)
337 if options.diff_pvalue:
338 options.argtxt += "# differential pvalue cutoff = %.2e\n" % (options.log_pvalue)
339 options.argtxt += "# differential qvalue will not be calculated and reported as -1 in the final output.\n"
340 else:
341 options.argtxt += "# differential qvalue cutoff = %.2e\n" % (options.log_qvalue)
343 return options
345 def opt_validate_filterdup ( options ):
346 """Validate options from a OptParser object.
348 Ret: Validated options object.
350 # logging object
351 logger.setLevel((4-options.verbose)*10)
353 options.error = logger.critical # function alias
354 options.warn = logger.warning
355 options.debug = logger.debug
356 options.info = logger.info
358 # gsize
359 try:
360 options.gsize = efgsize[options.gsize]
361 except:
362 try:
363 options.gsize = float(options.gsize)
364 except:
365 logger.error("Error when interpreting --gsize option: %s" % options.gsize)
366 logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
367 sys.exit(1)
369 # format
371 options.gzip_flag = False # if the input is gzip file
373 options.format = options.format.upper()
374 if options.format == "ELAND":
375 options.parser = ELANDResultParser
376 elif options.format == "BED":
377 options.parser = BEDParser
378 elif options.format == "BEDPE":
379 options.parser = BEDPEParser
380 elif options.format == "ELANDMULTI":
381 options.parser = ELANDMultiParser
382 elif options.format == "ELANDEXPORT":
383 options.parser = ELANDExportParser
384 elif options.format == "SAM":
385 options.parser = SAMParser
386 elif options.format == "BAM":
387 options.parser = BAMParser
388 options.gzip_flag = True
389 elif options.format == "BOWTIE":
390 options.parser = BowtieParser
391 elif options.format == "BAMPE":
392 options.parser = BAMPEParser
393 options.gzip_flag = True
394 elif options.format == "BEDPE":
395 options.parser = BEDPEParser
396 elif options.format == "AUTO":
397 options.parser = guess_parser
398 else:
399 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
400 sys.exit(1)
402 # duplicate reads
403 if options.keepduplicates != "auto" and options.keepduplicates != "all":
404 if not options.keepduplicates.isdigit():
405 logger.error("--keep-dup should be 'auto', 'all' or an integer!")
406 sys.exit(1)
408 # uppercase the format string
409 options.format = options.format.upper()
411 return options
413 def opt_validate_randsample ( options ):
414 """Validate options from a OptParser object.
416 Ret: Validated options object.
418 # logging object
419 logger.setLevel((4-options.verbose)*10)
421 options.error = logger.critical # function alias
422 options.warn = logger.warning
423 options.debug = logger.debug
424 options.info = logger.info
426 # format
428 options.gzip_flag = False # if the input is gzip file
430 options.format = options.format.upper()
431 if options.format == "ELAND":
432 options.parser = ELANDResultParser
433 elif options.format == "BED":
434 options.parser = BEDParser
435 elif options.format == "ELANDMULTI":
436 options.parser = ELANDMultiParser
437 elif options.format == "ELANDEXPORT":
438 options.parser = ELANDExportParser
439 elif options.format == "SAM":
440 options.parser = SAMParser
441 elif options.format == "BAM":
442 options.parser = BAMParser
443 options.gzip_flag = True
444 elif options.format == "BOWTIE":
445 options.parser = BowtieParser
446 elif options.format == "BAMPE":
447 options.parser = BAMPEParser
448 options.gzip_flag = True
449 elif options.format == "BEDPE":
450 options.parser = BEDPEParser
451 elif options.format == "AUTO":
452 options.parser = guess_parser
453 else:
454 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
455 sys.exit(1)
457 # uppercase the format string
458 options.format = options.format.upper()
460 # percentage or number
461 if options.percentage:
462 if options.percentage > 100.0:
463 logger.error("Percentage can't be bigger than 100.0. Please check your options and retry!")
464 sys.exit(1)
465 elif options.number:
466 if options.number <= 0:
467 logger.error("Number of tags can't be smaller than or equal to 0. Please check your options and retry!")
468 sys.exit(1)
470 return options
472 def opt_validate_refinepeak ( options ):
473 """Validate options from a OptParser object.
475 Ret: Validated options object.
477 # logging object
478 logger.setLevel((4-options.verbose)*10)
480 options.error = logger.critical # function alias
481 options.warn = logger.warning
482 options.debug = logger.debug
483 options.info = logger.info
485 # format
487 options.gzip_flag = False # if the input is gzip file
489 options.format = options.format.upper()
490 if options.format == "ELAND":
491 options.parser = ELANDResultParser
492 elif options.format == "BED":
493 options.parser = BEDParser
494 elif options.format == "ELANDMULTI":
495 options.parser = ELANDMultiParser
496 elif options.format == "ELANDEXPORT":
497 options.parser = ELANDExportParser
498 elif options.format == "SAM":
499 options.parser = SAMParser
500 elif options.format == "BAM":
501 options.parser = BAMParser
502 options.gzip_flag = True
503 elif options.format == "BOWTIE":
504 options.parser = BowtieParser
505 elif options.format == "AUTO":
506 options.parser = guess_parser
507 else:
508 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
509 sys.exit(1)
511 # uppercase the format string
512 options.format = options.format.upper()
514 return options
516 def opt_validate_predictd ( options ):
517 """Validate options from a OptParser object.
519 Ret: Validated options object.
521 # logging object
522 logger.setLevel((4-options.verbose)*10)
524 options.error = logger.critical # function alias
525 options.warn = logger.warning
526 options.debug = logger.debug
527 options.info = logger.info
529 # gsize
530 try:
531 options.gsize = efgsize[options.gsize]
532 except:
533 try:
534 options.gsize = float(options.gsize)
535 except:
536 logger.error("Error when interpreting --gsize option: %s" % options.gsize)
537 logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
538 sys.exit(1)
540 # format
541 options.gzip_flag = False # if the input is gzip file
543 options.format = options.format.upper()
544 if options.format == "ELAND":
545 options.parser = ELANDResultParser
546 elif options.format == "BED":
547 options.parser = BEDParser
548 elif options.format == "ELANDMULTI":
549 options.parser = ELANDMultiParser
550 elif options.format == "ELANDEXPORT":
551 options.parser = ELANDExportParser
552 elif options.format == "SAM":
553 options.parser = SAMParser
554 elif options.format == "BAM":
555 options.parser = BAMParser
556 options.gzip_flag = True
557 elif options.format == "BAMPE":
558 options.parser = BAMPEParser
559 options.gzip_flag = True
560 options.nomodel = True
561 elif options.format == "BEDPE":
562 options.parser = BEDPEParser
563 options.nomodel = True
564 elif options.format == "BOWTIE":
565 options.parser = BowtieParser
566 elif options.format == "AUTO":
567 options.parser = guess_parser
568 else:
569 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
570 sys.exit(1)
572 # uppercase the format string
573 options.format = options.format.upper()
575 # d_min is non-negative
576 if options.d_min < 0:
577 logger.error("Minimum fragment size shouldn't be negative!" % options.d_min)
578 sys.exit(1)
580 # upper and lower mfold
581 options.lmfold = options.mfold[0]
582 options.umfold = options.mfold[1]
583 if options.lmfold > options.umfold:
584 logger.error("Upper limit of mfold should be greater than lower limit!" % options.mfold)
585 sys.exit(1)
587 options.modelR = os.path.join( options.outdir, options.rfile )
589 return options
592 def opt_validate_pileup ( options ):
593 """Validate options from a OptParser object.
595 Ret: Validated options object.
597 # logging object
598 logger.setLevel((4-options.verbose)*10)
600 options.error = logger.critical # function alias
601 options.warn = logger.warning
602 options.debug = logger.debug
603 options.info = logger.info
605 # format
607 options.gzip_flag = False # if the input is gzip file
609 options.format = options.format.upper()
610 if options.format == "ELAND":
611 options.parser = ELANDResultParser
612 elif options.format == "BED":
613 options.parser = BEDParser
614 elif options.format == "ELANDMULTI":
615 options.parser = ELANDMultiParser
616 elif options.format == "ELANDEXPORT":
617 options.parser = ELANDExportParser
618 elif options.format == "SAM":
619 options.parser = SAMParser
620 elif options.format == "BAM":
621 options.parser = BAMParser
622 options.gzip_flag = True
623 elif options.format == "BOWTIE":
624 options.parser = BowtieParser
625 elif options.format == "BAMPE":
626 options.parser = BAMPEParser
627 options.gzip_flag = True
628 elif options.format == "BEDPE":
629 options.parser = BEDPEParser
630 else:
631 logger.error("Format \"%s\" cannot be recognized!" % (options.format))
632 sys.exit(1)
634 # uppercase the format string
635 options.format = options.format.upper()
637 # extsize
638 if options.extsize <= 0 :
639 logger.error("--extsize must > 0!")
640 sys.exit(1)
642 return options
644 def opt_validate_bdgcmp ( options ):
645 """Validate options from a OptParser object.
647 Ret: Validated options object.
649 # logging object
650 logger.setLevel((4-options.verbose)*10)
652 options.error = logger.critical # function alias
653 options.warn = logger.warning
654 options.debug = logger.debug
655 options.info = logger.info
657 # methods should be valid:
659 for method in set(options.method):
660 if method not in [ 'ppois', 'qpois', 'subtract', 'logFE', 'FE', 'logLR', 'slogLR', 'max' ]:
661 logger.error( "Invalid method: %s" % method )
662 sys.exit( 1 )
664 # # of --ofile must == # of -m
666 if options.ofile:
667 if len(options.method) != len(options.ofile):
668 logger.error("The number and the order of arguments for --ofile must be the same as for -m.")
669 sys.exit(1)
671 return options
674 def opt_validate_cmbreps ( options ):
675 """Validate options from a OptParser object.
677 Ret: Validated options object.
679 # logging object
680 logger.setLevel((4-options.verbose)*10)
682 options.error = logger.critical # function alias
683 options.warn = logger.warning
684 options.debug = logger.debug
685 options.info = logger.info
687 # methods should be valid:
690 if options.method not in [ 'fisher', 'max', 'mean']:
691 logger.error( "Invalid method: %s" % options.method )
692 sys.exit( 1 )
694 if len( options.ifile ) < 2:
695 logger.error("Combining replicates needs at least two replicates!")
696 sys.exit( 1 )
698 # # of -i must == # of -w
700 # if not options.weights:
701 # options.weights = [ 1.0 ] * len( options.ifile )
703 # if len( options.ifile ) != len( options.weights ):
704 # logger.error("Must provide same number of weights as number of input files.")
705 # sys.exit( 1 )
707 # if options.method == "fisher" and len( options.ifile ) > 3:
708 # logger.error("NOT IMPLEMENTED! Can't combine more than 3 replicates using Fisher's method.")
709 # sys.exit( 1 )
711 return options
714 def opt_validate_bdgopt ( options ):
715 """Validate options from a OptParser object.
717 Ret: Validated options object.
719 # logging object
720 logger.setLevel((4-options.verbose)*10)
722 options.error = logger.critical # function alias
723 options.warn = logger.warning
724 options.debug = logger.debug
725 options.info = logger.info
727 # methods should be valid:
729 if options.method.lower() not in [ 'multiply', 'add', 'p2q', 'max', 'min']:
730 logger.error( "Invalid method: %s" % options.method )
731 sys.exit( 1 )
733 if options.method.lower() in [ 'multiply', 'add' ] and not options.extraparam:
734 logger.error( "Need EXTRAPARAM for method multiply or add!")
735 sys.exit( 1 )
737 return options
739 def opt_validate_callvar ( options ):
740 """Validate options from a OptParser object.
742 Ret: Validated options object.
744 # logging object
745 logger.setLevel((4-options.verbose)*10)
747 options.error = logger.critical # function alias
748 options.warn = logger.warning
749 options.debug = logger.debug
750 options.info = logger.info
752 # methods should be valid:
754 if options.np <= 0:
755 options.np = 1
756 return options
759 def opt_validate_hmmratac ( options ):
760 """Validate options from a OptParser object.
762 Ret: Validated options object.
764 # logging object
765 logger.setLevel((4-options.verbose)*10)
767 options.error = logger.critical # function alias
768 options.warn = logger.warning
769 options.debug = logger.debug
770 options.info = logger.info
772 # input options.argtxt for hmmratac
773 options.argtxt = "# Command line: %s\n" % " ".join(sys.argv[1:])
774 # "# ARGUMENTS LIST:",\
775 # "# outfile = %s" % (options.ofile),\
776 # "# input file = %s\n" % (options.bam_file),\
777 # ... add additional
779 # Output options
780 #if options.store_bdg:
781 # options.argtxt += "# HMMRATAC will report whole genome bedgraph of all state annotations. \n"
783 #if options.store_bgscore:
784 # options.argtxt += "# HMMRATAC score will be added to each state annotation in bedgraph. \n"
786 #if options.store_peaks:
787 # options.argtxt += "# Peaks not reported in bed format\n"
789 #if options.print_exclude:
790 # options.print_exclude = os.path.join(options.outdir, options.ofile+"Output_exclude.bed")
791 #else:
792 # options.print_exclude = "None"
794 #if options.print_train:
795 # options.print_train = os.path.join(options.outdir, options.ofile+"Output_training.bed")
796 #else:
797 # options.print_train = "None"
800 # EM
801 # em_skip
802 if options.em_skip:
803 options.argtxt += "# EM training not performed on fragment distribution. \n"
804 # em_means non-negative
805 if sum( [ x < 0 for x in options.em_means ] ):
806 logger.error(" --means should not be negative! ")
807 sys.exit( 1 )
808 # em_stddev non-negative
809 if sum( [ x < 0 for x in options.em_stddevs ] ):
810 logger.error(" --stddev should not be negative! ")
811 sys.exit( 1 )
814 # HMM
815 # hmm_states non-negative int, warn if not k=3
816 #if options.hmm_states <=0:
817 # logger.error(" -s, --states must be an integer >= 0.")
818 # sys.exit( 1 )
819 #elif options.hmm_states != 3 and options.hmm_states > 0 and options.store_peaks == False:
820 # logger.warn(" If -s, --states not k=3, recommend NOT calling peaks, use bedgraph.")
822 # hmm_binsize > 0
823 if options.hmm_binsize <=0:
824 logger.error(" --binsize must be larger than 0.")
825 sys.exit( 1 )
827 # hmm_lower less than hmm_upper, non-negative
828 if options.hmm_lower <0:
829 logger.error(" -l, --lower should not be negative! ")
830 sys.exit( 1 )
831 if options.hmm_upper <0:
832 logger.error(" -u, --upper should not be negative! ")
833 sys.exit( 1 )
834 if options.hmm_lower > options.hmm_upper:
835 logger.error("Upper limit of fold change range should be greater than lower limit!" % options.mfold)
836 sys.exit(1)
838 # hmm_maxTrain non-negative
839 if options.hmm_maxTrain <= 0:
840 logger.error(" --maxTrain should be larger than 0!")
841 sys.exit( 1 )
843 # hmm_training_regions
844 if options.hmm_training_regions:
845 options.argtxt += "# Using -t, --training input to train HMM instead of using fold change settings to select. \n"
847 # hmm_zscore non-negative
848 #if options.hmm_zscore <0:
849 # logger.error(" -z, --zscore should not be negative!")
850 # sys.exit( 1 )
852 # hmm_randomSeed
853 if options.hmm_randomSeed:
854 options.argtxt += "# Random seed selected as: %d\n" % options.hmm_randomSeed
856 # hmm_window non-negative
857 #if options.hmm_window <0:
858 # logger.error(" --window should not be negative! ")
859 # sys.exit( 1 )
861 # hmm_file
862 #if options.hmm_file:
863 # options.argtxt += "# HMM training will be skipped, --model input used instead. \n"
865 # hmm_modelonly
866 if options.hmm_modelonly:
867 options.argtxt += "# Program will stop after generating model, which can be later applied with '--model'. \n"
870 # Peak Calling
871 if options.prescan_cutoff <= 1:
872 logger.error(" In order to use -c or --prescan-cutoff, the cutoff must be larger than 1.")
873 sys.exit( 1 )
875 if options.openregion_minlen < 0: # and options.store_peaks == True:
876 logger.error(" In order to use --minlen, the length should not be negative.")
877 sys.exit( 1 )
879 #if options.call_score.lower() not in [ 'max', 'ave', 'med', 'fc', 'zscore', 'all']:
880 # logger.error( " Invalid method: %s" % options.call_score )
881 # sys.exit( 1 )
883 # call_threshold non-negative
884 #if options.call_threshold <0:
885 # logger.error(" --threshold should not be negative! ")
886 # sys.exit( 1 )
889 # Misc
890 # misc_blacklist
891 #if options.misc_keep_duplicates:
892 # options.argtxt += "# Duplicate reads from analysis will be stored. \n"
894 # misc_trim non-negative
895 #if options.misc_trim <0:
896 # logger.error(" --trim should not be negative! ")
897 # sys.exit( 1 )
899 # np # should this be mp? non-negative
900 #if options.np <0:
901 # logger.error(" -m, --multiple-processing should not be negative! ")
902 # sys.exit( 1 )
904 # min_map_quality non-negative
905 #if options.min_map_quality <0:
906 # logger.error(" -q, --minmapq should not be negative! ")
907 # sys.exit( 1 )
910 return options