MACS3/Utilities/OptValidator.py

   1 # Time-stamp: <2022-06-10 10:24:34 Tao Liu>
   2
   3 """Module Description
   4
   5 This code is free software; you can redistribute it and/or modify it
   6 under the terms of the BSD License (see the file LICENSE included with
   7 the distribution).
   8 """
   9
  10 # ------------------------------------
  11 # python modules
  12 # ------------------------------------
  13 import sys
  14 import os
  15 import re
  16 import resource # we turn on memory monitoring inside of logging
  17 from argparse import ArgumentError
  18 from subprocess import Popen, PIPE
  19 from math import log
  20
  21 # ------------------------------------
  22 # MACS3 modules
  23 # ------------------------------------
  24 from MACS3.IO.Parser import BEDParser, ELANDResultParser, ELANDMultiParser, \
  25     ELANDExportParser, SAMParser, BAMParser, BAMPEParser,\
  26     BEDPEParser, BowtieParser,  guess_parser
  27
  28 from MACS3.Utilities.Constants import EFFECTIVEGS as efgsize
  29 # ------------------------------------
  30 # constants
  31 # ------------------------------------
  32
  33 import logging
  34 import MACS3.Utilities.Logger
  35 logger = logging.getLogger(__name__)
  36
  37 # ------------------------------------
  38 # Misc functions
  39 # ------------------------------------
  40
  41 logger = logging.getLogger(__name__)
  42
  43 def opt_validate_callpeak ( options ):
  44     """Validate options from a OptParser object.
  45
  46     Ret: Validated options object.
  47     """
  48     # logging object
  49     logger.setLevel((4-options.verbose)*10)
  50
  51     options.error   = logger.critical        # function alias
  52     options.warn    = logger.warning
  53     options.debug   = logger.debug
  54     options.info    = logger.info
  55
  56     # gsize
  57     try:
  58         options.gsize = efgsize[options.gsize]
  59     except:
  60         try:
  61             options.gsize = float(options.gsize)
  62         except:
  63             logger.error("Error when interpreting --gsize option: %s" % options.gsize)
  64             logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
  65             sys.exit(1)
  66
  67     # format
  68     options.gzip_flag = False           # if the input is gzip file
  69
  70     options.format = options.format.upper()
  71     if options.format == "ELAND":
  72         options.parser = ELANDResultParser
  73     elif options.format == "BED":
  74         options.parser = BEDParser
  75     elif options.format == "ELANDMULTI":
  76         options.parser = ELANDMultiParser
  77     elif options.format == "ELANDEXPORT":
  78         options.parser = ELANDExportParser
  79     elif options.format == "SAM":
  80         options.parser = SAMParser
  81     elif options.format == "BAM":
  82         options.parser = BAMParser
  83         options.gzip_flag = True
  84     elif options.format == "BAMPE":
  85         options.parser = BAMPEParser
  86         options.gzip_flag = True
  87         options.nomodel = True
  88     elif options.format == "BEDPE":
  89         options.parser = BEDPEParser
  90         options.nomodel = True
  91     elif options.format == "BOWTIE":
  92         options.parser = BowtieParser
  93     elif options.format == "AUTO":
  94         options.parser = guess_parser
  95     else:
  96         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
  97         sys.exit(1)
  98
  99     # duplicate reads
 100     if options.keepduplicates != "auto" and options.keepduplicates != "all":
 101         if not options.keepduplicates.isdigit():
 102             logger.error("--keep-dup should be 'auto', 'all' or an integer!")
 103             sys.exit(1)
 104
 105     # shiftsize>0
 106     #if options.shiftsize:               # only if --shiftsize is set, it's true
 107     #    options.extsize = 2 * options.shiftsize
 108     #else:                               # if --shiftsize is not set
 109     #    options.shiftsize = options.extsize / 2
 110     if options.extsize < 1 :
 111         logger.error("--extsize must >= 1!")
 112         sys.exit(1)
 113
 114     # refine_peaks, call_summits can't be combined with --broad
 115     #if options.broad and (options.refine_peaks or options.call_summits):
 116     #    logger.error("--broad can't be combined with --refine-peaks or --call-summits!")
 117     #    sys.exit(1)
 118
 119     if options.broad and options.call_summits:
 120         logger.error("--broad can't be combined with --call-summits!")
 121         sys.exit(1)
 122
 123     if options.pvalue:
 124         # if set, ignore qvalue cutoff
 125         options.log_qvalue = None
 126         options.log_pvalue = log(options.pvalue,10)*-1
 127     else:
 128         options.log_qvalue = log(options.qvalue,10)*-1
 129         options.log_pvalue = None
 130     if options.broad:
 131         options.log_broadcutoff = log(options.broadcutoff,10)*-1
 132
 133     # uppercase the format string
 134     options.format = options.format.upper()
 135
 136     # d_min is non-negative
 137     if options.d_min < 0:
 138         logger.error("Minimum fragment size shouldn't be negative!" % options.d_min)
 139         sys.exit(1)
 140
 141     # upper and lower mfold
 142     options.lmfold = options.mfold[0]
 143     options.umfold = options.mfold[1]
 144     if options.lmfold > options.umfold:
 145         logger.error("Upper limit of mfold should be greater than lower limit!" % options.mfold)
 146         sys.exit(1)
 147
 148     # output filenames
 149     options.peakxls = os.path.join( options.outdir, options.name+"_peaks.xls" )
 150     options.peakbed = os.path.join( options.outdir, options.name+"_peaks.bed" )
 151     options.peakNarrowPeak = os.path.join( options.outdir, options.name+"_peaks.narrowPeak" )
 152     options.peakBroadPeak = os.path.join( options.outdir, options.name+"_peaks.broadPeak" )
 153     options.peakGappedPeak = os.path.join( options.outdir, options.name+"_peaks.gappedPeak" )
 154     options.summitbed = os.path.join( options.outdir, options.name+"_summits.bed" )
 155     options.bdg_treat = os.path.join( options.outdir, options.name+"_treat_pileup.bdg" )
 156     options.bdg_control= os.path.join( options.outdir, options.name+"_control_lambda.bdg" )
 157     if options.cutoff_analysis:
 158         options.cutoff_analysis_file = os.path.join( options.outdir, options.name+"_cutoff_analysis.txt" )
 159     else:
 160         options.cutoff_analysis_file = "None"
 161     #options.negxls  = os.path.join( options.name+"_negative_peaks.xls" )
 162     #options.diagxls = os.path.join( options.name+"_diag.xls" )
 163     options.modelR  = os.path.join( options.outdir, options.name+"_model.r" )
 164     #options.pqtable  = os.path.join( options.outdir, options.name+"_pq_table.txt" )
 165
 166     options.argtxt = "\n".join((
 167         "# Command line: %s" % " ".join(sys.argv[1:]),\
 168         "# ARGUMENTS LIST:",\
 169         "# name = %s" % (options.name),\
 170         "# format = %s" % (options.format),\
 171         "# ChIP-seq file = %s" % (options.tfile),\
 172         "# control file = %s" % (options.cfile),\
 173         "# effective genome size = %.2e" % (options.gsize),\
 174         #"# tag size = %d" % (options.tsize),\
 175         "# band width = %d" % (options.bw),\
 176         "# model fold = %s\n" % (options.mfold),\
 177         ))
 178
 179     if options.pvalue:
 180         if options.broad:
 181             options.argtxt +=  "# pvalue cutoff for narrow/strong regions = %.2e\n" % (options.pvalue)
 182             options.argtxt +=  "# pvalue cutoff for broad/weak regions = %.2e\n" % (options.broadcutoff)
 183             options.argtxt +=  "# qvalue will not be calculated and reported as -1 in the final output.\n"
 184         else:
 185             options.argtxt +=  "# pvalue cutoff = %.2e\n" % (options.pvalue)
 186             options.argtxt +=  "# qvalue will not be calculated and reported as -1 in the final output.\n"
 187     else:
 188         if options.broad:
 189             options.argtxt +=  "# qvalue cutoff for narrow/strong regions = %.2e\n" % (options.qvalue)
 190             options.argtxt +=  "# qvalue cutoff for broad/weak regions = %.2e\n" % (options.broadcutoff)
 191         else:
 192             options.argtxt +=  "# qvalue cutoff = %.2e\n" % (options.qvalue)
 193
 194     if options.maxgap:
 195         options.argtxt += "# The maximum gap between significant sites = %d\n" % options.maxgap
 196     else:
 197         options.argtxt += "# The maximum gap between significant sites is assigned as the read length/tag size.\n"
 198     if options.minlen:
 199         options.argtxt += "# The minimum length of peaks = %d\n" % options.minlen
 200     else:
 201         options.argtxt += "# The minimum length of peaks is assigned as the predicted fragment length \"d\".\n"
 202
 203     if options.downsample:
 204         options.argtxt += "# Larger dataset will be randomly sampled towards smaller dataset.\n"
 205         if options.seed >= 0:
 206             options.argtxt += "# Random seed has been set as: %d\n" % options.seed
 207     else:
 208         if options.scaleto == "large":
 209             options.argtxt += "# Smaller dataset will be scaled towards larger dataset.\n"
 210         else:
 211             options.argtxt += "# Larger dataset will be scaled towards smaller dataset.\n"
 212
 213     if options.ratio != 1.0:
 214         options.argtxt += "# Using a custom scaling factor: %.2e\n" % (options.ratio)
 215
 216     if options.cfile:
 217         options.argtxt += "# Range for calculating regional lambda is: %d bps and %d bps\n" % (options.smalllocal,options.largelocal)
 218     else:
 219         options.argtxt += "# Range for calculating regional lambda is: %d bps\n" % (options.largelocal)
 220
 221     if options.broad:
 222         options.argtxt += "# Broad region calling is on\n"
 223     else:
 224         options.argtxt += "# Broad region calling is off\n"
 225
 226     if options.fecutoff != 1.0:
 227         options.argtxt += "# Additional cutoff on fold-enrichment is: %.2f\n" % (options.fecutoff)
 228
 229     if options.format in ["BAMPE", "BEDPE"]:
 230         # neutralize SHIFT
 231         options.shift = 0
 232         options.argtxt += "# Paired-End mode is on\n"
 233     else:
 234         options.argtxt += "# Paired-End mode is off\n"
 235
 236     #if options.refine_peaks:
 237     #    options.argtxt += "# Refining peak for read balance is on\n"
 238     if options.call_summits:
 239         options.argtxt += "# Searching for subpeak summits is on\n"
 240
 241     if options.do_SPMR and options.store_bdg:
 242         options.argtxt += "# MACS will save fragment pileup signal per million reads\n"
 243
 244     return options
 245
 246 def opt_validate_diffpeak ( options ):
 247     """Validate options from a OptParser object.
 248
 249     Ret: Validated options object.
 250     """
 251     # logging object
 252     logger.setLevel((4-options.verbose)*10)
 253
 254     options.error   = logger.critical        # function alias
 255     options.warn    = logger.warning
 256     options.debug   = logger.debug
 257     options.info    = logger.info
 258
 259     # format
 260     options.gzip_flag = False           # if the input is gzip file
 261
 262 #    options.format = options.format.upper()
 263     # fox this stuff
 264 #    if True: pass
 265 #    elif options.format == "AUTO":
 266 #        options.parser = guess_parser
 267 #    else:
 268 #        logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 269 #        sys.exit(1)
 270
 271     if options.peaks_pvalue:
 272         # if set, ignore qvalue cutoff
 273         options.peaks_log_qvalue = None
 274         options.peaks_log_pvalue = log(options.peaks_pvalue,10)*-1
 275         options.track_score_method = 'p'
 276     else:
 277         options.peaks_log_qvalue = log(options.peaks_qvalue,10)*-1
 278         options.peaks_log_pvalue = None
 279         options.track_score_method = 'q'
 280
 281     if options.diff_pvalue:
 282         # if set, ignore qvalue cutoff
 283         options.log_qvalue = None
 284         options.log_pvalue = log(options.diff_pvalue,10)*-1
 285         options.score_method = 'p'
 286     else:
 287         options.log_qvalue = log(options.diff_qvalue,10)*-1
 288         options.log_pvalue = None
 289         options.score_method = 'q'
 290
 291     # output filenames
 292     options.peakxls = options.name+"_diffpeaks.xls"
 293     options.peakbed = options.name+"_diffpeaks.bed"
 294     options.peak1xls = options.name+"_diffpeaks_by_peaks1.xls"
 295     options.peak2xls = options.name+"_diffpeaks_by_peaks2.xls"
 296     options.bdglogLR = options.name+"_logLR.bdg"
 297     options.bdgpvalue = options.name+"_logLR.bdg"
 298     options.bdglogFC = options.name+"_logLR.bdg"
 299
 300     options.call_peaks = True
 301     if not (options.peaks1 == '' or options.peaks2 == ''):
 302         if options.peaks1 == '':
 303             raise ArgumentError('peaks1', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
 304         elif options.peaks2 == '':
 305             raise ArgumentError('peaks2', 'Must specify both peaks1 and peaks2, or neither (to call peaks again)')
 306         options.call_peaks = False
 307         options.argtxt = "\n".join((
 308             "# ARGUMENTS LIST:",\
 309             "# name = %s" % (options.name),\
 310 #            "# format = %s" % (options.format),\
 311             "# ChIP-seq file 1 = %s" % (options.t1bdg),\
 312             "# control file 1 = %s" % (options.c1bdg),\
 313             "# ChIP-seq file 2 = %s" % (options.t2bdg),\
 314             "# control file 2 = %s" % (options.c2bdg),\
 315             "# Peaks, condition 1 = %s" % (options.peaks1),\
 316             "# Peaks, condition 2 = %s" % (options.peaks2),\
 317             ""
 318             ))
 319     else:
 320         options.argtxt = "\n".join((
 321             "# ARGUMENTS LIST:",\
 322             "# name = %s" % (options.name),\
 323 #            "# format = %s" % (options.format),\
 324             "# ChIP-seq file 1 = %s" % (options.t1bdg),\
 325             "# control file 1 = %s" % (options.c1bdg),\
 326             "# ChIP-seq file 2 = %s" % (options.t2bdg),\
 327             "# control file 2 = %s" % (options.c2bdg),\
 328             ""
 329             ))
 330
 331         if options.peaks_pvalue:
 332             options.argtxt +=  "# treat/control -log10(pvalue) cutoff = %.2e\n" % (options.peaks_log_pvalue)
 333             options.argtxt +=  "# treat/control -log10(qvalue) will not be calculated and reported as -1 in the final output.\n"
 334         else:
 335             options.argtxt +=  "# treat/control -log10(qvalue) cutoff = %.2e\n" % (options.peaks_log_qvalue)
 336
 337     if options.diff_pvalue:
 338         options.argtxt +=  "# differential pvalue cutoff = %.2e\n" % (options.log_pvalue)
 339         options.argtxt +=  "# differential qvalue will not be calculated and reported as -1 in the final output.\n"
 340     else:
 341         options.argtxt +=  "# differential qvalue cutoff = %.2e\n" % (options.log_qvalue)
 342
 343     return options
 344
 345 def opt_validate_filterdup ( options ):
 346     """Validate options from a OptParser object.
 347
 348     Ret: Validated options object.
 349     """
 350     # logging object
 351     logger.setLevel((4-options.verbose)*10)
 352
 353     options.error   = logger.critical        # function alias
 354     options.warn    = logger.warning
 355     options.debug   = logger.debug
 356     options.info    = logger.info
 357
 358     # gsize
 359     try:
 360         options.gsize = efgsize[options.gsize]
 361     except:
 362         try:
 363             options.gsize = float(options.gsize)
 364         except:
 365             logger.error("Error when interpreting --gsize option: %s" % options.gsize)
 366             logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
 367             sys.exit(1)
 368
 369     # format
 370
 371     options.gzip_flag = False           # if the input is gzip file
 372
 373     options.format = options.format.upper()
 374     if options.format == "ELAND":
 375         options.parser = ELANDResultParser
 376     elif options.format == "BED":
 377         options.parser = BEDParser
 378     elif options.format == "BEDPE":
 379         options.parser = BEDPEParser
 380     elif options.format == "ELANDMULTI":
 381         options.parser = ELANDMultiParser
 382     elif options.format == "ELANDEXPORT":
 383         options.parser = ELANDExportParser
 384     elif options.format == "SAM":
 385         options.parser = SAMParser
 386     elif options.format == "BAM":
 387         options.parser = BAMParser
 388         options.gzip_flag = True
 389     elif options.format == "BOWTIE":
 390         options.parser = BowtieParser
 391     elif options.format == "BAMPE":
 392         options.parser = BAMPEParser
 393         options.gzip_flag = True
 394     elif options.format == "BEDPE":
 395         options.parser = BEDPEParser
 396     elif options.format == "AUTO":
 397         options.parser = guess_parser
 398     else:
 399         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 400         sys.exit(1)
 401
 402     # duplicate reads
 403     if options.keepduplicates != "auto" and options.keepduplicates != "all":
 404         if not options.keepduplicates.isdigit():
 405             logger.error("--keep-dup should be 'auto', 'all' or an integer!")
 406             sys.exit(1)
 407
 408     # uppercase the format string
 409     options.format = options.format.upper()
 410
 411     return options
 412
 413 def opt_validate_randsample ( options ):
 414     """Validate options from a OptParser object.
 415
 416     Ret: Validated options object.
 417     """
 418     # logging object
 419     logger.setLevel((4-options.verbose)*10)
 420
 421     options.error   = logger.critical        # function alias
 422     options.warn    = logger.warning
 423     options.debug   = logger.debug
 424     options.info    = logger.info
 425
 426     # format
 427
 428     options.gzip_flag = False           # if the input is gzip file
 429
 430     options.format = options.format.upper()
 431     if options.format == "ELAND":
 432         options.parser = ELANDResultParser
 433     elif options.format == "BED":
 434         options.parser = BEDParser
 435     elif options.format == "ELANDMULTI":
 436         options.parser = ELANDMultiParser
 437     elif options.format == "ELANDEXPORT":
 438         options.parser = ELANDExportParser
 439     elif options.format == "SAM":
 440         options.parser = SAMParser
 441     elif options.format == "BAM":
 442         options.parser = BAMParser
 443         options.gzip_flag = True
 444     elif options.format == "BOWTIE":
 445         options.parser = BowtieParser
 446     elif options.format == "BAMPE":
 447         options.parser = BAMPEParser
 448         options.gzip_flag = True
 449     elif options.format == "BEDPE":
 450         options.parser = BEDPEParser
 451     elif options.format == "AUTO":
 452         options.parser = guess_parser
 453     else:
 454         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 455         sys.exit(1)
 456
 457     # uppercase the format string
 458     options.format = options.format.upper()
 459
 460     # percentage or number
 461     if options.percentage:
 462         if options.percentage > 100.0:
 463             logger.error("Percentage can't be bigger than 100.0. Please check your options and retry!")
 464             sys.exit(1)
 465     elif options.number:
 466         if options.number <= 0:
 467             logger.error("Number of tags can't be smaller than or equal to 0. Please check your options and retry!")
 468             sys.exit(1)
 469
 470     return options
 471
 472 def opt_validate_refinepeak ( options ):
 473     """Validate options from a OptParser object.
 474
 475     Ret: Validated options object.
 476     """
 477     # logging object
 478     logger.setLevel((4-options.verbose)*10)
 479
 480     options.error   = logger.critical        # function alias
 481     options.warn    = logger.warning
 482     options.debug   = logger.debug
 483     options.info    = logger.info
 484
 485     # format
 486
 487     options.gzip_flag = False           # if the input is gzip file
 488
 489     options.format = options.format.upper()
 490     if options.format == "ELAND":
 491         options.parser = ELANDResultParser
 492     elif options.format == "BED":
 493         options.parser = BEDParser
 494     elif options.format == "ELANDMULTI":
 495         options.parser = ELANDMultiParser
 496     elif options.format == "ELANDEXPORT":
 497         options.parser = ELANDExportParser
 498     elif options.format == "SAM":
 499         options.parser = SAMParser
 500     elif options.format == "BAM":
 501         options.parser = BAMParser
 502         options.gzip_flag = True
 503     elif options.format == "BOWTIE":
 504         options.parser = BowtieParser
 505     elif options.format == "AUTO":
 506         options.parser = guess_parser
 507     else:
 508         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 509         sys.exit(1)
 510
 511     # uppercase the format string
 512     options.format = options.format.upper()
 513
 514     return options
 515
 516 def opt_validate_predictd ( options ):
 517     """Validate options from a OptParser object.
 518
 519     Ret: Validated options object.
 520     """
 521     # logging object
 522     logger.setLevel((4-options.verbose)*10)
 523
 524     options.error   = logger.critical        # function alias
 525     options.warn    = logger.warning
 526     options.debug   = logger.debug
 527     options.info    = logger.info
 528
 529     # gsize
 530     try:
 531         options.gsize = efgsize[options.gsize]
 532     except:
 533         try:
 534             options.gsize = float(options.gsize)
 535         except:
 536             logger.error("Error when interpreting --gsize option: %s" % options.gsize)
 537             logger.error("Available shortcuts of effective genome sizes are %s" % ",".join(list(efgsize.keys())))
 538             sys.exit(1)
 539
 540     # format
 541     options.gzip_flag = False           # if the input is gzip file
 542
 543     options.format = options.format.upper()
 544     if options.format == "ELAND":
 545         options.parser = ELANDResultParser
 546     elif options.format == "BED":
 547         options.parser = BEDParser
 548     elif options.format == "ELANDMULTI":
 549         options.parser = ELANDMultiParser
 550     elif options.format == "ELANDEXPORT":
 551         options.parser = ELANDExportParser
 552     elif options.format == "SAM":
 553         options.parser = SAMParser
 554     elif options.format == "BAM":
 555         options.parser = BAMParser
 556         options.gzip_flag = True
 557     elif options.format == "BAMPE":
 558         options.parser = BAMPEParser
 559         options.gzip_flag = True
 560         options.nomodel = True
 561     elif options.format == "BEDPE":
 562         options.parser = BEDPEParser
 563         options.nomodel = True
 564     elif options.format == "BOWTIE":
 565         options.parser = BowtieParser
 566     elif options.format == "AUTO":
 567         options.parser = guess_parser
 568     else:
 569         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 570         sys.exit(1)
 571
 572     # uppercase the format string
 573     options.format = options.format.upper()
 574
 575     # d_min is non-negative
 576     if options.d_min < 0:
 577         logger.error("Minimum fragment size shouldn't be negative!" % options.d_min)
 578         sys.exit(1)
 579
 580     # upper and lower mfold
 581     options.lmfold = options.mfold[0]
 582     options.umfold = options.mfold[1]
 583     if options.lmfold > options.umfold:
 584         logger.error("Upper limit of mfold should be greater than lower limit!" % options.mfold)
 585         sys.exit(1)
 586
 587     options.modelR  = os.path.join( options.outdir, options.rfile )
 588
 589     return options
 590
 591
 592 def opt_validate_pileup ( options ):
 593     """Validate options from a OptParser object.
 594
 595     Ret: Validated options object.
 596     """
 597     # logging object
 598     logger.setLevel((4-options.verbose)*10)
 599
 600     options.error   = logger.critical        # function alias
 601     options.warn    = logger.warning
 602     options.debug   = logger.debug
 603     options.info    = logger.info
 604
 605     # format
 606
 607     options.gzip_flag = False           # if the input is gzip file
 608
 609     options.format = options.format.upper()
 610     if options.format == "ELAND":
 611         options.parser = ELANDResultParser
 612     elif options.format == "BED":
 613         options.parser = BEDParser
 614     elif options.format == "ELANDMULTI":
 615         options.parser = ELANDMultiParser
 616     elif options.format == "ELANDEXPORT":
 617         options.parser = ELANDExportParser
 618     elif options.format == "SAM":
 619         options.parser = SAMParser
 620     elif options.format == "BAM":
 621         options.parser = BAMParser
 622         options.gzip_flag = True
 623     elif options.format == "BOWTIE":
 624         options.parser = BowtieParser
 625     elif options.format == "BAMPE":
 626         options.parser = BAMPEParser
 627         options.gzip_flag = True
 628     elif options.format == "BEDPE":
 629         options.parser = BEDPEParser
 630     else:
 631         logger.error("Format \"%s\" cannot be recognized!" % (options.format))
 632         sys.exit(1)
 633
 634     # uppercase the format string
 635     options.format = options.format.upper()
 636
 637     # extsize
 638     if options.extsize <= 0 :
 639         logger.error("--extsize must > 0!")
 640         sys.exit(1)
 641
 642     return options
 643
 644 def opt_validate_bdgcmp ( options ):
 645     """Validate options from a OptParser object.
 646
 647     Ret: Validated options object.
 648     """
 649     # logging object
 650     logger.setLevel((4-options.verbose)*10)
 651
 652     options.error   = logger.critical        # function alias
 653     options.warn    = logger.warning
 654     options.debug   = logger.debug
 655     options.info    = logger.info
 656
 657     # methods should be valid:
 658
 659     for method in set(options.method):
 660         if method not in [ 'ppois', 'qpois', 'subtract', 'logFE', 'FE', 'logLR', 'slogLR', 'max' ]:
 661             logger.error( "Invalid method: %s" % method )
 662             sys.exit( 1 )
 663
 664     # # of --ofile must == # of -m
 665
 666     if options.ofile:
 667         if len(options.method) != len(options.ofile):
 668             logger.error("The number and the order of arguments for --ofile must be the same as for -m.")
 669             sys.exit(1)
 670
 671     return options
 672
 673
 674 def opt_validate_cmbreps ( options ):
 675     """Validate options from a OptParser object.
 676
 677     Ret: Validated options object.
 678     """
 679     # logging object
 680     logger.setLevel((4-options.verbose)*10)
 681
 682     options.error   = logger.critical        # function alias
 683     options.warn    = logger.warning
 684     options.debug   = logger.debug
 685     options.info    = logger.info
 686
 687     # methods should be valid:
 688
 689
 690     if options.method not in [ 'fisher', 'max', 'mean']:
 691         logger.error( "Invalid method: %s" % options.method )
 692         sys.exit( 1 )
 693
 694     if len( options.ifile ) < 2:
 695         logger.error("Combining replicates needs at least two replicates!")
 696         sys.exit( 1 )
 697
 698     # # of -i must == # of -w
 699
 700     # if not options.weights:
 701     #     options.weights = [ 1.0 ] * len( options.ifile )
 702
 703     # if len( options.ifile ) != len( options.weights ):
 704     #     logger.error("Must provide same number of weights as number of input files.")
 705     #     sys.exit( 1 )
 706
 707     # if options.method == "fisher" and len( options.ifile ) > 3:
 708     #     logger.error("NOT IMPLEMENTED! Can't combine more than 3 replicates using Fisher's method.")
 709     #     sys.exit( 1 )
 710
 711     return options
 712
 713
 714 def opt_validate_bdgopt ( options ):
 715     """Validate options from a OptParser object.
 716
 717     Ret: Validated options object.
 718     """
 719     # logging object
 720     logger.setLevel((4-options.verbose)*10)
 721
 722     options.error   = logger.critical        # function alias
 723     options.warn    = logger.warning
 724     options.debug   = logger.debug
 725     options.info    = logger.info
 726
 727     # methods should be valid:
 728
 729     if options.method.lower() not in [ 'multiply', 'add', 'p2q', 'max', 'min']:
 730         logger.error( "Invalid method: %s" % options.method )
 731         sys.exit( 1 )
 732
 733     if options.method.lower() in [ 'multiply', 'add' ] and not options.extraparam:
 734         logger.error( "Need EXTRAPARAM for method multiply or add!")
 735         sys.exit( 1 )
 736
 737     return options
 738
 739 def opt_validate_callvar ( options ):
 740     """Validate options from a OptParser object.
 741
 742     Ret: Validated options object.
 743     """
 744     # logging object
 745     logger.setLevel((4-options.verbose)*10)
 746
 747     options.error   = logger.critical        # function alias
 748     options.warn    = logger.warning
 749     options.debug   = logger.debug
 750     options.info    = logger.info
 751
 752     # methods should be valid:
 753
 754     if options.np <= 0:
 755         options.np = 1
 756     return options
 757
 758
 759 def opt_validate_hmmratac ( options ):
 760     """Validate options from a OptParser object.
 761
 762     Ret: Validated options object.
 763     """
 764     # logging object
 765     logger.setLevel((4-options.verbose)*10)
 766
 767     options.error   = logger.critical        # function alias
 768     options.warn    = logger.warning
 769     options.debug   = logger.debug
 770     options.info    = logger.info
 771
 772     # input options.argtxt for hmmratac
 773     options.argtxt = "# Command line: %s\n" % " ".join(sys.argv[1:])
 774     #        "# ARGUMENTS LIST:",\
 775     #        "# outfile = %s" % (options.ofile),\
 776     #        "# input file = %s\n" % (options.bam_file),\
 777     # ... add additional
 778
 779     # Output options
 780     #if options.store_bdg:
 781     #    options.argtxt += "# HMMRATAC will report whole genome bedgraph of all state annotations. \n"
 782
 783     #if options.store_bgscore:
 784     #    options.argtxt += "# HMMRATAC score will be added to each state annotation in bedgraph. \n"
 785
 786     #if options.store_peaks:
 787     #    options.argtxt += "# Peaks not reported in bed format\n"
 788
 789     #if options.print_exclude:
 790     #    options.print_exclude = os.path.join(options.outdir, options.ofile+"Output_exclude.bed")
 791     #else:
 792     #    options.print_exclude = "None"
 793
 794     #if options.print_train:
 795     #    options.print_train = os.path.join(options.outdir, options.ofile+"Output_training.bed")
 796     #else:
 797     #    options.print_train = "None"
 798
 799
 800     # EM
 801     # em_skip
 802     if options.em_skip:
 803         options.argtxt += "# EM training not performed on fragment distribution. \n"
 804     # em_means non-negative
 805     if sum( [ x < 0 for x in options.em_means ] ):
 806         logger.error(" --means should not be negative! ")
 807         sys.exit( 1 )
 808     # em_stddev non-negative
 809     if sum( [ x < 0 for x in options.em_stddevs ] ):
 810         logger.error(" --stddev should not be negative! ")
 811         sys.exit( 1 )
 812
 813
 814     # HMM
 815     # hmm_states non-negative int, warn if not k=3
 816     #if options.hmm_states <=0:
 817     #    logger.error(" -s, --states must be an integer >= 0.")
 818     #    sys.exit( 1 )
 819     #elif options.hmm_states != 3 and options.hmm_states > 0 and options.store_peaks == False:
 820     #    logger.warn(" If -s, --states not k=3, recommend NOT calling peaks, use bedgraph.")
 821
 822     # hmm_binsize > 0
 823     if options.hmm_binsize <=0:
 824         logger.error(" --binsize must be larger than 0.")
 825         sys.exit( 1 )
 826
 827     # hmm_lower less than hmm_upper, non-negative
 828     if options.hmm_lower <0:
 829         logger.error(" -l, --lower should not be negative! ")
 830         sys.exit( 1 )
 831     if options.hmm_upper <0:
 832         logger.error(" -u, --upper should not be negative! ")
 833         sys.exit( 1 )
 834     if options.hmm_lower > options.hmm_upper:
 835         logger.error("Upper limit of fold change range should be greater than lower limit!" % options.mfold)
 836         sys.exit(1)
 837
 838     # hmm_maxTrain non-negative
 839     if options.hmm_maxTrain <= 0:
 840         logger.error(" --maxTrain should be larger than 0!")
 841         sys.exit( 1 )
 842
 843     # hmm_training_regions
 844     if options.hmm_training_regions:
 845        options.argtxt += "# Using -t, --training input to train HMM instead of using fold change settings to select. \n"
 846
 847     # hmm_zscore non-negative
 848     #if options.hmm_zscore <0:
 849     #    logger.error(" -z, --zscore should not be negative!")
 850     #    sys.exit( 1 )
 851
 852     # hmm_randomSeed
 853     if options.hmm_randomSeed:
 854         options.argtxt += "# Random seed selected as: %d\n" % options.hmm_randomSeed
 855
 856     # hmm_window non-negative
 857     #if options.hmm_window <0:
 858     #    logger.error(" --window should not be negative! ")
 859     #    sys.exit( 1 )
 860
 861     # hmm_file
 862     #if options.hmm_file:
 863     #    options.argtxt += "# HMM training will be skipped, --model input used instead. \n"
 864
 865     # hmm_modelonly
 866     if options.hmm_modelonly:
 867        options.argtxt += "# Program will stop after generating model, which can be later applied with '--model'. \n"
 868
 869
 870     # Peak Calling
 871     if options.prescan_cutoff <= 1:
 872         logger.error(" In order to use -c or --prescan-cutoff, the cutoff must be larger than 1.")
 873         sys.exit( 1 )
 874
 875     if options.openregion_minlen < 0: # and options.store_peaks == True:
 876         logger.error(" In order to use --minlen, the length should not be negative.")
 877         sys.exit( 1 )
 878
 879     #if options.call_score.lower() not in [ 'max', 'ave', 'med', 'fc', 'zscore', 'all']:
 880     #    logger.error( " Invalid method: %s" % options.call_score )
 881     #    sys.exit( 1 )
 882
 883     # call_threshold non-negative
 884     #if options.call_threshold <0:
 885     #    logger.error(" --threshold should not be negative! ")
 886     #    sys.exit( 1 )
 887
 888
 889     # Misc
 890     # misc_blacklist
 891     #if options.misc_keep_duplicates:
 892     #    options.argtxt += "# Duplicate reads from analysis will be stored. \n"
 893
 894     # misc_trim non-negative
 895     #if options.misc_trim <0:
 896     #    logger.error(" --trim should not be negative! ")
 897     #    sys.exit( 1 )
 898
 899     # np # should this be mp? non-negative
 900     #if options.np <0:
 901     #    logger.error(" -m, --multiple-processing should not be negative! ")
 902     #    sys.exit( 1 )
 903
 904     # min_map_quality non-negative
 905     #if options.min_map_quality <0:
 906     #    logger.error(" -q, --minmapq should not be negative! ")
 907     #    sys.exit( 1 )
 908
 909
 910     return options
 911
 912