etc/gatk-wdl/fm2/tasks/cutadapt.wdl

   1 version 1.0
   2
   3 # Copyright (c) 2017 Leiden University Medical Center
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining a copy
   6 # of this software and associated documentation files (the "Software"), to deal
   7 # in the Software without restriction, including without limitation the rights
   8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
   9 # copies of the Software, and to permit persons to whom the Software is
  10 # furnished to do so, subject to the following conditions:
  11 #
  12 # The above copyright notice and this permission notice shall be included in
  13 # all copies or substantial portions of the Software.
  14 #
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21 # SOFTWARE.
  22
  23 task Cutadapt {
  24     input {
  25         File read1
  26         File? read2
  27         String read1output = "cut_r1.fq.gz"
  28         String? read2output
  29         Array[String] adapter = []
  30         Array[String] front = []
  31         Array[String] anywhere = []
  32         Array[String] adapterRead2 = []
  33         Array[String] frontRead2 = []
  34         Array[String] anywhereRead2 = []
  35         String reportPath = "cutadapt_report.txt"
  36         # Cutadapt compresses the zipped output files with a ridiculously
  37         # high compression level (5 or 6).
  38         # This is not the fast compression preset. It takes up to 400% more
  39         # CPU time for a 20% reduction in file size.
  40         # Hence we use compression level 1 here.
  41         Int compressionLevel = 1  # This only affects outputs with the .gz suffix.
  42
  43         Boolean? interleaved
  44         String? pairFilter
  45         Float? errorRate
  46         Boolean? noIndels
  47         Int? times
  48         Int? overlap
  49         Boolean? matchReadWildcards
  50         Boolean? noMatchAdapterWildcards
  51         Boolean? noTrim
  52         Boolean? maskAdapter
  53         Int? cut
  54         String? nextseqTrim
  55         String? qualityCutoff
  56         Int? qualityBase
  57         Int? length
  58         Boolean? trimN
  59         String? lengthTag
  60         String? stripSuffix
  61         String? prefix
  62         String? suffix
  63         Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads.
  64         Int? maximumLength
  65         Int? maxN
  66         Boolean? discardTrimmed
  67         Boolean? discardUntrimmed
  68         String? infoFilePath
  69         String? restFilePath
  70         String? wildcardFilePath
  71         String? tooShortOutputPath
  72         String? tooLongOutputPath
  73         String? untrimmedOutputPath
  74         String? tooShortPairedOutputPath
  75         String? tooLongPairedOutputPath
  76         String? untrimmedPairedOutputPath
  77         Boolean? colorspace
  78         Boolean? doubleEncode
  79         Boolean? stripF3
  80         Boolean? maq
  81         Boolean? bwa
  82         Boolean? zeroCap
  83         Boolean? noZeroCap
  84
  85         Int cores = 4
  86         String memory = "~{300 + 100 * cores}M"
  87         Int timeMinutes = 1 + ceil(size([read1, read2], "G")  * 12.0 / cores)
  88         String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1"
  89     }
  90
  91     String realRead2output = select_first([read2output, "cut_r2.fq.gz"])
  92     String read2outputArg = if (defined(read2))
  93         then "mkdir -p $(dirname " + realRead2output + ")"
  94         else ""
  95
  96     # FIXME: Use prefix() function for adapter, adapterRead2, etc.
  97     command {
  98         set -e
  99         ~{"mkdir -p $(dirname " + read1output + ")"}
 100         ~{read2outputArg}
 101         cutadapt \
 102         ~{"--cores=" + cores} \
 103         ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \
 104         ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \
 105         ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \
 106         ~{true="-G" false="" length(frontRead2) > 0} ~{sep=" -G " frontRead2} \
 107         ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \
 108         ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \
 109         --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \
 110         --compression-level ~{compressionLevel} \
 111         ~{"--to-short-output " + tooShortOutputPath} \
 112         ~{"--to-short-paired-output " + tooShortPairedOutputPath} \
 113         ~{"--to-long-output " + tooLongOutputPath} \
 114         ~{"--to-long-paired-output " + tooLongPairedOutputPath} \
 115         ~{"--untrimmed-output " + untrimmedOutputPath} \
 116         ~{"--untrimmed-paired-output " + untrimmedPairedOutputPath} \
 117         ~{"--pair-filter " + pairFilter} \
 118         ~{"--error-rate " + errorRate} \
 119         ~{"--times " + times} \
 120         ~{"--overlap " + overlap} \
 121         ~{"--cut " + cut} \
 122         ~{"--nextseq-trim " + nextseqTrim} \
 123         ~{"--quality-cutoff " + qualityCutoff} \
 124         ~{"--quality-base " + qualityBase} \
 125         ~{"--length " + length} \
 126         ~{"--length-tag " + lengthTag} \
 127         ~{"--strip-suffix " + stripSuffix} \
 128         ~{"--prefix " + prefix} \
 129         ~{"--suffix " + suffix} \
 130         ~{"--minimum-length " + minimumLength} \
 131         ~{"--maximum-length " + maximumLength} \
 132         ~{"--max-n " + maxN} \
 133         ~{true="--discard-untrimmed" false="" discardUntrimmed} \
 134         ~{"--info-file " + infoFilePath } \
 135         ~{"--rest-file " + restFilePath } \
 136         ~{"--wildcard-file " + wildcardFilePath} \
 137         ~{true="--match-read-wildcards" false="" matchReadWildcards} \
 138         ~{true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \
 139         ~{true="--no-trim" false="" noTrim} \
 140         ~{true="--mask-adapter" false="" maskAdapter} \
 141         ~{true="--no-indels" false="" noIndels} \
 142         ~{true="--trim-n" false="" trimN} \
 143         ~{true="--interleaved" false="" interleaved} \
 144         ~{true="--discard-trimmed" false="" discardTrimmed } \
 145         ~{true="--colorspace" false="" colorspace} \
 146         ~{true="--double-encode" false="" doubleEncode} \
 147         ~{true="--strip-f3" false="" stripF3} \
 148         ~{true="--maq" false="" maq} \
 149         ~{true="--bwa" false="" bwa} \
 150         ~{true="--zero-cap" false="" zeroCap} \
 151         ~{true="--no-zero-cap" false="" noZeroCap} \
 152         ~{read1} \
 153         ~{read2} \
 154         ~{"> " + reportPath}
 155     }
 156
 157     output{
 158         File cutRead1 = read1output
 159         File report = reportPath
 160         File? cutRead2 = read2output
 161         File? tooLongOutput=tooLongOutputPath
 162         File? tooShortOutput=tooShortOutputPath
 163         File? untrimmedOutput=untrimmedOutputPath
 164         File? tooLongPairedOutput=tooLongPairedOutputPath
 165         File? tooShortPairedOutput=tooShortPairedOutputPath
 166         File? untrimmedPairedOutput=untrimmedPairedOutputPath
 167         File? infoFile=infoFilePath
 168         File? restFile=restFilePath
 169         File? wildcardFile=wildcardFilePath
 170     }
 171
 172     runtime {
 173         cpu: cores
 174         memory: memory
 175         time_minutes: timeMinutes
 176         #docker: dockerImage
 177     }
 178
 179     parameter_meta {
 180         # inputs
 181         read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"}
 182         read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"}
 183         read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"}
 184         read2output: {description: "The name of the resulting second end fastq file.", category: "common"}
 185         adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"}
 186         front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
 187         anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
 188         adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"}
 189         frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
 190         anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
 191         reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"}
 192         compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"}
 193         interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"}
 194         pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"}
 195         errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"}
 196         noIndels: {description: "Equivalent to cutadapt's --no-indels flag.", category: "advanced"}
 197         times: {description: "Equivalent to cutadapt's --times option.", category: "advanced"}
 198         overlap: {description: "Equivalent to cutadapt's --overlap option.", category: "advanced"}
 199         matchReadWildcards: {description: "Equivalent to cutadapt's --match-read-wildcards flag.", category: "advanced"}
 200         noMatchAdapterWildcards: {description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", category: "advanced"}
 201         noTrim: {description: "Equivalent to cutadapt's --no-trim flag.", category: "advanced"}
 202         maskAdapter: {description: "Equivalent to cutadapt's --mask-adapter flag.", category: "advanced"}
 203         cut: {description: "Equivalent to cutadapt's --cut option.", category: "advanced"}
 204         nextseqTrim: {description: "Equivalent to cutadapt's --nextseq-trim option.", category: "advanced"}
 205         qualityCutoff: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
 206         qualityBase: {description: "Equivalent to cutadapt's --quality-base option.", category: "advanced"}
 207         length: {description: "Equivalent to cutadapt's --length option.", category: "advanced"}
 208         trimN: {description: "Equivalent to cutadapt's --trim-n flag.", category: "advanced"}
 209         lengthTag: {description: "Equivalent to cutadapt's --length-tag option.", category: "advanced"}
 210         stripSuffix: {description: "Equivalent to cutadapt's --strip-suffix option.", category: "advanced"}
 211         prefix: {description: "Equivalent to cutadapt's --prefix option.", category: "advanced"}
 212         suffix: {description: "Equivalent to cutadapt's --suffix option.", category: "advanced"}
 213         minimumLength: {description: "Equivalent to cutadapt's --minimum-length option.", category: "advanced"}
 214         maximumLength: {description: "Equivalent to cutadapt's --maximum-length option.", category: "advanced"}
 215         maxN: {description: "Equivalent to cutadapt's --max-n option.", category: "advanced"}
 216         discardTrimmed: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
 217         discardUntrimmed: {description: "Equivalent to cutadapt's --discard-untrimmed option.", category: "advanced"}
 218         infoFilePath: {description: "Equivalent to cutadapt's --info-file option.", category: "advanced"}
 219         restFilePath: {description: "Equivalent to cutadapt's --rest-file option.", category: "advanced"}
 220         wildcardFilePath: {description: "Equivalent to cutadapt's --wildcard-file option.", category: "advanced"}
 221         tooShortOutputPath: {description: "Equivalent to cutadapt's --too-short-output option.", category: "advanced"}
 222         tooLongOutputPath: {description: "Equivalent to cutadapt's --too-long-output option.", category: "advanced"}
 223         untrimmedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-output option.", category: "advanced"}
 224         tooShortPairedOutputPath: {description: "Equivalent to cutadapt's --too-short-paired-output option.", category: "advanced"}
 225         tooLongPairedOutputPath: {description: "Equivalent to cutadapt's --too-long-paired-output option.", category: "advanced"}
 226         untrimmedPairedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-paired-output option.", category: "advanced"}
 227         colorspace: {description: "Equivalent to cutadapt's --colorspace flag.", category: "advanced"}
 228         doubleEncode: {description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced"}
 229         stripF3: {description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced"}
 230         maq: {description: "Equivalent to cutadapt's --maq flag.", category: "advanced"}
 231         bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"}
 232         zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"}
 233         noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"}
 234         cores: {description: "The number of cores to use.", category: "advanced"}
 235         memory: {description: "The amount of memory this job will use.", category: "advanced"}
 236         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
 237         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
 238
 239         # outputs
 240         cutRead1: {description: "Trimmed read one."}
 241         report: {description: "Per-adapter statistics file."}
 242         cutRead2: {description: "Trimmed read two in pair."}
 243         tooLongOutput: {description: "Reads that are too long according to -M."}
 244         tooShortOutput: {description: "Reads that are too short according to -m."}
 245         untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."}
 246         tooLongPairedOutput: {description: "Second reads in a pair."}
 247         tooShortPairedOutput: {description: "Second reads in a pair."}
 248         untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."}
 249         infoFile: {description: "Detailed information about where adapters were found in each read."}
 250         restFile: {description: "The rest file."}
 251         wildcardFile: {description: "The wildcard file."}
 252     }
 253 }