3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 String read1output = "cut_r1.fq.gz"
29 Array[String] adapter = []
30 Array[String] front = []
31 Array[String] anywhere = []
32 Array[String] adapterRead2 = []
33 Array[String] frontRead2 = []
34 Array[String] anywhereRead2 = []
35 String reportPath = "cutadapt_report.txt"
36 # Cutadapt compresses the zipped output files with a ridiculously
37 # high compression level (5 or 6).
38 # This is not the fast compression preset. It takes up to 400% more
39 # CPU time for a 20% reduction in file size.
40 # Hence we use compression level 1 here.
41 Int compressionLevel = 1 # This only affects outputs with the .gz suffix.
49 Boolean? matchReadWildcards
50 Boolean? noMatchAdapterWildcards
63 Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads.
66 Boolean? discardTrimmed
67 Boolean? discardUntrimmed
70 String? wildcardFilePath
71 String? tooShortOutputPath
72 String? tooLongOutputPath
73 String? untrimmedOutputPath
74 String? tooShortPairedOutputPath
75 String? tooLongPairedOutputPath
76 String? untrimmedPairedOutputPath
86 String memory = "~{300 + 100 * cores}M"
87 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 12.0 / cores)
88 String dockerImage = "quay.io/biocontainers/cutadapt:2.10--py37hf01694f_1"
91 String realRead2output = select_first([read2output, "cut_r2.fq.gz"])
92 String read2outputArg = if (defined(read2))
93 then "mkdir -p $(dirname " + realRead2output + ")"
96 # FIXME: Use prefix() function for adapter, adapterRead2, etc.
99 ~{"mkdir -p $(dirname " + read1output + ")"}
102 ~{"--cores=" + cores} \
103 ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \
104 ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \
105 ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \
106 ~{true="-G" false="" length(frontRead2) > 0} ~{sep=" -G " frontRead2} \
107 ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \
108 ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \
109 --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \
110 --compression-level ~{compressionLevel} \
111 ~{"--to-short-output " + tooShortOutputPath} \
112 ~{"--to-short-paired-output " + tooShortPairedOutputPath} \
113 ~{"--to-long-output " + tooLongOutputPath} \
114 ~{"--to-long-paired-output " + tooLongPairedOutputPath} \
115 ~{"--untrimmed-output " + untrimmedOutputPath} \
116 ~{"--untrimmed-paired-output " + untrimmedPairedOutputPath} \
117 ~{"--pair-filter " + pairFilter} \
118 ~{"--error-rate " + errorRate} \
119 ~{"--times " + times} \
120 ~{"--overlap " + overlap} \
122 ~{"--nextseq-trim " + nextseqTrim} \
123 ~{"--quality-cutoff " + qualityCutoff} \
124 ~{"--quality-base " + qualityBase} \
125 ~{"--length " + length} \
126 ~{"--length-tag " + lengthTag} \
127 ~{"--strip-suffix " + stripSuffix} \
128 ~{"--prefix " + prefix} \
129 ~{"--suffix " + suffix} \
130 ~{"--minimum-length " + minimumLength} \
131 ~{"--maximum-length " + maximumLength} \
132 ~{"--max-n " + maxN} \
133 ~{true="--discard-untrimmed" false="" discardUntrimmed} \
134 ~{"--info-file " + infoFilePath } \
135 ~{"--rest-file " + restFilePath } \
136 ~{"--wildcard-file " + wildcardFilePath} \
137 ~{true="--match-read-wildcards" false="" matchReadWildcards} \
138 ~{true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \
139 ~{true="--no-trim" false="" noTrim} \
140 ~{true="--mask-adapter" false="" maskAdapter} \
141 ~{true="--no-indels" false="" noIndels} \
142 ~{true="--trim-n" false="" trimN} \
143 ~{true="--interleaved" false="" interleaved} \
144 ~{true="--discard-trimmed" false="" discardTrimmed } \
145 ~{true="--colorspace" false="" colorspace} \
146 ~{true="--double-encode" false="" doubleEncode} \
147 ~{true="--strip-f3" false="" stripF3} \
148 ~{true="--maq" false="" maq} \
149 ~{true="--bwa" false="" bwa} \
150 ~{true="--zero-cap" false="" zeroCap} \
151 ~{true="--no-zero-cap" false="" noZeroCap} \
158 File cutRead1 = read1output
159 File report = reportPath
160 File? cutRead2 = read2output
161 File? tooLongOutput=tooLongOutputPath
162 File? tooShortOutput=tooShortOutputPath
163 File? untrimmedOutput=untrimmedOutputPath
164 File? tooLongPairedOutput=tooLongPairedOutputPath
165 File? tooShortPairedOutput=tooShortPairedOutputPath
166 File? untrimmedPairedOutput=untrimmedPairedOutputPath
167 File? infoFile=infoFilePath
168 File? restFile=restFilePath
169 File? wildcardFile=wildcardFilePath
175 time_minutes: timeMinutes
181 read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"}
182 read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"}
183 read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"}
184 read2output: {description: "The name of the resulting second end fastq file.", category: "common"}
185 adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"}
186 front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
187 anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
188 adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"}
189 frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
190 anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
191 reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"}
192 compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"}
193 interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"}
194 pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"}
195 errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"}
196 noIndels: {description: "Equivalent to cutadapt's --no-indels flag.", category: "advanced"}
197 times: {description: "Equivalent to cutadapt's --times option.", category: "advanced"}
198 overlap: {description: "Equivalent to cutadapt's --overlap option.", category: "advanced"}
199 matchReadWildcards: {description: "Equivalent to cutadapt's --match-read-wildcards flag.", category: "advanced"}
200 noMatchAdapterWildcards: {description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", category: "advanced"}
201 noTrim: {description: "Equivalent to cutadapt's --no-trim flag.", category: "advanced"}
202 maskAdapter: {description: "Equivalent to cutadapt's --mask-adapter flag.", category: "advanced"}
203 cut: {description: "Equivalent to cutadapt's --cut option.", category: "advanced"}
204 nextseqTrim: {description: "Equivalent to cutadapt's --nextseq-trim option.", category: "advanced"}
205 qualityCutoff: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
206 qualityBase: {description: "Equivalent to cutadapt's --quality-base option.", category: "advanced"}
207 length: {description: "Equivalent to cutadapt's --length option.", category: "advanced"}
208 trimN: {description: "Equivalent to cutadapt's --trim-n flag.", category: "advanced"}
209 lengthTag: {description: "Equivalent to cutadapt's --length-tag option.", category: "advanced"}
210 stripSuffix: {description: "Equivalent to cutadapt's --strip-suffix option.", category: "advanced"}
211 prefix: {description: "Equivalent to cutadapt's --prefix option.", category: "advanced"}
212 suffix: {description: "Equivalent to cutadapt's --suffix option.", category: "advanced"}
213 minimumLength: {description: "Equivalent to cutadapt's --minimum-length option.", category: "advanced"}
214 maximumLength: {description: "Equivalent to cutadapt's --maximum-length option.", category: "advanced"}
215 maxN: {description: "Equivalent to cutadapt's --max-n option.", category: "advanced"}
216 discardTrimmed: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
217 discardUntrimmed: {description: "Equivalent to cutadapt's --discard-untrimmed option.", category: "advanced"}
218 infoFilePath: {description: "Equivalent to cutadapt's --info-file option.", category: "advanced"}
219 restFilePath: {description: "Equivalent to cutadapt's --rest-file option.", category: "advanced"}
220 wildcardFilePath: {description: "Equivalent to cutadapt's --wildcard-file option.", category: "advanced"}
221 tooShortOutputPath: {description: "Equivalent to cutadapt's --too-short-output option.", category: "advanced"}
222 tooLongOutputPath: {description: "Equivalent to cutadapt's --too-long-output option.", category: "advanced"}
223 untrimmedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-output option.", category: "advanced"}
224 tooShortPairedOutputPath: {description: "Equivalent to cutadapt's --too-short-paired-output option.", category: "advanced"}
225 tooLongPairedOutputPath: {description: "Equivalent to cutadapt's --too-long-paired-output option.", category: "advanced"}
226 untrimmedPairedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-paired-output option.", category: "advanced"}
227 colorspace: {description: "Equivalent to cutadapt's --colorspace flag.", category: "advanced"}
228 doubleEncode: {description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced"}
229 stripF3: {description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced"}
230 maq: {description: "Equivalent to cutadapt's --maq flag.", category: "advanced"}
231 bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"}
232 zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"}
233 noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"}
234 cores: {description: "The number of cores to use.", category: "advanced"}
235 memory: {description: "The amount of memory this job will use.", category: "advanced"}
236 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
237 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
240 cutRead1: {description: "Trimmed read one."}
241 report: {description: "Per-adapter statistics file."}
242 cutRead2: {description: "Trimmed read two in pair."}
243 tooLongOutput: {description: "Reads that are too long according to -M."}
244 tooShortOutput: {description: "Reads that are too short according to -m."}
245 untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."}
246 tooLongPairedOutput: {description: "Second reads in a pair."}
247 tooShortPairedOutput: {description: "Second reads in a pair."}
248 untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."}
249 infoFile: {description: "Detailed information about where adapters were found in each read."}
250 restFile: {description: "The rest file."}
251 wildcardFile: {description: "The wildcard file."}