[GalaxyCodeBases.git] / etc / gatk-wdl / fm2 / tasks / cutadapt.wdl
1 version 1.0
3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
23 task Cutadapt {
24     input {
25         File read1
26         File? read2
27         String read1output = "cut_r1.fq.gz"
28         String? read2output
29         Array[String] adapter = []
30         Array[String] front = []
31         Array[String] anywhere = []
32         Array[String] adapterRead2 = []
33         Array[String] frontRead2 = []
34         Array[String] anywhereRead2 = []
35         String reportPath = "cutadapt_report.txt"
36         # Cutadapt compresses the zipped output files with a ridiculously
37         # high compression level (5 or 6).
38         # This is not the fast compression preset. It takes up to 400% more
39         # CPU time for a 20% reduction in file size.
40         # Hence we use compression level 1 here.
41         Int compressionLevel = 1  # This only affects outputs with the .gz suffix.
43         Boolean? interleaved
44         String? pairFilter
45         Float? errorRate
46         Boolean? noIndels
47         Int? times
48         Int? overlap
49         Boolean? matchReadWildcards
50         Boolean? noMatchAdapterWildcards
51         Boolean? noTrim
52         Boolean? maskAdapter
53         Int? cut
54         String? nextseqTrim
55         String? qualityCutoff
56         Int? qualityBase
57         Int? length
58         Boolean? trimN
59         String? lengthTag
60         String? stripSuffix
61         String? prefix
62         String? suffix
63         Int? minimumLength = 2 # Necessary to prevent creation of empty reads or 1 base reads.
64         Int? maximumLength
65         Int? maxN
66         Boolean? discardTrimmed
67         Boolean? discardUntrimmed
68         String? infoFilePath
69         String? restFilePath
70         String? wildcardFilePath
71         String? tooShortOutputPath
72         String? tooLongOutputPath
73         String? untrimmedOutputPath
74         String? tooShortPairedOutputPath
75         String? tooLongPairedOutputPath
76         String? untrimmedPairedOutputPath
77         Boolean? colorspace
78         Boolean? doubleEncode
79         Boolean? stripF3
80         Boolean? maq
81         Boolean? bwa
82         Boolean? zeroCap
83         Boolean? noZeroCap
85         Int cores = 4
86         String memory = "~{300 + 100 * cores}M"
87         Int timeMinutes = 1 + ceil(size([read1, read2], "G")  * 12.0 / cores)
88         String dockerImage = ""
89     }
91     String realRead2output = select_first([read2output, "cut_r2.fq.gz"])
92     String read2outputArg = if (defined(read2))
93         then "mkdir -p $(dirname " + realRead2output + ")"
94         else ""
96     # FIXME: Use prefix() function for adapter, adapterRead2, etc.
97     command {
98         set -e
99         ~{"mkdir -p $(dirname " + read1output + ")"}
100         ~{read2outputArg}
101         cutadapt \
102         ~{"--cores=" + cores} \
103         ~{true="-a" false="" length(adapter) > 0} ~{sep=" -a " adapter} \
104         ~{true="-A" false="" length(adapterRead2) > 0} ~{sep=" -A " adapterRead2} \
105         ~{true="-g" false="" length(front) > 0} ~{sep=" -g " front} \
106         ~{true="-G" false="" length(frontRead2) > 0} ~{sep=" -G " frontRead2} \
107         ~{true="-b" false="" length(anywhere) > 0} ~{sep=" -b " anywhere} \
108         ~{true="-B" false="" length(anywhereRead2) > 0} ~{sep=" -B " anywhereRead2} \
109         --output ~{read1output} ~{if defined(read2) then "-p " + realRead2output else ""} \
110         --compression-level ~{compressionLevel} \
111         ~{"--to-short-output " + tooShortOutputPath} \
112         ~{"--to-short-paired-output " + tooShortPairedOutputPath} \
113         ~{"--to-long-output " + tooLongOutputPath} \
114         ~{"--to-long-paired-output " + tooLongPairedOutputPath} \
115         ~{"--untrimmed-output " + untrimmedOutputPath} \
116         ~{"--untrimmed-paired-output " + untrimmedPairedOutputPath} \
117         ~{"--pair-filter " + pairFilter} \
118         ~{"--error-rate " + errorRate} \
119         ~{"--times " + times} \
120         ~{"--overlap " + overlap} \
121         ~{"--cut " + cut} \
122         ~{"--nextseq-trim " + nextseqTrim} \
123         ~{"--quality-cutoff " + qualityCutoff} \
124         ~{"--quality-base " + qualityBase} \
125         ~{"--length " + length} \
126         ~{"--length-tag " + lengthTag} \
127         ~{"--strip-suffix " + stripSuffix} \
128         ~{"--prefix " + prefix} \
129         ~{"--suffix " + suffix} \
130         ~{"--minimum-length " + minimumLength} \
131         ~{"--maximum-length " + maximumLength} \
132         ~{"--max-n " + maxN} \
133         ~{true="--discard-untrimmed" false="" discardUntrimmed} \
134         ~{"--info-file " + infoFilePath } \
135         ~{"--rest-file " + restFilePath } \
136         ~{"--wildcard-file " + wildcardFilePath} \
137         ~{true="--match-read-wildcards" false="" matchReadWildcards} \
138         ~{true="--no-match-adapter-wildcards" false="" noMatchAdapterWildcards} \
139         ~{true="--no-trim" false="" noTrim} \
140         ~{true="--mask-adapter" false="" maskAdapter} \
141         ~{true="--no-indels" false="" noIndels} \
142         ~{true="--trim-n" false="" trimN} \
143         ~{true="--interleaved" false="" interleaved} \
144         ~{true="--discard-trimmed" false="" discardTrimmed } \
145         ~{true="--colorspace" false="" colorspace} \
146         ~{true="--double-encode" false="" doubleEncode} \
147         ~{true="--strip-f3" false="" stripF3} \
148         ~{true="--maq" false="" maq} \
149         ~{true="--bwa" false="" bwa} \
150         ~{true="--zero-cap" false="" zeroCap} \
151         ~{true="--no-zero-cap" false="" noZeroCap} \
152         ~{read1} \
153         ~{read2} \
154         ~{"> " + reportPath}
155     }
157     output{
158         File cutRead1 = read1output
159         File report = reportPath
160         File? cutRead2 = read2output
161         File? tooLongOutput=tooLongOutputPath
162         File? tooShortOutput=tooShortOutputPath
163         File? untrimmedOutput=untrimmedOutputPath
164         File? tooLongPairedOutput=tooLongPairedOutputPath
165         File? tooShortPairedOutput=tooShortPairedOutputPath
166         File? untrimmedPairedOutput=untrimmedPairedOutputPath
167         File? infoFile=infoFilePath
168         File? restFile=restFilePath
169         File? wildcardFile=wildcardFilePath
170     }
172     runtime {
173         cpu: cores
174         memory: memory
175         time_minutes: timeMinutes
176         #docker: dockerImage
177     }
179     parameter_meta {
180         # inputs
181         read1: {description: "The first or single end fastq file to be run through cutadapt.", category: "required"}
182         read2: {description: "An optional second end fastq file to be run through cutadapt.", category: "common"}
183         read1output: {description: "The name of the resulting first or single end fastq file.", category: "common"}
184         read2output: {description: "The name of the resulting second end fastq file.", category: "common"}
185         adapter: {description: "A list of 3' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "common"}
186         front: {description: "A list of 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
187         anywhere: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given first or single end fastq file.", category: "advanced"}
188         adapterRead2: {description: "A list of 3' ligated adapter sequences to be cut from the given second end fastq file.", category: "common"}
189         frontRead2: {description: "A list of 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
190         anywhereRead2: {description: "A list of 3' or 5' ligated adapter sequences to be cut from the given second end fastq file.", category: "advanced"}
191         reportPath: {description: "The name of the file to write cutadapts's stdout to, this contains some metrics.", category: "common"}
192         compressionLevel: {description: "The compression level if gzipped output is used.", category: "advanced"}
193         interleaved: {description: "Equivalent to cutadapt's --interleaved flag.", category: "advanced"}
194         pairFilter: {description: "Equivalent to cutadapt's --pair-filter option.", category: "advanced"}
195         errorRate: {description: "Equivalent to cutadapt's --error-rate option.", category: "advanced"}
196         noIndels: {description: "Equivalent to cutadapt's --no-indels flag.", category: "advanced"}
197         times: {description: "Equivalent to cutadapt's --times option.", category: "advanced"}
198         overlap: {description: "Equivalent to cutadapt's --overlap option.", category: "advanced"}
199         matchReadWildcards: {description: "Equivalent to cutadapt's --match-read-wildcards flag.", category: "advanced"}
200         noMatchAdapterWildcards: {description: "Equivalent to cutadapt's --no-match-adapter-wildcards flag.", category: "advanced"}
201         noTrim: {description: "Equivalent to cutadapt's --no-trim flag.", category: "advanced"}
202         maskAdapter: {description: "Equivalent to cutadapt's --mask-adapter flag.", category: "advanced"}
203         cut: {description: "Equivalent to cutadapt's --cut option.", category: "advanced"}
204         nextseqTrim: {description: "Equivalent to cutadapt's --nextseq-trim option.", category: "advanced"}
205         qualityCutoff: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
206         qualityBase: {description: "Equivalent to cutadapt's --quality-base option.", category: "advanced"}
207         length: {description: "Equivalent to cutadapt's --length option.", category: "advanced"}
208         trimN: {description: "Equivalent to cutadapt's --trim-n flag.", category: "advanced"}
209         lengthTag: {description: "Equivalent to cutadapt's --length-tag option.", category: "advanced"}
210         stripSuffix: {description: "Equivalent to cutadapt's --strip-suffix option.", category: "advanced"}
211         prefix: {description: "Equivalent to cutadapt's --prefix option.", category: "advanced"}
212         suffix: {description: "Equivalent to cutadapt's --suffix option.", category: "advanced"}
213         minimumLength: {description: "Equivalent to cutadapt's --minimum-length option.", category: "advanced"}
214         maximumLength: {description: "Equivalent to cutadapt's --maximum-length option.", category: "advanced"}
215         maxN: {description: "Equivalent to cutadapt's --max-n option.", category: "advanced"}
216         discardTrimmed: {description: "Equivalent to cutadapt's --quality-cutoff option.", category: "advanced"}
217         discardUntrimmed: {description: "Equivalent to cutadapt's --discard-untrimmed option.", category: "advanced"}
218         infoFilePath: {description: "Equivalent to cutadapt's --info-file option.", category: "advanced"}
219         restFilePath: {description: "Equivalent to cutadapt's --rest-file option.", category: "advanced"}
220         wildcardFilePath: {description: "Equivalent to cutadapt's --wildcard-file option.", category: "advanced"}
221         tooShortOutputPath: {description: "Equivalent to cutadapt's --too-short-output option.", category: "advanced"}
222         tooLongOutputPath: {description: "Equivalent to cutadapt's --too-long-output option.", category: "advanced"}
223         untrimmedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-output option.", category: "advanced"}
224         tooShortPairedOutputPath: {description: "Equivalent to cutadapt's --too-short-paired-output option.", category: "advanced"}
225         tooLongPairedOutputPath: {description: "Equivalent to cutadapt's --too-long-paired-output option.", category: "advanced"}
226         untrimmedPairedOutputPath: {description: "Equivalent to cutadapt's --untrimmed-paired-output option.", category: "advanced"}
227         colorspace: {description: "Equivalent to cutadapt's --colorspace flag.", category: "advanced"}
228         doubleEncode: {description: "Equivalent to cutadapt's --double-encode flag.", category: "advanced"}
229         stripF3: {description: "Equivalent to cutadapt's --strip-f3 flag.", category: "advanced"}
230         maq: {description: "Equivalent to cutadapt's --maq flag.", category: "advanced"}
231         bwa: {description: "Equivalent to cutadapt's --bwa flag.", category: "advanced"}
232         zeroCap: {description: "Equivalent to cutadapt's --zero-cap flag.", category: "advanced"}
233         noZeroCap: {description: "Equivalent to cutadapt's --no-zero-cap flag.", category: "advanced"}
234         cores: {description: "The number of cores to use.", category: "advanced"}
235         memory: {description: "The amount of memory this job will use.", category: "advanced"}
236         timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
237         dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
239         # outputs
240         cutRead1: {description: "Trimmed read one."}
241         report: {description: "Per-adapter statistics file."}
242         cutRead2: {description: "Trimmed read two in pair."}
243         tooLongOutput: {description: "Reads that are too long according to -M."}
244         tooShortOutput: {description: "Reads that are too short according to -m."}
245         untrimmedOutput: {description: "All reads without adapters (instead of the regular output file)."}
246         tooLongPairedOutput: {description: "Second reads in a pair."}
247         tooShortPairedOutput: {description: "Second reads in a pair."}
248         untrimmedPairedOutput: {description: "The second reads in a pair that were not trimmed."}
249         infoFile: {description: "Detailed information about where adapters were found in each read."}
250         restFile: {description: "The rest file."}
251         wildcardFile: {description: "The wildcard file."}
252     }