3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 Int sortMemoryPerThreadGb = 2
34 Int compressionLevel = 1
36 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 200 / threads)
37 # This container contains: samtools (1.10), bwa (0.7.17-r1188)
38 String dockerImage = "quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:eabfac3657eda5818bae4090db989e3d41b01542-0"
41 # Samtools sort may block the pipe while it is writing data to disk.
42 # This can lead to cpu underutilization.
43 # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads.
44 Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
45 Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
46 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.
47 Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
51 mkdir -p "$(dirname ~{outputPath})"
54 ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \
55 ~{bwaIndex.fastaFile} \
59 ~{"-@ " + totalSortThreads} \
60 -m ~{sortMemoryPerThreadGb}G \
61 -l ~{compressionLevel} \
67 File outputBam = outputPath
72 memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
73 time_minutes: timeMinutes
78 read1: {description: "The first or single end fastq file.", category: "required"}
79 read2: {description: "The second end fastq file.", category: "common"}
80 bwaIndex: {description: "The BWA index files.", category: "required"}
81 outputPath: {description: "The location the output BAM file should be written to.", category: "required"}
82 readgroup: {description: "The readgroup to be assigned to the reads. See BWA mem's `-R` option.", category: "common"}
84 threads: {description: "The number of threads to use.", category: "advanced"}
85 memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"}
86 sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
87 sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
88 compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
89 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
90 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
102 Boolean sixtyFour = false
106 Int sortMemoryPerThreadGb = 2
107 Int compressionLevel = 1
109 Int timeMinutes = 1 + ceil(size([read1, read2], "G") * 220 / threads)
110 # Contains bwa 0.7.17 bwakit 0.7.17.dev1 and samtools 1.10
111 String dockerImage = "quay.io/biocontainers/mulled-v2-ad317f19f5881324e963f6a6d464d696a2825ab6:c59b7a73c87a9fe81737d5d628e10a3b5807f453-0"
114 # Samtools sort may block the pipe while it is writing data to disk.
115 # This can lead to cpu underutilization.
116 # 1 thread if threads is 1. For 2-4 threads 2 sort threads. 3 sort threads for 5-8 threads.
117 Int estimatedSortThreads = if threads == 1 then 1 else 1 + ceil(threads / 4.0)
118 Int totalSortThreads = select_first([sortThreads, estimatedSortThreads])
119 # BWA needs slightly more memory than the size of the index files (~10%). Add a margin for safety here.
120 Int estimatedMemoryGb = 1 + ceil(size(bwaIndex.indexFiles, "G") * 1.2) + sortMemoryPerThreadGb * totalSortThreads
124 mkdir -p "$(dirname ~{outputPrefix})"
127 ~{"-R '" + readgroup}~{true="'" false="" defined(readgroup)} \
128 ~{bwaIndex.fastaFile} \
131 2> ~{outputPrefix}.log.bwamem | \
133 -p ~{outputPrefix}.hla \
134 ~{bwaIndex.fastaFile}~{true=".64.alt" false=".alt" sixtyFour} | \
136 ~{"-@ " + totalSortThreads} \
137 -m ~{sortMemoryPerThreadGb}G \
138 -l ~{compressionLevel} \
140 -o ~{outputPrefix}.aln.bam
144 File outputBam = outputPrefix + ".aln.bam"
148 # One extra thread for bwa-postalt + samtools is not needed.
149 # These only use 5-10% of compute power and not always simultaneously.
151 memory: "~{select_first([memoryGb, estimatedMemoryGb])}G"
152 time_minutes: timeMinutes
158 read1: {description: "The first-end fastq file.", category: "required"}
159 read2: {description: "The second-end fastq file.", category: "common"}
160 bwaIndex: {description: "The BWA index, including a .alt file.", category: "required"}
161 outputPrefix: {description: "The prefix of the output files, including any parent directories.", category: "required"}
162 readgroup: {description: "A readgroup identifier.", category: "common"}
163 sixtyFour: {description: "Whether or not the index uses the '.64' suffixes.", category: "common"}
164 threads: {description: "The number of threads to use for alignment.", category: "advanced"}
165 memoryGb: {description: "The amount of memory this job will use in gigabytes.", category: "advanced"}
166 sortThreads: {description: "The number of threads to use for sorting.", category: "advanced"}
167 sortMemoryPerThreadGb: {description: "The amount of memory for each sorting thread in gigabytes.", category: "advanced"}
168 compressionLevel: {description: "The compression level of the output BAM.", category: "advanced"}
169 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
170 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.",
171 category: "advanced"}
174 outputBam: "The produced BAM file."
180 Array[File] indexFiles