3 # Copyright (c) 2017 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 # Sambamba scales like this: 1 thread is fully utilized (1). 2 threads 1.8 utilized. 3 -> 2.4, 4-> 2.7.
29 # 2 threads reduces wall clock time by more than 40%.
31 Int compressionLevel = 1
34 # sortBufferSize and ioBufferSize taken from markdup defaults as of sambamba 0.7.1
35 Int sortBufferSize = 2048
36 Int ioBufferSize = 128
37 Boolean removeDuplicates = false
39 # According to the manual sambamba markdup uses the sortbufferSize + 2 times the ioBuffer size.
40 # Added 1024 mb as a margin of safety. Real life use with this setting uses 2.7 GiB.
41 Int memoryMb = 1024 + sortBufferSize + 2 * ioBufferSize
42 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
43 # Time minute calculation does not work well for higher number of threads.
44 Int timeMinutes = 1 + ceil(size(inputBams, "G") * 8) / threads
46 String bamIndexPath = sub(outputPath, "\.bam$", ".bai")
50 mkdir -p "$(dirname ~{outputPath})"
52 --nthreads ~{threads} \
53 -l ~{compressionLevel} \
54 ~{true="-r" false="" removeDuplicates} \
55 ~{"--hash-table-size " + hashTableSize} \
56 ~{"--overflow-list-size " + overFlowListSize} \
57 ~{"--sort-buffer-size " + sortBufferSize} \
58 ~{"--io-buffer-size " + ioBufferSize} \
59 ~{sep=' ' inputBams} ~{outputPath}
60 # sambamba creates an index for us
61 mv ~{outputPath}.bai ~{bamIndexPath}
65 File outputBam = outputPath
66 File outputBamIndex = bamIndexPath
70 memory: "~{memoryMb}M"
72 time_minutes: timeMinutes
78 inputBams: {description: "The input BAM files.", category: "required"}
79 outputPath: {description: "Output directory path + output file.", category: "required"}
80 compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
81 memoryMb: {description: "The amount of memory available to the job in megabytes.", category: "advanced"}
82 removeDuplicates: {description: "Whether to remove the duplicates (instead of only marking them).", category: "advanced"}
83 hashTableSize: {description: "Sets sambamba's hash table size", category: "advanced"}
84 overFlowListSize: {description: "Sets sambamba's overflow list size", category: "advanced"}
85 sortBufferSize: {description: "The amount of mb allocated to the sort buffer", category: "advanced"}
86 ioBufferSize: {description: "The amount of mb allocated to each IO buffer. Sambamba uses two IO buffers.", category: "advanced"}
87 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
88 threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
89 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
91 outputBam: {description: "Sorted BAM file."}
98 String outputPath = basename(inputBam, "\.bam") + ".sorted.bam"
99 Boolean sortByName = false
100 Int compressionLevel = 1
102 Int memoryPerThreadGb = 4
103 Int memoryGb = 1 + threads * memoryPerThreadGb
104 String dockerImage = "quay.io/biocontainers/sambamba:0.7.1--h148d290_2"
105 Int timeMinutes = 1 + ceil(size(inputBam, "G") * 3)
108 # Select first needed as outputPath is optional input. (bug in cromwell)
109 String bamIndexPath = sub(select_first([outputPath]), "\.bam$", ".bai")
113 mkdir -p "$(dirname ~{outputPath})"
115 -l ~{compressionLevel} \
116 ~{true="-n" false="" sortByName} \
117 ~{"--nthreads " + threads} \
118 -m ~{memoryPerThreadGb}G \
121 # sambamba creates an index for us
122 mv ~{outputPath}.bai ~{bamIndexPath}
126 File outputBam = outputPath
127 File outputBamIndex = bamIndexPath
132 memory: "~{memoryGb}G"
134 time_minutes: timeMinutes
139 inputBam: {description: "The input SAM file.", category: "required"}
140 outputPath: {description: "Output directory path + output file.", category: "required"}
141 sortByName: {description: "Sort the inputBam by read name instead of position.", category: "advanced"}
142 compressionLevel: {description: "Compression level from 0 (uncompressed) to 9 (best).", category: "advanced"}
143 memoryGb: {description: "The amount of memory available to the job in gigabytes.", category: "advanced"}
144 memoryPerThreadGb: {description: "The amount of memory used per sort thread in gigabytes", category: "advanced"}
145 dockerImage: {description: "The docker image used for this task. Changing this may result in errors which the developers may choose not to address.", category: "advanced"}
146 threads: {description: "The number of threads that will be used for this task.", category: "advanced"}
147 timeMinutes: {description: "The maximum amount of time the job will run in minutes.", category: "advanced"}
149 outputBam: {description: "Sorted BAM file."}