3 # Copyright (c) 2018 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 import "sample.wdl" as sampleWf
24 import "structural-variantcalling/structural-variantcalling.wdl" as structuralVariantCalling
25 import "gatk-variantcalling/single-sample-variantcalling.wdl" as variantCallingWorkflow
26 import "gatk-variantcalling/jointgenotyping.wdl" as jgwf
27 import "gatk-variantcalling/calculate-regions.wdl" as calcRegions
28 import "structs.wdl" as structs
29 import "tasks/biowdl.wdl" as biowdl
30 import "tasks/common.wdl" as common
31 import "tasks/multiqc.wdl" as multiqc
32 import "tasks/chunked-scatter.wdl" as chunkedScatter
37 String outputDir = "."
39 File referenceFastaFai
40 File referenceFastaDict
48 Boolean jointgenotyping = true
49 Boolean singleSampleGvcf = false
50 String? adapterForward = "AGATCGGAAGAG" # Illumina universal adapter
51 String? adapterReverse = "AGATCGGAAGAG" # Illumina universal adapter
52 String platform = "illumina"
53 Boolean useBwaKit = false
54 Int scatterSizeMillions = 1000
57 # Only run multiQC if the user specified an outputDir
58 Boolean runSVcalling = false
60 meta {allowNestedInputs: true}
62 Boolean mergeVcfs = !jointgenotyping || singleSampleGvcf
64 # Parse docker Tags configuration and sample sheet
65 call common.YamlToJson as ConvertDockerImagesFile {
67 yaml = dockerImagesFile
69 Map[String, String] dockerImages = read_json(ConvertDockerImagesFile.json)
71 call biowdl.InputConverter as ConvertSampleConfig {
73 samplesheet = sampleConfigFile,
74 dockerImage = dockerImages["biowdl-input-converter"]
76 SampleConfig sampleConfig = read_json(ConvertSampleConfig.json)
78 call calcRegions.CalculateRegions as calculateRegions {
80 referenceFasta = referenceFasta,
81 referenceFastaFai = referenceFastaFai,
82 referenceFastaDict = referenceFastaDict,
83 XNonParRegions = XNonParRegions,
84 YNonParRegions = YNonParRegions,
86 scatterSize = scatterSize,
87 scatterSizeMillions = scatterSizeMillions,
88 dockerImages = dockerImages
91 call chunkedScatter.ScatterRegions as scatterList {
93 inputFile = select_first([regions, referenceFastaFai]),
94 scatterSize = scatterSize,
95 scatterSizeMillions = scatterSizeMillions,
96 dockerImage = dockerImages["chunked-scatter"]
99 # Running sample subworkflow
100 scatter (sample in sampleConfig.samples) {
101 String sampleIds = sample.id
102 String sampleDir = outputDir + "/samples/" + sample.id + "/"
103 call sampleWf.SampleWorkflow as sampleWorkflow {
105 sampleDir = sampleDir,
107 referenceFasta = referenceFasta,
108 referenceFastaFai = referenceFastaFai,
109 referenceFastaDict = referenceFastaDict,
112 dbsnpVCFIndex = dbsnpVCFIndex,
113 adapterForward = adapterForward,
114 adapterReverse = adapterReverse,
115 useBwaKit = useBwaKit,
116 dockerImages = dockerImages,
117 scatters = scatterList.scatters,
118 bwaThreads = bwaThreads,
122 call variantCallingWorkflow.SingleSampleCalling as SingleSampleCalling {
124 bam = sampleWorkflow.recalibratedBam,
125 bamIndex = sampleWorkflow.recalibratedBamIndex,
126 gender = select_first([sample.gender, "unknown"]),
127 sampleName = sample.id,
128 outputDir = sampleDir,
129 referenceFasta = referenceFasta,
130 referenceFastaFai = referenceFastaFai,
131 referenceFastaDict = referenceFastaDict,
133 dbsnpVCFIndex = dbsnpVCFIndex,
134 XNonParRegions = calculateRegions.Xregions,
135 YNonParRegions = calculateRegions.Yregions,
136 statsRegions = regions,
137 autosomalRegionScatters = calculateRegions.autosomalRegionScatters,
138 gvcf = jointgenotyping,
139 mergeVcf = mergeVcfs,
140 dockerImages = dockerImages
145 call structuralVariantCalling.SVcalling as svCalling {
147 bamFile = sampleWorkflow.markdupBam,
148 bamIndex = sampleWorkflow.markdupBamIndex,
149 referenceFasta = referenceFasta,
150 referenceFastaFai = referenceFastaFai,
151 referenceFastaDict = referenceFastaDict,
154 outputDir = sampleDir,
155 dockerImages = dockerImages
160 if (jointgenotyping) {
161 call jgwf.JointGenotyping as JointGenotyping {
163 gvcfFiles = flatten(SingleSampleCalling.vcfScatters),
164 gvcfFilesIndex = flatten(SingleSampleCalling.vcfIndexScatters),
165 outputDir = outputDir,
166 vcfBasename = "multisample",
167 referenceFasta = referenceFasta,
168 referenceFastaFai = referenceFastaFai,
169 referenceFastaDict = referenceFastaDict,
170 sampleIds = sampleIds,
172 dbsnpVCFIndex = dbsnpVCFIndex,
174 scatterSize = scatterSize,
175 dockerImages = dockerImages
179 Array[File] allReports = flatten([
180 flatten(sampleWorkflow.reports), flatten(SingleSampleCalling.reports), select_first([JointGenotyping.reports, []])
183 call multiqc.MultiQC as multiqcTask {
185 reports = allReports,
187 dockerImage = dockerImages["multiqc"]
191 File multiqcReport = multiqcTask.multiqcReport
192 Array[File] reports = allReports
193 File? multiSampleVcf = JointGenotyping.multisampleVcf
194 File? multisampleVcfIndex = JointGenotyping.multisampleVcfIndex
195 File? multisampleGVcf = JointGenotyping.multisampleGVcf
196 File? multisampleGVcfIndex = JointGenotyping.multisampleGVcfIndex
197 Array[File] singleSampleVcfs = if jointgenotyping then [] else select_all(SingleSampleCalling.outputVcf)
198 Array[File] singleSampleVcfsIndex = if jointgenotyping then [] else select_all(SingleSampleCalling.outputVcfIndex)
199 Array[File] singleSampleGvcfs = if jointgenotyping then select_all(SingleSampleCalling.outputVcf) else []
200 Array[File] singleSampleGvcfsIndex = if jointgenotyping then select_all(SingleSampleCalling.outputVcfIndex) else []
201 Array[File] recalibratedBams = sampleWorkflow.recalibratedBam
202 Array[File] recalibratedBamIndexes = sampleWorkflow.recalibratedBamIndex
203 Array[File] markdupBams = sampleWorkflow.markdupBam
204 Array[File] markdupBamIndexes = sampleWorkflow.markdupBamIndex
205 Array[File?] cleverVCFs = svCalling.cleverVcf
206 Array[File?] matecleverVCFs = svCalling.cleverVcf
207 Array[File?] mantaVCFs = svCalling.mantaVcf
208 Array[File?] dellyVCFs = svCalling.dellyVcf
209 Array[File?] survivorVCFs = svCalling.survivorVcf
210 Array[Array[File]?] renamedVCFs = svCalling.renamedVcfs
214 sampleConfigFile: {description: "The samplesheet, including sample ids, library ids, readgroup ids and fastq file locations.",
215 category: "required"}
216 outputDir: {description: "The directory the output should be written to.", category: "common"}
217 referenceFasta: { description: "The reference fasta file", category: "required" }
218 referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" }
219 referenceFastaDict: { description: "Sequence dictionary (.dict) file of the reference", category: "required" }
220 dbsnpVCF: { description: "dbsnp VCF file used for checking known sites", category: "required"}
221 dbsnpVCFIndex: { description: "Index (.tbi) file for the dbsnp VCF", category: "required"}
222 bwaIndex: {description: "The BWA index files.", category: "required"}
223 dockerImagesFile: {description: "A YAML file describing the docker image used for the tasks. The dockerImages.yml provided with the pipeline is recommended.",
224 category: "advanced"}
225 regions: {description: "A bed file describing the regions to call variants for.", category: "common"}
226 runSVcalling: {description: "Whether or not Structural-variantcalling should be run.", category: "advanced"}
227 runMultiQC: {description: "Whether or not MultiQC should be run.", category: "advanced"}
228 XNonParRegions: {description: "Bed file with the non-PAR regions of X.", category: "common"}
229 YNonParRegions: {description: "Bed file with the non-PAR regions of Y.", category: "common"}
230 useBwaKit: {description: "Whether or not BWA kit should be used. If false BWA mem will be used.", category: "advanced"}
231 adapterForward: {description: "The adapter to be removed from the reads first or single end reads.", category: "common"}
232 adapterReverse: {description: "The adapter to be removed from the reads second end reads.", category: "common"}
233 platform: {description: "The platform used for sequencing.", category: "advanced"}
234 scatterSize: {description: "The size of the scattered regions in bases for the GATK subworkflows. Scattering is used to speed up certain processes. The genome will be seperated into multiple chunks (scatters) which will be processed in their own job, allowing for parallel processing. Higher values will result in a lower number of jobs. The optimal value here will depend on the available resources.",
235 category: "advanced"}
236 scatterSizeMillions:{ description: "Same as scatterSize, but is multiplied by 1000000 to get scatterSize. This allows for setting larger values more easily.",
237 category: "advanced"}
238 jointgenotyping: {description: "Whether to perform jointgenotyping (using HaplotypeCaller to call GVCFs and merge them with GenotypeGVCFs) or not",
240 singleSampleGvcf: {description: "Whether to output single-sample gvcfs", category: "common"}
241 bwaThreads: {description: "The amount of threads for the alignment process", category: "advanced"}