modified: openstmerge.py
[GalaxyCodeBases.git] / etc / gatk-wdl / fm2gatk / germline.wdl
blob9ba8d8c43debf69541c2a978262450cbf3571e12
1 version 1.0
3 # Copyright (c) 2018 Leiden University Medical Center
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 # SOFTWARE.
23 import "sample.wdl" as sampleWf
24 import "structural-variantcalling/structural-variantcalling.wdl" as structuralVariantCalling
25 import "gatk-variantcalling/single-sample-variantcalling.wdl" as variantCallingWorkflow
26 import "gatk-variantcalling/jointgenotyping.wdl" as jgwf
27 import "gatk-variantcalling/calculate-regions.wdl" as calcRegions
28 import "structs.wdl" as structs
29 import "tasks/biowdl.wdl" as biowdl
30 import "tasks/common.wdl" as common
31 import "tasks/multiqc.wdl" as multiqc
32 import "tasks/chunked-scatter.wdl" as chunkedScatter
34 workflow Germline {
35     input {
36         File sampleConfigFile
37         String outputDir = "."
38         File referenceFasta
39         File referenceFastaFai
40         File referenceFastaDict
41         BwaIndex bwaIndex
42         File dockerImagesFile
43         File dbsnpVCF
44         File dbsnpVCFIndex
45         File? regions
46         File? XNonParRegions
47         File? YNonParRegions
48         Boolean jointgenotyping = true
49         Boolean singleSampleGvcf = false
50         String? adapterForward = "AGATCGGAAGAG"  # Illumina universal adapter
51         String? adapterReverse = "AGATCGGAAGAG"  # Illumina universal adapter
52         String platform = "illumina"
53         Boolean useBwaKit = false
54         Int scatterSizeMillions = 1000
55         Int? scatterSize
56         Int bwaThreads = 4
57         # Only run multiQC if the user specified an outputDir
58         Boolean runSVcalling = false
59     }
60     meta {allowNestedInputs: true}
62     Boolean mergeVcfs = !jointgenotyping || singleSampleGvcf
64     # Parse docker Tags configuration and sample sheet
65     call common.YamlToJson as ConvertDockerImagesFile {
66         input:
67             yaml = dockerImagesFile
68     }
69     Map[String, String] dockerImages = read_json(ConvertDockerImagesFile.json)
71     call biowdl.InputConverter as ConvertSampleConfig {
72         input:
73             samplesheet = sampleConfigFile,
74             dockerImage = dockerImages["biowdl-input-converter"]
75     }
76     SampleConfig sampleConfig = read_json(ConvertSampleConfig.json)
78     call calcRegions.CalculateRegions as calculateRegions {
79         input:
80             referenceFasta = referenceFasta,
81             referenceFastaFai = referenceFastaFai,
82             referenceFastaDict = referenceFastaDict,
83             XNonParRegions = XNonParRegions,
84             YNonParRegions = YNonParRegions,
85             regions = regions,
86             scatterSize = scatterSize,
87             scatterSizeMillions = scatterSizeMillions,
88             dockerImages = dockerImages
89     }
91     call chunkedScatter.ScatterRegions as scatterList {
92         input:
93             inputFile = select_first([regions, referenceFastaFai]),
94             scatterSize = scatterSize,
95             scatterSizeMillions = scatterSizeMillions,
96             dockerImage = dockerImages["chunked-scatter"]
97     }
99     # Running sample subworkflow
100     scatter (sample in sampleConfig.samples) {
101         String sampleIds = sample.id
102         String sampleDir = outputDir + "/samples/" + sample.id + "/"
103         call sampleWf.SampleWorkflow as sampleWorkflow {
104             input:
105                 sampleDir = sampleDir,
106                 sample = sample,
107                 referenceFasta = referenceFasta,
108                 referenceFastaFai = referenceFastaFai,
109                 referenceFastaDict = referenceFastaDict,
110                 bwaIndex = bwaIndex,
111                 dbsnpVCF = dbsnpVCF,
112                 dbsnpVCFIndex = dbsnpVCFIndex,
113                 adapterForward = adapterForward,
114                 adapterReverse = adapterReverse,
115                 useBwaKit = useBwaKit,
116                 dockerImages = dockerImages,
117                 scatters = scatterList.scatters,
118                 bwaThreads = bwaThreads,
119                 platform = platform
120         }
121         
122         call variantCallingWorkflow.SingleSampleCalling as SingleSampleCalling {
123             input:
124                 bam = sampleWorkflow.recalibratedBam,
125                 bamIndex = sampleWorkflow.recalibratedBamIndex,
126                 gender = select_first([sample.gender, "unknown"]),
127                 sampleName = sample.id,
128                 outputDir = sampleDir,
129                 referenceFasta = referenceFasta,
130                 referenceFastaFai = referenceFastaFai,
131                 referenceFastaDict = referenceFastaDict,
132                 dbsnpVCF = dbsnpVCF,
133                 dbsnpVCFIndex = dbsnpVCFIndex,
134                 XNonParRegions = calculateRegions.Xregions,
135                 YNonParRegions = calculateRegions.Yregions,
136                 statsRegions = regions,
137                 autosomalRegionScatters = calculateRegions.autosomalRegionScatters,
138                 gvcf = jointgenotyping,
139                 mergeVcf = mergeVcfs,
140                 dockerImages = dockerImages                    
141         }
144         if (runSVcalling) {
145             call structuralVariantCalling.SVcalling as svCalling {
146                 input:
147                     bamFile = sampleWorkflow.markdupBam,
148                     bamIndex = sampleWorkflow.markdupBamIndex,
149                     referenceFasta = referenceFasta,
150                     referenceFastaFai = referenceFastaFai,
151                     referenceFastaDict = referenceFastaDict,
152                     bwaIndex = bwaIndex,
153                     sample = sample.id,
154                     outputDir = sampleDir,
155                     dockerImages = dockerImages
156             }
157         }
158     }
160     if (jointgenotyping) {
161         call jgwf.JointGenotyping as JointGenotyping {
162             input:
163                 gvcfFiles = flatten(SingleSampleCalling.vcfScatters),
164                 gvcfFilesIndex = flatten(SingleSampleCalling.vcfIndexScatters),
165                 outputDir = outputDir,
166                 vcfBasename = "multisample",
167                 referenceFasta = referenceFasta,
168                 referenceFastaFai = referenceFastaFai,
169                 referenceFastaDict = referenceFastaDict,
170                 sampleIds = sampleIds,
171                 dbsnpVCF = dbsnpVCF,
172                 dbsnpVCFIndex = dbsnpVCFIndex,
173                 regions = regions,
174                 scatterSize = scatterSize,
175                 dockerImages = dockerImages
176         }
177     }
179     Array[File] allReports = flatten([
180         flatten(sampleWorkflow.reports), flatten(SingleSampleCalling.reports), select_first([JointGenotyping.reports, []])
181         ])
183     call multiqc.MultiQC as multiqcTask {
184         input:
185             reports = allReports,
186             outDir = outputDir,
187             dockerImage = dockerImages["multiqc"]
188     }
190     output {
191         File multiqcReport = multiqcTask.multiqcReport
192         Array[File] reports = allReports
193         File? multiSampleVcf = JointGenotyping.multisampleVcf
194         File? multisampleVcfIndex = JointGenotyping.multisampleVcfIndex
195         File? multisampleGVcf = JointGenotyping.multisampleGVcf
196         File? multisampleGVcfIndex = JointGenotyping.multisampleGVcfIndex
197         Array[File] singleSampleVcfs = if jointgenotyping then [] else select_all(SingleSampleCalling.outputVcf)
198         Array[File] singleSampleVcfsIndex = if jointgenotyping then [] else select_all(SingleSampleCalling.outputVcfIndex)
199         Array[File] singleSampleGvcfs = if jointgenotyping then select_all(SingleSampleCalling.outputVcf) else []
200         Array[File] singleSampleGvcfsIndex = if jointgenotyping then select_all(SingleSampleCalling.outputVcfIndex) else []
201         Array[File] recalibratedBams = sampleWorkflow.recalibratedBam
202         Array[File] recalibratedBamIndexes = sampleWorkflow.recalibratedBamIndex
203         Array[File] markdupBams = sampleWorkflow.markdupBam
204         Array[File] markdupBamIndexes = sampleWorkflow.markdupBamIndex
205         Array[File?] cleverVCFs = svCalling.cleverVcf
206         Array[File?] matecleverVCFs = svCalling.cleverVcf
207         Array[File?] mantaVCFs = svCalling.mantaVcf
208         Array[File?] dellyVCFs = svCalling.dellyVcf
209         Array[File?] survivorVCFs = svCalling.survivorVcf
210         Array[Array[File]?] renamedVCFs = svCalling.renamedVcfs
211     }
213     parameter_meta {
214         sampleConfigFile: {description: "The samplesheet, including sample ids, library ids, readgroup ids and fastq file locations.",
215                            category: "required"}
216         outputDir: {description: "The directory the output should be written to.", category: "common"}
217         referenceFasta: { description: "The reference fasta file", category: "required" }
218         referenceFastaFai: { description: "Fasta index (.fai) file of the reference", category: "required" }
219         referenceFastaDict: { description: "Sequence dictionary (.dict) file of the reference", category: "required" }
220         dbsnpVCF: { description: "dbsnp VCF file used for checking known sites", category: "required"}
221         dbsnpVCFIndex: { description: "Index (.tbi) file for the dbsnp VCF", category: "required"}
222         bwaIndex: {description: "The BWA index files.", category: "required"}
223         dockerImagesFile: {description: "A YAML file describing the docker image used for the tasks. The dockerImages.yml provided with the pipeline is recommended.",
224                            category: "advanced"}
225         regions: {description: "A bed file describing the regions to call variants for.", category: "common"}
226         runSVcalling: {description: "Whether or not Structural-variantcalling should be run.", category: "advanced"}
227         runMultiQC: {description: "Whether or not MultiQC should be run.", category: "advanced"}
228         XNonParRegions: {description: "Bed file with the non-PAR regions of X.", category: "common"}
229         YNonParRegions: {description: "Bed file with the non-PAR regions of Y.", category: "common"}
230         useBwaKit: {description: "Whether or not BWA kit should be used. If false BWA mem will be used.", category: "advanced"}
231         adapterForward: {description: "The adapter to be removed from the reads first or single end reads.", category: "common"}
232         adapterReverse: {description: "The adapter to be removed from the reads second end reads.", category: "common"}
233         platform: {description: "The platform used for sequencing.", category: "advanced"}
234         scatterSize: {description: "The size of the scattered regions in bases for the GATK subworkflows. Scattering is used to speed up certain processes. The genome will be seperated into multiple chunks (scatters) which will be processed in their own job, allowing for parallel processing. Higher values will result in a lower number of jobs. The optimal value here will depend on the available resources.",
235               category: "advanced"}
236         scatterSizeMillions:{ description: "Same as scatterSize, but is multiplied by 1000000 to get scatterSize. This allows for setting larger values more easily.",
237                               category: "advanced"}
238         jointgenotyping: {description: "Whether to perform jointgenotyping (using HaplotypeCaller to call GVCFs and merge them with GenotypeGVCFs) or not",
239                   category: "common"}
240         singleSampleGvcf: {description: "Whether to output single-sample gvcfs", category: "common"}
241         bwaThreads: {description: "The amount of threads for the alignment process", category: "advanced"}
242     }