1 ## Copyright Broad Institute, 2017
3 ## This WDL pipeline implements data pre-processing and initial calling for somatic SNP,
4 ## Indel, and copy number variants in human whole-genome sequencing (WGS) data.
6 ## Requirements/expectations :
7 ## - Human whole-genome pair-end sequencing data in unmapped BAM (uBAM) format
8 ## - One or more read groups, one per uBAM file, all belonging to a single sample (SM)
9 ## - Input uBAM files must additionally comply with the following requirements:
10 ## - - filenames all have the same suffix (we use ".unmapped.bam")
11 ## - - files must pass validation by ValidateSamFile
12 ## - - reads are provided in query-sorted order
13 ## - - all reads must have an RG tag
14 ## - Reference genome must be Hg38 with ALT contigs
16 ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
17 ## For program versions, see docker containers.
20 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
21 ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
22 ## be subject to different licenses. Users are responsible for checking that they are
23 ## authorized to run all programs before running this script. Please see the docker
24 ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
25 ## licensing information pertaining to the included programs.
27 ## For documentation on the M2 and CNV parameters, please see the respective WDL files (imported below).
30 import "SomaticPairedSingleSampleWf.wdl" as PreProcess
31 import "mutect2.wdl" as M2
32 import "cnv_somatic_pair_workflow.wdl" as cnvSomaticPairWorkflow
34 workflow FullSomaticPipeline {
36 ### Preprocessing parameters
37 File contamination_sites_ud
38 File contamination_sites_bed
39 File contamination_sites_mu
40 File wgs_coverage_interval_list
42 String tumor_base_file_name
43 Array[File] tumor_flowcell_unmapped_bams
45 String normal_base_file_name
46 Array[File] normal_flowcell_unmapped_bams
47 String unmapped_bam_suffix
63 Array[File] known_indels_sites_VCFs
64 Array[File] known_indels_sites_indices
67 Int agg_preemptible_tries
69 Float cutoff_for_large_rg_in_gb = 20.0
71 # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
72 Int? increase_disk_size
74 Int compression_level = 2
75 #########################################
84 File? variants_for_contamination
85 File? variants_for_contamination_index
86 Boolean is_run_orientation_bias_filter = true
87 Boolean is_run_oncotator = true
90 String? onco_ds_local_db_dir
91 Array[String] artifact_modes
93 String? m2_extra_filtering_args
94 String? sequencing_center
95 String? sequence_source
96 File? default_config_file
98 Int? preemptible_attempts
99 String basic_bash_docker = "ubuntu:16.04"
100 String oncotator_docker = "broadinstitute/oncotator:1.9.6.1"
102 #####################################
110 File? gatk4_jar_override
113 Int? mem_gb_for_model_segments
115 call PreProcess.SomaticPairedEndSingleSampleWorkflow as PreProcessTumor {
117 contamination_sites_ud = contamination_sites_ud,
118 contamination_sites_bed = contamination_sites_bed,
119 contamination_sites_mu = contamination_sites_mu,
120 wgs_coverage_interval_list = wgs_coverage_interval_list,
122 base_file_name = tumor_base_file_name,
123 flowcell_unmapped_bams = tumor_flowcell_unmapped_bams,
124 unmapped_bam_suffix = unmapped_bam_suffix,
126 read_length = read_length,
128 ref_fasta = ref_fasta,
129 ref_fasta_index = ref_fasta_index,
138 dbSNP_vcf = dbSNP_vcf,
139 dbSNP_vcf_index = dbSNP_vcf_index,
140 known_indels_sites_VCFs = known_indels_sites_VCFs,
141 known_indels_sites_indices = known_indels_sites_indices,
143 preemptible_tries = preemptible_tries,
144 agg_preemptible_tries = agg_preemptible_tries,
146 cutoff_for_large_rg_in_gb = cutoff_for_large_rg_in_gb,
148 increase_disk_size = increase_disk_size,
150 compression_level = compression_level
153 call PreProcess.SomaticPairedEndSingleSampleWorkflow as PreProcessNormal {
155 contamination_sites_ud = contamination_sites_ud,
156 contamination_sites_bed = contamination_sites_bed,
157 contamination_sites_mu = contamination_sites_mu,
158 wgs_coverage_interval_list = wgs_coverage_interval_list,
160 base_file_name = normal_base_file_name,
161 flowcell_unmapped_bams = normal_flowcell_unmapped_bams,
162 unmapped_bam_suffix = unmapped_bam_suffix,
164 read_length = read_length,
166 ref_fasta = ref_fasta,
167 ref_fasta_index = ref_fasta_index,
176 dbSNP_vcf = dbSNP_vcf,
177 dbSNP_vcf_index = dbSNP_vcf_index,
178 known_indels_sites_VCFs = known_indels_sites_VCFs,
179 known_indels_sites_indices = known_indels_sites_indices,
181 preemptible_tries = preemptible_tries,
182 agg_preemptible_tries = agg_preemptible_tries,
184 cutoff_for_large_rg_in_gb = cutoff_for_large_rg_in_gb,
186 increase_disk_size = increase_disk_size,
188 compression_level = compression_level
191 call M2.Mutect2 as M2Pair {
193 intervals = wgs_coverage_interval_list,
194 tumor_bam = PreProcessTumor.output_bam,
195 tumor_bai = PreProcessTumor.output_bam_index,
196 normal_bam = PreProcessNormal.output_bam,
197 normal_bai = PreProcessNormal.output_bam_index,
199 pon_index = pon_index,
200 scatter_count = scatter_count,
202 gnomad_index = gnomad_index,
203 variants_for_contamination = variants_for_contamination,
204 variants_for_contamination_index = variants_for_contamination_index,
205 run_orientation_bias_filter = is_run_orientation_bias_filter,
206 run_oncotator = is_run_oncotator,
208 gatk_override = gatk4_jar_override,
209 onco_ds_tar_gz = onco_ds_tar_gz,
210 onco_ds_local_db_dir = onco_ds_local_db_dir,
211 artifact_modes = artifact_modes,
212 m2_extra_args = m2_extra_args,
213 m2_extra_filtering_args = m2_extra_filtering_args,
214 sequencing_center = sequencing_center,
215 sequence_source = sequence_source,
216 default_config_file = default_config_file,
218 preemptible_attempts = preemptible_attempts,
219 gatk_docker = gatk_docker,
220 basic_bash_docker = basic_bash_docker,
221 oncotator_docker = oncotator_docker,
223 ref_fasta = ref_fasta,
224 ref_fai = ref_fasta_index,
227 emergency_extra_disk = 20
230 call cnvSomaticPairWorkflow.CNVSomaticPairWorkflow as CNVPair {
232 intervals = wgs_coverage_interval_list,
233 common_sites = common_sites,
234 tumor_bam = PreProcessTumor.output_bam,
235 tumor_bam_idx = PreProcessTumor.output_bam_index,
236 normal_bam = PreProcessNormal.output_bam,
237 normal_bam_idx = PreProcessNormal.output_bam_index,
238 ref_fasta = ref_fasta,
239 ref_fasta_dict = ref_dict,
240 ref_fasta_fai = ref_fasta_index,
241 read_count_pon = read_count_pon,
242 gatk4_jar_override = gatk4_jar_override,
243 gatk_docker = gatk_docker,
244 is_run_oncotator = is_run_oncotator,
245 bin_length = bin_length,
246 mem_gb_for_model_segments = mem_gb_for_model_segments