modified: myjupyterlab.sh
[GalaxyCodeBases.git] / etc / gatk-wdl / FullSomaticPipeline.wdl
blob2741a3b4a3819e46cb4172b3f862261b68eb06e5
1 ## Copyright Broad Institute, 2017
2 ##
3 ## This WDL pipeline implements data pre-processing and initial calling for somatic SNP,
4 ## Indel, and copy number variants in human whole-genome sequencing (WGS) data.
5 ##
6 ## Requirements/expectations :
7 ## - Human whole-genome pair-end sequencing data in unmapped BAM (uBAM) format
8 ## - One or more read groups, one per uBAM file, all belonging to a single sample (SM)
9 ## - Input uBAM files must additionally comply with the following requirements:
10 ## - - filenames all have the same suffix (we use ".unmapped.bam")
11 ## - - files must pass validation by ValidateSamFile
12 ## - - reads are provided in query-sorted order
13 ## - - all reads must have an RG tag
14 ## - Reference genome must be Hg38 with ALT contigs
16 ## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
17 ## For program versions, see docker containers.
19 ## LICENSING :
20 ## This script is released under the WDL source code license (BSD-3) (see LICENSE in
21 ## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
22 ## be subject to different licenses. Users are responsible for checking that they are
23 ## authorized to run all programs before running this script. Please see the docker
24 ## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
25 ## licensing information pertaining to the included programs.
27 ## For documentation on the M2 and CNV parameters, please see the respective WDL files (imported below).
30 import "SomaticPairedSingleSampleWf.wdl" as PreProcess
31 import "mutect2.wdl" as M2
32 import "cnv_somatic_pair_workflow.wdl" as cnvSomaticPairWorkflow
34 workflow FullSomaticPipeline {
36     ### Preprocessing parameters
37     File contamination_sites_ud
38     File contamination_sites_bed
39     File contamination_sites_mu
40     File wgs_coverage_interval_list
42     String tumor_base_file_name
43     Array[File] tumor_flowcell_unmapped_bams
45     String normal_base_file_name
46     Array[File] normal_flowcell_unmapped_bams
47     String unmapped_bam_suffix
49     Int read_length = 250
51     File ref_fasta
52     File ref_fasta_index
53     File ref_dict
54     File ref_alt
55     File ref_bwt
56     File ref_sa
57     File ref_amb
58     File ref_ann
59     File ref_pac
61     File dbSNP_vcf
62     File dbSNP_vcf_index
63     Array[File] known_indels_sites_VCFs
64     Array[File] known_indels_sites_indices
66     Int preemptible_tries
67     Int agg_preemptible_tries
69     Float cutoff_for_large_rg_in_gb = 20.0
71     # Optional input to increase all disk sizes in case of outlier sample with strange size behavior
72     Int? increase_disk_size
74     Int compression_level = 2
75     #########################################
78     #### M2 parameters
79     File? pon
80     File? pon_index
81     Int scatter_count
82     File? gnomad
83     File? gnomad_index
84     File? variants_for_contamination
85     File? variants_for_contamination_index
86     Boolean is_run_orientation_bias_filter = true
87     Boolean is_run_oncotator = true
89     File? onco_ds_tar_gz
90     String? onco_ds_local_db_dir
91     Array[String] artifact_modes
92     String? m2_extra_args
93     String? m2_extra_filtering_args
94     String? sequencing_center
95     String? sequence_source
96     File? default_config_file
98     Int? preemptible_attempts
99     String basic_bash_docker = "ubuntu:16.04"
100     String oncotator_docker = "broadinstitute/oncotator:1.9.6.1"
102     #####################################
104     ### CNV parameters
105     #File intervals
106     File common_sites
107     File read_count_pon
109     String gatk_docker
110     File? gatk4_jar_override
111     Int? bin_length
113     Int? mem_gb_for_model_segments
115     call PreProcess.SomaticPairedEndSingleSampleWorkflow as PreProcessTumor {
116         input:
117             contamination_sites_ud = contamination_sites_ud,
118             contamination_sites_bed = contamination_sites_bed,
119             contamination_sites_mu = contamination_sites_mu,
120             wgs_coverage_interval_list = wgs_coverage_interval_list,
122             base_file_name = tumor_base_file_name,
123             flowcell_unmapped_bams = tumor_flowcell_unmapped_bams,
124             unmapped_bam_suffix = unmapped_bam_suffix,
126             read_length = read_length,
128             ref_fasta = ref_fasta,
129             ref_fasta_index = ref_fasta_index,
130             ref_dict = ref_dict,
131             ref_alt = ref_alt,
132             ref_bwt = ref_bwt,
133             ref_sa = ref_sa,
134             ref_amb = ref_amb,
135             ref_ann = ref_ann,
136             ref_pac = ref_pac,
138             dbSNP_vcf = dbSNP_vcf,
139             dbSNP_vcf_index = dbSNP_vcf_index,
140             known_indels_sites_VCFs = known_indels_sites_VCFs,
141             known_indels_sites_indices = known_indels_sites_indices,
143             preemptible_tries = preemptible_tries,
144             agg_preemptible_tries = agg_preemptible_tries,
146             cutoff_for_large_rg_in_gb = cutoff_for_large_rg_in_gb,
148             increase_disk_size = increase_disk_size,
150             compression_level = compression_level
151     }
153     call PreProcess.SomaticPairedEndSingleSampleWorkflow as PreProcessNormal {
154         input:
155             contamination_sites_ud = contamination_sites_ud,
156             contamination_sites_bed = contamination_sites_bed,
157             contamination_sites_mu = contamination_sites_mu,
158             wgs_coverage_interval_list = wgs_coverage_interval_list,
160             base_file_name = normal_base_file_name,
161             flowcell_unmapped_bams = normal_flowcell_unmapped_bams,
162             unmapped_bam_suffix = unmapped_bam_suffix,
164             read_length = read_length,
166             ref_fasta = ref_fasta,
167             ref_fasta_index = ref_fasta_index,
168             ref_dict = ref_dict,
169             ref_alt = ref_alt,
170             ref_bwt = ref_bwt,
171             ref_sa = ref_sa,
172             ref_amb = ref_amb,
173             ref_ann = ref_ann,
174             ref_pac = ref_pac,
176             dbSNP_vcf = dbSNP_vcf,
177             dbSNP_vcf_index = dbSNP_vcf_index,
178             known_indels_sites_VCFs = known_indels_sites_VCFs,
179             known_indels_sites_indices = known_indels_sites_indices,
181             preemptible_tries = preemptible_tries,
182             agg_preemptible_tries = agg_preemptible_tries,
184             cutoff_for_large_rg_in_gb = cutoff_for_large_rg_in_gb,
186             increase_disk_size = increase_disk_size,
188             compression_level = compression_level
189     }
191     call M2.Mutect2 as M2Pair {
192         input:
193             intervals = wgs_coverage_interval_list,
194             tumor_bam = PreProcessTumor.output_bam,
195             tumor_bai = PreProcessTumor.output_bam_index,
196             normal_bam = PreProcessNormal.output_bam,
197             normal_bai = PreProcessNormal.output_bam_index,
198             pon = pon,
199             pon_index = pon_index,
200             scatter_count = scatter_count,
201             gnomad = gnomad,
202             gnomad_index = gnomad_index,
203             variants_for_contamination = variants_for_contamination,
204             variants_for_contamination_index = variants_for_contamination_index,
205             run_orientation_bias_filter = is_run_orientation_bias_filter,
206             run_oncotator = is_run_oncotator,
208             gatk_override = gatk4_jar_override,
209             onco_ds_tar_gz = onco_ds_tar_gz,
210             onco_ds_local_db_dir = onco_ds_local_db_dir,
211             artifact_modes = artifact_modes,
212             m2_extra_args = m2_extra_args,
213             m2_extra_filtering_args = m2_extra_filtering_args,
214             sequencing_center = sequencing_center,
215             sequence_source = sequence_source,
216             default_config_file = default_config_file,
218             preemptible_attempts = preemptible_attempts,
219             gatk_docker = gatk_docker,
220             basic_bash_docker = basic_bash_docker,
221             oncotator_docker = oncotator_docker,
223             ref_fasta = ref_fasta,
224             ref_fai = ref_fasta_index,
225             ref_dict = ref_dict,
227             emergency_extra_disk = 20
228     }
230     call cnvSomaticPairWorkflow.CNVSomaticPairWorkflow as CNVPair {
231         input:
232             intervals = wgs_coverage_interval_list,
233             common_sites = common_sites,
234             tumor_bam = PreProcessTumor.output_bam,
235             tumor_bam_idx = PreProcessTumor.output_bam_index,
236             normal_bam = PreProcessNormal.output_bam,
237             normal_bam_idx = PreProcessNormal.output_bam_index,
238             ref_fasta = ref_fasta,
239             ref_fasta_dict = ref_dict,
240             ref_fasta_fai = ref_fasta_index,
241             read_count_pon = read_count_pon,
242             gatk4_jar_override = gatk4_jar_override,
243             gatk_docker = gatk_docker,
244             is_run_oncotator = is_run_oncotator,
245             bin_length = bin_length,
246             mem_gb_for_model_segments = mem_gb_for_model_segments
247     }