1 # Workflow for running the GATK CNV pipeline on a matched pair. Supports both WGS and WES.
5 # - The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the
6 # GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
7 # These intervals will be padded on both sides by the amount specified by padding (default 250)
8 # and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning,
9 # e.g., for WES). For WGS, the intervals should simply cover the autosomal chromosomes (sex chromosomes may be
10 # included, but care should be taken to 1) avoid creating panels of mixed sex, and 2) denoise case samples only
11 # with panels containing only individuals of the same sex as the case samples).
13 # - Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals
14 # argument, which accepts formats compatible with the GATK -XL argument
15 # (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
16 # This may be useful for excluding centromeric regions, etc. from analysis. Alternatively, these regions may
17 # be manually filtered from the final callset.
19 # A reasonable blacklist for excluded intervals (-XL) can be found at:
20 # hg19: gs://gatk-best-practices/somatic-b37/CNV_and_centromere_blacklist.hg19.list
21 # hg38: gs://gatk-best-practices/somatic-hg38/CNV_and_centromere_blacklist.hg38liftover.list (untested)
23 # - The sites file (common_sites) should be a Picard or GATK-style interval list. This is a list of sites
24 # of known variation at which allelic counts will be collected for use in modeling minor-allele fractions.
26 # - If you opt to run FuncotateSegments (i.e. set `is_run_funcotator` to `true`), then please also ensure that you have
27 # the correct value for `funcotator_ref_version`. Treat `funcotator_ref_version` as required if
28 # `is_run_funcotator` is `true`. Valid values for `funcotator_ref_version` are `hg38` and `hg19`.
29 # The latter includes GRCh37.
32 # - Example invocation:
34 # java -jar cromwell.jar run cnv_somatic_pair_workflow.wdl -i my_parameters.json
38 import "cnv_somatic/cnv_common_tasks.wdl" as CNVTasks
39 import "cnv_somatic/cnv_somatic_oncotator_workflow.wdl" as CNVOncotator
40 import "cnv_somatic/cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments
42 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_common_tasks.wdl" as CNVTasks
43 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_somatic_oncotator_workflow.wdl" as CNVOncotator
44 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments
46 workflow CNVSomaticPairWorkflow {
48 ##################################
49 #### required basic arguments ####
50 ##################################
53 File? blacklist_intervals
64 ##################################
65 #### optional basic arguments ####
66 ##################################
67 # For running oncotator
68 Boolean? is_run_oncotator
69 # For running funcotator
70 Boolean? is_run_funcotator
72 File? gatk4_jar_override
73 Int? preemptible_attempts
74 # Use as a last resort to increase the disk given to every task in case of ill behaving data
75 Int? emergency_extra_disk
77 ####################################################
78 #### optional arguments for PreprocessIntervals ####
79 ####################################################
82 Int? mem_gb_for_preprocess_intervals
84 ##############################################
85 #### optional arguments for CollectCounts ####
86 ##############################################
87 String? collect_counts_format
88 Int? mem_gb_for_collect_counts
90 #####################################################
91 #### optional arguments for CollectAllelicCounts ####
92 #####################################################
93 String? minimum_base_quality
94 Int? mem_gb_for_collect_allelic_counts
96 ##################################################
97 #### optional arguments for DenoiseReadCounts ####
98 ##################################################
99 Int? number_of_eigensamples
100 Int? mem_gb_for_denoise_read_counts
102 ##############################################
103 #### optional arguments for ModelSegments ####
104 ##############################################
105 Int? max_num_segments_per_chromosome
106 Int? min_total_allele_count
107 Int? min_total_allele_count_normal
108 Float? genotyping_homozygous_log_ratio_threshold
109 Float? genotyping_base_error_rate
110 Float? kernel_variance_copy_ratio
111 Float? kernel_variance_allele_fraction
112 Float? kernel_scaling_allele_fraction
113 Int? kernel_approximation_dimension
114 Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
115 Float? num_changepoints_penalty_factor
116 Float? minor_allele_fraction_prior_alpha
117 Int? num_samples_copy_ratio
118 Int? num_burn_in_copy_ratio
119 Int? num_samples_allele_fraction
120 Int? num_burn_in_allele_fraction
121 Float? smoothing_threshold_copy_ratio
122 Float? smoothing_threshold_allele_fraction
123 Int? max_num_smoothing_iterations
124 Int? num_smoothing_iterations_per_fit
125 Int? mem_gb_for_model_segments
127 ######################################################
128 #### optional arguments for CallCopyRatioSegments ####
129 ######################################################
130 Float? neutral_segment_copy_ratio_lower_bound
131 Float? neutral_segment_copy_ratio_upper_bound
132 Float? outlier_neutral_segment_copy_ratio_z_score_threshold
133 Float? calling_copy_ratio_z_score_threshold
134 Int? mem_gb_for_call_copy_ratio_segments
136 #########################################
137 #### optional arguments for plotting ####
138 #########################################
139 Int? minimum_contig_length
140 Int? mem_gb_for_plotting
142 ##########################################
143 #### optional arguments for Oncotator ####
144 ##########################################
145 String? additional_args_for_oncotator
146 String? oncotator_docker
147 Int? mem_gb_for_oncotator
148 Int? boot_disk_space_gb_for_oncotator
150 ##################################################
151 #### optional arguments for FuncotateSegments ####
152 ##################################################
153 String? additional_args_for_funcotator
154 String? funcotator_ref_version
155 Int? mem_gb_for_funcotator
156 File? funcotator_transcript_selection_list
157 File? funcotator_data_sources_tar_gz
158 String? funcotator_transcript_selection_mode
159 Array[String]? funcotator_annotation_defaults
160 Array[String]? funcotator_annotation_overrides
161 Array[String]? funcotator_excluded_fields
162 Boolean? funcotator_is_removing_untared_datasources
163 Int? funcotator_disk_space_gb
164 Boolean? funcotator_use_ssd
167 Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_dict, "GB") + size(ref_fasta_fai, "GB"))
168 Int read_count_pon_size = ceil(size(read_count_pon, "GB"))
169 Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bam_idx, "GB"))
170 Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) else 0
172 Int gatk4_override_size = if defined(gatk4_jar_override) then ceil(size(gatk4_jar_override, "GB")) else 0
173 # This is added to every task as padding, should increase if systematically you need more disk for every call
174 Int disk_pad = 20 + ceil(size(intervals, "GB")) + ceil(size(common_sites, "GB")) + gatk4_override_size + select_first([emergency_extra_disk, 0])
176 File final_normal_bam = select_first([normal_bam, "null"])
177 File final_normal_bam_idx = select_first([normal_bam_idx, "null"])
179 Int preprocess_intervals_disk = ref_size + disk_pad
180 call CNVTasks.PreprocessIntervals {
182 intervals = intervals,
183 blacklist_intervals = blacklist_intervals,
184 ref_fasta = ref_fasta,
185 ref_fasta_fai = ref_fasta_fai,
186 ref_fasta_dict = ref_fasta_dict,
188 bin_length = bin_length,
189 gatk4_jar_override = gatk4_jar_override,
190 gatk_docker = gatk_docker,
191 mem_gb = mem_gb_for_preprocess_intervals,
192 disk_space_gb = preprocess_intervals_disk,
193 preemptible_attempts = preemptible_attempts
196 Int collect_counts_tumor_disk = tumor_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
197 call CNVTasks.CollectCounts as CollectCountsTumor {
199 intervals = PreprocessIntervals.preprocessed_intervals,
201 bam_idx = tumor_bam_idx,
202 ref_fasta = ref_fasta,
203 ref_fasta_fai = ref_fasta_fai,
204 ref_fasta_dict = ref_fasta_dict,
205 format = collect_counts_format,
206 gatk4_jar_override = gatk4_jar_override,
207 gatk_docker = gatk_docker,
208 mem_gb = mem_gb_for_collect_counts,
209 disk_space_gb = collect_counts_tumor_disk,
210 preemptible_attempts = preemptible_attempts
213 Int collect_allelic_counts_tumor_disk = tumor_bam_size + ref_size + disk_pad
214 call CNVTasks.CollectAllelicCounts as CollectAllelicCountsTumor {
216 common_sites = common_sites,
218 bam_idx = tumor_bam_idx,
219 ref_fasta = ref_fasta,
220 ref_fasta_dict = ref_fasta_dict,
221 ref_fasta_fai = ref_fasta_fai,
222 minimum_base_quality = minimum_base_quality,
223 gatk4_jar_override = gatk4_jar_override,
224 gatk_docker = gatk_docker,
225 mem_gb = mem_gb_for_collect_allelic_counts,
226 disk_space_gb = collect_allelic_counts_tumor_disk,
227 preemptible_attempts = preemptible_attempts
230 Int denoise_read_counts_tumor_disk = read_count_pon_size + ceil(size(CollectCountsTumor.counts, "GB")) + disk_pad
231 call DenoiseReadCounts as DenoiseReadCountsTumor {
233 entity_id = CollectCountsTumor.entity_id,
234 read_counts = CollectCountsTumor.counts,
235 read_count_pon = read_count_pon,
236 number_of_eigensamples = number_of_eigensamples,
237 gatk4_jar_override = gatk4_jar_override,
238 gatk_docker = gatk_docker,
239 mem_gb = mem_gb_for_denoise_read_counts,
240 disk_space_gb = denoise_read_counts_tumor_disk,
241 preemptible_attempts = preemptible_attempts
244 Int model_segments_normal_portion = if defined(normal_bam) then ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) else 0
245 Int model_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsTumor.allelic_counts, "GB")) + model_segments_normal_portion + disk_pad
246 call ModelSegments as ModelSegmentsTumor {
248 entity_id = CollectCountsTumor.entity_id,
249 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
250 allelic_counts = CollectAllelicCountsTumor.allelic_counts,
251 normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts,
252 max_num_segments_per_chromosome = max_num_segments_per_chromosome,
253 min_total_allele_count = min_total_allele_count,
254 min_total_allele_count_normal = min_total_allele_count_normal,
255 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
256 genotyping_base_error_rate = genotyping_base_error_rate,
257 kernel_variance_copy_ratio = kernel_variance_copy_ratio,
258 kernel_variance_allele_fraction = kernel_variance_allele_fraction,
259 kernel_scaling_allele_fraction = kernel_scaling_allele_fraction,
260 kernel_approximation_dimension = kernel_approximation_dimension,
261 window_sizes = window_sizes,
262 num_changepoints_penalty_factor = num_changepoints_penalty_factor,
263 minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha,
264 num_samples_copy_ratio = num_samples_copy_ratio,
265 num_burn_in_copy_ratio = num_burn_in_copy_ratio,
266 num_samples_allele_fraction = num_samples_allele_fraction,
267 num_burn_in_allele_fraction = num_burn_in_allele_fraction,
268 smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio,
269 smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction,
270 max_num_smoothing_iterations = max_num_smoothing_iterations,
271 num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit,
272 gatk4_jar_override = gatk4_jar_override,
273 gatk_docker = gatk_docker,
274 mem_gb = mem_gb_for_model_segments,
275 disk_space_gb = model_segments_tumor_disk,
276 preemptible_attempts = preemptible_attempts
279 Int copy_ratio_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.copy_ratio_only_segments, "GB")) + disk_pad
280 call CallCopyRatioSegments as CallCopyRatioSegmentsTumor {
282 entity_id = CollectCountsTumor.entity_id,
283 copy_ratio_segments = ModelSegmentsTumor.copy_ratio_only_segments,
284 neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound,
285 neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound,
286 outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold,
287 calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold,
288 gatk4_jar_override = gatk4_jar_override,
289 gatk_docker = gatk_docker,
290 mem_gb = mem_gb_for_call_copy_ratio_segments,
291 disk_space_gb = copy_ratio_segments_tumor_disk,
292 preemptible_attempts = preemptible_attempts
295 # The F=files from other tasks are small enough to just combine into one disk variable and pass to the tumor plotting tasks
296 Int plot_tumor_disk = ref_size + ceil(size(DenoiseReadCountsTumor.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsTumor.modeled_segments, "GB")) + disk_pad
297 call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosTumor {
299 entity_id = CollectCountsTumor.entity_id,
300 standardized_copy_ratios = DenoiseReadCountsTumor.standardized_copy_ratios,
301 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
302 ref_fasta_dict = ref_fasta_dict,
303 minimum_contig_length = minimum_contig_length,
304 gatk4_jar_override = gatk4_jar_override,
305 gatk_docker = gatk_docker,
306 mem_gb = mem_gb_for_plotting,
307 disk_space_gb = plot_tumor_disk,
308 preemptible_attempts = preemptible_attempts
311 call PlotModeledSegments as PlotModeledSegmentsTumor {
313 entity_id = CollectCountsTumor.entity_id,
314 denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
315 het_allelic_counts = ModelSegmentsTumor.het_allelic_counts,
316 modeled_segments = ModelSegmentsTumor.modeled_segments,
317 ref_fasta_dict = ref_fasta_dict,
318 minimum_contig_length = minimum_contig_length,
319 gatk4_jar_override = gatk4_jar_override,
320 gatk_docker = gatk_docker,
321 mem_gb = mem_gb_for_plotting,
322 disk_space_gb = plot_tumor_disk,
323 preemptible_attempts = preemptible_attempts
326 Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
327 if (defined(normal_bam)) {
328 call CNVTasks.CollectCounts as CollectCountsNormal {
330 intervals = PreprocessIntervals.preprocessed_intervals,
331 bam = final_normal_bam,
332 bam_idx = final_normal_bam_idx,
333 ref_fasta = ref_fasta,
334 ref_fasta_fai = ref_fasta_fai,
335 ref_fasta_dict = ref_fasta_dict,
336 format = collect_counts_format,
337 gatk4_jar_override = gatk4_jar_override,
338 gatk_docker = gatk_docker,
339 mem_gb = mem_gb_for_collect_counts,
340 disk_space_gb = collect_counts_normal_disk,
341 preemptible_attempts = preemptible_attempts
344 Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad
345 call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal {
347 common_sites = common_sites,
348 bam = final_normal_bam,
349 bam_idx = final_normal_bam_idx,
350 ref_fasta = ref_fasta,
351 ref_fasta_dict = ref_fasta_dict,
352 ref_fasta_fai = ref_fasta_fai,
353 minimum_base_quality = minimum_base_quality,
354 gatk4_jar_override = gatk4_jar_override,
355 gatk_docker = gatk_docker,
356 mem_gb = mem_gb_for_collect_allelic_counts,
357 disk_space_gb = collect_allelic_counts_normal_disk,
358 preemptible_attempts = preemptible_attempts
361 Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad
362 call DenoiseReadCounts as DenoiseReadCountsNormal {
364 entity_id = CollectCountsNormal.entity_id,
365 read_counts = CollectCountsNormal.counts,
366 read_count_pon = read_count_pon,
367 number_of_eigensamples = number_of_eigensamples,
368 gatk4_jar_override = gatk4_jar_override,
369 gatk_docker = gatk_docker,
370 mem_gb = mem_gb_for_denoise_read_counts,
371 disk_space_gb = denoise_read_counts_normal_disk,
372 preemptible_attempts = preemptible_attempts
375 Int model_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) + disk_pad
376 call ModelSegments as ModelSegmentsNormal {
378 entity_id = CollectCountsNormal.entity_id,
379 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
380 allelic_counts = CollectAllelicCountsNormal.allelic_counts,
381 max_num_segments_per_chromosome = max_num_segments_per_chromosome,
382 min_total_allele_count = min_total_allele_count_normal,
383 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
384 genotyping_base_error_rate = genotyping_base_error_rate,
385 kernel_variance_copy_ratio = kernel_variance_copy_ratio,
386 kernel_variance_allele_fraction = kernel_variance_allele_fraction,
387 kernel_scaling_allele_fraction = kernel_scaling_allele_fraction,
388 kernel_approximation_dimension = kernel_approximation_dimension,
389 window_sizes = window_sizes,
390 num_changepoints_penalty_factor = num_changepoints_penalty_factor,
391 minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha,
392 num_samples_copy_ratio = num_samples_copy_ratio,
393 num_burn_in_copy_ratio = num_burn_in_copy_ratio,
394 num_samples_allele_fraction = num_samples_allele_fraction,
395 num_burn_in_allele_fraction = num_burn_in_allele_fraction,
396 smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio,
397 smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction,
398 max_num_smoothing_iterations = max_num_smoothing_iterations,
399 num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit,
400 gatk4_jar_override = gatk4_jar_override,
401 gatk_docker = gatk_docker,
402 mem_gb = mem_gb_for_model_segments,
403 disk_space_gb = model_segments_normal_disk,
404 preemptible_attempts = preemptible_attempts
407 Int copy_ratio_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.copy_ratio_only_segments, "GB")) + disk_pad
408 call CallCopyRatioSegments as CallCopyRatioSegmentsNormal {
410 entity_id = CollectCountsNormal.entity_id,
411 copy_ratio_segments = ModelSegmentsNormal.copy_ratio_only_segments,
412 neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound,
413 neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound,
414 outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold,
415 calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold,
416 gatk4_jar_override = gatk4_jar_override,
417 gatk_docker = gatk_docker,
418 mem_gb = mem_gb_for_call_copy_ratio_segments,
419 disk_space_gb = copy_ratio_segments_normal_disk,
420 preemptible_attempts = preemptible_attempts
423 # The files from other tasks are small enough to just combine into one disk variable and pass to the normal plotting tasks
424 Int plot_normal_disk = ref_size + ceil(size(DenoiseReadCountsNormal.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsNormal.modeled_segments, "GB")) + disk_pad
425 call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosNormal {
427 entity_id = CollectCountsNormal.entity_id,
428 standardized_copy_ratios = DenoiseReadCountsNormal.standardized_copy_ratios,
429 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
430 ref_fasta_dict = ref_fasta_dict,
431 minimum_contig_length = minimum_contig_length,
432 gatk4_jar_override = gatk4_jar_override,
433 gatk_docker = gatk_docker,
434 mem_gb = mem_gb_for_plotting,
435 disk_space_gb = plot_normal_disk,
436 preemptible_attempts = preemptible_attempts
438 call PlotModeledSegments as PlotModeledSegmentsNormal {
440 entity_id = CollectCountsNormal.entity_id,
441 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
442 het_allelic_counts = ModelSegmentsNormal.het_allelic_counts,
443 modeled_segments = ModelSegmentsNormal.modeled_segments,
444 ref_fasta_dict = ref_fasta_dict,
445 minimum_contig_length = minimum_contig_length,
446 gatk4_jar_override = gatk4_jar_override,
447 gatk_docker = gatk_docker,
448 mem_gb = mem_gb_for_plotting,
449 disk_space_gb = plot_normal_disk,
450 preemptible_attempts = preemptible_attempts
454 if (select_first([is_run_oncotator, false])) {
455 call CNVOncotator.CNVOncotatorWorkflow as CNVOncotatorWorkflow {
457 called_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
458 additional_args = additional_args_for_oncotator,
459 oncotator_docker = oncotator_docker,
460 mem_gb_for_oncotator = mem_gb_for_oncotator,
461 boot_disk_space_gb_for_oncotator = boot_disk_space_gb_for_oncotator,
462 preemptible_attempts = preemptible_attempts
465 if (select_first([is_run_funcotator, false])) {
466 call CNVFuncotateSegments.CNVFuncotateSegmentsWorkflow as CNVFuncotateSegmentsWorkflow {
468 input_seg_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
469 funcotator_ref_version = select_first([funcotator_ref_version, "hg19"]),
470 extra_args = additional_args_for_funcotator,
471 ref_fasta = ref_fasta,
472 ref_fasta_fai = ref_fasta_fai,
473 ref_fasta_dict = ref_fasta_dict,
474 transcript_selection_list = funcotator_transcript_selection_list,
475 funcotator_data_sources_tar_gz = funcotator_data_sources_tar_gz,
476 gatk4_jar_override = gatk4_jar_override,
477 gatk_docker = gatk_docker,
478 mem_gb = mem_gb_for_funcotator,
479 preemptible_attempts = preemptible_attempts,
480 transcript_selection_mode = funcotator_transcript_selection_mode,
481 annotation_defaults = funcotator_annotation_defaults,
482 annotation_overrides = funcotator_annotation_overrides,
483 funcotator_excluded_fields = funcotator_excluded_fields,
484 is_removing_untared_datasources = funcotator_is_removing_untared_datasources,
485 disk_space_gb = funcotator_disk_space_gb,
486 use_ssd = funcotator_use_ssd,
492 File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
494 File read_counts_entity_id_tumor = CollectCountsTumor.entity_id
495 File read_counts_tumor = CollectCountsTumor.counts
496 File allelic_counts_entity_id_tumor = CollectAllelicCountsTumor.entity_id
497 File allelic_counts_tumor = CollectAllelicCountsTumor.allelic_counts
498 File denoised_copy_ratios_tumor = DenoiseReadCountsTumor.denoised_copy_ratios
499 File standardized_copy_ratios_tumor = DenoiseReadCountsTumor.standardized_copy_ratios
500 File het_allelic_counts_tumor = ModelSegmentsTumor.het_allelic_counts
501 File normal_het_allelic_counts_tumor = ModelSegmentsTumor.normal_het_allelic_counts
502 File copy_ratio_only_segments_tumor = ModelSegmentsTumor.copy_ratio_only_segments
503 File copy_ratio_legacy_segments_tumor = ModelSegmentsTumor.copy_ratio_legacy_segments
504 File allele_fraction_legacy_segments_tumor = ModelSegmentsTumor.allele_fraction_legacy_segments
505 File modeled_segments_begin_tumor = ModelSegmentsTumor.modeled_segments_begin
506 File copy_ratio_parameters_begin_tumor = ModelSegmentsTumor.copy_ratio_parameters_begin
507 File allele_fraction_parameters_begin_tumor = ModelSegmentsTumor.allele_fraction_parameters_begin
508 File modeled_segments_tumor = ModelSegmentsTumor.modeled_segments
509 File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters
510 File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters
511 File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments
512 File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments
513 File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot
514 File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot
515 File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD
516 Float standardized_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD_value
517 File denoised_MAD_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD
518 Float denoised_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD_value
519 File delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD
520 Float delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD_value
521 File scaled_delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD
522 Float scaled_delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD_value
523 File modeled_segments_plot_tumor = PlotModeledSegmentsTumor.modeled_segments_plot
525 File? read_counts_entity_id_normal = CollectCountsNormal.entity_id
526 File? read_counts_normal = CollectCountsNormal.counts
527 File? allelic_counts_entity_id_normal = CollectAllelicCountsNormal.entity_id
528 File? allelic_counts_normal = CollectAllelicCountsNormal.allelic_counts
529 File? denoised_copy_ratios_normal = DenoiseReadCountsNormal.denoised_copy_ratios
530 File? standardized_copy_ratios_normal = DenoiseReadCountsNormal.standardized_copy_ratios
531 File? het_allelic_counts_normal = ModelSegmentsNormal.het_allelic_counts
532 File? normal_het_allelic_counts_normal = ModelSegmentsNormal.normal_het_allelic_counts
533 File? copy_ratio_only_segments_normal = ModelSegmentsNormal.copy_ratio_only_segments
534 File? copy_ratio_legacy_segments_normal = ModelSegmentsNormal.copy_ratio_legacy_segments
535 File? allele_fraction_legacy_segments_normal = ModelSegmentsNormal.allele_fraction_legacy_segments
536 File? modeled_segments_begin_normal = ModelSegmentsNormal.modeled_segments_begin
537 File? copy_ratio_parameters_begin_normal = ModelSegmentsNormal.copy_ratio_parameters_begin
538 File? allele_fraction_parameters_begin_normal = ModelSegmentsNormal.allele_fraction_parameters_begin
539 File? modeled_segments_normal = ModelSegmentsNormal.modeled_segments
540 File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters
541 File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters
542 File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments
543 File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments
544 File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot
545 File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot
546 File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD
547 Float? standardized_MAD_value_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD_value
548 File? denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD
549 Float? denoised_MAD_value_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD_value
550 File? delta_MAD_normal = PlotDenoisedCopyRatiosNormal.delta_MAD
551 Float? delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.delta_MAD_value
552 File? scaled_delta_MAD_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD
553 Float? scaled_delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD_value
554 File? modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot
556 File oncotated_called_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_file, "null"])
557 File oncotated_called_gene_list_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_gene_list_file, "null"])
558 File funcotated_called_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_seg_simple_tsv, "null"])
559 File funcotated_called_gene_list_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_gene_list_tsv, "null"])
563 task DenoiseReadCounts {
567 Int? number_of_eigensamples #use all eigensamples in panel by default
568 File? gatk4_jar_override
574 Boolean use_ssd = false
576 Int? preemptible_attempts
578 Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
579 Int command_mem_mb = machine_mem_mb - 1000
583 #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
585 gatk --java-options "-Xmx${command_mem_mb}m" DenoiseReadCounts \
586 --input ${read_counts} \
587 --count-panel-of-normals ${read_count_pon} \
588 ${"--number-of-eigensamples " + number_of_eigensamples} \
589 --standardized-copy-ratios ${entity_id}.standardizedCR.tsv \
590 --denoised-copy-ratios ${entity_id}.denoisedCR.tsv
594 #docker: "${gatk_docker}"
595 memory: machine_mem_mb + " MB"
596 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
597 cpu: select_first([cpu, 1])
598 preemptible: select_first([preemptible_attempts, 5])
602 File standardized_copy_ratios = "${entity_id}.standardizedCR.tsv"
603 File denoised_copy_ratios = "${entity_id}.denoisedCR.tsv"
609 File denoised_copy_ratios
611 File? normal_allelic_counts
612 Int? max_num_segments_per_chromosome
613 Int? min_total_allele_count
614 Int? min_total_allele_count_normal
615 Float? genotyping_homozygous_log_ratio_threshold
616 Float? genotyping_base_error_rate
617 Float? kernel_variance_copy_ratio
618 Float? kernel_variance_allele_fraction
619 Float? kernel_scaling_allele_fraction
620 Int? kernel_approximation_dimension
621 Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
622 Float? num_changepoints_penalty_factor
623 Float? minor_allele_fraction_prior_alpha
624 Int? num_samples_copy_ratio
625 Int? num_burn_in_copy_ratio
626 Int? num_samples_allele_fraction
627 Int? num_burn_in_allele_fraction
628 Float? smoothing_threshold_copy_ratio
629 Float? smoothing_threshold_allele_fraction
630 Int? max_num_smoothing_iterations
631 Int? num_smoothing_iterations_per_fit
633 File? gatk4_jar_override
639 Boolean use_ssd = false
641 Int? preemptible_attempts
643 Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
644 # ModelSegments seems to need at least 3GB of overhead to run
645 Int command_mem_mb = machine_mem_mb - 3000
647 # If optional output_dir not specified, use "out"
648 String output_dir_ = select_first([output_dir, "out"])
650 # default values are min_total_allele_count_ = 0 in matched-normal mode
651 # = 30 in case-only mode
652 Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30
653 Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count])
657 #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
659 gatk --java-options "-Xmx${command_mem_mb}m" ModelSegments \
660 --denoised-copy-ratios ${denoised_copy_ratios} \
661 --allelic-counts ${allelic_counts} \
662 ${"--normal-allelic-counts " + normal_allelic_counts} \
663 --minimum-total-allele-count-case ${min_total_allele_count_} \
664 --minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
665 --genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
666 --genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
667 --maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
668 --kernel-variance-copy-ratio ${default="0.0" kernel_variance_copy_ratio} \
669 --kernel-variance-allele-fraction ${default="0.025" kernel_variance_allele_fraction} \
670 --kernel-scaling-allele-fraction ${default="1.0" kernel_scaling_allele_fraction} \
671 --kernel-approximation-dimension ${default="100" kernel_approximation_dimension} \
672 --window-size ${sep=" --window-size " window_sizes} \
673 --number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
674 --minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
675 --number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
676 --number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
677 --number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
678 --number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
679 --smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
680 --smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
681 --maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
682 --number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
683 --output ${output_dir_} \
684 --output-prefix ${entity_id}
686 # We need to create the file even if the above command doesn't so we have something to delocalize
687 # If no file is created by the above task then it will copy out an empty file
688 touch ${output_dir_}/${entity_id}.hets.normal.tsv
692 #docker: "${gatk_docker}"
693 memory: machine_mem_mb + " MB"
694 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
695 cpu: select_first([cpu, 1])
696 preemptible: select_first([preemptible_attempts, 5])
700 File het_allelic_counts = "${output_dir_}/${entity_id}.hets.tsv"
701 File normal_het_allelic_counts = "${output_dir_}/${entity_id}.hets.normal.tsv"
702 File copy_ratio_only_segments = "${output_dir_}/${entity_id}.cr.seg"
703 File copy_ratio_legacy_segments = "${output_dir_}/${entity_id}.cr.igv.seg"
704 File allele_fraction_legacy_segments = "${output_dir_}/${entity_id}.af.igv.seg"
705 File modeled_segments_begin = "${output_dir_}/${entity_id}.modelBegin.seg"
706 File copy_ratio_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.cr.param"
707 File allele_fraction_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.af.param"
708 File modeled_segments = "${output_dir_}/${entity_id}.modelFinal.seg"
709 File copy_ratio_parameters = "${output_dir_}/${entity_id}.modelFinal.cr.param"
710 File allele_fraction_parameters = "${output_dir_}/${entity_id}.modelFinal.af.param"
714 task CallCopyRatioSegments {
716 File copy_ratio_segments
717 Float? neutral_segment_copy_ratio_lower_bound
718 Float? neutral_segment_copy_ratio_upper_bound
719 Float? outlier_neutral_segment_copy_ratio_z_score_threshold
720 Float? calling_copy_ratio_z_score_threshold
721 File? gatk4_jar_override
727 Boolean use_ssd = false
729 Int? preemptible_attempts
731 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
732 Int command_mem_mb = machine_mem_mb - 1000
736 #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
738 gatk --java-options "-Xmx${command_mem_mb}m" CallCopyRatioSegments \
739 --input ${copy_ratio_segments} \
740 --neutral-segment-copy-ratio-lower-bound ${default="0.9" neutral_segment_copy_ratio_lower_bound} \
741 --neutral-segment-copy-ratio-upper-bound ${default="1.1" neutral_segment_copy_ratio_upper_bound} \
742 --outlier-neutral-segment-copy-ratio-z-score-threshold ${default="2.0" outlier_neutral_segment_copy_ratio_z_score_threshold} \
743 --calling-copy-ratio-z-score-threshold ${default="2.0" calling_copy_ratio_z_score_threshold} \
744 --output ${entity_id}.called.seg
748 #docker: "${gatk_docker}"
749 memory: machine_mem_mb + " MB"
750 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
751 cpu: select_first([cpu, 1])
752 preemptible: select_first([preemptible_attempts, 5])
756 File called_copy_ratio_segments = "${entity_id}.called.seg"
757 File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg"
761 task PlotDenoisedCopyRatios {
763 File standardized_copy_ratios
764 File denoised_copy_ratios
766 Int? minimum_contig_length
768 File? gatk4_jar_override
774 Boolean use_ssd = false
776 Int? preemptible_attempts
778 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
779 Int command_mem_mb = machine_mem_mb - 1000
781 # If optional output_dir not specified, use "out"
782 String output_dir_ = select_first([output_dir, "out"])
786 #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
788 gatk --java-options "-Xmx${command_mem_mb}m" PlotDenoisedCopyRatios \
789 --standardized-copy-ratios ${standardized_copy_ratios} \
790 --denoised-copy-ratios ${denoised_copy_ratios} \
791 --sequence-dictionary ${ref_fasta_dict} \
792 --minimum-contig-length ${default="1000000" minimum_contig_length} \
793 --output ${output_dir_} \
794 --output-prefix ${entity_id}
798 #docker: "${gatk_docker}"
799 memory: machine_mem_mb + " MB"
800 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
801 cpu: select_first([cpu, 1])
802 preemptible: select_first([preemptible_attempts, 5])
806 File denoised_copy_ratios_plot = "${output_dir_}/${entity_id}.denoised.png"
807 File denoised_copy_ratios_lim_4_plot = "${output_dir_}/${entity_id}.denoisedLimit4.png"
808 File standardized_MAD = "${output_dir_}/${entity_id}.standardizedMAD.txt"
809 Float standardized_MAD_value = read_float(standardized_MAD)
810 File denoised_MAD = "${output_dir_}/${entity_id}.denoisedMAD.txt"
811 Float denoised_MAD_value = read_float(denoised_MAD)
812 File delta_MAD = "${output_dir_}/${entity_id}.deltaMAD.txt"
813 Float delta_MAD_value = read_float(delta_MAD)
814 File scaled_delta_MAD = "${output_dir_}/${entity_id}.scaledDeltaMAD.txt"
815 String scaled_delta_MAD_str = read_string(scaled_delta_MAD)
816 Float scaled_delta_MAD_value = if scaled_delta_MAD_str == "NA" then 0 else read_float(scaled_delta_MAD)
817 #Float scaled_delta_MAD_value = read_float(scaled_delta_MAD)
821 task PlotModeledSegments {
823 File denoised_copy_ratios
824 File het_allelic_counts
825 File modeled_segments
827 Int? minimum_contig_length
829 File? gatk4_jar_override
835 Boolean use_ssd = false
837 Int? preemptible_attempts
839 Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
840 Int command_mem_mb = machine_mem_mb - 1000
842 # If optional output_dir not specified, use "out"
843 String output_dir_ = select_first([output_dir, "out"])
847 #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
849 gatk --java-options "-Xmx${command_mem_mb}m" PlotModeledSegments \
850 --denoised-copy-ratios ${denoised_copy_ratios} \
851 --allelic-counts ${het_allelic_counts} \
852 --segments ${modeled_segments} \
853 --sequence-dictionary ${ref_fasta_dict} \
854 --minimum-contig-length ${default="1000000" minimum_contig_length} \
855 --output ${output_dir_} \
856 --output-prefix ${entity_id}
860 #docker: "${gatk_docker}"
861 memory: machine_mem_mb + " MB"
862 disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
863 cpu: select_first([cpu, 1])
864 preemptible: select_first([preemptible_attempts, 5])
868 File modeled_segments_plot = "${output_dir_}/${entity_id}.modeled.png"