new file: pixi.toml
[GalaxyCodeBases.git] / etc / gatk-wdl / cnv_somatic_pair_workflow.wdl
blobb3dd37876b6c6bdfa008b7370facff1653e794b0
1 # Workflow for running the GATK CNV pipeline on a matched pair. Supports both WGS and WES.
3 # Notes:
5 # - The intervals argument is required for both WGS and WES workflows and accepts formats compatible with the
6 #   GATK -L argument (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
7 #   These intervals will be padded on both sides by the amount specified by padding (default 250)
8 #   and split into bins of length specified by bin_length (default 1000; specify 0 to skip binning,
9 #   e.g., for WES).  For WGS, the intervals should simply cover the autosomal chromosomes (sex chromosomes may be
10 #   included, but care should be taken to 1) avoid creating panels of mixed sex, and 2) denoise case samples only
11 #   with panels containing only individuals of the same sex as the case samples).
13 # - Intervals can be blacklisted from coverage collection and all downstream steps by using the blacklist_intervals
14 #   argument, which accepts formats compatible with the GATK -XL argument
15 #   (see https://gatkforums.broadinstitute.org/gatk/discussion/11009/intervals-and-interval-lists).
16 #   This may be useful for excluding centromeric regions, etc. from analysis.  Alternatively, these regions may
17 #   be manually filtered from the final callset.
19 #  A reasonable blacklist for excluded intervals (-XL) can be found at:
20 #   hg19: gs://gatk-best-practices/somatic-b37/CNV_and_centromere_blacklist.hg19.list
21 #   hg38: gs://gatk-best-practices/somatic-hg38/CNV_and_centromere_blacklist.hg38liftover.list (untested)
23 # - The sites file (common_sites) should be a Picard or GATK-style interval list.  This is a list of sites
24 #   of known variation at which allelic counts will be collected for use in modeling minor-allele fractions.
26 # - If you opt to run FuncotateSegments (i.e. set `is_run_funcotator` to `true`), then please also ensure that you have
27 #       the correct value for `funcotator_ref_version`.  Treat `funcotator_ref_version` as required if
28 #       `is_run_funcotator` is `true`.  Valid values for `funcotator_ref_version` are `hg38` and `hg19`.
29 #       The latter includes GRCh37.
32 # - Example invocation:
34 #       java -jar cromwell.jar run cnv_somatic_pair_workflow.wdl -i my_parameters.json
36 #############
38 import "cnv_somatic/cnv_common_tasks.wdl" as CNVTasks
39 import "cnv_somatic/cnv_somatic_oncotator_workflow.wdl" as CNVOncotator
40 import "cnv_somatic/cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments
42 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_common_tasks.wdl" as CNVTasks
43 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_somatic_oncotator_workflow.wdl" as CNVOncotator
44 #import "https://raw.githubusercontent.com/gatk-workflows/gatk4-somatic-cnvs/1.4.0/tasks/cnv_somatic_funcotate_seg_workflow.wdl" as CNVFuncotateSegments
46 workflow CNVSomaticPairWorkflow {
48     ##################################
49     #### required basic arguments ####
50     ##################################
51     File common_sites
52     File intervals
53     File? blacklist_intervals
54     File tumor_bam
55     File tumor_bam_idx
56     File? normal_bam
57     File? normal_bam_idx
58     File read_count_pon
59     File ref_fasta_dict
60     File ref_fasta_fai
61     File ref_fasta
62     String gatk_docker
64     ##################################
65     #### optional basic arguments ####
66     ##################################
67      # For running oncotator
68     Boolean? is_run_oncotator
69      # For running funcotator
70     Boolean? is_run_funcotator
72     File? gatk4_jar_override
73     Int? preemptible_attempts
74     # Use as a last resort to increase the disk given to every task in case of ill behaving data
75     Int? emergency_extra_disk
77     ####################################################
78     #### optional arguments for PreprocessIntervals ####
79     ####################################################
80     Int? padding
81     Int? bin_length
82     Int? mem_gb_for_preprocess_intervals
84     ##############################################
85     #### optional arguments for CollectCounts ####
86     ##############################################
87     String? collect_counts_format
88     Int? mem_gb_for_collect_counts
90     #####################################################
91     #### optional arguments for CollectAllelicCounts ####
92     #####################################################
93     String? minimum_base_quality
94     Int? mem_gb_for_collect_allelic_counts
96     ##################################################
97     #### optional arguments for DenoiseReadCounts ####
98     ##################################################
99     Int? number_of_eigensamples
100     Int? mem_gb_for_denoise_read_counts
102     ##############################################
103     #### optional arguments for ModelSegments ####
104     ##############################################
105     Int? max_num_segments_per_chromosome
106     Int? min_total_allele_count
107     Int? min_total_allele_count_normal
108     Float? genotyping_homozygous_log_ratio_threshold
109     Float? genotyping_base_error_rate
110     Float? kernel_variance_copy_ratio
111     Float? kernel_variance_allele_fraction
112     Float? kernel_scaling_allele_fraction
113     Int? kernel_approximation_dimension
114     Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
115     Float? num_changepoints_penalty_factor
116     Float? minor_allele_fraction_prior_alpha
117     Int? num_samples_copy_ratio
118     Int? num_burn_in_copy_ratio
119     Int? num_samples_allele_fraction
120     Int? num_burn_in_allele_fraction
121     Float? smoothing_threshold_copy_ratio
122     Float? smoothing_threshold_allele_fraction
123     Int? max_num_smoothing_iterations
124     Int? num_smoothing_iterations_per_fit
125     Int? mem_gb_for_model_segments
127     ######################################################
128     #### optional arguments for CallCopyRatioSegments ####
129     ######################################################
130     Float? neutral_segment_copy_ratio_lower_bound
131     Float? neutral_segment_copy_ratio_upper_bound
132     Float? outlier_neutral_segment_copy_ratio_z_score_threshold
133     Float? calling_copy_ratio_z_score_threshold
134     Int? mem_gb_for_call_copy_ratio_segments
136     #########################################
137     #### optional arguments for plotting ####
138     #########################################
139     Int? minimum_contig_length
140     Int? mem_gb_for_plotting
142     ##########################################
143     #### optional arguments for Oncotator ####
144     ##########################################
145     String? additional_args_for_oncotator
146     String? oncotator_docker
147     Int? mem_gb_for_oncotator
148     Int? boot_disk_space_gb_for_oncotator
150     ##################################################
151     #### optional arguments for FuncotateSegments ####
152     ##################################################
153     String? additional_args_for_funcotator
154     String? funcotator_ref_version
155     Int? mem_gb_for_funcotator
156     File? funcotator_transcript_selection_list
157     File? funcotator_data_sources_tar_gz
158     String? funcotator_transcript_selection_mode
159     Array[String]? funcotator_annotation_defaults
160     Array[String]? funcotator_annotation_overrides
161     Array[String]? funcotator_excluded_fields
162     Boolean? funcotator_is_removing_untared_datasources
163     Int? funcotator_disk_space_gb
164     Boolean? funcotator_use_ssd
165     Int? funcotator_cpu
167     Int ref_size = ceil(size(ref_fasta, "GB") + size(ref_fasta_dict, "GB") + size(ref_fasta_fai, "GB"))
168     Int read_count_pon_size = ceil(size(read_count_pon, "GB"))
169     Int tumor_bam_size = ceil(size(tumor_bam, "GB") + size(tumor_bam_idx, "GB"))
170     Int normal_bam_size = if defined(normal_bam) then ceil(size(normal_bam, "GB") + size(normal_bam_idx, "GB")) else 0
172     Int gatk4_override_size = if defined(gatk4_jar_override) then ceil(size(gatk4_jar_override, "GB")) else 0
173     # This is added to every task as padding, should increase if systematically you need more disk for every call
174     Int disk_pad = 20 + ceil(size(intervals, "GB")) + ceil(size(common_sites, "GB")) + gatk4_override_size + select_first([emergency_extra_disk, 0])
176     File final_normal_bam = select_first([normal_bam, "null"])
177     File final_normal_bam_idx = select_first([normal_bam_idx, "null"])
179     Int preprocess_intervals_disk = ref_size + disk_pad
180     call CNVTasks.PreprocessIntervals {
181         input:
182             intervals = intervals,
183             blacklist_intervals = blacklist_intervals,
184             ref_fasta = ref_fasta,
185             ref_fasta_fai = ref_fasta_fai,
186             ref_fasta_dict = ref_fasta_dict,
187             padding = padding,
188             bin_length = bin_length,
189             gatk4_jar_override = gatk4_jar_override,
190             gatk_docker = gatk_docker,
191             mem_gb = mem_gb_for_preprocess_intervals,
192             disk_space_gb = preprocess_intervals_disk,
193             preemptible_attempts = preemptible_attempts
194     }
196     Int collect_counts_tumor_disk = tumor_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
197     call CNVTasks.CollectCounts as CollectCountsTumor {
198         input:
199             intervals = PreprocessIntervals.preprocessed_intervals,
200             bam = tumor_bam,
201             bam_idx = tumor_bam_idx,
202             ref_fasta = ref_fasta,
203             ref_fasta_fai = ref_fasta_fai,
204             ref_fasta_dict = ref_fasta_dict,
205             format = collect_counts_format,
206             gatk4_jar_override = gatk4_jar_override,
207             gatk_docker = gatk_docker,
208             mem_gb = mem_gb_for_collect_counts,
209             disk_space_gb = collect_counts_tumor_disk,
210             preemptible_attempts = preemptible_attempts
211     }
213     Int collect_allelic_counts_tumor_disk = tumor_bam_size + ref_size + disk_pad
214     call CNVTasks.CollectAllelicCounts as CollectAllelicCountsTumor {
215         input:
216             common_sites = common_sites,
217             bam = tumor_bam,
218             bam_idx = tumor_bam_idx,
219             ref_fasta = ref_fasta,
220             ref_fasta_dict = ref_fasta_dict,
221             ref_fasta_fai = ref_fasta_fai,
222             minimum_base_quality =  minimum_base_quality,
223             gatk4_jar_override = gatk4_jar_override,
224             gatk_docker = gatk_docker,
225             mem_gb = mem_gb_for_collect_allelic_counts,
226             disk_space_gb = collect_allelic_counts_tumor_disk,
227             preemptible_attempts = preemptible_attempts
228     }
230     Int denoise_read_counts_tumor_disk = read_count_pon_size + ceil(size(CollectCountsTumor.counts, "GB")) + disk_pad
231     call DenoiseReadCounts as DenoiseReadCountsTumor {
232         input:
233             entity_id = CollectCountsTumor.entity_id,
234             read_counts = CollectCountsTumor.counts,
235             read_count_pon = read_count_pon,
236             number_of_eigensamples = number_of_eigensamples,
237             gatk4_jar_override = gatk4_jar_override,
238             gatk_docker = gatk_docker,
239             mem_gb = mem_gb_for_denoise_read_counts,
240             disk_space_gb = denoise_read_counts_tumor_disk,
241             preemptible_attempts = preemptible_attempts
242     }
244     Int model_segments_normal_portion = if defined(normal_bam) then ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) else 0
245     Int model_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsTumor.allelic_counts, "GB")) + model_segments_normal_portion + disk_pad
246     call ModelSegments as ModelSegmentsTumor {
247         input:
248             entity_id = CollectCountsTumor.entity_id,
249             denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
250             allelic_counts = CollectAllelicCountsTumor.allelic_counts,
251             normal_allelic_counts = CollectAllelicCountsNormal.allelic_counts,
252             max_num_segments_per_chromosome = max_num_segments_per_chromosome,
253             min_total_allele_count = min_total_allele_count,
254             min_total_allele_count_normal = min_total_allele_count_normal,
255             genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
256             genotyping_base_error_rate = genotyping_base_error_rate,
257             kernel_variance_copy_ratio = kernel_variance_copy_ratio,
258             kernel_variance_allele_fraction = kernel_variance_allele_fraction,
259             kernel_scaling_allele_fraction = kernel_scaling_allele_fraction,
260             kernel_approximation_dimension = kernel_approximation_dimension,
261             window_sizes = window_sizes,
262             num_changepoints_penalty_factor = num_changepoints_penalty_factor,
263             minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha,
264             num_samples_copy_ratio = num_samples_copy_ratio,
265             num_burn_in_copy_ratio = num_burn_in_copy_ratio,
266             num_samples_allele_fraction = num_samples_allele_fraction,
267             num_burn_in_allele_fraction = num_burn_in_allele_fraction,
268             smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio,
269             smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction,
270             max_num_smoothing_iterations = max_num_smoothing_iterations,
271             num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit,
272             gatk4_jar_override = gatk4_jar_override,
273             gatk_docker = gatk_docker,
274             mem_gb = mem_gb_for_model_segments,
275             disk_space_gb = model_segments_tumor_disk,
276             preemptible_attempts = preemptible_attempts
277     }
279     Int copy_ratio_segments_tumor_disk = ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.copy_ratio_only_segments, "GB")) + disk_pad
280     call CallCopyRatioSegments as CallCopyRatioSegmentsTumor {
281         input:
282             entity_id = CollectCountsTumor.entity_id,
283             copy_ratio_segments = ModelSegmentsTumor.copy_ratio_only_segments,
284             neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound,
285             neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound,
286             outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold,
287             calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold,
288             gatk4_jar_override = gatk4_jar_override,
289             gatk_docker = gatk_docker,
290             mem_gb = mem_gb_for_call_copy_ratio_segments,
291             disk_space_gb = copy_ratio_segments_tumor_disk,
292             preemptible_attempts = preemptible_attempts
293     }
295     # The F=files from other tasks are small enough to just combine into one disk variable and pass to the tumor plotting tasks
296     Int plot_tumor_disk = ref_size + ceil(size(DenoiseReadCountsTumor.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsTumor.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsTumor.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsTumor.modeled_segments, "GB")) + disk_pad
297     call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosTumor {
298         input:
299             entity_id = CollectCountsTumor.entity_id,
300             standardized_copy_ratios = DenoiseReadCountsTumor.standardized_copy_ratios,
301             denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
302             ref_fasta_dict = ref_fasta_dict,
303             minimum_contig_length = minimum_contig_length,
304             gatk4_jar_override = gatk4_jar_override,
305             gatk_docker = gatk_docker,
306             mem_gb = mem_gb_for_plotting,
307             disk_space_gb = plot_tumor_disk,
308             preemptible_attempts = preemptible_attempts
309     }
311     call PlotModeledSegments as PlotModeledSegmentsTumor {
312         input:
313             entity_id = CollectCountsTumor.entity_id,
314             denoised_copy_ratios = DenoiseReadCountsTumor.denoised_copy_ratios,
315             het_allelic_counts = ModelSegmentsTumor.het_allelic_counts,
316             modeled_segments = ModelSegmentsTumor.modeled_segments,
317             ref_fasta_dict = ref_fasta_dict,
318             minimum_contig_length = minimum_contig_length,
319             gatk4_jar_override = gatk4_jar_override,
320             gatk_docker = gatk_docker,
321             mem_gb = mem_gb_for_plotting,
322             disk_space_gb = plot_tumor_disk,
323             preemptible_attempts = preemptible_attempts
324     }
326     Int collect_counts_normal_disk = normal_bam_size + ceil(size(PreprocessIntervals.preprocessed_intervals, "GB")) + disk_pad
327     if (defined(normal_bam)) {
328         call CNVTasks.CollectCounts as CollectCountsNormal {
329             input:
330                 intervals = PreprocessIntervals.preprocessed_intervals,
331                 bam = final_normal_bam,
332                 bam_idx = final_normal_bam_idx,
333                 ref_fasta = ref_fasta,
334                 ref_fasta_fai = ref_fasta_fai,
335                 ref_fasta_dict = ref_fasta_dict,
336                 format = collect_counts_format,
337                 gatk4_jar_override = gatk4_jar_override,
338                 gatk_docker = gatk_docker,
339                 mem_gb = mem_gb_for_collect_counts,
340                 disk_space_gb = collect_counts_normal_disk,
341                 preemptible_attempts = preemptible_attempts
342         }
344         Int collect_allelic_counts_normal_disk = normal_bam_size + ref_size + disk_pad
345         call CNVTasks.CollectAllelicCounts as CollectAllelicCountsNormal {
346             input:
347                 common_sites = common_sites,
348                 bam = final_normal_bam,
349                 bam_idx = final_normal_bam_idx,
350                 ref_fasta = ref_fasta,
351                 ref_fasta_dict = ref_fasta_dict,
352                 ref_fasta_fai = ref_fasta_fai,
353                 minimum_base_quality =  minimum_base_quality,
354                 gatk4_jar_override = gatk4_jar_override,
355                 gatk_docker = gatk_docker,
356                 mem_gb = mem_gb_for_collect_allelic_counts,
357                 disk_space_gb = collect_allelic_counts_normal_disk,
358                 preemptible_attempts = preemptible_attempts
359         }
361         Int denoise_read_counts_normal_disk = read_count_pon_size + ceil(size(CollectCountsNormal.counts, "GB")) + disk_pad
362         call DenoiseReadCounts as DenoiseReadCountsNormal {
363             input:
364                 entity_id = CollectCountsNormal.entity_id,
365                 read_counts = CollectCountsNormal.counts,
366                 read_count_pon = read_count_pon,
367                 number_of_eigensamples = number_of_eigensamples,
368                 gatk4_jar_override = gatk4_jar_override,
369                 gatk_docker = gatk_docker,
370                 mem_gb = mem_gb_for_denoise_read_counts,
371                 disk_space_gb = denoise_read_counts_normal_disk,
372                 preemptible_attempts = preemptible_attempts
373         }
375         Int model_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(CollectAllelicCountsNormal.allelic_counts, "GB")) + disk_pad
376         call ModelSegments as ModelSegmentsNormal {
377             input:
378                 entity_id = CollectCountsNormal.entity_id,
379                 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
380                 allelic_counts = CollectAllelicCountsNormal.allelic_counts,
381                 max_num_segments_per_chromosome = max_num_segments_per_chromosome,
382                 min_total_allele_count = min_total_allele_count_normal,
383                 genotyping_homozygous_log_ratio_threshold = genotyping_homozygous_log_ratio_threshold,
384                 genotyping_base_error_rate = genotyping_base_error_rate,
385                 kernel_variance_copy_ratio = kernel_variance_copy_ratio,
386                 kernel_variance_allele_fraction = kernel_variance_allele_fraction,
387                 kernel_scaling_allele_fraction = kernel_scaling_allele_fraction,
388                 kernel_approximation_dimension = kernel_approximation_dimension,
389                 window_sizes = window_sizes,
390                 num_changepoints_penalty_factor = num_changepoints_penalty_factor,
391                 minor_allele_fraction_prior_alpha = minor_allele_fraction_prior_alpha,
392                 num_samples_copy_ratio = num_samples_copy_ratio,
393                 num_burn_in_copy_ratio = num_burn_in_copy_ratio,
394                 num_samples_allele_fraction = num_samples_allele_fraction,
395                 num_burn_in_allele_fraction = num_burn_in_allele_fraction,
396                 smoothing_threshold_copy_ratio = smoothing_threshold_copy_ratio,
397                 smoothing_threshold_allele_fraction = smoothing_threshold_allele_fraction,
398                 max_num_smoothing_iterations = max_num_smoothing_iterations,
399                 num_smoothing_iterations_per_fit = num_smoothing_iterations_per_fit,
400                 gatk4_jar_override = gatk4_jar_override,
401                 gatk_docker = gatk_docker,
402                 mem_gb = mem_gb_for_model_segments,
403                 disk_space_gb = model_segments_normal_disk,
404                 preemptible_attempts = preemptible_attempts
405         }
407         Int copy_ratio_segments_normal_disk = ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.copy_ratio_only_segments, "GB")) + disk_pad
408         call CallCopyRatioSegments as CallCopyRatioSegmentsNormal {
409             input:
410                 entity_id = CollectCountsNormal.entity_id,
411                 copy_ratio_segments = ModelSegmentsNormal.copy_ratio_only_segments,
412                 neutral_segment_copy_ratio_lower_bound = neutral_segment_copy_ratio_lower_bound,
413                 neutral_segment_copy_ratio_upper_bound = neutral_segment_copy_ratio_upper_bound,
414                 outlier_neutral_segment_copy_ratio_z_score_threshold = outlier_neutral_segment_copy_ratio_z_score_threshold,
415                 calling_copy_ratio_z_score_threshold = calling_copy_ratio_z_score_threshold,
416                 gatk4_jar_override = gatk4_jar_override,
417                 gatk_docker = gatk_docker,
418                 mem_gb = mem_gb_for_call_copy_ratio_segments,
419                 disk_space_gb = copy_ratio_segments_normal_disk,
420                 preemptible_attempts = preemptible_attempts
421         }
423         # The files from other tasks are small enough to just combine into one disk variable and pass to the normal plotting tasks
424         Int plot_normal_disk = ref_size + ceil(size(DenoiseReadCountsNormal.standardized_copy_ratios, "GB")) + ceil(size(DenoiseReadCountsNormal.denoised_copy_ratios, "GB")) + ceil(size(ModelSegmentsNormal.het_allelic_counts, "GB")) + ceil(size(ModelSegmentsNormal.modeled_segments, "GB")) + disk_pad
425         call PlotDenoisedCopyRatios as PlotDenoisedCopyRatiosNormal {
426             input:
427                 entity_id = CollectCountsNormal.entity_id,
428                 standardized_copy_ratios = DenoiseReadCountsNormal.standardized_copy_ratios,
429                 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
430                 ref_fasta_dict = ref_fasta_dict,
431                 minimum_contig_length = minimum_contig_length,
432                 gatk4_jar_override = gatk4_jar_override,
433                 gatk_docker = gatk_docker,
434                 mem_gb = mem_gb_for_plotting,
435                 disk_space_gb = plot_normal_disk,
436                 preemptible_attempts = preemptible_attempts
437         }
438         call PlotModeledSegments as PlotModeledSegmentsNormal {
439             input:
440                 entity_id = CollectCountsNormal.entity_id,
441                 denoised_copy_ratios = DenoiseReadCountsNormal.denoised_copy_ratios,
442                 het_allelic_counts = ModelSegmentsNormal.het_allelic_counts,
443                 modeled_segments = ModelSegmentsNormal.modeled_segments,
444                 ref_fasta_dict = ref_fasta_dict,
445                 minimum_contig_length = minimum_contig_length,
446                 gatk4_jar_override = gatk4_jar_override,
447                 gatk_docker = gatk_docker,
448                 mem_gb = mem_gb_for_plotting,
449                 disk_space_gb = plot_normal_disk,
450                 preemptible_attempts = preemptible_attempts
451         }
452     }
454     if (select_first([is_run_oncotator, false])) {
455         call CNVOncotator.CNVOncotatorWorkflow as CNVOncotatorWorkflow {
456             input:
457                  called_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
458                  additional_args = additional_args_for_oncotator,
459                  oncotator_docker = oncotator_docker,
460                  mem_gb_for_oncotator = mem_gb_for_oncotator,
461                  boot_disk_space_gb_for_oncotator = boot_disk_space_gb_for_oncotator,
462                  preemptible_attempts = preemptible_attempts
463         }
464     }
465     if (select_first([is_run_funcotator, false])) {
466         call CNVFuncotateSegments.CNVFuncotateSegmentsWorkflow as CNVFuncotateSegmentsWorkflow {
467             input:
468                  input_seg_file = CallCopyRatioSegmentsTumor.called_copy_ratio_segments,
469                  funcotator_ref_version = select_first([funcotator_ref_version, "hg19"]),
470                  extra_args = additional_args_for_funcotator,
471                  ref_fasta = ref_fasta,
472                  ref_fasta_fai = ref_fasta_fai,
473                  ref_fasta_dict = ref_fasta_dict,
474                  transcript_selection_list = funcotator_transcript_selection_list,
475                  funcotator_data_sources_tar_gz = funcotator_data_sources_tar_gz,
476                  gatk4_jar_override = gatk4_jar_override,
477                  gatk_docker = gatk_docker,
478                  mem_gb = mem_gb_for_funcotator,
479                  preemptible_attempts = preemptible_attempts,
480                  transcript_selection_mode = funcotator_transcript_selection_mode,
481                  annotation_defaults = funcotator_annotation_defaults,
482                  annotation_overrides = funcotator_annotation_overrides,
483                  funcotator_excluded_fields = funcotator_excluded_fields,
484                  is_removing_untared_datasources = funcotator_is_removing_untared_datasources,
485                  disk_space_gb = funcotator_disk_space_gb,
486                  use_ssd = funcotator_use_ssd,
487                  cpu = funcotator_cpu
488         }
489     }
491     output {
492         File preprocessed_intervals = PreprocessIntervals.preprocessed_intervals
494         File read_counts_entity_id_tumor = CollectCountsTumor.entity_id
495         File read_counts_tumor = CollectCountsTumor.counts
496         File allelic_counts_entity_id_tumor = CollectAllelicCountsTumor.entity_id
497         File allelic_counts_tumor = CollectAllelicCountsTumor.allelic_counts
498         File denoised_copy_ratios_tumor = DenoiseReadCountsTumor.denoised_copy_ratios
499         File standardized_copy_ratios_tumor = DenoiseReadCountsTumor.standardized_copy_ratios
500         File het_allelic_counts_tumor = ModelSegmentsTumor.het_allelic_counts
501         File normal_het_allelic_counts_tumor = ModelSegmentsTumor.normal_het_allelic_counts
502         File copy_ratio_only_segments_tumor = ModelSegmentsTumor.copy_ratio_only_segments
503         File copy_ratio_legacy_segments_tumor = ModelSegmentsTumor.copy_ratio_legacy_segments
504         File allele_fraction_legacy_segments_tumor = ModelSegmentsTumor.allele_fraction_legacy_segments
505         File modeled_segments_begin_tumor = ModelSegmentsTumor.modeled_segments_begin
506         File copy_ratio_parameters_begin_tumor = ModelSegmentsTumor.copy_ratio_parameters_begin
507         File allele_fraction_parameters_begin_tumor = ModelSegmentsTumor.allele_fraction_parameters_begin
508         File modeled_segments_tumor = ModelSegmentsTumor.modeled_segments
509         File copy_ratio_parameters_tumor = ModelSegmentsTumor.copy_ratio_parameters
510         File allele_fraction_parameters_tumor = ModelSegmentsTumor.allele_fraction_parameters
511         File called_copy_ratio_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_segments
512         File called_copy_ratio_legacy_segments_tumor = CallCopyRatioSegmentsTumor.called_copy_ratio_legacy_segments
513         File denoised_copy_ratios_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_plot
514         File denoised_copy_ratios_lim_4_plot_tumor = PlotDenoisedCopyRatiosTumor.denoised_copy_ratios_lim_4_plot
515         File standardized_MAD_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD
516         Float standardized_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.standardized_MAD_value
517         File denoised_MAD_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD
518         Float denoised_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.denoised_MAD_value
519         File delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD
520         Float delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.delta_MAD_value
521         File scaled_delta_MAD_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD
522         Float scaled_delta_MAD_value_tumor = PlotDenoisedCopyRatiosTumor.scaled_delta_MAD_value
523         File modeled_segments_plot_tumor = PlotModeledSegmentsTumor.modeled_segments_plot
525         File? read_counts_entity_id_normal = CollectCountsNormal.entity_id
526         File? read_counts_normal = CollectCountsNormal.counts
527         File? allelic_counts_entity_id_normal = CollectAllelicCountsNormal.entity_id
528         File? allelic_counts_normal = CollectAllelicCountsNormal.allelic_counts
529         File? denoised_copy_ratios_normal = DenoiseReadCountsNormal.denoised_copy_ratios
530         File? standardized_copy_ratios_normal = DenoiseReadCountsNormal.standardized_copy_ratios
531         File? het_allelic_counts_normal = ModelSegmentsNormal.het_allelic_counts
532         File? normal_het_allelic_counts_normal = ModelSegmentsNormal.normal_het_allelic_counts
533         File? copy_ratio_only_segments_normal = ModelSegmentsNormal.copy_ratio_only_segments
534         File? copy_ratio_legacy_segments_normal = ModelSegmentsNormal.copy_ratio_legacy_segments
535         File? allele_fraction_legacy_segments_normal = ModelSegmentsNormal.allele_fraction_legacy_segments
536         File? modeled_segments_begin_normal = ModelSegmentsNormal.modeled_segments_begin
537         File? copy_ratio_parameters_begin_normal = ModelSegmentsNormal.copy_ratio_parameters_begin
538         File? allele_fraction_parameters_begin_normal = ModelSegmentsNormal.allele_fraction_parameters_begin
539         File? modeled_segments_normal = ModelSegmentsNormal.modeled_segments
540         File? copy_ratio_parameters_normal = ModelSegmentsNormal.copy_ratio_parameters
541         File? allele_fraction_parameters_normal = ModelSegmentsNormal.allele_fraction_parameters
542         File? called_copy_ratio_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_segments
543         File? called_copy_ratio_legacy_segments_normal = CallCopyRatioSegmentsNormal.called_copy_ratio_legacy_segments
544         File? denoised_copy_ratios_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_plot
545         File? denoised_copy_ratios_lim_4_plot_normal = PlotDenoisedCopyRatiosNormal.denoised_copy_ratios_lim_4_plot
546         File? standardized_MAD_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD
547         Float? standardized_MAD_value_normal = PlotDenoisedCopyRatiosNormal.standardized_MAD_value
548         File? denoised_MAD_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD
549         Float? denoised_MAD_value_normal = PlotDenoisedCopyRatiosNormal.denoised_MAD_value
550         File? delta_MAD_normal = PlotDenoisedCopyRatiosNormal.delta_MAD
551         Float? delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.delta_MAD_value
552         File? scaled_delta_MAD_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD
553         Float? scaled_delta_MAD_value_normal = PlotDenoisedCopyRatiosNormal.scaled_delta_MAD_value
554         File? modeled_segments_plot_normal = PlotModeledSegmentsNormal.modeled_segments_plot
556         File oncotated_called_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_file, "null"])
557         File oncotated_called_gene_list_file_tumor = select_first([CNVOncotatorWorkflow.oncotated_called_gene_list_file, "null"])
558         File funcotated_called_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_seg_simple_tsv, "null"])
559         File funcotated_called_gene_list_file_tumor = select_first([CNVFuncotateSegmentsWorkflow.funcotated_gene_list_tsv, "null"])
560     }
563 task DenoiseReadCounts {
564     String entity_id
565     File read_counts
566     File read_count_pon
567     Int? number_of_eigensamples #use all eigensamples in panel by default
568     File? gatk4_jar_override
570     # Runtime parameters
571     String gatk_docker
572     Int? mem_gb
573     Int? disk_space_gb
574     Boolean use_ssd = false
575     Int? cpu
576     Int? preemptible_attempts
578     Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
579     Int command_mem_mb = machine_mem_mb - 1000
581     command <<<
582         set -e
583         #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
585         gatk --java-options "-Xmx${command_mem_mb}m" DenoiseReadCounts \
586             --input ${read_counts} \
587             --count-panel-of-normals ${read_count_pon} \
588             ${"--number-of-eigensamples " + number_of_eigensamples} \
589             --standardized-copy-ratios ${entity_id}.standardizedCR.tsv \
590             --denoised-copy-ratios ${entity_id}.denoisedCR.tsv
591     >>>
593     runtime {
594         #docker: "${gatk_docker}"
595         memory: machine_mem_mb + " MB"
596         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
597         cpu: select_first([cpu, 1])
598         preemptible: select_first([preemptible_attempts, 5])
599     }
601     output {
602         File standardized_copy_ratios = "${entity_id}.standardizedCR.tsv"
603         File denoised_copy_ratios = "${entity_id}.denoisedCR.tsv"
604     }
607 task ModelSegments {
608     String entity_id
609     File denoised_copy_ratios
610     File allelic_counts
611     File? normal_allelic_counts
612     Int? max_num_segments_per_chromosome
613     Int? min_total_allele_count
614     Int? min_total_allele_count_normal
615     Float? genotyping_homozygous_log_ratio_threshold
616     Float? genotyping_base_error_rate
617     Float? kernel_variance_copy_ratio
618     Float? kernel_variance_allele_fraction
619     Float? kernel_scaling_allele_fraction
620     Int? kernel_approximation_dimension
621     Array[Int]+? window_sizes = [8, 16, 32, 64, 128, 256]
622     Float? num_changepoints_penalty_factor
623     Float? minor_allele_fraction_prior_alpha
624     Int? num_samples_copy_ratio
625     Int? num_burn_in_copy_ratio
626     Int? num_samples_allele_fraction
627     Int? num_burn_in_allele_fraction
628     Float? smoothing_threshold_copy_ratio
629     Float? smoothing_threshold_allele_fraction
630     Int? max_num_smoothing_iterations
631     Int? num_smoothing_iterations_per_fit
632     String? output_dir
633     File? gatk4_jar_override
635     # Runtime parameters
636     String gatk_docker
637     Int? mem_gb
638     Int? disk_space_gb
639     Boolean use_ssd = false
640     Int? cpu
641     Int? preemptible_attempts
643     Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
644     # ModelSegments seems to need at least 3GB of overhead to run
645     Int command_mem_mb = machine_mem_mb - 3000
647     # If optional output_dir not specified, use "out"
648     String output_dir_ = select_first([output_dir, "out"])
650     # default values are min_total_allele_count_ = 0 in matched-normal mode
651     #                                            = 30 in case-only mode
652     Int default_min_total_allele_count = if defined(normal_allelic_counts) then 0 else 30
653     Int min_total_allele_count_ = select_first([min_total_allele_count, default_min_total_allele_count])
655     command <<<
656         set -e
657         #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
659         gatk --java-options "-Xmx${command_mem_mb}m" ModelSegments \
660             --denoised-copy-ratios ${denoised_copy_ratios} \
661             --allelic-counts ${allelic_counts} \
662             ${"--normal-allelic-counts " + normal_allelic_counts} \
663             --minimum-total-allele-count-case ${min_total_allele_count_} \
664             --minimum-total-allele-count-normal ${default="30" min_total_allele_count_normal} \
665             --genotyping-homozygous-log-ratio-threshold ${default="-10.0" genotyping_homozygous_log_ratio_threshold} \
666             --genotyping-base-error-rate ${default="0.05" genotyping_base_error_rate} \
667             --maximum-number-of-segments-per-chromosome ${default="1000" max_num_segments_per_chromosome} \
668             --kernel-variance-copy-ratio ${default="0.0" kernel_variance_copy_ratio} \
669             --kernel-variance-allele-fraction ${default="0.025" kernel_variance_allele_fraction} \
670             --kernel-scaling-allele-fraction ${default="1.0" kernel_scaling_allele_fraction} \
671             --kernel-approximation-dimension ${default="100" kernel_approximation_dimension} \
672             --window-size ${sep=" --window-size " window_sizes} \
673             --number-of-changepoints-penalty-factor ${default="1.0" num_changepoints_penalty_factor} \
674             --minor-allele-fraction-prior-alpha ${default="25.0" minor_allele_fraction_prior_alpha} \
675             --number-of-samples-copy-ratio ${default="100" num_samples_copy_ratio} \
676             --number-of-burn-in-samples-copy-ratio ${default="50" num_burn_in_copy_ratio} \
677             --number-of-samples-allele-fraction ${default="100" num_samples_allele_fraction} \
678             --number-of-burn-in-samples-allele-fraction ${default="50" num_burn_in_allele_fraction} \
679             --smoothing-credible-interval-threshold-copy-ratio ${default="2.0" smoothing_threshold_copy_ratio} \
680             --smoothing-credible-interval-threshold-allele-fraction ${default="2.0" smoothing_threshold_allele_fraction} \
681             --maximum-number-of-smoothing-iterations ${default="10" max_num_smoothing_iterations} \
682             --number-of-smoothing-iterations-per-fit ${default="0" num_smoothing_iterations_per_fit} \
683             --output ${output_dir_} \
684             --output-prefix ${entity_id}
686         # We need to create the file even if the above command doesn't so we have something to delocalize
687         # If no file is created by the above task then it will copy out an empty file
688         touch ${output_dir_}/${entity_id}.hets.normal.tsv
689     >>>
691     runtime {
692         #docker: "${gatk_docker}"
693         memory: machine_mem_mb + " MB"
694         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
695         cpu: select_first([cpu, 1])
696         preemptible: select_first([preemptible_attempts, 5])
697     }
699     output {
700         File het_allelic_counts = "${output_dir_}/${entity_id}.hets.tsv"
701         File normal_het_allelic_counts = "${output_dir_}/${entity_id}.hets.normal.tsv"
702         File copy_ratio_only_segments = "${output_dir_}/${entity_id}.cr.seg"
703         File copy_ratio_legacy_segments = "${output_dir_}/${entity_id}.cr.igv.seg"
704         File allele_fraction_legacy_segments = "${output_dir_}/${entity_id}.af.igv.seg"
705         File modeled_segments_begin = "${output_dir_}/${entity_id}.modelBegin.seg"
706         File copy_ratio_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.cr.param"
707         File allele_fraction_parameters_begin = "${output_dir_}/${entity_id}.modelBegin.af.param"
708         File modeled_segments = "${output_dir_}/${entity_id}.modelFinal.seg"
709         File copy_ratio_parameters = "${output_dir_}/${entity_id}.modelFinal.cr.param"
710         File allele_fraction_parameters = "${output_dir_}/${entity_id}.modelFinal.af.param"
711     }
714 task CallCopyRatioSegments {
715     String entity_id
716     File copy_ratio_segments
717     Float? neutral_segment_copy_ratio_lower_bound
718     Float? neutral_segment_copy_ratio_upper_bound
719     Float? outlier_neutral_segment_copy_ratio_z_score_threshold
720     Float? calling_copy_ratio_z_score_threshold
721     File? gatk4_jar_override
723     # Runtime parameters
724     String gatk_docker
725     Int? mem_gb
726     Int? disk_space_gb
727     Boolean use_ssd = false
728     Int? cpu
729     Int? preemptible_attempts
731     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
732     Int command_mem_mb = machine_mem_mb - 1000
734     command <<<
735         set -e
736         #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
738         gatk --java-options "-Xmx${command_mem_mb}m" CallCopyRatioSegments \
739             --input ${copy_ratio_segments} \
740             --neutral-segment-copy-ratio-lower-bound ${default="0.9" neutral_segment_copy_ratio_lower_bound} \
741             --neutral-segment-copy-ratio-upper-bound ${default="1.1" neutral_segment_copy_ratio_upper_bound} \
742             --outlier-neutral-segment-copy-ratio-z-score-threshold ${default="2.0" outlier_neutral_segment_copy_ratio_z_score_threshold} \
743             --calling-copy-ratio-z-score-threshold ${default="2.0" calling_copy_ratio_z_score_threshold} \
744             --output ${entity_id}.called.seg
745     >>>
747     runtime {
748         #docker: "${gatk_docker}"
749         memory: machine_mem_mb + " MB"
750         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
751         cpu: select_first([cpu, 1])
752         preemptible: select_first([preemptible_attempts, 5])
753     }
755     output {
756         File called_copy_ratio_segments = "${entity_id}.called.seg"
757         File called_copy_ratio_legacy_segments = "${entity_id}.called.igv.seg"
758     }
761 task PlotDenoisedCopyRatios {
762     String entity_id
763     File standardized_copy_ratios
764     File denoised_copy_ratios
765     File ref_fasta_dict
766     Int? minimum_contig_length
767     String? output_dir
768     File? gatk4_jar_override
770     # Runtime parameters
771     String gatk_docker
772     Int? mem_gb
773     Int? disk_space_gb
774     Boolean use_ssd = false
775     Int? cpu
776     Int? preemptible_attempts
778     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
779     Int command_mem_mb = machine_mem_mb - 1000
781     # If optional output_dir not specified, use "out"
782     String output_dir_ = select_first([output_dir, "out"])
784     command <<<
785         set -e
786         #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
788         gatk --java-options "-Xmx${command_mem_mb}m" PlotDenoisedCopyRatios \
789             --standardized-copy-ratios ${standardized_copy_ratios} \
790             --denoised-copy-ratios ${denoised_copy_ratios} \
791             --sequence-dictionary ${ref_fasta_dict} \
792             --minimum-contig-length ${default="1000000" minimum_contig_length} \
793             --output ${output_dir_} \
794             --output-prefix ${entity_id}
795     >>>
797     runtime {
798         #docker: "${gatk_docker}"
799         memory: machine_mem_mb + " MB"
800         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
801         cpu: select_first([cpu, 1])
802         preemptible: select_first([preemptible_attempts, 5])
803     }
805     output {
806         File denoised_copy_ratios_plot = "${output_dir_}/${entity_id}.denoised.png"
807         File denoised_copy_ratios_lim_4_plot = "${output_dir_}/${entity_id}.denoisedLimit4.png"
808         File standardized_MAD = "${output_dir_}/${entity_id}.standardizedMAD.txt"
809         Float standardized_MAD_value = read_float(standardized_MAD)
810         File denoised_MAD = "${output_dir_}/${entity_id}.denoisedMAD.txt"
811         Float denoised_MAD_value = read_float(denoised_MAD)
812         File delta_MAD = "${output_dir_}/${entity_id}.deltaMAD.txt"
813         Float delta_MAD_value = read_float(delta_MAD)
814         File scaled_delta_MAD = "${output_dir_}/${entity_id}.scaledDeltaMAD.txt"
815         String scaled_delta_MAD_str = read_string(scaled_delta_MAD)
816         Float scaled_delta_MAD_value = if scaled_delta_MAD_str == "NA" then 0 else read_float(scaled_delta_MAD)
817         #Float scaled_delta_MAD_value = read_float(scaled_delta_MAD)
818     }
821 task PlotModeledSegments {
822     String entity_id
823     File denoised_copy_ratios
824     File het_allelic_counts
825     File modeled_segments
826     File ref_fasta_dict
827     Int? minimum_contig_length
828     String? output_dir
829     File? gatk4_jar_override
831     # Runtime parameters
832     String gatk_docker
833     Int? mem_gb
834     Int? disk_space_gb
835     Boolean use_ssd = false
836     Int? cpu
837     Int? preemptible_attempts
839     Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
840     Int command_mem_mb = machine_mem_mb - 1000
842     # If optional output_dir not specified, use "out"
843     String output_dir_ = select_first([output_dir, "out"])
845     command <<<
846         set -e
847         #export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}
849         gatk --java-options "-Xmx${command_mem_mb}m" PlotModeledSegments \
850             --denoised-copy-ratios ${denoised_copy_ratios} \
851             --allelic-counts ${het_allelic_counts} \
852             --segments ${modeled_segments} \
853             --sequence-dictionary ${ref_fasta_dict} \
854             --minimum-contig-length ${default="1000000" minimum_contig_length} \
855             --output ${output_dir_} \
856             --output-prefix ${entity_id}
857     >>>
859     runtime {
860         #docker: "${gatk_docker}"
861         memory: machine_mem_mb + " MB"
862         disks: "local-disk " + disk_space_gb + if use_ssd then " SSD" else " HDD"
863         cpu: select_first([cpu, 1])
864         preemptible: select_first([preemptible_attempts, 5])
865     }
867     output {
868         File modeled_segments_plot = "${output_dir_}/${entity_id}.modeled.png"
869     }