modified: openstmerge.py
[GalaxyCodeBases.git] / etc / gatk-wdl / fm2 / tasks / fgi.wdl
blobb531415cf60e9cf6baf9f96f111a602f33747fee
1 version 1.0
3 task FilterSamReads {
4         input {
5                 File inputBam
6                 File inputBamIndex
7                 String outputBamPath
8                 Int minMatchLen = 200
9                 Boolean? createMd5File = false
11                 Int? compressionLevel
13                 Int? javaXmxMb = 1024
14                 Int? memoryMb = javaXmxMb + 512
15                 # One minute per input gigabyte.
16                 Int? timeMinutes = 1 + ceil(size(inputBam, "G") * 1)
17                 String? dockerImage = "quay.io/biocontainers/picard:2.23.8--0"
18         }
19         Array[String] FilterScriptContents = [
20                 'function accept(rec) {',
21                 '  if (rec.getReadUnmappedFlag()) return false;',
22                 '  var cigar = rec.getCigar();',
23                 '  if (cigar == null) return false;',
24                 '  var readMatch = 0;',
25                 '  for (var i=0;i < cigar.numCigarElements();++i) {',
26                 '    var ce = cigar.getCigarElement(i);',
27                 '    if (ce.getOperator().name() == "M") readMatch += ce.length;',
28                 '  }',
29                 '  if (readMatch > '+ minMatchLen +') return true;',
30                 '}',
31                 'accept(record);'
32         ]
34         command {
35                 set -e
36                 mkdir -p "$(dirname ~{outputBamPath})"
37                 JAVA_OPTS="-Xmx~{javaXmxMb}M -XX:ParallelGCThreads=1" picard \
38                 FilterSamReads \
39                 INPUT=~{sep=' INPUT=' inputBam} \
40                 OUTPUT=~{outputBamPath} \
41                 ~{"COMPRESSION_LEVEL=" + compressionLevel} \
42                 JAVASCRIPT_FILE=${write_lines(FilterScriptContents)} \
43                 FILTER=includeJavascript \
44                 CREATE_INDEX=true \
45                 CREATE_MD5_FILE=~{true="true" false="false" createMd5File}
46         }
48         output {
49                 File outputBam = outputBamPath
50                 File outputBamIndex = sub(outputBamPath, "\.bam$", ".bai")
51                 File? outputBamMd5 = outputBamPath + ".md5"
52         }
54         runtime {
55                 memory: "~{memoryMb}M"
56                 time_minutes: timeMinutes
57                 #docker: dockerImage
58         }
61 task callSNP {
62         input {
63                 File inputBam
64                 File inputBamIndex
65                 GatkIndex GatkIndex
66                 String outputPath
67                 File helperPl
68         }
69         File referenceFasta = GatkIndex.fastaFile
70         File dbsnpVCF = GatkIndex.dbsnpVCF
71         String bcfFile = outputPath + "/mpileup.bcf"
72         String snp0File = outputPath + "/snp0.gz"
73         String snp1File = outputPath + "/snp1.gz"
74         String snpFile = outputPath + "/snp.gz"
75         File SNPosFile = "bin/snpos.lst"
76         File SNPannotsFile = "bin/annots.bed.gz"
77         command {
78                 set -e
79                 mkdir -p "~{outputPath}"
80                 bcftools mpileup --threads 6 ~{inputBam} -d 30000 -Q 30 -f ~{referenceFasta} -p -Ob \
81                 -a FORMAT/AD,FORMAT/SCR,FORMAT/ADF,FORMAT/ADR \
82                 --ff UNMAP,SECONDARY,QCFAIL -B \
83                 -o ~{bcfFile}
84                 bcftools call -Oz -A -m -P 5.1e-1 ~{bcfFile} -o ~{snp0File}
85                 bcftools index ~{snp0File}
86                 bcftools annotate -a ~{dbsnpVCF} ~{snp0File} -c ID --collapse all -R ~{SNPosFile} -Oz -o ~{snp1File}
87                 bcftools index ~{snp1File}
88                 tabix ~{SNPannotsFile}
89                 bcftools annotate -a ~{SNPannotsFile} ~{snp1File} -c CHROM,FROM,TO,ID --collapse all -Oz -o ~{snpFile}
90                 bcftools index ~{snpFile}
91                 bcftools query -f'%CHROM\t[%DP\t%QUAL\t%TGT\n]' -i 'POS==501' ~{snpFile} > ~{outputPath + "/snp0.txt"}
92                 bcftools query -f'%ID\t[%DP\t%QUAL\t%TGT]\t%CHROM:%POS\n' ~{snpFile} -o ~{outputPath + "/../snpG.txt"}
93                 perl ~{helperPl} ~{outputPath + "/snp0.txt"} > ~{outputPath + "/../snp.txt"}
94         }
95         # See <https://github.com/samtools/bcftools/issues/658> for `-c -p 0.9`. This fix low recalculated BaseQ next to INDEL.
96         # `bcftools call -m -P 0.5` is not working, so use `-m -P 5.1e-1`.
98         output {
99                 File outSNP0txt = outputPath + "/snp0.txt"
100                 File outSNP1txt = outputPath + "/../snpG.txt"
101                 File outSNPtxt = outputPath + "/../snp.txt"
102                 File outbcfFile = bcfFile
103                 File outsnpFile = snpFile
104                 File outsnpIndexFile = snpFile + ".csi"
105                 File outsnp0File = snp0File
106                 File outsnp0IndexFile = snp0File + ".csi"
107         }
110 task callSTR {
111         input {
112                 File inputBam
113                 File inputBamIndex
114                 String outputPath
115                 File helperBED
116                 File helperPl
117         }
118         String awkStr = "'\{print $1\"\\t\"$4\}'"
119         command {
120                 set -e
121                 mkdir -p "~{outputPath}"
122                 samtools mpileup -l ~{helperBED} ~{inputBam} |awk ~{awkStr} > ~{outputPath + "/str0.txt"}
123                 perl ~{helperPl} ~{outputPath + "/str0.txt"} > ~{outputPath + "/../str.txt"}
124         }
125         output {
126                 File outSTR0txt = outputPath + "/str0.txt"
127                 File outSTRtxt = outputPath + "/../str.txt"
128         }
131 struct GatkIndex {
132         File fastaFile
133         File dbsnpVCF
134         Array[File] indexFiles