1 from __future__
import division
6 from toolshed
import reader
9 from itertools
import cycle
13 return op
.basename(bam
).rsplit(".", 1)[0].split("-")[0]
16 fname
= fname
[0] if not isinstance(fname
, basestring
) else fname
17 print >>sys
.stderr
, fname
18 qual_count
= [0] * 256
19 for sam_line
in (x
.rstrip().split("\t") for x
in nopen(fname
) if not
21 qual
= int(sam_line
[4])
23 # each qual should be the sum of all quals with a lower qual than it
24 return np
.cumsum(qual_count
[::-1])[::-1]
26 from toolshed
import nopen
, pmap
28 def count_both(bam
, regions
, flags
):
29 #if not "last" in bam: flags += " -f0x2"
30 off
, on
= OFF
.format(**locals()), ON
.format(**locals())
31 return bam
, map(counter
, [off
, on
])
33 def count_bam(bam
, flags
):
34 return int(next(nopen("|samtools view -c {flags} {bam}".format(**locals()))))
36 ON
= "| samtools view {bam} -L {regions} {flags}"
37 OFF
= "| bedtools intersect -ubam -abam {bam} -b {regions} -wa -v \
38 | samtools view - {flags}"
40 def main(regions
, bams
, reads
=None, flags
="-F%i" % (0x100 |
0x4 |
0x200 |
0x400),
42 r2
= open(tempfile
.mktemp(), 'w')
43 for toks
in reader(regions
, header
=False):
44 if toks
[0][0] == "@" or not (toks
[1] + toks
[2]).isdigit(): continue
45 toks
[1] = str(max(0, int(toks
[1]) - pad
))
46 toks
[2] = str(int(toks
[2]) + pad
)
47 print >>r2
, "\t".join(toks
)
54 reads
= int(nopen("|bioawk -c fastx 'END { print NR }' %s" % reads
).next()) * 2.0
57 colors
= cycle('rgbkmy')
60 counts
= dict(pmap(count_both
, ((bam
, regions
, flags
)
64 nreads
= count_bam(bam
, flags
) if reads
== "bam" else reads
65 bam_reads
[bam
] = nreads
66 symbol
= 'o' if len(set(counts
[bam
][0])) < 3 else '.'
67 pl
.plot(counts
[bam
][0] / float(nreads
), counts
[bam
][1] / float(nreads
),
68 '%s%s' % (colors
.next(), symbol
), label
=name(bam
))
70 pl
.xlabel('off target')
71 pl
.ylabel('on target')
72 pl
.legend(loc
='lower right')
79 print >>out
, "qual\tmethod\toff\ton"
81 for qual
in range(0, 256):
83 print >>out
, "{qual}\t{bam}\t{off}\t{on}".format(
84 qual
=qual
, bam
=name(b
),
85 off
=counts
[b
][0][qual
] / bam_reads
[bam
],
86 on
=counts
[b
][1][qual
] / bam_reads
[bam
])
87 print >>sys
.stderr
, "wrote", out
.name
89 if __name__
== "__main__":
91 p
= argparse
.ArgumentParser()
92 p
.add_argument("--reads", help="reads file")
93 p
.add_argument("regions")
94 p
.add_argument("bams", nargs
="+")
98 main(a
.regions
, a
.bams
, reads
=a
.reads
)