4 from toolshed
import reader
, nopen
5 from collections
import defaultdict
8 from itertools
import cycle
12 colors
= cycle(seaborn
.color_palette('Set1', 8))
17 return op
.basename(bam
).rsplit(".", 1)[0].split("-")[0]
18 return op
.basename(bam
).rsplit(".", 1)[0].split("-")[0] + ("-trim" if "trim" in bam
else "")
19 return op
.dirname(bam
)
21 def count_bases(cigar
, patt
=re
.compile("\d+[IM]")):
22 return sum(int(p
[:-1]) for p
in patt
.findall(cigar
))
24 def count_on_off(bam
, flags
, pad
, bases
=BASES
):
26 #if not "last" in bam: flags = flags + " -f 0x2"
30 rcounts
= defaultdict(int)
32 posn
= re
.compile(".*?_(.+):(\d+)-(\d+)")
34 # chr1b:3001315:+__chr1b:3001467:- 99 chr1 3001316 60 100M
35 print >>sys
.stderr
, bam
36 for toks
in reader("|samtools view {flags} {bam}".format(**locals()),
38 if toks
[0][0] == "@": continue
39 if toks
[0].endswith(("_R1", "_R2")):
40 toks
[0] = toks
[0][:-3]
45 if toks
[0].endswith("_BAD"):
46 off_count
[qual
] += (count_bases(cigar
) if bases
else 1)
49 rname
, flag
= toks
[0], int(toks
[1])
51 # @2_chr4:72040172-72040686_R1
52 if rcounts
[rname
] > 2:
53 raise Exception("%s:%s", (bam
, rname
))
56 chrom
, start
, end
= re
.match(posn
, rname
).groups(0)
58 print >>sys
.stderr
, rname
61 pos
= int(start
if flag
& 0x40 else end
)
62 on
= chrom
== toks
[2] and abs(pos
- int(toks
[3])) <= pad
64 on_count
[qual
] += (count_bases(cigar
) if bases
else 1)
66 off_count
[qual
] += (count_bases(cigar
) if bases
else 1)
68 on_count
= np
.cumsum(on_count
[::-1])[::-1]
69 off_count
= np
.cumsum(off_count
[::-1])[::-1]
70 return off_count
, on_count
72 FLAGS
="-F%i" % (0x4 |
0x100 |
0x200)
73 def main(bams
, reads
=None, flags
=FLAGS
, pad
=12002):
74 if not reads
.isdigit():
75 reads
= 2 * float(nopen("|bioawk -c fastx 'END { print NR }' %s" % reads
).next())
77 reads
= 2.0 * int(reads
)
79 if any('trim' in b
for b
in bams
):
80 assert all('trim' in b
for b
in bams
), [b
for b
in bams
if not 'trim'
86 if BASES
: denom
*= 100.
88 counts
[bam
] = count_on_off(bam
, flags
, pad
)
92 print >>out
, "qual\tmethod\toff\ton"
94 for qual
in range(0, 256):
96 print >>out
, "{qual}\t{bam}\t{off}\t{on}".format(
97 qual
=qual
, bam
=name(b
),
98 off
=counts
[b
][0][qual
] / denom
,
99 on
=counts
[b
][1][qual
] / denom
)
100 print >>sys
.stderr
, "wrote", out
.name
104 # multiply by 100 bases per read.
107 symbol
= 'o' if len(set(counts
[bam
][0])) < 3 else '.'
108 nmax
= max(i
for i
, v
in enumerate(counts
[bam
][1]) if v
> 0)
109 p
, = pl
.plot(counts
[bam
][0][1:nmax
+ 1] / denom
,
110 counts
[bam
][1][1:nmax
+ 1] / denom
,
114 marker
=symbol
, label
=name(bam
))
116 names
.append(name(bam
))
117 pl
.xlabel('off target')
118 pl
.ylabel('on target')
119 pl
.legend(pts
, names
, loc
='lower right')
122 print >>sys
.stderr
, reads
126 if __name__
== "__main__":
128 p
= argparse
.ArgumentParser()
129 p
.add_argument("--reads", help="reads file", required
=True)
130 p
.add_argument("bams", nargs
="+")
134 main(a
.bams
, reads
=a
.reads
)