2 In order to avoid double counting of bases in overlapping paired end reads,
3 this script accept mapped reads in SAM format, use the position to determine
4 overlap, and set the lower base-quality of pair overlap to 0.
5 It is assumed that the reads are name-sorted as when they come from the
10 bwa mem reference.fa R1.fq R2.fq \
11 | bam-merge-pairs.py \
12 | samtools view -bS - > merged.bam
14 from __future__
import print_function
15 from __future__
import division
16 import itertools
as it
17 from operator
import itemgetter
18 from bwameth
import Bam
21 line_iter
= (x
.rstrip("\r\n").split("\t") for x
in sys
.stdin
)
24 for g
, pair
in it
.groupby(line_iter
, itemgetter(0)):
26 print("\n".join("\t".join(line
) for line
in pair
))
30 print "\t".join(pair
[0])
33 assert len(pair
) == 2, pair
34 left
, right
= [Bam(b
) for b
in pair
]
36 # TODO: use left_shift(), right_shift()
37 if left
.pos
+ left
.tlen
< right
.pos
:
43 # L ------------------>
44 # R <------------------
45 ovl_bases
= right
.pos
, left
.pos
+ left
.tlen