limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / python / etc / samflagfilter.py
blob7a920b3d1d404f29e27404f1e43cd6f83231694e
1 #!/usr/bin/env python3
2 import sys
4 def main():
5 if len(sys.argv) == 1 :
6 print('[x]No arguments given!',file=sys.stderr,flush=True)
8 print("Name of the script:",sys.argv[0])
9 print("Number of arguments:", len(sys.argv))
10 print("Arguments:" , str(sys.argv))
11 if len(sys.argv) < 3 :
12 print('Usage:',sys.argv[0],'<in.bam> <outprefix> [verbose=0]',file=sys.stderr,flush=True)
13 exit(0)
14 try:
15 verbose = int(sys.argv[3])
16 except: # `except IndexError:` and `except ValueError:`
17 verbose = 0
19 inBAMname = sys.argv[1]
20 outPrefix = sys.argv[2]
21 print('From:[{}], To:[{}.(Watson|Crick|Dropped).bam].\nVerbose: [{}].'.format(inBAMname,outPrefix,verbose),file=sys.stderr,flush=True)
22 splitBSbam(inBAMname,outPrefix,3,verbose)
24 class DebugWrite:
25 def __init__(self, name=None, verbose=1):
26 self._name = name
27 self.verbose = verbose
29 @property
30 def name(self):
31 return self._name
32 @name.setter
33 def name(self, name):
34 self._name = name
36 def write(self, read):
37 if self.verbose > 1:
38 print ("[{}]\t{}\t{}".format(self.name, read.query_name, read.flag))
39 def close(self):
40 return 0
42 def splitBSbam(inBAMname,outPrefix,n_threads=3,verbose=0):
43 import pysam
44 samfile = pysam.AlignmentFile(inBAMname, "rb")
45 if verbose == 0:
46 fileWatson = pysam.AlignmentFile('.'.join((outPrefix,'Watson','bam')), "wb", template=samfile, threads=n_threads)
47 fileCrick = pysam.AlignmentFile('.'.join((outPrefix,'Crick','bam')), "wb", template=samfile, threads=n_threads)
48 fileDropped = pysam.AlignmentFile('.'.join((outPrefix,'Dropped','bam')), "wb", template=samfile, threads=n_threads)
49 else:
50 fileWatson = DebugWrite('oWatson',verbose)
51 fileCrick = DebugWrite('oCrick',verbose)
52 fileDropped = DebugWrite('oDropped',verbose)
53 from collections import defaultdict
54 stats = defaultdict(int)
55 stats['Watson']=stats['Crick']=0 # make them be first in `str(stats)`.
56 for read in samfile:
57 if read.flag & 0xF00 :
58 fileDropped.write(read)
59 stats['Dropped_Supplementary'] += 1
60 continue
61 try:
62 tagYD = read.get_tag('YD')
63 stats['etc_YD'] += 1
64 if tagYD == 'f':
65 fileWatson.write(read)
66 stats['Watson'] += 1
67 elif tagYD == 'r':
68 fileCrick.write(read)
69 stats['Crick'] += 1
70 else:
71 print(tagYD,read.query_name,read.flag)
72 exit(3)
73 #continue
74 except KeyError:
75 #print('[!]',sys.exc_info()[1]) # (<class 'KeyError'>, KeyError("tag 'YD' not present"), <traceback object at 0x10f664ec8>)
76 #pass
77 if read.is_unmapped:
78 stats['etc_nonYD'] += 1
79 if read.mate_is_unmapped:
80 fileDropped.write(read)
81 stats['Dropped_PEUnmapped'] += 1
82 elif (read.is_read2 and (not read.mate_is_reverse)) or (read.is_read1 and read.mate_is_reverse) :
83 fileWatson.write(read)
84 stats['Watson'] += 1
85 elif (read.is_read2 and read.mate_is_reverse) or (read.is_read1 and (not read.mate_is_reverse)) :
86 fileCrick.write(read)
87 stats['Crick'] += 1
88 else:
89 print (str(read))
90 exit(4)
91 else: # not bwa-meth
92 stats['etc_nonBWA-meth'] += 1
93 if (read.is_read1 and (not read.is_reverse)) or (read.is_read2 and read.is_reverse) :
94 fileWatson.write(read)
95 stats['Watson'] += 1
96 elif (read.is_read1 and read.is_reverse) or (read.is_read2 and (not read.is_reverse)) :
97 fileCrick.write(read)
98 stats['Crick'] += 1
99 else:
100 print (str(read))
101 exit(2)
102 else:
103 print(str(stats),file=sys.stderr,flush=True)
104 #import pprint
105 #pprint.pprint(stats, indent=2, stream=sys.stderr)
106 fileDropped.close()
107 fileWatson.close()
108 fileCrick.close()
109 samfile.close()
111 if __name__ == "__main__":
112 main() # time ./samflagfilter.py t.bam tt2