4 SamplesList
= ('D3B_Crick', 'D3B_Watson', 'Normal_Crick', 'Normal_Watson', 'D3B_WGS', 'Normal_WGS')
6 #from collections import defaultdict
7 #DepthCnt = {key:defaultdict(int) for key in SamplesList}
8 #yDepthCnt = defaultdict(lambda: defaultdict(int))
13 if len(sys
.argv
) < 3 :
14 print('Usage:',sys
.argv
[0],'<samtools.depth.gz> <out.tsv> [verbose=0]',file=sys
.stderr
,flush
=True)
17 verbose
= int(sys
.argv
[3])
18 except: # `except IndexError:` and `except ValueError:`
21 inDepthFile
= sys
.argv
[1]
23 print('From:[{}], To:[{}].\nVerbose: [{}].'.format(inDepthFile
,outFile
,verbose
),file=sys
.stderr
,flush
=True)
24 RecordCnt
,MaxDepth
,cDepthCnt
,cDepthStat
= inStat(inDepthFile
,verbose
)
26 cDepthStat
[k
][2] = cDepthStat
[k
][0] / RecordCnt
# E(X)
27 cDepthStat
[k
][3] = cDepthStat
[k
][1] / RecordCnt
# E(X^2)
28 cDepthStat
[k
][4] = math
.sqrt(cDepthStat
[k
][3] - cDepthStat
[k
][2]*cDepthStat
[k
][2]) # E(X^2)-E(X)^2
29 tsvout
= open(outFile
, 'wt')
30 print('#{}\t{}'.format('Depth','\t'.join(SamplesList
)),file=tsvout
)
31 #RecordCntLength = len(str(RecordCnt))
32 print( '#N={},SD:\t{}'.format(RecordCnt
,'\t'.join(str(round(cDepthStat
[col
][4],1)) for col
in SamplesList
)),file=tsvout
)
33 for depth
in range(0,MaxDepth
+1):
34 #print( '{}\t{}'.format(depth,'\t'.join(str(DepthCnt[col][depth]) for col in SamplesList)) )
35 #print( '{}\t{}'.format(depth,'\t'.join(str(yDepthCnt[depth][col]) for col in SamplesList)) )
36 print( '{}\t{}'.format(depth
,'\t'.join(str(cDepthCnt
[col
][depth
]) for col
in SamplesList
)),file=tsvout
)
38 #print('#MaxDepth={}'.format(MaxDepth),file=tsvout)
42 def inStat(inDepthFile
,verbose
):
45 from collections
import Counter
46 # Looking up things in global scope takes longer then looking up stuff in local scope. <https://stackoverflow.com/a/54645851/159695>
47 cDepthCnt
= {key
:Counter() for key
in SamplesList
}
48 cDepthStat
= {key
:[0,0,0,0,0] for key
in SamplesList
} # x and x^2
51 with gzip
.open(inDepthFile
, 'rt') as tsvin
:
52 tsvin
= csv
.DictReader(tsvin
, delimiter
='\t', fieldnames
=('ChrID','Pos')+SamplesList
)
53 #headers = tsvin.fieldnames
57 #print(', '.join(row[col] for col in SamplesList))
60 theValue
= int(row
[k
])
61 if theValue
> MaxDepth
:
63 #DepthCnt[k][theValue] += 1 # PyPy3:30.54 ns, Python3:22.23 ns
64 #yDepthCnt[theValue][k] += 1 # PyPy3:30.47 ns, Python3:21.50 ns
65 cDepthCnt
[k
][theValue
] += 1 # PyPy3:29.82 ns, Python3:30.61 ns
66 cDepthStat
[k
][0] += theValue
67 cDepthStat
[k
][1] += theValue
* theValue
68 #print(MaxDepth,DepthCnt)
69 except KeyboardInterrupt:
70 print('\n[!]Ctrl+C pressed.',file=sys
.stderr
,flush
=True)
72 print('[!]Lines Read:[{}], MaxDepth is [{}].'.format(RecordCnt
,MaxDepth
),file=sys
.stderr
,flush
=True)
73 return RecordCnt
,MaxDepth
,cDepthCnt
,cDepthStat
75 if __name__
== "__main__":
76 main() # time python3 ./samdepthplot.py t.tsv.gz 1
79 get_implementation_name
= platform
.python_implementation()
80 if get_implementation_name
== 'CPython':
82 # get_size only works under Python, not PyPy.
83 mylibfile
= "../lib/pysize/pysize.py"
84 with
open(mylibfile
) as f
:
85 code
= compile(f
.read(), mylibfile
, 'exec')
86 exec(code
, globals(), locals())
87 print('[!]cDepthCnt:',get_size(cDepthCnt
),file=sys
.stderr
,flush
=True)
88 print('[!]DepthCnt: ',get_size(DepthCnt
),file=sys
.stderr
,flush
=True) # smaller
89 print('[!]yDepthCnt:',get_size(yDepthCnt
),file=sys
.stderr
,flush
=True)
90 except FileNotFoundError
:
100 time pypy3 -m cProfile -o t2.cprof ./samdepthstat.py t.tsv.gz t.out
101 pyprof2calltree -i t2.cprof -k
103 time ./samdepthstat.py t.tsv.gz t.out