2 import site
,os
; site
.addsitedir('/'.join((os
.path
.expanduser("~"),'git/toGit/python/Gsite')))
3 #import sys; print('\n'.join(sys.path))
7 from collections
import OrderedDict
8 from collections
import defaultdict
14 from xopen
import xopen
17 def __missing__(self
, key
):
18 value
= self
[key
] = type(self
)()
25 with
xopen('meth.cg.rate.bz2','rt') as f
:
26 #with open('meth.cg.rate','rt') as f:
27 tsvin
= csv
.reader(f
, delimiter
='\t')
29 if row
[0].startswith('#'):
32 #Rate = (int(row[3])+int(row[6]))/(int(row[4])+int(row[7]))
33 MethRate
[row
[0]][int(row
[1])] = float(row
[9])
35 #pprint.pprint(MethRate)
36 print(Galaxy
.asizeof
.asizeof(MethRate
))
38 #CDSdat = OrderedDict()
40 CDSdat
= defaultdict(set)
42 with
xopen('CDS.bed.gz','rt') as f
:
43 #with open('CDS.bed','rt') as f:
44 tsvin
= csv
.reader(f
, delimiter
='\t')
46 ZoneLength
= 1 + int(row
[2]) - int(row
[1])
47 #print((ZoneLength,row))
48 theKey
= '\t'.join(row
[0:3])
49 #CDSdat[theKey]=','.join(filter(None,(CDSdat.setdefault(theKey),row[3])))
50 #CDSdat[theKey] |= {row[3]}
51 CDSdat
[theKey
].add(row
[3])
53 #pprint.pprint(CDSdat)
54 print(Galaxy
.asizeof
.asizeof(CDSdat
))
55 print((MethRateCnt
,CDSdatCnt
,len(MethRate
),len(CDSdat
)))
57 with
open('meth.zones','wt') as outf
:
58 for k
, v
in CDSdat
.items():
59 v
= ','.join(sorted(v
))
60 (Chrid
,pLeft
,pRight
) = k
.split('\t')
63 #print((k,v,CDSdat[k],Chrid,pLeft,pRight))
64 ZoneLen
= 1 + pRight
- pLeft
65 ZoneStep
= math
.ceil(float(ZoneLen
)/ZoneNum
)
66 ZoneCnt
= math
.ceil(ZoneLen
/ZoneStep
)
68 ZoneValueCntV
= ZoneCnt
70 while startPos
<= pRight
:
71 endPos
= startPos
+ ZoneStep
72 if endPos
> pRight
: endPos
= pRight
75 for p
in range(startPos
,endPos
):
76 if p
in MethRate
[Chrid
]:
77 ZoneSum
+= MethRate
[Chrid
][p
]
80 ZoneValues
.append(naStr
)
83 ZoneValue1
= float(ZoneSum
/ZoneHits
)
84 ZoneValues
.append("%.8f" % ZoneValue1
)
86 ZoneValues
= [v
,k
,','.join(map(str,(ZoneStep
,ZoneCnt
,ZoneValueCntV
)))] + ZoneValues
88 print('\t'.join(ZoneValues
),file=outf
,flush
=True)
90 #for theKey in CDSdat.keys():
91 # print(theKey,CDSdat[theKey])
93 scp t.py blc4:/ldfssz1/FGI/users/huxuesong/t/lc/
95 f=magic.from_file('CDS.bed.gz')
97 f=magic.from_file('meth.cg.rate.bz2')
99 f=magic.from_file('adapter.fa')
104 (164859, 202393, 25, 198788)