11 "DS362249":"DS362249.1", "DS362248":"DS362248.1", "DS362247":"DS362247.1", "DS362246":"DS362246.1", "DS362245":"DS362245.1", "DS362244":"DS362244.1", "DS362243":"DS362243.1", "DS362242":"DS362242.1", "DS362241":"DS362241.1", "DS362240":"DS362240.1", "DS362239":"DS362239.1", "DS362238":"DS362238.1", "DS362237":"DS362237.1", "DS362236":"DS362236.1", "DS362235":"DS362235.1", "DS362234":"DS362234.1", "DS362233":"DS362233.1", "DS362232":"DS362232.1", "DS362231":"DS362231.1", "DS362230":"DS362230.1", "DS362229":"DS362229.1", "DS362228":"DS362228.1", "DS362227":"DS362227.1", "DS362226":"DS362226.1", "DS362225":"DS362225.1", "DS362224":"DS362224.1", "DS362223":"DS362223.1", "DS362222":"DS362222.1", "DS362221":"DS362221.1", "DS362220":"DS362220.1", "DS362219":"DS362219.1", "DS362218":"DS362218.1", "DS362217":"DS362217.1", "DS499581":"DS499581.1", "DS499580":"DS499580.1", "DS499579":"DS499579.1", "DS499578":"DS499578.1", "DS499577":"DS499577.1", "DS499576":"DS499576.1", "DS499575":"DS499575.1", "DS499574":"DS499574.1", "DS499573":"DS499573.1", "DS499572":"DS499572.1", "DS499571":"DS499571.1", "DS499570":"DS499570.1", "CP001107":"CP001107.1", "CP001104":"CP001104.1", "CP001105":"CP001105.1", "CP001106":"CP001106.1"
15 f
= gzip
.open('snpcandidatforpcr.out.gz', 'rt')
16 tsvin
= csv
.reader(f
, delimiter
='\t')
18 if len(row
)==0: continue
20 ChrID
= row
[0].split('.')[1]
23 Alleles
= row
[4].split(',')
24 Allele1
= Alleles
[0].split('|')
25 Allele2
= Alleles
[1].split('|')
26 AlleleP
= { Allele1
[0]:sum(map(int,Allele1
[2:])), Allele2
[0]:sum(map(int,Allele2
[2:])) }
28 theKey
= '\t'.join((ChrIDs
[ChrID
],thePos
))
29 SNPdata
[theKey
] = AlleleP
30 SNPchrID
.add(ChrIDs
[ChrID
])
31 #print((ChrID,thePos,theRef,theKey,AlleleP))
36 #pprint.pprint(SNPchrID)
37 print('[!]SNP read:%d.' % (SNPcnt
))
42 vcf_reader
= vcf
.Reader(filename
='microb.vcf.gz',compressed
=True)
43 for theChrid
in sorted(SNPchrID
):
44 for record
in vcf_reader
.fetch(theChrid
):
45 theKey
= '\t'.join((record
.CHROM
,str(record
.POS
)))
50 theBases
= [record
.REF
]
51 for abp
in record
.ALT
:
52 theBases
.append(str(abp
))
54 for abp
in SNPdata
[theKey
]:
55 SNPdatastr
.append( ''.join([abp
,':',str(SNPdata
[theKey
][abp
]) ]) )
56 print('%4d %s\t%s\t%s'%(SNPcnt
, theKey
.replace('\t',':'), ','.join(SNPdatastr
),','.join(theBases
)))
57 #print(SNPdata[theKey])
58 #print(record.genotype('ERR589860'))
61 for sample
in record
.samples
:
63 theADstr
= ','.join(map(str, sample
['AD'] if isinstance(sample
['AD'], list) else list(str(sample
['AD']))))
65 if type(sample['AD']) is list:
66 theADstr = ','.join(map(str,sample['AD']))
67 elif type(sample['AD']) is int:
68 theADstr = str(sample['AD'])
72 print('%s:%s\t%4d %s=%s'%(sample
.sample
,sample
.gt_bases
,sample
['DP'],sample
['GT'],theADstr
))
73 Depths
.append(sample
['DP'])
74 if sample
.gt_bases
!= None:
75 SampleGTs
.add(sample
.gt_bases
)
77 AvgDepth
= sum(Depths
)/len(Depths
)
78 theDepth
= int(round(AvgDepth
,-1))
79 DepthStat
[theDepth
] = 1 + DepthStat
.setdefault(theDepth
,0)
80 GTStat
[len(SampleGTs
)] = 1 + GTStat
.setdefault(len(SampleGTs
),0)
81 print((len(SampleGTs
),AvgDepth
,theDepth
))
82 print((GTStat
,DepthStat
))
85 gzcat snpcandidatforpcr.out.gz|awk 'NF > 0 {split($0,a,".");print a[1]}'|uniq