4 import primer3
# https://github.com/libnano/primer3-py
6 # https://brcaexchange.org/variants and click "Show All Public Data", then click "Download" to get `variants.tsv`.
7 # gzcat ~/Downloads/variants.tsv.gz|head -n30|awk -F'\t' '{if (length($124)+length($125)==2 || NR==1) print $43,$108,$122,$123,$124,$125}'|column -t
8 # DBID_LOVD Genomic_Coordinate_hg38 Chr Pos Ref Alt
9 # BRCA1_001574 chr17:g.43093299:A>G 17 43093299 A G
10 # BRCA1_003516 chr17:g.43078507:T>G 17 43078507 T G
11 # BRCA1_004379 chr17:g.43103085:A>G 17 43103085 A G
13 from os
.path
import expanduser
14 InFile
: str = r
'~/tmp/variants.tsv.0.gz'
15 InFile
= expanduser(InFile
)
17 InRef
: str = expanduser(r
'~/tmp/GRCh38.fa')
18 from pyfaidx
import Fasta
19 RefSeqs
= Fasta(InRef
)
20 #print(RefSeqs['chr1'])
21 # Random access of BGZip is not supported now, see https://github.com/mdshw5/pyfaidx/issues/126
23 #InColNames = ['DBID_LOVD','Chr','Pos','Ref','Alt','Genomic_Coordinate_hg38']
24 InColNames
= ['Chr','Pos','Ref','Alt']
27 #pd.read_table(InFile,compression='gzip',sep='\t')
33 from typing
import Dict
, List
, Tuple
34 InData
: Dict
[str,Dict
[int,Tuple
[str,List
[str]]]] = {}
37 第一种引物,上游引物3‘端设一个,下游距离300bp-400bp设置
38 第二种,目标点上游100bp设置上游引物,不要覆盖目标点,下游,200-300,
43 thePara
: Dict
[str,int] = dict(MaxAmpLen
=400, MinAmpLen
=300, P5Up1
=0, P5Up2
=100,
44 TmMax
=63, TmMin
=55, TmDeltra
=5,
45 PrimerLenMin
=25, PrimerLenMax
=36, Mode2LeftMax
=100
48 with gzip
.open(InFile
, 'rt') as tsvin
:
49 tsvin
= csv
.DictReader(tsvin
, delimiter
='\t')
50 #headers = tsvin.fieldnames
53 #print(', '.join(row[col] for col in InColNames))
55 if len(row
['Ref']) > 1 or len(row
['Alt']) > 1 :
56 #print(', '.join(row[col] for col in ['Chr','Pos','Ref','Alt']))
59 print(', '.join(row
[col
] for col
in InColNames
))
60 row
['Pos'] = int(row
['Pos'])
61 if row
['Chr'] in InData
:
62 if row
['Pos'] in InData
[row
['Chr']] :
63 InData
[row
['Chr']][row
['Pos']][1].append(row
['Alt'])
64 #print(InData[row['Chr']][row['Pos']])
66 InData
[row
['Chr']][row
['Pos']] = (row
['Ref'],[row
['Alt']])
68 InData
[row
['Chr']] = { row
['Pos'] : (row
['Ref'],[row
['Alt']]) }
70 Primer3GlobalArgs
: Dict
= {
71 'PRIMER_OPT_SIZE': 2+thePara
['PrimerLenMin'],
72 'PRIMER_PICK_INTERNAL_OLIGO': 1,
73 'PRIMER_INTERNAL_MAX_SELF_END': 8,
74 'PRIMER_MIN_SIZE': thePara
['PrimerLenMin'],
75 'PRIMER_MAX_SIZE': thePara
['PrimerLenMax'],
76 'PRIMER_OPT_TM': 60.0,
77 'PRIMER_MIN_TM': thePara
['TmMin'],
78 'PRIMER_MAX_TM': thePara
['TmMax'],
79 'PRIMER_MIN_GC': 20.0,
80 'PRIMER_MAX_GC': 80.0,
81 'PRIMER_MAX_POLY_X': 10,
82 'PRIMER_INTERNAL_MAX_POLY_X': 10,
83 'PRIMER_SALT_MONOVALENT': 50.0,
84 'PRIMER_DNA_CONC': 50.0,
85 'PRIMER_MAX_NS_ACCEPTED': 0,
86 'PRIMER_MAX_SELF_ANY': 12,
87 'PRIMER_MAX_SELF_END': 8,
88 'PRIMER_PAIR_MAX_COMPL_ANY': 12,
89 'PRIMER_PAIR_MAX_COMPL_END': 8,
90 'PRIMER_PRODUCT_SIZE_RANGE': [[thePara
['MinAmpLen']-thePara
['PrimerLenMax'],thePara
['MaxAmpLen']+thePara
['PrimerLenMax']]],
91 'PRIMER_TASK': 'generic',
92 'PRIMER_PICK_LEFT_PRIMER': 1,
93 'PRIMER_PICK_INTERNAL_OLIGO': 0,
94 'PRIMER_PICK_RIGHT_PRIMER': 1,
95 'PRIMER_PAIR_MAX_DIFF_TM': thePara
['TmDeltra'],
97 primer3
.bindings
.setP3Globals(Primer3GlobalArgs
)
99 for ChrID
in InData
.keys() :
100 for thePos
in InData
[ChrID
].keys() :
101 FulChrID
: str = ''.join(['chr',ChrID
])
102 # Start attributes are 1-based
103 Left
: int = thePos
- thePara
['Mode2LeftMax'] - thePara
['PrimerLenMax'] -1
104 if Left
< 0 : Left
= 0
106 # End attributes are 0-based
107 Right
: int = thePos
+ thePara
['MaxAmpLen'] + thePara
['PrimerLenMax']
108 if Right
> len(RefSeqs
[FulChrID
]) : Right
= len(RefSeqs
[FulChrID
])
109 theSeq
: str = RefSeqs
[FulChrID
][Left
:Right
]
110 print(':'.join([ChrID
,str(thePos
),FulChrID
,str(theSeq
),str(InData
[ChrID
][thePos
]) ]))
111 Primer3Ret
: Dict
= primer3
.bindings
.designPrimers({
112 'SEQUENCE_ID': theSeq
.fancy_name
,
113 'SEQUENCE_TEMPLATE': str(theSeq
),
114 'SEQUENCE_INCLUDED_REGION': [ thePara
['PrimerLenMax'],thePara
['MaxAmpLen'] ],
119 print(b
'[!] %(skipped)d InDels skipped in %(Total)d items.' % {b
'skipped': Skipped
, b
'Total': Total
})