modified: openstmerge.py
[GalaxyCodeBases.git] / python / etc / primer3d.py
blobe4712fca4b2656dfe05717a7fff112e19d3cfd1f
1 #!/usr/bin/env python3
2 import argparse
3 import logging
4 import primer3 # https://github.com/libnano/primer3-py
6 # https://brcaexchange.org/variants and click "Show All Public Data", then click "Download" to get `variants.tsv`.
7 # gzcat ~/Downloads/variants.tsv.gz|head -n30|awk -F'\t' '{if (length($124)+length($125)==2 || NR==1) print $43,$108,$122,$123,$124,$125}'|column -t
8 # DBID_LOVD Genomic_Coordinate_hg38 Chr Pos Ref Alt
9 # BRCA1_001574 chr17:g.43093299:A>G 17 43093299 A G
10 # BRCA1_003516 chr17:g.43078507:T>G 17 43078507 T G
11 # BRCA1_004379 chr17:g.43103085:A>G 17 43103085 A G
13 from os.path import expanduser
14 InFile: str = r'~/tmp/variants.tsv.0.gz'
15 InFile = expanduser(InFile)
17 InRef: str = expanduser(r'~/tmp/GRCh38.fa')
18 from pyfaidx import Fasta
19 RefSeqs = Fasta(InRef)
20 #print(RefSeqs['chr1'])
21 # Random access of BGZip is not supported now, see https://github.com/mdshw5/pyfaidx/issues/126
23 #InColNames = ['DBID_LOVD','Chr','Pos','Ref','Alt','Genomic_Coordinate_hg38']
24 InColNames = ['Chr','Pos','Ref','Alt']
25 #import numpy
26 #import pandas as pd
27 #pd.read_table(InFile,compression='gzip',sep='\t')
29 import gzip
30 import csv
31 Total: int = 0
32 Skipped: int = 0
33 from typing import Dict, List, Tuple
34 InData: Dict[str,Dict[int,Tuple[str,List[str]]]] = {}
36 '''
37 第一种引物,上游引物3‘端设一个,下游距离300bp-400bp设置
38 第二种,目标点上游100bp设置上游引物,不要覆盖目标点,下游,200-300,
39 只考虑一对引物中间的部分,引物本身不考虑。
40 Tm 参考范围55-62
41 '''
43 thePara: Dict[str,int] = dict(MaxAmpLen=400, MinAmpLen=300, P5Up1=0, P5Up2=100,
44 TmMax=63, TmMin=55, TmDeltra=5,
45 PrimerLenMin=25, PrimerLenMax=36, Mode2LeftMax=100
48 with gzip.open(InFile, 'rt') as tsvin:
49 tsvin = csv.DictReader(tsvin, delimiter='\t')
50 #headers = tsvin.fieldnames
51 #print(headers)
52 for row in tsvin:
53 #print(', '.join(row[col] for col in InColNames))
54 Total += 1
55 if len(row['Ref']) > 1 or len(row['Alt']) > 1 :
56 #print(', '.join(row[col] for col in ['Chr','Pos','Ref','Alt']))
57 Skipped += 1
58 else :
59 print(', '.join(row[col] for col in InColNames))
60 row['Pos'] = int(row['Pos'])
61 if row['Chr'] in InData :
62 if row['Pos'] in InData[row['Chr']] :
63 InData[row['Chr']][row['Pos']][1].append(row['Alt'])
64 #print(InData[row['Chr']][row['Pos']])
65 else :
66 InData[row['Chr']][row['Pos']] = (row['Ref'],[row['Alt']])
67 else :
68 InData[row['Chr']] = { row['Pos'] : (row['Ref'],[row['Alt']]) }
70 Primer3GlobalArgs: Dict = {
71 'PRIMER_OPT_SIZE': 2+thePara['PrimerLenMin'],
72 'PRIMER_PICK_INTERNAL_OLIGO': 1,
73 'PRIMER_INTERNAL_MAX_SELF_END': 8,
74 'PRIMER_MIN_SIZE': thePara['PrimerLenMin'],
75 'PRIMER_MAX_SIZE': thePara['PrimerLenMax'],
76 'PRIMER_OPT_TM': 60.0,
77 'PRIMER_MIN_TM': thePara['TmMin'],
78 'PRIMER_MAX_TM': thePara['TmMax'],
79 'PRIMER_MIN_GC': 20.0,
80 'PRIMER_MAX_GC': 80.0,
81 'PRIMER_MAX_POLY_X': 10,
82 'PRIMER_INTERNAL_MAX_POLY_X': 10,
83 'PRIMER_SALT_MONOVALENT': 50.0,
84 'PRIMER_DNA_CONC': 50.0,
85 'PRIMER_MAX_NS_ACCEPTED': 0,
86 'PRIMER_MAX_SELF_ANY': 12,
87 'PRIMER_MAX_SELF_END': 8,
88 'PRIMER_PAIR_MAX_COMPL_ANY': 12,
89 'PRIMER_PAIR_MAX_COMPL_END': 8,
90 'PRIMER_PRODUCT_SIZE_RANGE': [[thePara['MinAmpLen']-thePara['PrimerLenMax'],thePara['MaxAmpLen']+thePara['PrimerLenMax']]],
91 'PRIMER_TASK': 'generic',
92 'PRIMER_PICK_LEFT_PRIMER': 1,
93 'PRIMER_PICK_INTERNAL_OLIGO': 0,
94 'PRIMER_PICK_RIGHT_PRIMER': 1,
95 'PRIMER_PAIR_MAX_DIFF_TM': thePara['TmDeltra'],
97 primer3.bindings.setP3Globals(Primer3GlobalArgs)
99 for ChrID in InData.keys() :
100 for thePos in InData[ChrID].keys() :
101 FulChrID: str = ''.join(['chr',ChrID])
102 # Start attributes are 1-based
103 Left: int = thePos - thePara['Mode2LeftMax'] - thePara['PrimerLenMax'] -1
104 if Left < 0 : Left = 0
105 #Left = thePos-1
106 # End attributes are 0-based
107 Right: int = thePos + thePara['MaxAmpLen'] + thePara['PrimerLenMax']
108 if Right > len(RefSeqs[FulChrID]) : Right = len(RefSeqs[FulChrID])
109 theSeq: str = RefSeqs[FulChrID][Left:Right]
110 print(':'.join([ChrID,str(thePos),FulChrID,str(theSeq),str(InData[ChrID][thePos]) ]))
111 Primer3Ret: Dict = primer3.bindings.designPrimers({
112 'SEQUENCE_ID': theSeq.fancy_name,
113 'SEQUENCE_TEMPLATE': str(theSeq),
114 'SEQUENCE_INCLUDED_REGION': [ thePara['PrimerLenMax'],thePara['MaxAmpLen'] ],
116 print(Primer3Ret)
119 print(b'[!] %(skipped)d InDels skipped in %(Total)d items.' % {b'skipped': Skipped, b'Total': Total})