2 ###!/scratch/src/mambaforge/envs/py311/bin/python3
5 #sys.path.append('/scratch/src/mambaforge/envs/py311/lib/python3.11/site-packages')
15 pp
= pprint
.PrettyPrinter(indent
=4)
17 def eprint(*args
, **kwargs
) -> None:
18 print(*args
, **kwargs
, file=sys
.stderr
, flush
=True)
20 def init_argparse() -> argparse
.ArgumentParser
:
21 parser
= argparse
.ArgumentParser(
22 description
='Align Salus PE barcode with 1-100 as forward and 101-150 as reverse.',
23 epilog
='Contact: <huxs@salus-bio.com>')
24 parser
.add_argument('outfile', type=argparse
.FileType('w', encoding
='UTF-8'), help='Alignment Results', nargs
='?', default
='-')
25 parser
.add_argument('-i', '--read1', type=pathlib
.Path
, required
=True, dest
='infile', help='For Lane01_fastq_R1.fq[.gz]')
26 #parser.add_argument('-n', '--dryrun', '--dry-run', action='store_true', dest='dryrun')
28 "-v", "--version", action
="version",
29 version
=f
"{parser.prog} version 1.0.0"
34 parser
= init_argparse()
35 if len(sys
.argv
) == 1:
38 args
= parser
.parse_args()
40 matchNumbers
= list('1234567890'*15)
41 for name
,seq
,qual
in mappy
.fastx_read(args
.infile
.as_posix()):
44 AlnRef
= mappy
.Aligner(seq
=seqF
,preset
='sr',k
=11)
45 for hit
in AlnRef
.map(seqR
, cs
=False, MD
=False): # traverse alignments
47 if hit
.NM
or hit
.cigar_str
!= '30M':
48 print( ', '.join(map(str,[hit
.ctg
, hit
.r_st
, hit
.r_en
, hit
.strand
, hit
.q_st
, hit
.q_en
, hit
.cigar_str
, hit
.NM
, name
])), file=args
.outfile
)
49 seqFstr
= seqF
[:30] + seqF
[30:].lower()
50 seqRstr
= seqR
[:30] + seqR
[30:].lower()
53 revR
= mappy
.revcomp(seqRstr
)
54 queryST
= len(seqR
) - hit
.q_st
- hit
.q_en
58 (firstPadding
, secondPadding
) = (0,0)
59 if hit
.r_st
> queryST
:
60 secondPadding
= hit
.r_st
- queryST
61 elif hit
.r_st
< queryST
:
62 firstPadding
= queryST
- hit
.r_st
63 rStr
= ' '*firstPadding
+ seqFstr
64 qStr
= ' '*secondPadding
+ queryStr
65 pairs
= zip( list(rStr
),list(qStr
) )
69 if item
[0].lower() == item
[1].lower():
71 matchStr
+= matchNumbers
[matchNumberIndex
]
80 print( rStr
,matchStr
,qStr
,sep
='\n', file=args
.outfile
)
84 if __name__
== "__main__":
85 main() # time ./alignpe.py -i pe150.fq.gz