modified: Makefile
[GalaxyCodeBases.git] / python / salus / alignpe.py
blobefad3cba2043823da6d86d57d0be9593814e4dec
1 #!/usr/bin/env python3
2 ###!/scratch/src/mambaforge/envs/py311/bin/python3
4 import sys
5 #sys.path.append('/scratch/src/mambaforge/envs/py311/lib/python3.11/site-packages')
6 import io
7 import argparse
8 import pathlib
9 import gzip
10 #import pyfastx
11 import mappy
12 import tqdm
14 import pprint
15 pp = pprint.PrettyPrinter(indent=4)
17 def eprint(*args, **kwargs) -> None:
18 print(*args, **kwargs, file=sys.stderr, flush=True)
20 def init_argparse() -> argparse.ArgumentParser:
21 parser = argparse.ArgumentParser(
22 description='Align Salus PE barcode with 1-100 as forward and 101-150 as reverse.',
23 epilog='Contact: <huxs@salus-bio.com>')
24 parser.add_argument('outfile', type=argparse.FileType('w', encoding='UTF-8'), help='Alignment Results', nargs='?', default='-')
25 parser.add_argument('-i', '--read1', type=pathlib.Path, required=True, dest='infile', help='For Lane01_fastq_R1.fq[.gz]')
26 #parser.add_argument('-n', '--dryrun', '--dry-run', action='store_true', dest='dryrun')
27 parser.add_argument(
28 "-v", "--version", action="version",
29 version=f"{parser.prog} version 1.0.0"
31 return parser
33 def main() -> None:
34 parser = init_argparse()
35 if len(sys.argv) == 1:
36 parser.print_help()
37 exit(0);
38 args = parser.parse_args()
39 pp.pprint(args)
40 matchNumbers = list('1234567890'*15)
41 for name,seq,qual in mappy.fastx_read(args.infile.as_posix()):
42 seqF = seq[:100]
43 seqR = seq[100:]
44 AlnRef = mappy.Aligner(seq=seqF,preset='sr',k=11)
45 for hit in AlnRef.map(seqR, cs=False, MD=False): # traverse alignments
46 if hit.is_primary:
47 if hit.NM or hit.cigar_str != '30M':
48 print( ', '.join(map(str,[hit.ctg, hit.r_st, hit.r_en, hit.strand, hit.q_st, hit.q_en, hit.cigar_str, hit.NM, name])), file=args.outfile )
49 seqFstr = seqF[:30] + seqF[30:].lower()
50 seqRstr = seqR[:30] + seqR[30:].lower()
51 queryST = hit.q_st
52 if hit.strand == -1:
53 revR = mappy.revcomp(seqRstr)
54 queryST = len(seqR) - hit.q_st - hit.q_en
55 queryStr = revR
56 elif hit.strand == 1:
57 queryStr = seqRstr
58 (firstPadding, secondPadding) = (0,0)
59 if hit.r_st > queryST:
60 secondPadding = hit.r_st - queryST
61 elif hit.r_st < queryST:
62 firstPadding = queryST - hit.r_st
63 rStr = ' '*firstPadding + seqFstr
64 qStr = ' '*secondPadding + queryStr
65 pairs = zip( list(rStr),list(qStr) )
66 matchStr = ''
67 matchNumberIndex = 0
68 for item in pairs:
69 if item[0].lower() == item[1].lower():
70 #matchStr += '|'
71 matchStr += matchNumbers[matchNumberIndex]
72 matchNumberIndex += 1
73 else:
74 if item[0] == ' ':
75 matchStr += '_'
76 elif item[1] == ' ':
77 matchStr += '‾'
78 else:
79 matchStr += '●'
80 print( rStr,matchStr,qStr,sep='\n', file=args.outfile)
81 args.outfile.flush()
82 exit(0);
84 if __name__ == "__main__":
85 main() # time ./alignpe.py -i pe150.fq.gz