5 def eprint(*args
, **kwargs
):
6 print(*args
, file=sys
.stderr
, **kwargs
)
10 from skbio
.alignment
import StripedSmithWaterman
12 'adapter1': 'CAAGCAGAAGACGGCATACGAGATTCTTTCCCTACACGACGCTCTTCCGATCT',
13 'adapter2': 'CCCGTTCGCAACATGTCTGGCGTCATATCTTGTGACTACAGCACCCTCGACTCTCGCAGACTTTCACCAGTCCATGAT',
14 'adapter3': 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGTAGATCTCGGTGGTCGCCGTATCATT'
17 for qid
in Adapters
.keys():
19 rcSeq
= str(DNA(Seq
).reverse_complement())
20 Query
= StripedSmithWaterman(Seq
)
21 rcQuery
= StripedSmithWaterman(rcSeq
)
22 Querys
[qid
] = (Query
,rcQuery
)
25 def aliOneSeq(QuerySSWs
,targetSeq
):
28 for qid
in QuerySSWs
.keys():
29 (Query
,rcQuery
) = QuerySSWs
[qid
]
30 Alignment
= Query(targetSeq
)
31 rcAlignment
= rcQuery(targetSeq
)
33 if rcAlignment
== None or Alignment
.optimal_alignment_score
> rcAlignment
.optimal_alignment_score
:
39 Alignments
[qid
] = (resAli
,strand
)
40 Scores
[qid
] = resAli
.optimal_alignment_score
41 maxScores
= [key
for key
, value
in Scores
.items() if value
== max(Scores
.values())]
42 maxAlignments
= {k
: Alignments
[k
] for k
in maxScores
}
45 def printAli(maxAlignments
,kmer
,kCount
):
46 #targetSeq = 'ACATAGTCTGGCGTCATTCTTGTGTACA'
47 #maxAlignments = aliOneSeq(QuerySSWs,targetSeq)
48 for tid
in maxAlignments
.keys():
49 ali
= maxAlignments
[tid
]
51 ':'.join((kmer
,str(kCount
))),
52 tid
, ali
[1], ali
[0].cigar
,
53 str(ali
[0].optimal_alignment_score
),
54 str(ali
[0].suboptimal_alignment_score
)
58 ali
[0].aligned_target_sequence
,
59 ali
[0].aligned_query_sequence
66 if len(sys
.argv
) < 3 :
67 print('Usage:',sys
.argv
[0],'<kmc_dump file> <min count> >alignments.out',file=sys
.stderr
,flush
=True);
69 kmcDumpFile
= sys
.argv
[1]
70 minCount
= int(sys
.argv
[2])
72 QuerySSWs
= buildQuery()
74 with
open(kmcDumpFile
) as kmcfile
:
76 kmer
,kCount
= line
.split() # rstrip('\n') only needed when split("\t"), since whitespace include "\n".
77 #print(':'.join(('[',kmer,kCount,']')))
78 if int(kCount
) < minCount
:
79 eprint(':'.join(('[!]Stop reading before',kmer
,kCount
)))
81 maxAlignments
= aliOneSeq(QuerySSWs
,kmer
)
82 printAli(maxAlignments
,kmer
,kCount
)
84 if __name__
== "__main__":
85 main() # time ./alignkmer.py deumi31-9.out.rnk2 1000000 |head