modified: src1/input.c
[GalaxyCodeBases.git] / c_cpp / lib / klib / ksw.c
blob742fec90b55fcfd2675f3d4348d629e17c46ff42
1 /* The MIT License
3 Copyright (c) 2011 by Attractive Chaos <attractor@live.co.uk>
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
26 #include <stdlib.h>
27 #include <stdint.h>
28 #include <emmintrin.h>
29 #include "ksw.h"
31 #ifdef __GNUC__
32 #define LIKELY(x) __builtin_expect((x),1)
33 #define UNLIKELY(x) __builtin_expect((x),0)
34 #else
35 #define LIKELY(x) (x)
36 #define UNLIKELY(x) (x)
37 #endif
39 const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 };
41 struct _kswq_t {
42 int qlen, slen;
43 uint8_t shift, mdiff, max, size;
44 __m128i *qp, *H0, *H1, *E, *Hmax;
47 /**
48 * Initialize the query data structure
50 * @param size Number of bytes used to store a score; valid valures are 1 or 2
51 * @param qlen Length of the query sequence
52 * @param query Query sequence
53 * @param m Size of the alphabet
54 * @param mat Scoring matrix in a one-dimension array
56 * @return Query data structure
58 kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
60 kswq_t *q;
61 int slen, a, tmp, p;
63 size = size > 1? 2 : 1;
64 p = 8 * (3 - size); // # values per __m128i
65 slen = (qlen + p - 1) / p; // segmented length
66 q = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
67 q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
68 q->H0 = q->qp + slen * m;
69 q->H1 = q->H0 + slen;
70 q->E = q->H1 + slen;
71 q->Hmax = q->E + slen;
72 q->slen = slen; q->qlen = qlen; q->size = size;
73 // compute shift
74 tmp = m * m;
75 for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
76 if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
77 if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
79 q->max = q->mdiff;
80 q->shift = 256 - q->shift; // NB: q->shift is uint8_t
81 q->mdiff += q->shift; // this is the difference between the min and max scores
82 // An example: p=8, qlen=19, slen=3 and segmentation:
83 // {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
84 if (size == 1) {
85 int8_t *t = (int8_t*)q->qp;
86 for (a = 0; a < m; ++a) {
87 int i, k, nlen = slen * p;
88 const int8_t *ma = mat + a * m;
89 for (i = 0; i < slen; ++i)
90 for (k = i; k < nlen; k += slen) // p iterations
91 *t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
93 } else {
94 int16_t *t = (int16_t*)q->qp;
95 for (a = 0; a < m; ++a) {
96 int i, k, nlen = slen * p;
97 const int8_t *ma = mat + a * m;
98 for (i = 0; i < slen; ++i)
99 for (k = i; k < nlen; k += slen) // p iterations
100 *t++ = (k >= qlen? 0 : ma[query[k]]);
103 return q;
106 kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e)
108 int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
109 uint64_t *b;
110 __m128i zero, gapoe, gape, shift, *H0, *H1, *E, *Hmax;
111 kswr_t r;
113 #define __max_16(ret, xx) do { \
114 (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
115 (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
116 (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
117 (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
118 (ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
119 } while (0)
121 // initialization
122 r = g_defr;
123 minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
124 endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
125 m_b = n_b = 0; b = 0;
126 zero = _mm_set1_epi32(0);
127 gapoe = _mm_set1_epi8(_gapo + _gape);
128 gape = _mm_set1_epi8(_gape);
129 shift = _mm_set1_epi8(q->shift);
130 H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
131 slen = q->slen;
132 for (i = 0; i < slen; ++i) {
133 _mm_store_si128(E + i, zero);
134 _mm_store_si128(H0 + i, zero);
135 _mm_store_si128(Hmax + i, zero);
137 // the core loop
138 for (i = 0; i < tlen; ++i) {
139 int j, k, cmp, imax;
140 __m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
141 h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
142 h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
143 for (j = 0; LIKELY(j < slen); ++j) {
144 /* SW cells are computed in the following order:
145 * H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
146 * E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
147 * F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
149 // compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
150 h = _mm_adds_epu8(h, _mm_load_si128(S + j));
151 h = _mm_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j)
152 e = _mm_load_si128(E + j); // e=E'(i,j)
153 h = _mm_max_epu8(h, e);
154 h = _mm_max_epu8(h, f); // h=H'(i,j)
155 max = _mm_max_epu8(max, h); // set max
156 _mm_store_si128(H1 + j, h); // save to H'(i,j)
157 // now compute E'(i+1,j)
158 h = _mm_subs_epu8(h, gapoe); // h=H'(i,j)-gapo
159 e = _mm_subs_epu8(e, gape); // e=E'(i,j)-gape
160 e = _mm_max_epu8(e, h); // e=E'(i+1,j)
161 _mm_store_si128(E + j, e); // save to E'(i+1,j)
162 // now compute F'(i,j+1)
163 f = _mm_subs_epu8(f, gape);
164 f = _mm_max_epu8(f, h);
165 // get H'(i-1,j) and prepare for the next j
166 h = _mm_load_si128(H0 + j); // h=H'(i-1,j)
168 // NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
169 for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
170 f = _mm_slli_si128(f, 1);
171 for (j = 0; LIKELY(j < slen); ++j) {
172 h = _mm_load_si128(H1 + j);
173 h = _mm_max_epu8(h, f); // h=H'(i,j)
174 _mm_store_si128(H1 + j, h);
175 h = _mm_subs_epu8(h, gapoe);
176 f = _mm_subs_epu8(f, gape);
177 cmp = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_subs_epu8(f, h), zero));
178 if (UNLIKELY(cmp == 0xffff)) goto end_loop16;
181 end_loop16:
182 //int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
183 __max_16(imax, max); // imax is the maximum number in max
184 if (imax >= minsc) { // write the b array; this condition adds branching unfornately
185 if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append
186 if (n_b == m_b) {
187 m_b = m_b? m_b<<1 : 8;
188 b = (uint64_t*)realloc(b, 8 * m_b);
190 b[n_b++] = (uint64_t)imax<<32 | i;
191 } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
193 if (imax > gmax) {
194 gmax = imax; te = i; // te is the end position on the target
195 for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
196 _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
197 if (gmax + q->shift >= 255 || gmax >= endsc) break;
199 S = H1; H1 = H0; H0 = S; // swap H0 and H1
201 r.score = gmax + q->shift < 255? gmax : 255;
202 r.te = te;
203 if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
204 int max = -1, low, high, qlen = slen * 16;
205 uint8_t *t = (uint8_t*)Hmax;
206 for (i = 0; i < qlen; ++i, ++t)
207 if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
208 //printf("%d,%d\n", max, gmax);
209 if (b) {
210 i = (r.score + q->max - 1) / q->max;
211 low = te - i; high = te + i;
212 for (i = 0; i < n_b; ++i) {
213 int e = (int32_t)b[i];
214 if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
215 r.score2 = b[i]>>32, r.te2 = e;
219 free(b);
220 return r;
223 kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e)
225 int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
226 uint64_t *b;
227 __m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax;
228 kswr_t r;
230 #define __max_8(ret, xx) do { \
231 (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
232 (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
233 (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
234 (ret) = _mm_extract_epi16((xx), 0); \
235 } while (0)
237 // initialization
238 r = g_defr;
239 minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
240 endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
241 m_b = n_b = 0; b = 0;
242 zero = _mm_set1_epi32(0);
243 gapoe = _mm_set1_epi16(_gapo + _gape);
244 gape = _mm_set1_epi16(_gape);
245 H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
246 slen = q->slen;
247 for (i = 0; i < slen; ++i) {
248 _mm_store_si128(E + i, zero);
249 _mm_store_si128(H0 + i, zero);
250 _mm_store_si128(Hmax + i, zero);
252 // the core loop
253 for (i = 0; i < tlen; ++i) {
254 int j, k, imax;
255 __m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
256 h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
257 h = _mm_slli_si128(h, 2);
258 for (j = 0; LIKELY(j < slen); ++j) {
259 h = _mm_adds_epi16(h, *S++);
260 e = _mm_load_si128(E + j);
261 h = _mm_max_epi16(h, e);
262 h = _mm_max_epi16(h, f);
263 max = _mm_max_epi16(max, h);
264 _mm_store_si128(H1 + j, h);
265 h = _mm_subs_epu16(h, gapoe);
266 e = _mm_subs_epu16(e, gape);
267 e = _mm_max_epi16(e, h);
268 _mm_store_si128(E + j, e);
269 f = _mm_subs_epu16(f, gape);
270 f = _mm_max_epi16(f, h);
271 h = _mm_load_si128(H0 + j);
273 for (k = 0; LIKELY(k < 16); ++k) {
274 f = _mm_slli_si128(f, 2);
275 for (j = 0; LIKELY(j < slen); ++j) {
276 h = _mm_load_si128(H1 + j);
277 h = _mm_max_epi16(h, f);
278 _mm_store_si128(H1 + j, h);
279 h = _mm_subs_epu16(h, gapoe);
280 f = _mm_subs_epu16(f, gape);
281 if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop8;
284 end_loop8:
285 __max_8(imax, max);
286 if (imax >= minsc) {
287 if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) {
288 if (n_b == m_b) {
289 m_b = m_b? m_b<<1 : 8;
290 b = (uint64_t*)realloc(b, 8 * m_b);
292 b[n_b++] = (uint64_t)imax<<32 | i;
293 } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
295 if (imax > gmax) {
296 gmax = imax; te = i;
297 for (j = 0; LIKELY(j < slen); ++j)
298 _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
299 if (gmax >= endsc) break;
301 S = H1; H1 = H0; H0 = S;
303 r.score = gmax; r.te = te;
305 int max = -1, low, high, qlen = slen * 8;
306 uint16_t *t = (uint16_t*)Hmax;
307 for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
308 if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
309 if (b) {
310 i = (r.score + q->max - 1) / q->max;
311 low = te - i; high = te + i;
312 for (i = 0; i < n_b; ++i) {
313 int e = (int32_t)b[i];
314 if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
315 r.score2 = b[i]>>32, r.te2 = e;
319 free(b);
320 return r;
323 static void revseq(int l, uint8_t *s)
325 int i, t;
326 for (i = 0; i < l>>1; ++i)
327 t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t;
330 kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry)
332 int size;
333 kswq_t *q;
334 kswr_t r, rr;
335 kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int);
337 q = (qry && *qry)? *qry : ksw_qinit((xtra&KSW_XBYTE)? 1 : 2, qlen, query, m, mat);
338 if (qry && *qry == 0) *qry = q;
339 func = q->size == 2? ksw_i16 : ksw_u8;
340 size = q->size;
341 r = func(q, tlen, target, gapo, gape, xtra);
342 if (qry == 0) free(q);
343 if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r;
344 revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end
345 q = ksw_qinit(size, r.qe + 1, query, m, mat);
346 rr = func(q, tlen, target, gapo, gape, KSW_XSTOP | r.score);
347 revseq(r.qe + 1, query); revseq(r.te + 1, target);
348 free(q);
349 if (r.score == rr.score)
350 r.tb = r.te - rr.te, r.qb = r.qe - rr.qe;
351 return r;
354 /********************
355 *** SW extension ***
356 ********************/
358 typedef struct {
359 int32_t h, e;
360 } eh_t;
362 int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle)
364 eh_t *eh; // score array
365 int8_t *qp; // query profile
366 int i, j, k, gapoe = gapo + gape, beg, end, max, max_i, max_j, max_gap;
367 if (h0 < 0) h0 = 0;
368 // allocate memory
369 qp = malloc(qlen * m);
370 eh = calloc(qlen + 1, 8);
371 // generate the query profile
372 for (k = i = 0; k < m; ++k) {
373 const int8_t *p = &mat[k * m];
374 for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
376 // fill the first row
377 eh[0].h = h0; eh[1].h = h0 > gapoe? h0 - gapoe : 0;
378 for (j = 2; j <= qlen && eh[j-1].h > gape; ++j)
379 eh[j].h = eh[j-1].h - gape;
380 // adjust $w if it is too large
381 k = m * m;
382 for (i = 0, max = 0; i < k; ++i) // get the max score
383 max = max > mat[i]? max : mat[i];
384 max_gap = (int)((double)(qlen * max - gapo) / gape + 1.);
385 max_gap = max_gap > 1? max_gap : 1;
386 w = w < max_gap? w : max_gap;
387 // DP loop
388 max = h0, max_i = max_j = -1;
389 beg = 0, end = qlen;
390 for (i = 0; LIKELY(i < tlen); ++i) {
391 int f = 0, h1, m = 0, mj = -1;
392 int8_t *q = &qp[target[i] * qlen];
393 // compute the first column
394 h1 = h0 - (gapo + gape * (i + 1));
395 if (h1 < 0) h1 = 0;
396 // apply the band and the constraint (if provided)
397 if (beg < i - w) beg = i - w;
398 if (end > i + w + 1) end = i + w + 1;
399 if (end > qlen) end = qlen;
400 for (j = beg; LIKELY(j < end); ++j) {
401 // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
402 // Similar to SSE2-SW, cells are computed in the following order:
403 // H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
404 // E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
405 // F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
406 eh_t *p = &eh[j];
407 int h = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
408 p->h = h1; // set H(i,j-1) for the next row
409 h += q[j];
410 h = h > e? h : e;
411 h = h > f? h : f;
412 h1 = h; // save H(i,j) to h1 for the next column
413 mj = m > h? mj : j;
414 m = m > h? m : h; // m is stored at eh[mj+1]
415 h -= gapoe;
416 h = h > 0? h : 0;
417 e -= gape;
418 e = e > h? e : h; // computed E(i+1,j)
419 p->e = e; // save E(i+1,j) for the next row
420 f -= gape;
421 f = f > h? f : h; // computed F(i,j+1)
423 eh[end].h = h1; eh[end].e = 0;
424 if (m == 0) break;
425 if (m > max) max = m, max_i = i, max_j = mj;
426 // update beg and end for the next round
427 for (j = mj; j >= beg && eh[j].h; --j);
428 beg = j + 1;
429 for (j = mj + 2; j <= end && eh[j].h; ++j);
430 end = j;
431 //beg = 0; end = qlen; // uncomment this line for debugging
433 free(eh); free(qp);
434 if (_qle) *_qle = max_j + 1;
435 if (_tle) *_tle = max_i + 1;
436 return max;
439 /********************
440 * Global alignment *
441 ********************/
443 #define MINUS_INF -0x40000000
445 static inline uint32_t *push_cigar(int *n_cigar, int *m_cigar, uint32_t *cigar, int op, int len)
447 if (*n_cigar == 0 || op != (cigar[(*n_cigar) - 1]&0xf)) {
448 if (*n_cigar == *m_cigar) {
449 *m_cigar = *m_cigar? (*m_cigar)<<1 : 4;
450 cigar = realloc(cigar, (*m_cigar) << 2);
452 cigar[(*n_cigar)++] = len<<4 | op;
453 } else cigar[(*n_cigar)-1] += len<<4;
454 return cigar;
457 int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int *n_cigar_, uint32_t **cigar_)
459 eh_t *eh;
460 int8_t *qp; // query profile
461 int i, j, k, gapoe = gapo + gape, score, n_col;
462 uint8_t *z; // backtrack matrix; in each cell: f<<4|e<<2|h; in principle, we can halve the memory, but backtrack will be a little more complex
463 if (n_cigar_) *n_cigar_ = 0;
464 // allocate memory
465 n_col = qlen < 2*w+1? qlen : 2*w+1; // maximum #columns of the backtrack matrix
466 z = malloc(n_col * tlen);
467 qp = malloc(qlen * m);
468 eh = calloc(qlen + 1, 8);
469 // generate the query profile
470 for (k = i = 0; k < m; ++k) {
471 const int8_t *p = &mat[k * m];
472 for (j = 0; j < qlen; ++j) qp[i++] = p[query[j]];
474 // fill the first row
475 eh[0].h = 0; eh[0].e = MINUS_INF;
476 for (j = 1; j <= qlen && j <= w; ++j)
477 eh[j].h = -(gapo + gape * j), eh[j].e = MINUS_INF;
478 for (; j <= qlen; ++j) eh[j].h = eh[j].e = MINUS_INF; // everything is -inf outside the band
479 // DP loop
480 for (i = 0; LIKELY(i < tlen); ++i) { // target sequence is in the outer loop
481 int32_t f = MINUS_INF, h1, beg, end;
482 int8_t *q = &qp[target[i] * qlen];
483 uint8_t *zi = &z[i * n_col];
484 beg = i > w? i - w : 0;
485 end = i + w + 1 < qlen? i + w + 1 : qlen; // only loop through [beg,end) of the query sequence
486 h1 = beg == 0? -(gapo + gape * (i + 1)) : MINUS_INF;
487 for (j = beg; LIKELY(j < end); ++j) {
488 // This loop is organized in a similar way to ksw_extend() and ksw_sse2(), except:
489 // 1) not checking h>0; 2) recording direction for backtracking
490 eh_t *p = &eh[j];
491 int32_t h = p->h, e = p->e;
492 uint8_t d; // direction
493 p->h = h1;
494 h += q[j];
495 d = h > e? 0 : 1;
496 h = h > e? h : e;
497 d = h > f? d : 2;
498 h = h > f? h : f;
499 h1 = h;
500 h -= gapoe;
501 e -= gape;
502 d |= e > h? 1<<2 : 0;
503 e = e > h? e : h;
504 p->e = e;
505 f -= gape;
506 d |= f > h? 2<<4 : 0; // if we want to halve the memory, use one bit only, instead of two
507 f = f > h? f : h;
508 zi[j - beg] = d; // z[i,j] keeps h for the current cell and e/f for the next cell
510 eh[end].h = h1; eh[end].e = MINUS_INF;
512 score = eh[qlen].h;
513 if (n_cigar_ && cigar_) { // backtrack
514 int n_cigar = 0, m_cigar = 0, which = 0;
515 uint32_t *cigar = 0, tmp;
516 i = tlen - 1; k = (i + w + 1 < qlen? i + w + 1 : qlen) - 1; // (i,k) points to the last cell
517 while (i >= 0 && k >= 0) {
518 which = z[i * n_col + (k - (i > w? i - w : 0))] >> (which<<1) & 3;
519 if (which == 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 0, 1), --i, --k;
520 else if (which == 1) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, 1), --i;
521 else cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, 1), --k;
523 if (i >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 2, i + 1);
524 if (k >= 0) cigar = push_cigar(&n_cigar, &m_cigar, cigar, 1, k + 1);
525 for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR
526 tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp;
527 *n_cigar_ = n_cigar, *cigar_ = cigar;
529 free(eh); free(qp); free(z);
530 return score;
533 /*******************************************
534 * Main function (not compiled by default) *
535 *******************************************/
537 #ifdef _KSW_MAIN
539 #include <unistd.h>
540 #include <stdio.h>
541 #include <zlib.h>
542 #include "kseq.h"
543 KSEQ_INIT(gzFile, gzread)
545 unsigned char seq_nt4_table[256] = {
546 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
547 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
548 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
549 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
550 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
551 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
552 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
553 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
554 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
555 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
556 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
557 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
558 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
559 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
560 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
561 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
564 int main(int argc, char *argv[])
566 int c, sa = 1, sb = 3, i, j, k, forward_only = 0, max_rseq = 0;
567 int8_t mat[25];
568 int gapo = 5, gape = 2, minsc = 0, xtra = KSW_XSTART;
569 uint8_t *rseq = 0;
570 gzFile fpt, fpq;
571 kseq_t *kst, *ksq;
573 // parse command line
574 while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) {
575 switch (c) {
576 case 'a': sa = atoi(optarg); break;
577 case 'b': sb = atoi(optarg); break;
578 case 'q': gapo = atoi(optarg); break;
579 case 'r': gape = atoi(optarg); break;
580 case 't': minsc = atoi(optarg); break;
581 case 'f': forward_only = 1; break;
582 case '1': xtra |= KSW_XBYTE; break;
585 if (optind + 2 > argc) {
586 fprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] <target.fa> <query.fa>\n", sa, sb, gapo, gape, minsc);
587 return 1;
589 if (minsc > 0xffff) minsc = 0xffff;
590 xtra |= KSW_XSUBO | minsc;
591 // initialize scoring matrix
592 for (i = k = 0; i < 4; ++i) {
593 for (j = 0; j < 4; ++j)
594 mat[k++] = i == j? sa : -sb;
595 mat[k++] = 0; // ambiguous base
597 for (j = 0; j < 5; ++j) mat[k++] = 0;
598 // open file
599 fpt = gzopen(argv[optind], "r"); kst = kseq_init(fpt);
600 fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq);
601 // all-pair alignment
602 while (kseq_read(ksq) > 0) {
603 kswq_t *q[2] = {0, 0};
604 kswr_t r;
605 for (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]];
606 if (!forward_only) { // reverse
607 if ((int)ksq->seq.m > max_rseq) {
608 max_rseq = ksq->seq.m;
609 rseq = (uint8_t*)realloc(rseq, max_rseq);
611 for (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j)
612 rseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i];
614 gzrewind(fpt); kseq_rewind(kst);
615 while (kseq_read(kst) > 0) {
616 for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]];
617 r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]);
618 if (r.score >= minsc)
619 printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2);
620 if (rseq) {
621 r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]);
622 if (r.score >= minsc)
623 printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2);
626 free(q[0]); free(q[1]);
628 free(rseq);
629 kseq_destroy(kst); gzclose(fpt);
630 kseq_destroy(ksq); gzclose(fpq);
631 return 0;
633 #endif