modified: nfig1.py
[GalaxyCodeBases.git] / BGI / BASE / src / 2bwt / DNACount.c
blob407c175af27d333ae3af3b0a1fd5e661990bc0ae
1 /*
3 DNACount.c DNA Count
5 This module contains DNA occurrence counting functions. The DNA must be
6 in word-packed format.
8 Copyright (C) 2004, Wong Chi Kwong.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include "DNACount.h"
29 #include "MiscUtilities.h"
32 void GenerateDNAOccCountTable(unsigned long long *dnaDecodeTable) {
34 unsigned long long i, j, c, t;
36 for (i=0; i<DNA_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
37 dnaDecodeTable[i] = 0;
38 c = i;
39 for (j=0; j<4; j++) {
40 t = c & 0x0000000F;
41 dnaDecodeTable[i] += (unsigned long long)1 << (t * 4); //Each alphabet gets 4 bits
42 c >>= 4;
48 unsigned long long ForwardDNAOccCount(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
50 static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
51 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
52 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
53 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
55 unsigned long long wordToCount, charToCount;
56 unsigned long long i, c;
57 unsigned long long sum = 0;
59 #ifdef DEBUG
60 if (index >= 256) {
61 fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
62 exit(1);
64 #endif
66 wordToCount = index / 16;
67 charToCount = index - wordToCount * 16;
69 for (i=0; i<wordToCount; i++) {
70 sum += dnaDecodeTable[dna[i] >> 16];
71 sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
74 if (charToCount > 0) {
75 c = dna[i] & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
76 sum += dnaDecodeTable[c >> 16];
77 sum += dnaDecodeTable[c & 0xFFFF];
78 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
81 return (sum >> (character * 8)) & 0x000000FF;
85 unsigned long long BackwardDNAOccCount(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
87 static const unsigned int truncateLeftMask[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
88 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
89 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
90 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
92 unsigned long long wordToCount, charToCount;
93 unsigned long long i, c;
94 unsigned long long sum = 0;
96 #ifdef DEBUG
97 if (index >= 256) {
98 fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
99 exit(1);
101 #endif
103 wordToCount = index / 16;
104 charToCount = index - wordToCount * 16;
106 dna -= wordToCount + 1;
108 if (charToCount > 0) {
109 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 16 - c;
110 sum += dnaDecodeTable[c >> 16];
111 sum += dnaDecodeTable[c & 0xFFFF];
112 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
115 for (i=0; i<wordToCount; i++) {
116 dna++;
117 sum += dnaDecodeTable[*dna >> 16];
118 sum += dnaDecodeTable[*dna & 0x0000FFFF];
121 return (sum >> (character * 8)) & 0x000000FF;
125 void ForwardDNAAllOccCount(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
127 static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
128 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
129 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
130 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
132 unsigned long long wordToCount, charToCount;
133 unsigned long long i, c;
134 unsigned long long sum = 0;
136 #ifdef DEBUG
137 if (index >= 256) {
138 fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
139 exit(1);
141 #endif
143 wordToCount = index / 16;
144 charToCount = index - wordToCount * 16;
146 for (i=0; i<wordToCount; i++) {
147 sum += dnaDecodeTable[dna[i] >> 16];
148 sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
151 if (charToCount > 0) {
152 c = dna[i] & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
153 sum += dnaDecodeTable[c >> 16];
154 sum += dnaDecodeTable[c & 0xFFFF];
155 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
158 occCount[0] = sum & 0x000000FF; sum >>= 8;
159 occCount[1] = sum & 0x000000FF; sum >>= 8;
160 occCount[2] = sum & 0x000000FF; sum >>= 8;
161 occCount[3] = sum;
165 void BackwardDNAAllOccCount(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
167 static const unsigned int truncateLeftMask[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
168 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
169 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
170 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
172 unsigned long long wordToCount, charToCount;
173 unsigned long long i, c;
174 unsigned long long sum = 0;
176 #ifdef DEBUG
177 if (index >= 256) {
178 fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
179 exit(1);
181 #endif
183 wordToCount = index / 16;
184 charToCount = index - wordToCount * 16;
186 dna -= wordToCount + 1;
188 if (charToCount > 0) {
189 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 16 - c;
190 sum += dnaDecodeTable[c >> 16];
191 sum += dnaDecodeTable[c & 0xFFFF];
192 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
195 for (i=0; i<wordToCount; i++) {
196 dna++;
197 sum += dnaDecodeTable[*dna >> 16];
198 sum += dnaDecodeTable[*dna & 0x0000FFFF];
201 occCount[0] = sum & 0x000000FF; sum >>= 8;
202 occCount[1] = sum & 0x000000FF; sum >>= 8;
203 occCount[2] = sum & 0x000000FF; sum >>= 8;
204 occCount[3] = sum;
208 unsigned long long Forward1OccCount(const unsigned int* bitVector, const unsigned long long index, const unsigned long long* dnaDecodeTable) {
210 static const unsigned int truncateRightMask[32] = { 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xF8000000, 0xFC000000, 0xFE000000,
211 0xFF000000, 0xFF800000, 0xFFC00000, 0xFFE00000, 0xFFF00000, 0xFFF80000, 0xFFFC0000, 0xFFFE0000,
212 0xFFFF0000, 0xFFFF8000, 0xFFFFC000, 0xFFFFE000, 0xFFFFF000, 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00,
213 0xFFFFFF00, 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0, 0xFFFFFFF0, 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE};
215 unsigned long long wordToCount, bitToCount;
216 unsigned long long i, c;
217 unsigned long long sum = 0;
218 unsigned long long numberOf1;
220 #ifdef DEBUG
221 if (index >= 256) {
222 fprintf(stderr, "Forward1OccCount() : index >= 256!\n");
223 exit(1);
225 #endif
227 wordToCount = index / 32;
228 bitToCount = index - wordToCount * 32;
230 for (i=0; i<wordToCount; i++) {
231 sum += dnaDecodeTable[bitVector[i] >> 16];
232 sum += dnaDecodeTable[bitVector[i] & 0x0000FFFF];
235 if (bitToCount > 0) {
236 c = bitVector[i] & truncateRightMask[bitToCount];
237 sum += dnaDecodeTable[c >> 16];
238 sum += dnaDecodeTable[c & 0x0000FFFF];
241 sum >>= 8;
242 numberOf1 = sum & 0x000000FF;
243 sum >>= 8;
244 numberOf1 = sum & 0x000000FF;
245 sum >>= 8;
246 numberOf1 = sum * 2;
248 return numberOf1;
252 unsigned long long Backward1OccCount(const unsigned int* bitVector, const unsigned long long index, const unsigned long long* dnaDecodeTable) {
254 static const unsigned int truncateLeftMask[32] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
255 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
256 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
257 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF};
259 unsigned long long wordToCount, bitToCount;
260 unsigned long long i, c;
261 unsigned long long sum = 0;
262 unsigned long long numberOf1;
264 #ifdef DEBUG
265 if (index >= 256) {
266 fprintf(stderr, "ForwardDNAOccCount() : index >= 256!\n");
267 exit(1);
269 #endif
271 wordToCount = index / 32;
272 bitToCount = index - wordToCount * 32;
274 bitVector -= wordToCount + 1;
276 if (bitToCount > 0) {
277 c = *bitVector & truncateLeftMask[bitToCount];
278 sum += dnaDecodeTable[c >> 16];
279 sum += dnaDecodeTable[c & 0xFFFF];
282 for (i=0; i<wordToCount; i++) {
283 bitVector++;
284 sum += dnaDecodeTable[*bitVector >> 16];
285 sum += dnaDecodeTable[*bitVector & 0x0000FFFF];
288 sum >>= 8;
289 numberOf1 = sum & 0x000000FF;
290 sum >>= 8;
291 numberOf1 = sum & 0x000000FF;
292 sum >>= 8;
293 numberOf1 = sum * 2;
295 return numberOf1;
299 unsigned long long ForwardDNAOccCountNoLimit(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
301 static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
302 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
303 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
304 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
306 unsigned long long iteration, wordToCount, charToCount;
307 unsigned long long i, j, c;
308 unsigned long long sum;
309 unsigned long long occCount = 0;
311 iteration = index / 256;
312 wordToCount = (index - iteration * 256) / 16;
313 charToCount = index - iteration * 256 - wordToCount * 16;
315 for (i=0; i<iteration; i++) {
317 sum = 0;
318 for (j=0; j<16; j++) {
319 sum += dnaDecodeTable[*dna >> 16];
320 sum += dnaDecodeTable[*dna & 0x0000FFFF];
321 dna++;
323 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
324 occCount += (sum >> (character * 8)) & 0x000000FF;
325 } else {
326 // only some or all of the 3 bits are on
327 // in reality, only one of the four cases are possible
328 if (sum == 0x00000100) {
329 if (character == 0) {
330 occCount += 256;
332 } else if (sum == 0x00010000) {
333 if (character == 1) {
334 occCount += 256;
336 } else if (sum == 0x01000000) {
337 if (character == 2) {
338 occCount += 256;
340 } else if (sum == 0x00000000) {
341 if (character == 3) {
342 occCount += 256;
344 } else {
345 fprintf(stderr, "ForwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
346 exit(1);
352 sum = 0;
353 for (j=0; j<wordToCount; j++) {
354 sum += dnaDecodeTable[*dna >> 16];
355 sum += dnaDecodeTable[*dna & 0x0000FFFF];
356 dna++;
359 if (charToCount > 0) {
360 c = *dna & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
361 sum += dnaDecodeTable[c >> 16];
362 sum += dnaDecodeTable[c & 0xFFFF];
363 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
366 occCount += (sum >> (character * 8)) & 0x000000FF;
368 return occCount;
372 unsigned long long BackwardDNAOccCountNoLimit(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
374 static const unsigned int truncateLeftMask[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
375 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
376 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
377 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
379 unsigned long long iteration, wordToCount, charToCount;
380 unsigned long long i, j, c;
381 unsigned long long sum = 0;
382 unsigned long long occCount;
384 dna -= index / 16 + 1;
386 iteration = index / 256;
387 wordToCount = (index - iteration * 256) / 16;
388 charToCount = index - iteration * 256 - wordToCount * 16;
390 if (charToCount > 0) {
391 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 16 - c;
392 sum += dnaDecodeTable[c >> 16];
393 sum += dnaDecodeTable[c & 0xFFFF];
394 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
397 for (j=0; j<wordToCount; j++) {
398 dna++;
399 sum += dnaDecodeTable[*dna >> 16];
400 sum += dnaDecodeTable[*dna & 0x0000FFFF];
403 occCount = (sum >> (character * 8)) & 0x000000FF;
406 for (i=0; i<iteration; i++) {
408 sum = 0;
409 for (j=0; j<16; j++) {
410 dna++;
411 sum += dnaDecodeTable[*dna >> 16];
412 sum += dnaDecodeTable[*dna & 0x0000FFFF];
414 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
415 occCount += (sum >> (character * 8)) & 0x000000FF;
416 } else {
417 // only some or all of the 3 bits are on
418 // in reality, only one of the four cases are possible
419 if (sum == 0x00000100) {
420 if (character == 0) {
421 occCount += 256;
423 } else if (sum == 0x00010000) {
424 if (character == 1) {
425 occCount += 256;
427 } else if (sum == 0x01000000) {
428 if (character == 2) {
429 occCount += 256;
431 } else if (sum == 0x00000000) {
432 if (character == 3) {
433 occCount += 256;
435 } else {
436 fprintf(stderr, "BackwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
437 exit(1);
443 return occCount;
447 void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
449 static const unsigned int truncateRightMask[8] = { 0x00000000, 0xF0000000,
450 0xFF000000, 0xFFF00000,
451 0xFFFF0000, 0xFFFFF000,
452 0xFFFFFF00, 0xFFFFFFF0};
454 unsigned long long iteration, wordToCount, charToCount;
455 unsigned long long i, j, k, c;
456 unsigned long long sum,check;
458 for (j=0;j<DNA_ALPHABET_SIZE;j++) {
459 occCount[j] = 0;
462 iteration = index / 16;
463 wordToCount = (index - iteration * 16) / 8;
464 charToCount = index - iteration * 16 - wordToCount * 8;
466 for (i=0; i<iteration; i++) {
467 sum = dnaDecodeTable[*dna >> 16];
468 sum += dnaDecodeTable[*dna & 0x0000FFFF];
469 dna++;
470 sum += dnaDecodeTable[*dna >> 16];
471 sum += dnaDecodeTable[*dna & 0x0000FFFF];
472 dna++;
474 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
475 for (j=0;j<DNA_ALPHABET_SIZE;j++) {
476 occCount[j] += sum & 0x0000000F;
477 sum >>= 4;
480 } else {
481 // only some or all of the 3 bits are on
482 // in reality, only one of the four cases are possible
483 check=0x0000000000000010;
484 for (j=0;j<DNA_ALPHABET_SIZE;j++) {
485 if (sum == check) {
486 occCount[j] += 16;
488 check<<=4;
494 sum = 0;
495 for (j=0; j<wordToCount; j++) {
496 sum += dnaDecodeTable[*dna >> 16];
497 sum += dnaDecodeTable[*dna & 0x0000FFFF];
498 dna++;
501 if (charToCount > 0) {
502 c = *dna & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
503 sum += dnaDecodeTable[c >> 16];
504 sum += dnaDecodeTable[c & 0xFFFF];
505 occCount[0] += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
508 for (j=0;j<DNA_ALPHABET_SIZE;j++) {
509 occCount[j] += sum & 0x0000000F; sum >>= 4;
513 void BackwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
515 static const unsigned int truncateLeftMask[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
516 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
517 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
518 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
520 unsigned long long iteration, wordToCount, charToCount;
521 unsigned long long i, j, c;
522 unsigned long long sum;
524 dna -= index / 16 + 1;
526 iteration = index / 256;
527 wordToCount = (index - iteration * 256) / 16;
528 charToCount = index - iteration * 256 - wordToCount * 16;
530 sum = 0;
531 if (charToCount > 0) {
532 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 16 - c;
533 sum += dnaDecodeTable[c >> 16];
534 sum += dnaDecodeTable[c & 0xFFFF];
535 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
538 for (j=0; j<wordToCount; j++) {
539 dna++;
540 sum += dnaDecodeTable[*dna >> 16];
541 sum += dnaDecodeTable[*dna & 0x0000FFFF];
544 occCount[0] = sum & 0x000000FF; sum >>= 8;
545 occCount[1] = sum & 0x000000FF; sum >>= 8;
546 occCount[2] = sum & 0x000000FF; sum >>= 8;
547 occCount[3] = sum;
549 for (i=0; i<iteration; i++) {
551 sum = 0;
552 for (j=0; j<16; j++) {
553 dna++;
554 sum += dnaDecodeTable[*dna >> 16];
555 sum += dnaDecodeTable[*dna & 0x0000FFFF];
557 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
558 occCount[0] += sum & 0x000000FF; sum >>= 8;
559 occCount[1] += sum & 0x000000FF; sum >>= 8;
560 occCount[2] += sum & 0x000000FF; sum >>= 8;
561 occCount[3] += sum;
562 } else {
563 // only some or all of the 3 bits are on
564 // in reality, only one of the four cases are possible
565 if (sum == 0x00000100) {
566 occCount[0] += 256;
567 } else if (sum == 0x00010000) {
568 occCount[1] += 256;
569 } else if (sum == 0x01000000) {
570 occCount[2] += 256;
571 } else if (sum == 0x00000000) {
572 occCount[3] += 256;
573 } else {
574 fprintf(stderr, "BackwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
575 exit(1);
584 void GenerateDNA_NOccCountTable(unsigned int *dnaDecodeTable) {
586 unsigned long long i, j, c, t;
588 for (i=0; i<DNA_N_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
589 dnaDecodeTable[i] = 0;
590 c = i;
591 for (j=0; j<5; j++) {
592 t = c & 0x00000007;
593 if (t != 4) {
594 // Count of 'n' is to be derived from acgt
595 dnaDecodeTable[i] += 1 << (t * 8);
597 c >>= 3;
603 unsigned long long ForwardDNA_NOccCount(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
605 static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
606 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
607 0xFFFFFF00, 0xFFFFFFE0};
609 unsigned long long wordToCount, charToCount;
610 unsigned long long i, c;
611 unsigned long long sum = 0;
612 unsigned long long occCount;
614 #ifdef DEBUG
615 if (index > 250) {
616 fprintf(stderr, "ForwardDNA_NOccCount() : index > 250!\n");
617 exit(1);
619 #endif
621 wordToCount = index / 10;
622 charToCount = index - wordToCount * 10;
624 for (i=0; i<wordToCount; i++) {
625 sum += dnaDecodeTable[dna[i] >> 17];
626 sum += dnaDecodeTable[(dna[i] >> 2) & 0x00007FFF];
629 if (charToCount > 0) {
630 c = dna[i] & truncateRightMask[charToCount]; // increase count of 'a' by 10 - charToCount;
631 sum += dnaDecodeTable[c >> 17];
632 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
633 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
636 if (character != 4) {
637 occCount = (sum >> (character * 8)) & 0x000000FF;
638 } else {
639 occCount = index;
640 occCount -= sum & 0x000000FF; sum >>= 8;
641 occCount -= sum & 0x000000FF; sum >>= 8;
642 occCount -= sum & 0x000000FF; sum >>= 8;
643 occCount -= sum;
646 return occCount;
650 unsigned long long BackwardDNA_NOccCount(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
652 static const unsigned int truncateLeftMask[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
653 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
654 0x03FFFFFC, 0x1FFFFFFC};
656 unsigned long long wordToCount, charToCount;
657 unsigned long long j, c;
658 unsigned long long sum = 0;
659 unsigned long long occCount;
661 #ifdef DEBUG
662 if (index > 250) {
663 fprintf(stderr, "BackwardDNA_NOccCount() : index >= 250!\n");
664 exit(1);
666 #endif
668 wordToCount = index / 10;
669 charToCount = index - wordToCount * 10;
671 dna -= wordToCount + 1;
673 if (charToCount > 0) {
674 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 10 - charToCount;
675 sum += dnaDecodeTable[c >> 17];
676 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
677 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
680 for (j=0; j<wordToCount; j++) {
681 dna++;
682 sum += dnaDecodeTable[*dna >> 17];
683 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
686 if (character != 4) {
687 occCount = (sum >> (character * 8)) & 0x000000FF;
688 } else {
689 occCount = index;
690 occCount -= sum & 0x000000FF; sum >>= 8;
691 occCount -= sum & 0x000000FF; sum >>= 8;
692 occCount -= sum & 0x000000FF; sum >>= 8;
693 occCount -= sum;
696 #ifdef DEBUG
697 if (occCount > index + 1) {
698 fprintf(stderr, "BackwardDNA_NOccCount() : occCount > index + 1!\n");
699 exit(1);
701 #endif
703 return occCount;
707 void ForwardDNA_NAllOccCount(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
709 static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
710 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
711 0xFFFFFF00, 0xFFFFFFE0};
713 unsigned long long wordToCount, charToCount;
714 unsigned long long i, c;
715 unsigned long long sum = 0;
717 #ifdef DEBUG
718 if (index > 250) {
719 fprintf(stderr, "ForwardDNA_NAllOccCount() : index >= 250!\n");
720 exit(1);
722 #endif
724 wordToCount = index / 10;
725 charToCount = index - wordToCount * 10;
727 for (i=0; i<wordToCount; i++) {
728 sum += dnaDecodeTable[dna[i] >> 17];
729 sum += dnaDecodeTable[(dna[i] >> 2) & 0x00007FFF];
732 if (charToCount > 0) {
733 c = dna[i] & truncateRightMask[charToCount]; // increase count of 'a' by 10 - charToCount;
734 sum += dnaDecodeTable[c >> 17];
735 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
736 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
739 occCount[0] = sum & 0x000000FF; sum >>= 8;
740 occCount[1] = sum & 0x000000FF; sum >>= 8;
741 occCount[2] = sum & 0x000000FF; sum >>= 8;
742 occCount[3] = sum;
746 void BackwardDNA_NAllOccCount(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
748 static const unsigned int truncateLeftMask[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
749 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
750 0x03FFFFFC, 0x1FFFFFFC};
752 unsigned long long wordToCount, charToCount;
753 unsigned long long j, c;
754 unsigned long long sum = 0;
756 #ifdef DEBUG
757 if (index > 250) {
758 fprintf(stderr, "BackwardDNA_NAllOccCount() : index >= 250!\n");
759 exit(1);
761 #endif
763 wordToCount = index / 10;
764 charToCount = index - wordToCount * 10;
766 dna -= wordToCount + 1;
768 if (charToCount > 0) {
769 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 10 - charToCount;
770 sum += dnaDecodeTable[c >> 17];
771 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
772 sum += charToCount - 10; // decrease count of 'a' by 16 - charToCount
775 for (j=0; j<wordToCount; j++) {
776 dna++;
777 sum += dnaDecodeTable[*dna >> 17];
778 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
781 occCount[0] = sum & 0x000000FF; sum >>= 8;
782 occCount[1] = sum & 0x000000FF; sum >>= 8;
783 occCount[2] = sum & 0x000000FF; sum >>= 8;
784 occCount[3] = sum;
788 unsigned long long ForwardDNA_NOccCountNoLimit(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
790 static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
791 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
792 0xFFFFFF00, 0xFFFFFFE0};
794 unsigned long long iteration, wordToCount, charToCount;
795 unsigned long long i, j, c;
796 unsigned long long sum;
797 unsigned long long occCount = 0;
799 iteration = index / 250;
800 wordToCount = (index - iteration * 250) / 10;
801 charToCount = index - iteration * 250 - wordToCount * 10;
803 for (i=0; i<iteration; i++) {
805 sum = 0;
806 for (j=0; j<25; j++) {
807 sum += dnaDecodeTable[*dna >> 17];
808 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
809 dna++;
811 if (character != 4) {
812 occCount += (sum >> (character * 8)) & 0x000000FF;
813 } else {
814 occCount -= sum & 0x000000FF; sum >>= 8;
815 occCount -= sum & 0x000000FF; sum >>= 8;
816 occCount -= sum & 0x000000FF; sum >>= 8;
817 occCount -= sum;
821 sum = 0;
822 for (j=0; j<wordToCount; j++) {
823 sum += dnaDecodeTable[*dna >> 17];
824 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
825 dna++;
828 if (charToCount > 0) {
829 c = *dna & truncateRightMask[charToCount]; // increase count of 'a' by 10 - charToCount;
830 sum += dnaDecodeTable[c >> 17];
831 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
832 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
835 if (character != 4) {
836 occCount += (sum >> (character * 8)) & 0x000000FF;
837 } else {
838 occCount += index;
839 occCount -= sum & 0x000000FF; sum >>= 8;
840 occCount -= sum & 0x000000FF; sum >>= 8;
841 occCount -= sum & 0x000000FF; sum >>= 8;
842 occCount -= sum;
845 return occCount;
849 unsigned long long BackwardDNA_NOccCountNoLimit(const unsigned int* dna, const unsigned long long index, const unsigned int character, const unsigned long long* dnaDecodeTable) {
851 static const unsigned int truncateLeftMask[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
852 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
853 0x03FFFFFC, 0x1FFFFFFC};
855 unsigned long long iteration, wordToCount, charToCount;
856 unsigned long long i, j, c;
857 unsigned long long sum = 0;
858 unsigned long long occCount = 0;
860 dna -= index / 10 + 1;
862 iteration = index / 250;
863 wordToCount = (index - iteration * 250) / 10;
864 charToCount = index - iteration * 250 - wordToCount * 10;
866 if (charToCount > 0) {
867 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 10 - charToCount;
868 sum += dnaDecodeTable[c >> 17];
869 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
870 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
873 for (j=0; j<wordToCount; j++) {
874 dna++;
875 sum += dnaDecodeTable[*dna >> 17];
876 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
879 if (character != 4) {
880 occCount = (sum >> (character * 8)) & 0x000000FF;
881 } else {
882 occCount = index;
883 occCount -= sum & 0x000000FF; sum >>= 8;
884 occCount -= sum & 0x000000FF; sum >>= 8;
885 occCount -= sum & 0x000000FF; sum >>= 8;
886 occCount -= sum;
889 for (i=0; i<iteration; i++) {
891 sum = 0;
892 for (j=0; j<25; j++) {
893 dna++;
894 sum += dnaDecodeTable[*dna >> 17];
895 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
897 if (character != 4) {
898 occCount += (sum >> (character * 8)) & 0x000000FF;
899 } else {
900 occCount -= sum & 0x000000FF; sum >>= 8;
901 occCount -= sum & 0x000000FF; sum >>= 8;
902 occCount -= sum & 0x000000FF; sum >>= 8;
903 occCount -= sum;
907 return occCount;
911 void ForwardDNA_NAllOccCountNoLimit(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
913 static const unsigned int truncateRightMask[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
914 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
915 0xFFFFFF00, 0xFFFFFFE0};
917 unsigned long long iteration, wordToCount, charToCount;
918 unsigned long long i, j, c;
919 unsigned long long sum;
921 occCount[0] = 0;
922 occCount[1] = 0;
923 occCount[2] = 0;
924 occCount[3] = 0;
926 iteration = index / 250;
927 wordToCount = (index - iteration * 250) / 10;
928 charToCount = index - iteration * 250 - wordToCount * 10;
930 for (i=0; i<iteration; i++) {
932 sum = 0;
933 for (j=0; j<25; j++) {
934 sum += dnaDecodeTable[*dna >> 17];
935 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
936 dna++;
938 occCount[0] += sum & 0x000000FF; sum >>= 8;
939 occCount[1] += sum & 0x000000FF; sum >>= 8;
940 occCount[2] += sum & 0x000000FF; sum >>= 8;
941 occCount[3] += sum;
944 sum = 0;
945 for (j=0; j<wordToCount; j++) {
946 sum += dnaDecodeTable[*dna >> 17];
947 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
948 dna++;
951 if (charToCount > 0) {
952 c = *dna & truncateRightMask[charToCount]; // increase count of 'a' by 10 - charToCount;
953 sum += dnaDecodeTable[c >> 17];
954 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
955 sum += charToCount - 10; // decrease count of 'a' by 10 - charToCount
958 occCount[0] += sum & 0x000000FF; sum >>= 8;
959 occCount[1] += sum & 0x000000FF; sum >>= 8;
960 occCount[2] += sum & 0x000000FF; sum >>= 8;
961 occCount[3] += sum;
965 void BackwardDNA_NAllOccCountNoLimit(const unsigned int* dna, const unsigned long long index, unsigned long long* __restrict occCount, const unsigned long long* dnaDecodeTable) {
967 static const unsigned int truncateLeftMask[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
968 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
969 0x03FFFFFC, 0x1FFFFFFC};
971 unsigned long long iteration, wordToCount, charToCount;
972 unsigned long long i, j, c;
973 unsigned long long sum = 0;
975 dna -= index / 10 + 1;
977 iteration = index / 250;
978 wordToCount = (index - iteration * 250) / 10;
979 charToCount = index - iteration * 250 - wordToCount * 10;
981 if (charToCount > 0) {
982 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 10 - charToCount;
983 sum += dnaDecodeTable[c >> 17];
984 sum += dnaDecodeTable[(c >> 2) & 0x00007FFF];
985 sum += charToCount - 10; // decrease count of 'a' by 16 - charToCount
988 for (j=0; j<wordToCount; j++) {
989 dna++;
990 sum += dnaDecodeTable[*dna >> 17];
991 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
994 occCount[0] = sum & 0x000000FF; sum >>= 8;
995 occCount[1] = sum & 0x000000FF; sum >>= 8;
996 occCount[2] = sum & 0x000000FF; sum >>= 8;
997 occCount[3] = sum;
999 for (i=0; i<iteration; i++) {
1001 sum = 0;
1002 for (j=0; j<25; j++) {
1003 dna++;
1004 sum += dnaDecodeTable[*dna >> 17];
1005 sum += dnaDecodeTable[(*dna >> 2) & 0x00007FFF];
1007 occCount[0] += sum & 0x000000FF; sum >>= 8;
1008 occCount[1] += sum & 0x000000FF; sum >>= 8;
1009 occCount[2] += sum & 0x000000FF; sum >>= 8;
1010 occCount[3] += sum;
1015 unsigned long long ForwardOccCount(const unsigned int* packed, const unsigned long long index, const unsigned int character, const unsigned int alphabetSize) {
1017 unsigned long long wordToCount, charToCount;
1018 unsigned long long bitPerChar, charPerWord;
1019 unsigned long long i, j, c;
1020 unsigned long long occCount = 0;
1022 bitPerChar = ceilLog2(alphabetSize);
1023 charPerWord = BITS_IN_WORD / bitPerChar;
1025 wordToCount = index / charPerWord;
1026 charToCount = index - wordToCount * charPerWord;
1028 for (i=0; i<wordToCount; i++) {
1029 c = packed[i];
1030 for (j=0; j<charPerWord; j++) {
1031 if (c >> (BITS_IN_WORD - bitPerChar) == character) {
1032 occCount++;
1034 c <<= bitPerChar;
1037 if (charToCount > 0) {
1038 c = packed[i];
1039 for (j=0; j<charToCount; j++) {
1040 if (c >> (BITS_IN_WORD - bitPerChar) == character) {
1041 occCount++;
1043 c <<= bitPerChar;
1047 return occCount;
1052 unsigned long long BackwardOccCount(const unsigned int* packed, const unsigned long long index, const unsigned int character, const unsigned int alphabetSize) {
1054 unsigned long long wordToCount, charToCount;
1055 unsigned long long bitPerChar, charPerWord;
1056 unsigned long long i, j, c;
1057 unsigned long long occCount = 0;
1059 bitPerChar = ceilLog2(alphabetSize);
1060 charPerWord = BITS_IN_WORD / bitPerChar;
1062 wordToCount = index / charPerWord;
1063 charToCount = index - wordToCount * charPerWord;
1065 packed -= wordToCount + 1;
1067 if (charToCount > 0) {
1068 c = *packed << (bitPerChar * (charPerWord - charToCount));
1069 for (j=0; j<charToCount; j++) {
1070 if (c >> (BITS_IN_WORD - bitPerChar) == character) {
1071 occCount++;
1073 c <<= bitPerChar;
1077 for (i=1; i<=wordToCount; i++) {
1078 packed++;
1079 c = *packed;
1080 for (j=0; j<charPerWord; j++) {
1081 if (c >> (BITS_IN_WORD - bitPerChar) == character) {
1082 occCount++;
1084 c <<= bitPerChar;
1088 return occCount;
1091 void ForwardAllOccCount(const unsigned int* packed, const unsigned long long index, const unsigned int alphabetSize, unsigned int* occCount) {
1093 unsigned long long wordToCount, charToCount;
1094 unsigned long long bitPerChar, charPerWord;
1095 unsigned long long i, j, c;
1097 bitPerChar = ceilLog2(alphabetSize);
1098 charPerWord = BITS_IN_WORD / bitPerChar;
1100 wordToCount = index / charPerWord;
1101 charToCount = index - wordToCount * charPerWord;
1103 for (i=0; i<wordToCount; i++) {
1104 c = packed[i];
1105 for (j=0; j<charPerWord; j++) {
1106 occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
1107 c <<= bitPerChar;
1110 if (charToCount > 0) {
1111 c = packed[i];
1112 for (j=0; j<charToCount; j++) {
1113 occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
1114 c <<= bitPerChar;
1120 void BackwardAllOccCount(const unsigned int* packed, const unsigned long long index, const unsigned int alphabetSize, unsigned int* occCount) {
1122 unsigned long long wordToCount, charToCount;
1123 unsigned long long bitPerChar, charPerWord;
1124 unsigned long long i, j, c;
1126 bitPerChar = ceilLog2(alphabetSize);
1127 charPerWord = BITS_IN_WORD / bitPerChar;
1129 wordToCount = index / charPerWord;
1130 charToCount = index - wordToCount * charPerWord;
1132 packed -= wordToCount + 1;
1134 if (charToCount > 0) {
1135 c = *packed << (bitPerChar * (charPerWord - charToCount));
1136 for (j=0; j<charToCount; j++) {
1137 occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
1138 c <<= bitPerChar;
1142 for (i=1; i<=wordToCount; i++) {
1143 packed++;
1144 c = *packed;
1145 for (j=0; j<charPerWord; j++) {
1146 occCount[c >> (BITS_IN_WORD - bitPerChar)]++;
1147 c <<= bitPerChar;