5 This module contains DNA occurrence counting functions. The DNA must be
8 Copyright (C) 2004, Wong Chi Kwong.
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
29 #include "MiscUtilities.h"
32 void GenerateDNAOccCountTable(unsigned long long *dnaDecodeTable
) {
34 unsigned long long i
, j
, c
, t
;
36 for (i
=0; i
<DNA_OCC_CNT_TABLE_SIZE_IN_WORD
; i
++) {
37 dnaDecodeTable
[i
] = 0;
41 dnaDecodeTable
[i
] += (unsigned long long)1 << (t
* 4); //Each alphabet gets 4 bits
48 unsigned long long ForwardDNAOccCount(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
50 static const unsigned int truncateRightMask
[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
51 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
52 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
53 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
55 unsigned long long wordToCount
, charToCount
;
56 unsigned long long i
, c
;
57 unsigned long long sum
= 0;
61 fprintf(stderr
, "ForwardDNAOccCount() : index >= 256!\n");
66 wordToCount
= index
/ 16;
67 charToCount
= index
- wordToCount
* 16;
69 for (i
=0; i
<wordToCount
; i
++) {
70 sum
+= dnaDecodeTable
[dna
[i
] >> 16];
71 sum
+= dnaDecodeTable
[dna
[i
] & 0x0000FFFF];
74 if (charToCount
> 0) {
75 c
= dna
[i
] & truncateRightMask
[charToCount
]; // increase count of 'a' by 16 - c;
76 sum
+= dnaDecodeTable
[c
>> 16];
77 sum
+= dnaDecodeTable
[c
& 0xFFFF];
78 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
81 return (sum
>> (character
* 8)) & 0x000000FF;
85 unsigned long long BackwardDNAOccCount(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
87 static const unsigned int truncateLeftMask
[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
88 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
89 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
90 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
92 unsigned long long wordToCount
, charToCount
;
93 unsigned long long i
, c
;
94 unsigned long long sum
= 0;
98 fprintf(stderr
, "ForwardDNAOccCount() : index >= 256!\n");
103 wordToCount
= index
/ 16;
104 charToCount
= index
- wordToCount
* 16;
106 dna
-= wordToCount
+ 1;
108 if (charToCount
> 0) {
109 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 16 - c;
110 sum
+= dnaDecodeTable
[c
>> 16];
111 sum
+= dnaDecodeTable
[c
& 0xFFFF];
112 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
115 for (i
=0; i
<wordToCount
; i
++) {
117 sum
+= dnaDecodeTable
[*dna
>> 16];
118 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
121 return (sum
>> (character
* 8)) & 0x000000FF;
125 void ForwardDNAAllOccCount(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
127 static const unsigned int truncateRightMask
[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
128 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
129 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
130 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
132 unsigned long long wordToCount
, charToCount
;
133 unsigned long long i
, c
;
134 unsigned long long sum
= 0;
138 fprintf(stderr
, "ForwardDNAOccCount() : index >= 256!\n");
143 wordToCount
= index
/ 16;
144 charToCount
= index
- wordToCount
* 16;
146 for (i
=0; i
<wordToCount
; i
++) {
147 sum
+= dnaDecodeTable
[dna
[i
] >> 16];
148 sum
+= dnaDecodeTable
[dna
[i
] & 0x0000FFFF];
151 if (charToCount
> 0) {
152 c
= dna
[i
] & truncateRightMask
[charToCount
]; // increase count of 'a' by 16 - c;
153 sum
+= dnaDecodeTable
[c
>> 16];
154 sum
+= dnaDecodeTable
[c
& 0xFFFF];
155 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
158 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
159 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
160 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
165 void BackwardDNAAllOccCount(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
167 static const unsigned int truncateLeftMask
[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
168 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
169 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
170 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
172 unsigned long long wordToCount
, charToCount
;
173 unsigned long long i
, c
;
174 unsigned long long sum
= 0;
178 fprintf(stderr
, "ForwardDNAOccCount() : index >= 256!\n");
183 wordToCount
= index
/ 16;
184 charToCount
= index
- wordToCount
* 16;
186 dna
-= wordToCount
+ 1;
188 if (charToCount
> 0) {
189 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 16 - c;
190 sum
+= dnaDecodeTable
[c
>> 16];
191 sum
+= dnaDecodeTable
[c
& 0xFFFF];
192 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
195 for (i
=0; i
<wordToCount
; i
++) {
197 sum
+= dnaDecodeTable
[*dna
>> 16];
198 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
201 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
202 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
203 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
208 unsigned long long Forward1OccCount(const unsigned int* bitVector
, const unsigned long long index
, const unsigned long long* dnaDecodeTable
) {
210 static const unsigned int truncateRightMask
[32] = { 0x00000000, 0x80000000, 0xC0000000, 0xE0000000, 0xF0000000, 0xF8000000, 0xFC000000, 0xFE000000,
211 0xFF000000, 0xFF800000, 0xFFC00000, 0xFFE00000, 0xFFF00000, 0xFFF80000, 0xFFFC0000, 0xFFFE0000,
212 0xFFFF0000, 0xFFFF8000, 0xFFFFC000, 0xFFFFE000, 0xFFFFF000, 0xFFFFF800, 0xFFFFFC00, 0xFFFFFE00,
213 0xFFFFFF00, 0xFFFFFF80, 0xFFFFFFC0, 0xFFFFFFE0, 0xFFFFFFF0, 0xFFFFFFF8, 0xFFFFFFFC, 0xFFFFFFFE};
215 unsigned long long wordToCount
, bitToCount
;
216 unsigned long long i
, c
;
217 unsigned long long sum
= 0;
218 unsigned long long numberOf1
;
222 fprintf(stderr
, "Forward1OccCount() : index >= 256!\n");
227 wordToCount
= index
/ 32;
228 bitToCount
= index
- wordToCount
* 32;
230 for (i
=0; i
<wordToCount
; i
++) {
231 sum
+= dnaDecodeTable
[bitVector
[i
] >> 16];
232 sum
+= dnaDecodeTable
[bitVector
[i
] & 0x0000FFFF];
235 if (bitToCount
> 0) {
236 c
= bitVector
[i
] & truncateRightMask
[bitToCount
];
237 sum
+= dnaDecodeTable
[c
>> 16];
238 sum
+= dnaDecodeTable
[c
& 0x0000FFFF];
242 numberOf1
= sum
& 0x000000FF;
244 numberOf1
= sum
& 0x000000FF;
252 unsigned long long Backward1OccCount(const unsigned int* bitVector
, const unsigned long long index
, const unsigned long long* dnaDecodeTable
) {
254 static const unsigned int truncateLeftMask
[32] = { 0x00000000, 0x00000001, 0x00000003, 0x00000007, 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
255 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
256 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
257 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF};
259 unsigned long long wordToCount
, bitToCount
;
260 unsigned long long i
, c
;
261 unsigned long long sum
= 0;
262 unsigned long long numberOf1
;
266 fprintf(stderr
, "ForwardDNAOccCount() : index >= 256!\n");
271 wordToCount
= index
/ 32;
272 bitToCount
= index
- wordToCount
* 32;
274 bitVector
-= wordToCount
+ 1;
276 if (bitToCount
> 0) {
277 c
= *bitVector
& truncateLeftMask
[bitToCount
];
278 sum
+= dnaDecodeTable
[c
>> 16];
279 sum
+= dnaDecodeTable
[c
& 0xFFFF];
282 for (i
=0; i
<wordToCount
; i
++) {
284 sum
+= dnaDecodeTable
[*bitVector
>> 16];
285 sum
+= dnaDecodeTable
[*bitVector
& 0x0000FFFF];
289 numberOf1
= sum
& 0x000000FF;
291 numberOf1
= sum
& 0x000000FF;
299 unsigned long long ForwardDNAOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
301 static const unsigned int truncateRightMask
[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
302 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
303 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
304 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
306 unsigned long long iteration
, wordToCount
, charToCount
;
307 unsigned long long i
, j
, c
;
308 unsigned long long sum
;
309 unsigned long long occCount
= 0;
311 iteration
= index
/ 256;
312 wordToCount
= (index
- iteration
* 256) / 16;
313 charToCount
= index
- iteration
* 256 - wordToCount
* 16;
315 for (i
=0; i
<iteration
; i
++) {
318 for (j
=0; j
<16; j
++) {
319 sum
+= dnaDecodeTable
[*dna
>> 16];
320 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
323 if (!DNA_OCC_SUM_EXCEPTION(sum
)) {
324 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
326 // only some or all of the 3 bits are on
327 // in reality, only one of the four cases are possible
328 if (sum
== 0x00000100) {
329 if (character
== 0) {
332 } else if (sum
== 0x00010000) {
333 if (character
== 1) {
336 } else if (sum
== 0x01000000) {
337 if (character
== 2) {
340 } else if (sum
== 0x00000000) {
341 if (character
== 3) {
345 fprintf(stderr
, "ForwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
353 for (j
=0; j
<wordToCount
; j
++) {
354 sum
+= dnaDecodeTable
[*dna
>> 16];
355 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
359 if (charToCount
> 0) {
360 c
= *dna
& truncateRightMask
[charToCount
]; // increase count of 'a' by 16 - c;
361 sum
+= dnaDecodeTable
[c
>> 16];
362 sum
+= dnaDecodeTable
[c
& 0xFFFF];
363 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
366 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
372 unsigned long long BackwardDNAOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
374 static const unsigned int truncateLeftMask
[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
375 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
376 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
377 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
379 unsigned long long iteration
, wordToCount
, charToCount
;
380 unsigned long long i
, j
, c
;
381 unsigned long long sum
= 0;
382 unsigned long long occCount
;
384 dna
-= index
/ 16 + 1;
386 iteration
= index
/ 256;
387 wordToCount
= (index
- iteration
* 256) / 16;
388 charToCount
= index
- iteration
* 256 - wordToCount
* 16;
390 if (charToCount
> 0) {
391 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 16 - c;
392 sum
+= dnaDecodeTable
[c
>> 16];
393 sum
+= dnaDecodeTable
[c
& 0xFFFF];
394 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
397 for (j
=0; j
<wordToCount
; j
++) {
399 sum
+= dnaDecodeTable
[*dna
>> 16];
400 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
403 occCount
= (sum
>> (character
* 8)) & 0x000000FF;
406 for (i
=0; i
<iteration
; i
++) {
409 for (j
=0; j
<16; j
++) {
411 sum
+= dnaDecodeTable
[*dna
>> 16];
412 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
414 if (!DNA_OCC_SUM_EXCEPTION(sum
)) {
415 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
417 // only some or all of the 3 bits are on
418 // in reality, only one of the four cases are possible
419 if (sum
== 0x00000100) {
420 if (character
== 0) {
423 } else if (sum
== 0x00010000) {
424 if (character
== 1) {
427 } else if (sum
== 0x01000000) {
428 if (character
== 2) {
431 } else if (sum
== 0x00000000) {
432 if (character
== 3) {
436 fprintf(stderr
, "BackwardDNAOccCountNoLimit(): DNA occ sum exception!\n");
447 void ForwardDNAAllOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
449 static const unsigned int truncateRightMask
[8] = { 0x00000000, 0xF0000000,
450 0xFF000000, 0xFFF00000,
451 0xFFFF0000, 0xFFFFF000,
452 0xFFFFFF00, 0xFFFFFFF0};
454 unsigned long long iteration
, wordToCount
, charToCount
;
455 unsigned long long i
, j
, k
, c
;
456 unsigned long long sum
,check
;
458 for (j
=0;j
<DNA_ALPHABET_SIZE
;j
++) {
462 iteration
= index
/ 16;
463 wordToCount
= (index
- iteration
* 16) / 8;
464 charToCount
= index
- iteration
* 16 - wordToCount
* 8;
466 for (i
=0; i
<iteration
; i
++) {
467 sum
= dnaDecodeTable
[*dna
>> 16];
468 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
470 sum
+= dnaDecodeTable
[*dna
>> 16];
471 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
474 if (!DNA_OCC_SUM_EXCEPTION(sum
)) {
475 for (j
=0;j
<DNA_ALPHABET_SIZE
;j
++) {
476 occCount
[j
] += sum
& 0x0000000F;
481 // only some or all of the 3 bits are on
482 // in reality, only one of the four cases are possible
483 check
=0x0000000000000010;
484 for (j
=0;j
<DNA_ALPHABET_SIZE
;j
++) {
495 for (j
=0; j
<wordToCount
; j
++) {
496 sum
+= dnaDecodeTable
[*dna
>> 16];
497 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
501 if (charToCount
> 0) {
502 c
= *dna
& truncateRightMask
[charToCount
]; // increase count of 'a' by 16 - c;
503 sum
+= dnaDecodeTable
[c
>> 16];
504 sum
+= dnaDecodeTable
[c
& 0xFFFF];
505 occCount
[0] += charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
508 for (j
=0;j
<DNA_ALPHABET_SIZE
;j
++) {
509 occCount
[j
] += sum
& 0x0000000F; sum
>>= 4;
513 void BackwardDNAAllOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
515 static const unsigned int truncateLeftMask
[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
516 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
517 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
518 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
520 unsigned long long iteration
, wordToCount
, charToCount
;
521 unsigned long long i
, j
, c
;
522 unsigned long long sum
;
524 dna
-= index
/ 16 + 1;
526 iteration
= index
/ 256;
527 wordToCount
= (index
- iteration
* 256) / 16;
528 charToCount
= index
- iteration
* 256 - wordToCount
* 16;
531 if (charToCount
> 0) {
532 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 16 - c;
533 sum
+= dnaDecodeTable
[c
>> 16];
534 sum
+= dnaDecodeTable
[c
& 0xFFFF];
535 sum
+= charToCount
- 16; // decrease count of 'a' by 16 - positionToProcess
538 for (j
=0; j
<wordToCount
; j
++) {
540 sum
+= dnaDecodeTable
[*dna
>> 16];
541 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
544 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
545 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
546 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
549 for (i
=0; i
<iteration
; i
++) {
552 for (j
=0; j
<16; j
++) {
554 sum
+= dnaDecodeTable
[*dna
>> 16];
555 sum
+= dnaDecodeTable
[*dna
& 0x0000FFFF];
557 if (!DNA_OCC_SUM_EXCEPTION(sum
)) {
558 occCount
[0] += sum
& 0x000000FF; sum
>>= 8;
559 occCount
[1] += sum
& 0x000000FF; sum
>>= 8;
560 occCount
[2] += sum
& 0x000000FF; sum
>>= 8;
563 // only some or all of the 3 bits are on
564 // in reality, only one of the four cases are possible
565 if (sum
== 0x00000100) {
567 } else if (sum
== 0x00010000) {
569 } else if (sum
== 0x01000000) {
571 } else if (sum
== 0x00000000) {
574 fprintf(stderr
, "BackwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
584 void GenerateDNA_NOccCountTable(unsigned int *dnaDecodeTable
) {
586 unsigned long long i
, j
, c
, t
;
588 for (i
=0; i
<DNA_N_OCC_CNT_TABLE_SIZE_IN_WORD
; i
++) {
589 dnaDecodeTable
[i
] = 0;
591 for (j
=0; j
<5; j
++) {
594 // Count of 'n' is to be derived from acgt
595 dnaDecodeTable
[i
] += 1 << (t
* 8);
603 unsigned long long ForwardDNA_NOccCount(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
605 static const unsigned int truncateRightMask
[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
606 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
607 0xFFFFFF00, 0xFFFFFFE0};
609 unsigned long long wordToCount
, charToCount
;
610 unsigned long long i
, c
;
611 unsigned long long sum
= 0;
612 unsigned long long occCount
;
616 fprintf(stderr
, "ForwardDNA_NOccCount() : index > 250!\n");
621 wordToCount
= index
/ 10;
622 charToCount
= index
- wordToCount
* 10;
624 for (i
=0; i
<wordToCount
; i
++) {
625 sum
+= dnaDecodeTable
[dna
[i
] >> 17];
626 sum
+= dnaDecodeTable
[(dna
[i
] >> 2) & 0x00007FFF];
629 if (charToCount
> 0) {
630 c
= dna
[i
] & truncateRightMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
631 sum
+= dnaDecodeTable
[c
>> 17];
632 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
633 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
636 if (character
!= 4) {
637 occCount
= (sum
>> (character
* 8)) & 0x000000FF;
640 occCount
-= sum
& 0x000000FF; sum
>>= 8;
641 occCount
-= sum
& 0x000000FF; sum
>>= 8;
642 occCount
-= sum
& 0x000000FF; sum
>>= 8;
650 unsigned long long BackwardDNA_NOccCount(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
652 static const unsigned int truncateLeftMask
[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
653 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
654 0x03FFFFFC, 0x1FFFFFFC};
656 unsigned long long wordToCount
, charToCount
;
657 unsigned long long j
, c
;
658 unsigned long long sum
= 0;
659 unsigned long long occCount
;
663 fprintf(stderr
, "BackwardDNA_NOccCount() : index >= 250!\n");
668 wordToCount
= index
/ 10;
669 charToCount
= index
- wordToCount
* 10;
671 dna
-= wordToCount
+ 1;
673 if (charToCount
> 0) {
674 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
675 sum
+= dnaDecodeTable
[c
>> 17];
676 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
677 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
680 for (j
=0; j
<wordToCount
; j
++) {
682 sum
+= dnaDecodeTable
[*dna
>> 17];
683 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
686 if (character
!= 4) {
687 occCount
= (sum
>> (character
* 8)) & 0x000000FF;
690 occCount
-= sum
& 0x000000FF; sum
>>= 8;
691 occCount
-= sum
& 0x000000FF; sum
>>= 8;
692 occCount
-= sum
& 0x000000FF; sum
>>= 8;
697 if (occCount
> index
+ 1) {
698 fprintf(stderr
, "BackwardDNA_NOccCount() : occCount > index + 1!\n");
707 void ForwardDNA_NAllOccCount(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
709 static const unsigned int truncateRightMask
[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
710 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
711 0xFFFFFF00, 0xFFFFFFE0};
713 unsigned long long wordToCount
, charToCount
;
714 unsigned long long i
, c
;
715 unsigned long long sum
= 0;
719 fprintf(stderr
, "ForwardDNA_NAllOccCount() : index >= 250!\n");
724 wordToCount
= index
/ 10;
725 charToCount
= index
- wordToCount
* 10;
727 for (i
=0; i
<wordToCount
; i
++) {
728 sum
+= dnaDecodeTable
[dna
[i
] >> 17];
729 sum
+= dnaDecodeTable
[(dna
[i
] >> 2) & 0x00007FFF];
732 if (charToCount
> 0) {
733 c
= dna
[i
] & truncateRightMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
734 sum
+= dnaDecodeTable
[c
>> 17];
735 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
736 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
739 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
740 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
741 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
746 void BackwardDNA_NAllOccCount(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
748 static const unsigned int truncateLeftMask
[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
749 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
750 0x03FFFFFC, 0x1FFFFFFC};
752 unsigned long long wordToCount
, charToCount
;
753 unsigned long long j
, c
;
754 unsigned long long sum
= 0;
758 fprintf(stderr
, "BackwardDNA_NAllOccCount() : index >= 250!\n");
763 wordToCount
= index
/ 10;
764 charToCount
= index
- wordToCount
* 10;
766 dna
-= wordToCount
+ 1;
768 if (charToCount
> 0) {
769 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
770 sum
+= dnaDecodeTable
[c
>> 17];
771 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
772 sum
+= charToCount
- 10; // decrease count of 'a' by 16 - charToCount
775 for (j
=0; j
<wordToCount
; j
++) {
777 sum
+= dnaDecodeTable
[*dna
>> 17];
778 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
781 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
782 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
783 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
788 unsigned long long ForwardDNA_NOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
790 static const unsigned int truncateRightMask
[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
791 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
792 0xFFFFFF00, 0xFFFFFFE0};
794 unsigned long long iteration
, wordToCount
, charToCount
;
795 unsigned long long i
, j
, c
;
796 unsigned long long sum
;
797 unsigned long long occCount
= 0;
799 iteration
= index
/ 250;
800 wordToCount
= (index
- iteration
* 250) / 10;
801 charToCount
= index
- iteration
* 250 - wordToCount
* 10;
803 for (i
=0; i
<iteration
; i
++) {
806 for (j
=0; j
<25; j
++) {
807 sum
+= dnaDecodeTable
[*dna
>> 17];
808 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
811 if (character
!= 4) {
812 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
814 occCount
-= sum
& 0x000000FF; sum
>>= 8;
815 occCount
-= sum
& 0x000000FF; sum
>>= 8;
816 occCount
-= sum
& 0x000000FF; sum
>>= 8;
822 for (j
=0; j
<wordToCount
; j
++) {
823 sum
+= dnaDecodeTable
[*dna
>> 17];
824 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
828 if (charToCount
> 0) {
829 c
= *dna
& truncateRightMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
830 sum
+= dnaDecodeTable
[c
>> 17];
831 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
832 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
835 if (character
!= 4) {
836 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
839 occCount
-= sum
& 0x000000FF; sum
>>= 8;
840 occCount
-= sum
& 0x000000FF; sum
>>= 8;
841 occCount
-= sum
& 0x000000FF; sum
>>= 8;
849 unsigned long long BackwardDNA_NOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, const unsigned int character
, const unsigned long long* dnaDecodeTable
) {
851 static const unsigned int truncateLeftMask
[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
852 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
853 0x03FFFFFC, 0x1FFFFFFC};
855 unsigned long long iteration
, wordToCount
, charToCount
;
856 unsigned long long i
, j
, c
;
857 unsigned long long sum
= 0;
858 unsigned long long occCount
= 0;
860 dna
-= index
/ 10 + 1;
862 iteration
= index
/ 250;
863 wordToCount
= (index
- iteration
* 250) / 10;
864 charToCount
= index
- iteration
* 250 - wordToCount
* 10;
866 if (charToCount
> 0) {
867 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
868 sum
+= dnaDecodeTable
[c
>> 17];
869 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
870 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
873 for (j
=0; j
<wordToCount
; j
++) {
875 sum
+= dnaDecodeTable
[*dna
>> 17];
876 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
879 if (character
!= 4) {
880 occCount
= (sum
>> (character
* 8)) & 0x000000FF;
883 occCount
-= sum
& 0x000000FF; sum
>>= 8;
884 occCount
-= sum
& 0x000000FF; sum
>>= 8;
885 occCount
-= sum
& 0x000000FF; sum
>>= 8;
889 for (i
=0; i
<iteration
; i
++) {
892 for (j
=0; j
<25; j
++) {
894 sum
+= dnaDecodeTable
[*dna
>> 17];
895 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
897 if (character
!= 4) {
898 occCount
+= (sum
>> (character
* 8)) & 0x000000FF;
900 occCount
-= sum
& 0x000000FF; sum
>>= 8;
901 occCount
-= sum
& 0x000000FF; sum
>>= 8;
902 occCount
-= sum
& 0x000000FF; sum
>>= 8;
911 void ForwardDNA_NAllOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
913 static const unsigned int truncateRightMask
[10] = { 0x00000000, 0xE0000000, 0xFC000000, 0xFF800000,
914 0xFFF00000, 0xFFFE0000, 0xFFFFC000, 0xFFFFF800,
915 0xFFFFFF00, 0xFFFFFFE0};
917 unsigned long long iteration
, wordToCount
, charToCount
;
918 unsigned long long i
, j
, c
;
919 unsigned long long sum
;
926 iteration
= index
/ 250;
927 wordToCount
= (index
- iteration
* 250) / 10;
928 charToCount
= index
- iteration
* 250 - wordToCount
* 10;
930 for (i
=0; i
<iteration
; i
++) {
933 for (j
=0; j
<25; j
++) {
934 sum
+= dnaDecodeTable
[*dna
>> 17];
935 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
938 occCount
[0] += sum
& 0x000000FF; sum
>>= 8;
939 occCount
[1] += sum
& 0x000000FF; sum
>>= 8;
940 occCount
[2] += sum
& 0x000000FF; sum
>>= 8;
945 for (j
=0; j
<wordToCount
; j
++) {
946 sum
+= dnaDecodeTable
[*dna
>> 17];
947 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
951 if (charToCount
> 0) {
952 c
= *dna
& truncateRightMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
953 sum
+= dnaDecodeTable
[c
>> 17];
954 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
955 sum
+= charToCount
- 10; // decrease count of 'a' by 10 - charToCount
958 occCount
[0] += sum
& 0x000000FF; sum
>>= 8;
959 occCount
[1] += sum
& 0x000000FF; sum
>>= 8;
960 occCount
[2] += sum
& 0x000000FF; sum
>>= 8;
965 void BackwardDNA_NAllOccCountNoLimit(const unsigned int* dna
, const unsigned long long index
, unsigned long long* __restrict occCount
, const unsigned long long* dnaDecodeTable
) {
967 static const unsigned int truncateLeftMask
[10] = { 0x00000000, 0x0000001C, 0x000000FC, 0x000007FC,
968 0x00003FFC, 0x0001FFFC, 0x000FFFFC, 0x007FFFFC,
969 0x03FFFFFC, 0x1FFFFFFC};
971 unsigned long long iteration
, wordToCount
, charToCount
;
972 unsigned long long i
, j
, c
;
973 unsigned long long sum
= 0;
975 dna
-= index
/ 10 + 1;
977 iteration
= index
/ 250;
978 wordToCount
= (index
- iteration
* 250) / 10;
979 charToCount
= index
- iteration
* 250 - wordToCount
* 10;
981 if (charToCount
> 0) {
982 c
= *dna
& truncateLeftMask
[charToCount
]; // increase count of 'a' by 10 - charToCount;
983 sum
+= dnaDecodeTable
[c
>> 17];
984 sum
+= dnaDecodeTable
[(c
>> 2) & 0x00007FFF];
985 sum
+= charToCount
- 10; // decrease count of 'a' by 16 - charToCount
988 for (j
=0; j
<wordToCount
; j
++) {
990 sum
+= dnaDecodeTable
[*dna
>> 17];
991 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
994 occCount
[0] = sum
& 0x000000FF; sum
>>= 8;
995 occCount
[1] = sum
& 0x000000FF; sum
>>= 8;
996 occCount
[2] = sum
& 0x000000FF; sum
>>= 8;
999 for (i
=0; i
<iteration
; i
++) {
1002 for (j
=0; j
<25; j
++) {
1004 sum
+= dnaDecodeTable
[*dna
>> 17];
1005 sum
+= dnaDecodeTable
[(*dna
>> 2) & 0x00007FFF];
1007 occCount
[0] += sum
& 0x000000FF; sum
>>= 8;
1008 occCount
[1] += sum
& 0x000000FF; sum
>>= 8;
1009 occCount
[2] += sum
& 0x000000FF; sum
>>= 8;
1015 unsigned long long ForwardOccCount(const unsigned int* packed
, const unsigned long long index
, const unsigned int character
, const unsigned int alphabetSize
) {
1017 unsigned long long wordToCount
, charToCount
;
1018 unsigned long long bitPerChar
, charPerWord
;
1019 unsigned long long i
, j
, c
;
1020 unsigned long long occCount
= 0;
1022 bitPerChar
= ceilLog2(alphabetSize
);
1023 charPerWord
= BITS_IN_WORD
/ bitPerChar
;
1025 wordToCount
= index
/ charPerWord
;
1026 charToCount
= index
- wordToCount
* charPerWord
;
1028 for (i
=0; i
<wordToCount
; i
++) {
1030 for (j
=0; j
<charPerWord
; j
++) {
1031 if (c
>> (BITS_IN_WORD
- bitPerChar
) == character
) {
1037 if (charToCount
> 0) {
1039 for (j
=0; j
<charToCount
; j
++) {
1040 if (c
>> (BITS_IN_WORD
- bitPerChar
) == character
) {
1052 unsigned long long BackwardOccCount(const unsigned int* packed
, const unsigned long long index
, const unsigned int character
, const unsigned int alphabetSize
) {
1054 unsigned long long wordToCount
, charToCount
;
1055 unsigned long long bitPerChar
, charPerWord
;
1056 unsigned long long i
, j
, c
;
1057 unsigned long long occCount
= 0;
1059 bitPerChar
= ceilLog2(alphabetSize
);
1060 charPerWord
= BITS_IN_WORD
/ bitPerChar
;
1062 wordToCount
= index
/ charPerWord
;
1063 charToCount
= index
- wordToCount
* charPerWord
;
1065 packed
-= wordToCount
+ 1;
1067 if (charToCount
> 0) {
1068 c
= *packed
<< (bitPerChar
* (charPerWord
- charToCount
));
1069 for (j
=0; j
<charToCount
; j
++) {
1070 if (c
>> (BITS_IN_WORD
- bitPerChar
) == character
) {
1077 for (i
=1; i
<=wordToCount
; i
++) {
1080 for (j
=0; j
<charPerWord
; j
++) {
1081 if (c
>> (BITS_IN_WORD
- bitPerChar
) == character
) {
1091 void ForwardAllOccCount(const unsigned int* packed
, const unsigned long long index
, const unsigned int alphabetSize
, unsigned int* occCount
) {
1093 unsigned long long wordToCount
, charToCount
;
1094 unsigned long long bitPerChar
, charPerWord
;
1095 unsigned long long i
, j
, c
;
1097 bitPerChar
= ceilLog2(alphabetSize
);
1098 charPerWord
= BITS_IN_WORD
/ bitPerChar
;
1100 wordToCount
= index
/ charPerWord
;
1101 charToCount
= index
- wordToCount
* charPerWord
;
1103 for (i
=0; i
<wordToCount
; i
++) {
1105 for (j
=0; j
<charPerWord
; j
++) {
1106 occCount
[c
>> (BITS_IN_WORD
- bitPerChar
)]++;
1110 if (charToCount
> 0) {
1112 for (j
=0; j
<charToCount
; j
++) {
1113 occCount
[c
>> (BITS_IN_WORD
- bitPerChar
)]++;
1120 void BackwardAllOccCount(const unsigned int* packed
, const unsigned long long index
, const unsigned int alphabetSize
, unsigned int* occCount
) {
1122 unsigned long long wordToCount
, charToCount
;
1123 unsigned long long bitPerChar
, charPerWord
;
1124 unsigned long long i
, j
, c
;
1126 bitPerChar
= ceilLog2(alphabetSize
);
1127 charPerWord
= BITS_IN_WORD
/ bitPerChar
;
1129 wordToCount
= index
/ charPerWord
;
1130 charToCount
= index
- wordToCount
* charPerWord
;
1132 packed
-= wordToCount
+ 1;
1134 if (charToCount
> 0) {
1135 c
= *packed
<< (bitPerChar
* (charPerWord
- charToCount
));
1136 for (j
=0; j
<charToCount
; j
++) {
1137 occCount
[c
>> (BITS_IN_WORD
- bitPerChar
)]++;
1142 for (i
=1; i
<=wordToCount
; i
++) {
1145 for (j
=0; j
<charPerWord
; j
++) {
1146 occCount
[c
>> (BITS_IN_WORD
- bitPerChar
)]++;