modified: myjupyterlab.sh
[GalaxyCodeBases.git] / c_cpp / lib / klib / test / kbit_test.c
blob3ae3bd309c6ed79b10ee238437ae6f90b0453ef5
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <time.h>
4 #include <emmintrin.h>
5 #include "kbit.h"
7 // from bowtie-0.9.8.1
8 inline static int bt1_pop64(uint64_t x) // the kbi_popcount64() equivalence; similar to popcount_2() in wiki
10 x -= ((x >> 1) & 0x5555555555555555llu);
11 x = (x & 0x3333333333333333llu) + ((x >> 2) & 0x3333333333333333llu);
12 x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0Fllu;
13 x = x + (x >> 8);
14 x = x + (x >> 16);
15 x = x + (x >> 32);
16 return x & 0x3F;
19 inline static int bt1_countInU64(uint64_t dw, int c) // the kbi_DNAcount64() equivalence
21 uint64_t dwA = dw & 0xAAAAAAAAAAAAAAAAllu;
22 uint64_t dwNA = dw & ~0xAAAAAAAAAAAAAAAAllu;
23 uint64_t tmp;
24 switch (c) {
25 case 0: tmp = (dwA >> 1) | dwNA; break;
26 case 1: tmp = ~(dwA >> 1) & dwNA; break;
27 case 2: tmp = (dwA >> 1) & ~dwNA; break;
28 default: tmp = (dwA >> 1) & dwNA;
30 tmp = bt1_pop64(tmp);
31 if (c == 0) tmp = 32 - tmp;
32 return (int)tmp;
35 // from bigmagic
36 static uint32_t sse2_bit_count32(const __m128i* block, const __m128i* block_end)
38 const unsigned mu1 = 0x55555555;
39 const unsigned mu2 = 0x33333333;
40 const unsigned mu3 = 0x0F0F0F0F;
41 const unsigned mu4 = 0x0000003F;
43 uint32_t tcnt[4];
45 // Loading masks
46 __m128i m1 = _mm_set_epi32 (mu1, mu1, mu1, mu1);
47 __m128i m2 = _mm_set_epi32 (mu2, mu2, mu2, mu2);
48 __m128i m3 = _mm_set_epi32 (mu3, mu3, mu3, mu3);
49 __m128i m4 = _mm_set_epi32 (mu4, mu4, mu4, mu4);
50 __m128i mcnt;
51 mcnt = _mm_xor_si128(m1, m1); // cnt = 0
53 __m128i tmp1, tmp2;
56 __m128i b = _mm_load_si128(block);
57 ++block;
59 // b = (b & 0x55555555) + (b >> 1 & 0x55555555);
60 tmp1 = _mm_srli_epi32(b, 1); // tmp1 = (b >> 1 & 0x55555555)
61 tmp1 = _mm_and_si128(tmp1, m1);
62 tmp2 = _mm_and_si128(b, m1); // tmp2 = (b & 0x55555555)
63 b = _mm_add_epi32(tmp1, tmp2); // b = tmp1 + tmp2
65 // b = (b & 0x33333333) + (b >> 2 & 0x33333333);
66 tmp1 = _mm_srli_epi32(b, 2); // (b >> 2 & 0x33333333)
67 tmp1 = _mm_and_si128(tmp1, m2);
68 tmp2 = _mm_and_si128(b, m2); // (b & 0x33333333)
69 b = _mm_add_epi32(tmp1, tmp2); // b = tmp1 + tmp2
71 // b = (b + (b >> 4)) & 0x0F0F0F0F;
72 tmp1 = _mm_srli_epi32(b, 4); // tmp1 = b >> 4
73 b = _mm_add_epi32(b, tmp1); // b = b + (b >> 4)
74 b = _mm_and_si128(b, m3); // & 0x0F0F0F0F
76 // b = b + (b >> 8);
77 tmp1 = _mm_srli_epi32 (b, 8); // tmp1 = b >> 8
78 b = _mm_add_epi32(b, tmp1); // b = b + (b >> 8)
80 // b = (b + (b >> 16)) & 0x0000003F;
81 tmp1 = _mm_srli_epi32 (b, 16); // b >> 16
82 b = _mm_add_epi32(b, tmp1); // b + (b >> 16)
83 b = _mm_and_si128(b, m4); // (b >> 16) & 0x0000003F;
85 mcnt = _mm_add_epi32(mcnt, b); // mcnt += b
87 } while (block < block_end);
89 _mm_store_si128((__m128i*)tcnt, mcnt);
91 return tcnt[0] + tcnt[1] + tcnt[2] + tcnt[3];
94 int main(void)
96 int i, N = 100000000;
97 uint64_t *x, cnt;
98 clock_t t;
99 int c = 1;
101 x = (uint64_t*)calloc(N, 8);
102 srand48(11);
103 for (i = 0; i < N; ++i)
104 x[i] = (uint64_t)lrand48() << 32 ^ lrand48();
106 fprintf(stderr, "\n===> Calculate # of 1 in an integer (popcount) <===\n");
108 t = clock(); cnt = 0;
109 for (i = 0; i < N; ++i) cnt += kbi_popcount64(x[i]);
110 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "kbit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
112 t = clock(); cnt = 0;
113 for (i = 0; i < N; ++i) cnt += bt1_pop64(x[i]);
114 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "wiki-popcount_2", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
116 t = clock(); cnt = 0;
117 for (i = 0; i < N; ++i) cnt += __builtin_popcountl(x[i]);
118 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "__builtin_popcountl", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
120 t = clock(); cnt = 0;
121 cnt += sse2_bit_count32((__m128i*)x, (__m128i*)(x+N));
122 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "SSE2-32bit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
124 fprintf(stderr, "\n===> Count '%c' in 2-bit encoded integers <===\n", "ACGT"[c]);
126 t = clock(); cnt = 0;
127 for (i = 0; i < N; ++i) cnt += kbi_DNAcount64(x[i], c);
128 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "kbit", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
130 t = clock(); cnt = 0;
131 for (i = 0; i < N; ++i) cnt += bt1_countInU64(x[i], c);
132 fprintf(stderr, "%20s\t%20ld\t%10.6f\n", "bowtie1", (long)cnt, (double)(clock() - t) / CLOCKS_PER_SEC);
134 fprintf(stderr, "\n");
135 free(x);
136 return 0;