1 /*-------------------------------------------------------------------------
4 * Miscellaneous functions for bit-wise operations.
6 * Copyright (c) 2019-2024, PostgreSQL Global Development Group
9 * src/port/pg_bitutils.c
11 *-------------------------------------------------------------------------
15 #ifdef HAVE__GET_CPUID
22 #include "port/pg_bitutils.h"
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
34 const uint8 pg_leftmost_one_pos
[256] = {
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
62 const uint8 pg_rightmost_one_pos
[256] = {
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
82 * Array giving the number of 1-bits in each possible byte value.
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
87 const uint8 pg_number_of_ones
[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
106 static inline int pg_popcount32_slow(uint32 word
);
107 static inline int pg_popcount64_slow(uint64 word
);
108 static uint64
pg_popcount_slow(const char *buf
, int bytes
);
109 static uint64
pg_popcount_masked_slow(const char *buf
, int bytes
, bits8 mask
);
111 #ifdef TRY_POPCNT_FAST
112 static bool pg_popcount_available(void);
113 static int pg_popcount32_choose(uint32 word
);
114 static int pg_popcount64_choose(uint64 word
);
115 static uint64
pg_popcount_choose(const char *buf
, int bytes
);
116 static uint64
pg_popcount_masked_choose(const char *buf
, int bytes
, bits8 mask
);
117 static inline int pg_popcount32_fast(uint32 word
);
118 static inline int pg_popcount64_fast(uint64 word
);
119 static uint64
pg_popcount_fast(const char *buf
, int bytes
);
120 static uint64
pg_popcount_masked_fast(const char *buf
, int bytes
, bits8 mask
);
122 int (*pg_popcount32
) (uint32 word
) = pg_popcount32_choose
;
123 int (*pg_popcount64
) (uint64 word
) = pg_popcount64_choose
;
124 uint64 (*pg_popcount_optimized
) (const char *buf
, int bytes
) = pg_popcount_choose
;
125 uint64 (*pg_popcount_masked_optimized
) (const char *buf
, int bytes
, bits8 mask
) = pg_popcount_masked_choose
;
126 #endif /* TRY_POPCNT_FAST */
128 #ifdef TRY_POPCNT_FAST
131 * Return true if CPUID indicates that the POPCNT instruction is available.
134 pg_popcount_available(void)
136 unsigned int exx
[4] = {0, 0, 0, 0};
138 #if defined(HAVE__GET_CPUID)
139 __get_cpuid(1, &exx
[0], &exx
[1], &exx
[2], &exx
[3]);
140 #elif defined(HAVE__CPUID)
143 #error cpuid instruction not available
146 return (exx
[2] & (1 << 23)) != 0; /* POPCNT */
150 * These functions get called on the first call to pg_popcount32 etc.
151 * They detect whether we can use the asm implementations, and replace
152 * the function pointers so that subsequent calls are routed directly to
153 * the chosen implementation.
156 choose_popcount_functions(void)
158 if (pg_popcount_available())
160 pg_popcount32
= pg_popcount32_fast
;
161 pg_popcount64
= pg_popcount64_fast
;
162 pg_popcount_optimized
= pg_popcount_fast
;
163 pg_popcount_masked_optimized
= pg_popcount_masked_fast
;
167 pg_popcount32
= pg_popcount32_slow
;
168 pg_popcount64
= pg_popcount64_slow
;
169 pg_popcount_optimized
= pg_popcount_slow
;
170 pg_popcount_masked_optimized
= pg_popcount_masked_slow
;
173 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 if (pg_popcount_avx512_available())
176 pg_popcount_optimized
= pg_popcount_avx512
;
177 pg_popcount_masked_optimized
= pg_popcount_masked_avx512
;
183 pg_popcount32_choose(uint32 word
)
185 choose_popcount_functions();
186 return pg_popcount32(word
);
190 pg_popcount64_choose(uint64 word
)
192 choose_popcount_functions();
193 return pg_popcount64(word
);
197 pg_popcount_choose(const char *buf
, int bytes
)
199 choose_popcount_functions();
200 return pg_popcount_optimized(buf
, bytes
);
204 pg_popcount_masked_choose(const char *buf
, int bytes
, bits8 mask
)
206 choose_popcount_functions();
207 return pg_popcount_masked(buf
, bytes
, mask
);
212 * Return the number of 1 bits set in word
215 pg_popcount32_fast(uint32 word
)
218 return __popcnt(word
);
222 __asm__
__volatile__(" popcntl %1,%0\n":"=q"(res
):"rm"(word
):"cc");
229 * Return the number of 1 bits set in word
232 pg_popcount64_fast(uint64 word
)
235 return __popcnt64(word
);
239 __asm__
__volatile__(" popcntq %1,%0\n":"=q"(res
):"rm"(word
):"cc");
246 * Returns the number of 1-bits in buf
249 pg_popcount_fast(const char *buf
, int bytes
)
253 #if SIZEOF_VOID_P >= 8
254 /* Process in 64-bit chunks if the buffer is aligned. */
255 if (buf
== (const char *) TYPEALIGN(8, buf
))
257 const uint64
*words
= (const uint64
*) buf
;
261 popcnt
+= pg_popcount64_fast(*words
++);
265 buf
= (const char *) words
;
268 /* Process in 32-bit chunks if the buffer is aligned. */
269 if (buf
== (const char *) TYPEALIGN(4, buf
))
271 const uint32
*words
= (const uint32
*) buf
;
275 popcnt
+= pg_popcount32_fast(*words
++);
279 buf
= (const char *) words
;
283 /* Process any remaining bytes */
285 popcnt
+= pg_number_of_ones
[(unsigned char) *buf
++];
291 * pg_popcount_masked_fast
292 * Returns the number of 1-bits in buf after applying the mask to each byte
295 pg_popcount_masked_fast(const char *buf
, int bytes
, bits8 mask
)
299 #if SIZEOF_VOID_P >= 8
300 /* Process in 64-bit chunks if the buffer is aligned */
301 uint64 maskv
= ~UINT64CONST(0) / 0xFF * mask
;
303 if (buf
== (const char *) TYPEALIGN(8, buf
))
305 const uint64
*words
= (const uint64
*) buf
;
309 popcnt
+= pg_popcount64_fast(*words
++ & maskv
);
313 buf
= (const char *) words
;
316 /* Process in 32-bit chunks if the buffer is aligned. */
317 uint32 maskv
= ~((uint32
) 0) / 0xFF * mask
;
319 if (buf
== (const char *) TYPEALIGN(4, buf
))
321 const uint32
*words
= (const uint32
*) buf
;
325 popcnt
+= pg_popcount32_fast(*words
++ & maskv
);
329 buf
= (const char *) words
;
333 /* Process any remaining bytes */
335 popcnt
+= pg_number_of_ones
[(unsigned char) *buf
++ & mask
];
340 #endif /* TRY_POPCNT_FAST */
345 * Return the number of 1 bits set in word
348 pg_popcount32_slow(uint32 word
)
350 #ifdef HAVE__BUILTIN_POPCOUNT
351 return __builtin_popcount(word
);
352 #else /* !HAVE__BUILTIN_POPCOUNT */
357 result
+= pg_number_of_ones
[word
& 255];
362 #endif /* HAVE__BUILTIN_POPCOUNT */
367 * Return the number of 1 bits set in word
370 pg_popcount64_slow(uint64 word
)
372 #ifdef HAVE__BUILTIN_POPCOUNT
374 return __builtin_popcountl(word
);
375 #elif SIZEOF_LONG_LONG == 8
376 return __builtin_popcountll(word
);
378 #error "cannot find integer of the same size as uint64_t"
380 #else /* !HAVE__BUILTIN_POPCOUNT */
385 result
+= pg_number_of_ones
[word
& 255];
390 #endif /* HAVE__BUILTIN_POPCOUNT */
395 * Returns the number of 1-bits in buf
398 pg_popcount_slow(const char *buf
, int bytes
)
402 #if SIZEOF_VOID_P >= 8
403 /* Process in 64-bit chunks if the buffer is aligned. */
404 if (buf
== (const char *) TYPEALIGN(8, buf
))
406 const uint64
*words
= (const uint64
*) buf
;
410 popcnt
+= pg_popcount64_slow(*words
++);
414 buf
= (const char *) words
;
417 /* Process in 32-bit chunks if the buffer is aligned. */
418 if (buf
== (const char *) TYPEALIGN(4, buf
))
420 const uint32
*words
= (const uint32
*) buf
;
424 popcnt
+= pg_popcount32_slow(*words
++);
428 buf
= (const char *) words
;
432 /* Process any remaining bytes */
434 popcnt
+= pg_number_of_ones
[(unsigned char) *buf
++];
440 * pg_popcount_masked_slow
441 * Returns the number of 1-bits in buf after applying the mask to each byte
444 pg_popcount_masked_slow(const char *buf
, int bytes
, bits8 mask
)
448 #if SIZEOF_VOID_P >= 8
449 /* Process in 64-bit chunks if the buffer is aligned */
450 uint64 maskv
= ~UINT64CONST(0) / 0xFF * mask
;
452 if (buf
== (const char *) TYPEALIGN(8, buf
))
454 const uint64
*words
= (const uint64
*) buf
;
458 popcnt
+= pg_popcount64_slow(*words
++ & maskv
);
462 buf
= (const char *) words
;
465 /* Process in 32-bit chunks if the buffer is aligned. */
466 uint32 maskv
= ~((uint32
) 0) / 0xFF * mask
;
468 if (buf
== (const char *) TYPEALIGN(4, buf
))
470 const uint32
*words
= (const uint32
*) buf
;
474 popcnt
+= pg_popcount32_slow(*words
++ & maskv
);
478 buf
= (const char *) words
;
482 /* Process any remaining bytes */
484 popcnt
+= pg_number_of_ones
[(unsigned char) *buf
++ & mask
];
489 #ifndef TRY_POPCNT_FAST
492 * When the POPCNT instruction is not available, there's no point in using
493 * function pointers to vary the implementation between the fast and slow
494 * method. We instead just make these actual external functions when
495 * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
496 * the slow versions here.
499 pg_popcount32(uint32 word
)
501 return pg_popcount32_slow(word
);
505 pg_popcount64(uint64 word
)
507 return pg_popcount64_slow(word
);
511 * pg_popcount_optimized
512 * Returns the number of 1-bits in buf
515 pg_popcount_optimized(const char *buf
, int bytes
)
517 return pg_popcount_slow(buf
, bytes
);
521 * pg_popcount_masked_optimized
522 * Returns the number of 1-bits in buf after applying the mask to each byte
525 pg_popcount_masked_optimized(const char *buf
, int bytes
, bits8 mask
)
527 return pg_popcount_masked_slow(buf
, bytes
, mask
);
530 #endif /* !TRY_POPCNT_FAST */