Provide a better error message for misplaced dispatch options.
[pgsql.git] / src / port / pg_bitutils.c
blobc8399981ee0e22b3f4a3e3e1c96dd5225c09bf8a
1 /*-------------------------------------------------------------------------
3 * pg_bitutils.c
4 * Miscellaneous functions for bit-wise operations.
6 * Copyright (c) 2019-2024, PostgreSQL Global Development Group
8 * IDENTIFICATION
9 * src/port/pg_bitutils.c
11 *-------------------------------------------------------------------------
13 #include "c.h"
15 #ifdef HAVE__GET_CPUID
16 #include <cpuid.h>
17 #endif
18 #ifdef HAVE__CPUID
19 #include <intrin.h>
20 #endif
22 #include "port/pg_bitutils.h"
26 * Array giving the position of the left-most set bit for each possible
27 * byte value. We count the right-most position as the 0th bit, and the
28 * left-most the 7th bit. The 0th entry of the array should not be used.
30 * Note: this is not used by the functions in pg_bitutils.h when
31 * HAVE__BUILTIN_CLZ is defined, but we provide it anyway, so that
32 * extensions possibly compiled with a different compiler can use it.
34 const uint8 pg_leftmost_one_pos[256] = {
35 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
36 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
37 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
38 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
39 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
40 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
41 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
42 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
43 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
44 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
45 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
46 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
47 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
48 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
49 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
50 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
54 * Array giving the position of the right-most set bit for each possible
55 * byte value. We count the right-most position as the 0th bit, and the
56 * left-most the 7th bit. The 0th entry of the array should not be used.
58 * Note: this is not used by the functions in pg_bitutils.h when
59 * HAVE__BUILTIN_CTZ is defined, but we provide it anyway, so that
60 * extensions possibly compiled with a different compiler can use it.
62 const uint8 pg_rightmost_one_pos[256] = {
63 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
65 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
66 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
67 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
68 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
69 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
70 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
71 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
72 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
73 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
74 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
75 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
76 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
77 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
78 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
82 * Array giving the number of 1-bits in each possible byte value.
84 * Note: we export this for use by functions in which explicit use
85 * of the popcount functions seems unlikely to be a win.
87 const uint8 pg_number_of_ones[256] = {
88 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
89 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
90 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
91 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
92 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
93 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
94 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
95 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
96 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
97 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
98 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
99 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
100 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
101 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
102 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
103 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
106 static inline int pg_popcount32_slow(uint32 word);
107 static inline int pg_popcount64_slow(uint64 word);
108 static uint64 pg_popcount_slow(const char *buf, int bytes);
109 static uint64 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask);
111 #ifdef TRY_POPCNT_FAST
112 static bool pg_popcount_available(void);
113 static int pg_popcount32_choose(uint32 word);
114 static int pg_popcount64_choose(uint64 word);
115 static uint64 pg_popcount_choose(const char *buf, int bytes);
116 static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
117 static inline int pg_popcount32_fast(uint32 word);
118 static inline int pg_popcount64_fast(uint64 word);
119 static uint64 pg_popcount_fast(const char *buf, int bytes);
120 static uint64 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask);
122 int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
123 int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
124 uint64 (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
125 uint64 (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
126 #endif /* TRY_POPCNT_FAST */
128 #ifdef TRY_POPCNT_FAST
131 * Return true if CPUID indicates that the POPCNT instruction is available.
133 static bool
134 pg_popcount_available(void)
136 unsigned int exx[4] = {0, 0, 0, 0};
138 #if defined(HAVE__GET_CPUID)
139 __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
140 #elif defined(HAVE__CPUID)
141 __cpuid(exx, 1);
142 #else
143 #error cpuid instruction not available
144 #endif
146 return (exx[2] & (1 << 23)) != 0; /* POPCNT */
150 * These functions get called on the first call to pg_popcount32 etc.
151 * They detect whether we can use the asm implementations, and replace
152 * the function pointers so that subsequent calls are routed directly to
153 * the chosen implementation.
155 static inline void
156 choose_popcount_functions(void)
158 if (pg_popcount_available())
160 pg_popcount32 = pg_popcount32_fast;
161 pg_popcount64 = pg_popcount64_fast;
162 pg_popcount_optimized = pg_popcount_fast;
163 pg_popcount_masked_optimized = pg_popcount_masked_fast;
165 else
167 pg_popcount32 = pg_popcount32_slow;
168 pg_popcount64 = pg_popcount64_slow;
169 pg_popcount_optimized = pg_popcount_slow;
170 pg_popcount_masked_optimized = pg_popcount_masked_slow;
173 #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
174 if (pg_popcount_avx512_available())
176 pg_popcount_optimized = pg_popcount_avx512;
177 pg_popcount_masked_optimized = pg_popcount_masked_avx512;
179 #endif
182 static int
183 pg_popcount32_choose(uint32 word)
185 choose_popcount_functions();
186 return pg_popcount32(word);
189 static int
190 pg_popcount64_choose(uint64 word)
192 choose_popcount_functions();
193 return pg_popcount64(word);
196 static uint64
197 pg_popcount_choose(const char *buf, int bytes)
199 choose_popcount_functions();
200 return pg_popcount_optimized(buf, bytes);
203 static uint64
204 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
206 choose_popcount_functions();
207 return pg_popcount_masked(buf, bytes, mask);
211 * pg_popcount32_fast
212 * Return the number of 1 bits set in word
214 static inline int
215 pg_popcount32_fast(uint32 word)
217 #ifdef _MSC_VER
218 return __popcnt(word);
219 #else
220 uint32 res;
222 __asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
223 return (int) res;
224 #endif
228 * pg_popcount64_fast
229 * Return the number of 1 bits set in word
231 static inline int
232 pg_popcount64_fast(uint64 word)
234 #ifdef _MSC_VER
235 return __popcnt64(word);
236 #else
237 uint64 res;
239 __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
240 return (int) res;
241 #endif
245 * pg_popcount_fast
246 * Returns the number of 1-bits in buf
248 static uint64
249 pg_popcount_fast(const char *buf, int bytes)
251 uint64 popcnt = 0;
253 #if SIZEOF_VOID_P >= 8
254 /* Process in 64-bit chunks if the buffer is aligned. */
255 if (buf == (const char *) TYPEALIGN(8, buf))
257 const uint64 *words = (const uint64 *) buf;
259 while (bytes >= 8)
261 popcnt += pg_popcount64_fast(*words++);
262 bytes -= 8;
265 buf = (const char *) words;
267 #else
268 /* Process in 32-bit chunks if the buffer is aligned. */
269 if (buf == (const char *) TYPEALIGN(4, buf))
271 const uint32 *words = (const uint32 *) buf;
273 while (bytes >= 4)
275 popcnt += pg_popcount32_fast(*words++);
276 bytes -= 4;
279 buf = (const char *) words;
281 #endif
283 /* Process any remaining bytes */
284 while (bytes--)
285 popcnt += pg_number_of_ones[(unsigned char) *buf++];
287 return popcnt;
291 * pg_popcount_masked_fast
292 * Returns the number of 1-bits in buf after applying the mask to each byte
294 static uint64
295 pg_popcount_masked_fast(const char *buf, int bytes, bits8 mask)
297 uint64 popcnt = 0;
299 #if SIZEOF_VOID_P >= 8
300 /* Process in 64-bit chunks if the buffer is aligned */
301 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
303 if (buf == (const char *) TYPEALIGN(8, buf))
305 const uint64 *words = (const uint64 *) buf;
307 while (bytes >= 8)
309 popcnt += pg_popcount64_fast(*words++ & maskv);
310 bytes -= 8;
313 buf = (const char *) words;
315 #else
316 /* Process in 32-bit chunks if the buffer is aligned. */
317 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
319 if (buf == (const char *) TYPEALIGN(4, buf))
321 const uint32 *words = (const uint32 *) buf;
323 while (bytes >= 4)
325 popcnt += pg_popcount32_fast(*words++ & maskv);
326 bytes -= 4;
329 buf = (const char *) words;
331 #endif
333 /* Process any remaining bytes */
334 while (bytes--)
335 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
337 return popcnt;
340 #endif /* TRY_POPCNT_FAST */
344 * pg_popcount32_slow
345 * Return the number of 1 bits set in word
347 static inline int
348 pg_popcount32_slow(uint32 word)
350 #ifdef HAVE__BUILTIN_POPCOUNT
351 return __builtin_popcount(word);
352 #else /* !HAVE__BUILTIN_POPCOUNT */
353 int result = 0;
355 while (word != 0)
357 result += pg_number_of_ones[word & 255];
358 word >>= 8;
361 return result;
362 #endif /* HAVE__BUILTIN_POPCOUNT */
366 * pg_popcount64_slow
367 * Return the number of 1 bits set in word
369 static inline int
370 pg_popcount64_slow(uint64 word)
372 #ifdef HAVE__BUILTIN_POPCOUNT
373 #if SIZEOF_LONG == 8
374 return __builtin_popcountl(word);
375 #elif SIZEOF_LONG_LONG == 8
376 return __builtin_popcountll(word);
377 #else
378 #error "cannot find integer of the same size as uint64_t"
379 #endif
380 #else /* !HAVE__BUILTIN_POPCOUNT */
381 int result = 0;
383 while (word != 0)
385 result += pg_number_of_ones[word & 255];
386 word >>= 8;
389 return result;
390 #endif /* HAVE__BUILTIN_POPCOUNT */
394 * pg_popcount_slow
395 * Returns the number of 1-bits in buf
397 static uint64
398 pg_popcount_slow(const char *buf, int bytes)
400 uint64 popcnt = 0;
402 #if SIZEOF_VOID_P >= 8
403 /* Process in 64-bit chunks if the buffer is aligned. */
404 if (buf == (const char *) TYPEALIGN(8, buf))
406 const uint64 *words = (const uint64 *) buf;
408 while (bytes >= 8)
410 popcnt += pg_popcount64_slow(*words++);
411 bytes -= 8;
414 buf = (const char *) words;
416 #else
417 /* Process in 32-bit chunks if the buffer is aligned. */
418 if (buf == (const char *) TYPEALIGN(4, buf))
420 const uint32 *words = (const uint32 *) buf;
422 while (bytes >= 4)
424 popcnt += pg_popcount32_slow(*words++);
425 bytes -= 4;
428 buf = (const char *) words;
430 #endif
432 /* Process any remaining bytes */
433 while (bytes--)
434 popcnt += pg_number_of_ones[(unsigned char) *buf++];
436 return popcnt;
440 * pg_popcount_masked_slow
441 * Returns the number of 1-bits in buf after applying the mask to each byte
443 static uint64
444 pg_popcount_masked_slow(const char *buf, int bytes, bits8 mask)
446 uint64 popcnt = 0;
448 #if SIZEOF_VOID_P >= 8
449 /* Process in 64-bit chunks if the buffer is aligned */
450 uint64 maskv = ~UINT64CONST(0) / 0xFF * mask;
452 if (buf == (const char *) TYPEALIGN(8, buf))
454 const uint64 *words = (const uint64 *) buf;
456 while (bytes >= 8)
458 popcnt += pg_popcount64_slow(*words++ & maskv);
459 bytes -= 8;
462 buf = (const char *) words;
464 #else
465 /* Process in 32-bit chunks if the buffer is aligned. */
466 uint32 maskv = ~((uint32) 0) / 0xFF * mask;
468 if (buf == (const char *) TYPEALIGN(4, buf))
470 const uint32 *words = (const uint32 *) buf;
472 while (bytes >= 4)
474 popcnt += pg_popcount32_slow(*words++ & maskv);
475 bytes -= 4;
478 buf = (const char *) words;
480 #endif
482 /* Process any remaining bytes */
483 while (bytes--)
484 popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
486 return popcnt;
489 #ifndef TRY_POPCNT_FAST
492 * When the POPCNT instruction is not available, there's no point in using
493 * function pointers to vary the implementation between the fast and slow
494 * method. We instead just make these actual external functions when
495 * TRY_POPCNT_FAST is not defined. The compiler should be able to inline
496 * the slow versions here.
499 pg_popcount32(uint32 word)
501 return pg_popcount32_slow(word);
505 pg_popcount64(uint64 word)
507 return pg_popcount64_slow(word);
511 * pg_popcount_optimized
512 * Returns the number of 1-bits in buf
514 uint64
515 pg_popcount_optimized(const char *buf, int bytes)
517 return pg_popcount_slow(buf, bytes);
521 * pg_popcount_masked_optimized
522 * Returns the number of 1-bits in buf after applying the mask to each byte
524 uint64
525 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mask)
527 return pg_popcount_masked_slow(buf, bytes, mask);
530 #endif /* !TRY_POPCNT_FAST */