common/alnumeric.h

   1 #ifndef AL_NUMERIC_H
   2 #define AL_NUMERIC_H
   3
   4 #include <cstddef>
   5 #include <cstdint>
   6 #ifdef HAVE_INTRIN_H
   7 #include <intrin.h>
   8 #endif
   9 #ifdef HAVE_SSE_INTRINSICS
  10 #include <xmmintrin.h>
  11 #endif
  12
  13 #include "opthelpers.h"
  14
  15
  16 inline constexpr int64_t operator "" _i64(unsigned long long int n) noexcept { return static_cast<int64_t>(n); }
  17 inline constexpr uint64_t operator "" _u64(unsigned long long int n) noexcept { return static_cast<uint64_t>(n); }
  18
  19
  20 constexpr inline float minf(float a, float b) noexcept
  21 { return ((a > b) ? b : a); }
  22 constexpr inline float maxf(float a, float b) noexcept
  23 { return ((a > b) ? a : b); }
  24 constexpr inline float clampf(float val, float min, float max) noexcept
  25 { return minf(max, maxf(min, val)); }
  26
  27 constexpr inline double mind(double a, double b) noexcept
  28 { return ((a > b) ? b : a); }
  29 constexpr inline double maxd(double a, double b) noexcept
  30 { return ((a > b) ? a : b); }
  31 constexpr inline double clampd(double val, double min, double max) noexcept
  32 { return mind(max, maxd(min, val)); }
  33
  34 constexpr inline unsigned int minu(unsigned int a, unsigned int b) noexcept
  35 { return ((a > b) ? b : a); }
  36 constexpr inline unsigned int maxu(unsigned int a, unsigned int b) noexcept
  37 { return ((a > b) ? a : b); }
  38 constexpr inline unsigned int clampu(unsigned int val, unsigned int min, unsigned int max) noexcept
  39 { return minu(max, maxu(min, val)); }
  40
  41 constexpr inline int mini(int a, int b) noexcept
  42 { return ((a > b) ? b : a); }
  43 constexpr inline int maxi(int a, int b) noexcept
  44 { return ((a > b) ? a : b); }
  45 constexpr inline int clampi(int val, int min, int max) noexcept
  46 { return mini(max, maxi(min, val)); }
  47
  48 constexpr inline int64_t mini64(int64_t a, int64_t b) noexcept
  49 { return ((a > b) ? b : a); }
  50 constexpr inline int64_t maxi64(int64_t a, int64_t b) noexcept
  51 { return ((a > b) ? a : b); }
  52 constexpr inline int64_t clampi64(int64_t val, int64_t min, int64_t max) noexcept
  53 { return mini64(max, maxi64(min, val)); }
  54
  55 constexpr inline uint64_t minu64(uint64_t a, uint64_t b) noexcept
  56 { return ((a > b) ? b : a); }
  57 constexpr inline uint64_t maxu64(uint64_t a, uint64_t b) noexcept
  58 { return ((a > b) ? a : b); }
  59 constexpr inline uint64_t clampu64(uint64_t val, uint64_t min, uint64_t max) noexcept
  60 { return minu64(max, maxu64(min, val)); }
  61
  62 constexpr inline size_t minz(size_t a, size_t b) noexcept
  63 { return ((a > b) ? b : a); }
  64 constexpr inline size_t maxz(size_t a, size_t b) noexcept
  65 { return ((a > b) ? a : b); }
  66 constexpr inline size_t clampz(size_t val, size_t min, size_t max) noexcept
  67 { return minz(max, maxz(min, val)); }
  68
  69
  70 /** Find the next power-of-2 for non-power-of-2 numbers. */
  71 inline uint32_t NextPowerOf2(uint32_t value) noexcept
  72 {
  73     if(value > 0)
  74     {
  75         value--;
  76         value |= value>>1;
  77         value |= value>>2;
  78         value |= value>>4;
  79         value |= value>>8;
  80         value |= value>>16;
  81     }
  82     return value+1;
  83 }
  84
  85 /** Round up a value to the next multiple. */
  86 inline size_t RoundUp(size_t value, size_t r) noexcept
  87 {
  88     value += r-1;
  89     return value - (value%r);
  90 }
  91
  92
  93 /* Define CTZ macros (count trailing zeros), and POPCNT macros (population
  94  * count/count 1 bits), for 32- and 64-bit integers. The CTZ macros' results
  95  * are *UNDEFINED* if the value is 0.
  96  */
  97 #ifdef __GNUC__
  98
  99 namespace detail_ {
 100
 101 template<typename T>
 102 constexpr inline auto popcnt64(T val) = delete;
 103 template<>
 104 constexpr inline auto popcnt64(unsigned long long val) { return __builtin_popcountll(val); }
 105 template<>
 106 constexpr inline auto popcnt64(unsigned long val) { return __builtin_popcountl(val); }
 107
 108 template<typename T>
 109 constexpr inline auto ctz64(T val) = delete;
 110 template<>
 111 constexpr inline auto ctz64(unsigned long long val) { return __builtin_ctzll(val); }
 112 template<>
 113 constexpr inline auto ctz64(unsigned long val) { return __builtin_ctzl(val); }
 114
 115 } // namespace detail_
 116
 117 #define POPCNT32 __builtin_popcount
 118 #define CTZ32 __builtin_ctz
 119 #define POPCNT64 detail_::popcnt64<uint64_t>
 120 #define CTZ64 detail_::ctz64<uint64_t>
 121
 122 #else
 123
 124 /* There be black magics here. The popcnt method is derived from
 125  * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
 126  * while the ctz-utilizing-popcnt algorithm is shown here
 127  * http://www.hackersdelight.org/hdcodetxt/ntz.c.txt
 128  * as the ntz2 variant. These likely aren't the most efficient methods, but
 129  * they're good enough if the GCC built-ins aren't available.
 130  */
 131 inline int fallback_popcnt32(uint32_t v)
 132 {
 133     v = v - ((v >> 1) & 0x55555555u);
 134     v = (v & 0x33333333u) + ((v >> 2) & 0x33333333u);
 135     v = (v + (v >> 4)) & 0x0f0f0f0fu;
 136     return (int)((v * 0x01010101u) >> 24);
 137 }
 138 #define POPCNT32 fallback_popcnt32
 139 inline int fallback_popcnt64(uint64_t v)
 140 {
 141     v = v - ((v >> 1) & 0x5555555555555555_u64);
 142     v = (v & 0x3333333333333333_u64) + ((v >> 2) & 0x3333333333333333_u64);
 143     v = (v + (v >> 4)) & 0x0f0f0f0f0f0f0f0f_u64;
 144     return (int)((v * 0x0101010101010101_u64) >> 56);
 145 }
 146 #define POPCNT64 fallback_popcnt64
 147
 148 #if defined(_WIN64)
 149
 150 inline int msvc64_ctz32(uint32_t v)
 151 {
 152     unsigned long idx = 32;
 153     _BitScanForward(&idx, v);
 154     return (int)idx;
 155 }
 156 #define CTZ32 msvc64_ctz32
 157 inline int msvc64_ctz64(uint64_t v)
 158 {
 159     unsigned long idx = 64;
 160     _BitScanForward64(&idx, v);
 161     return (int)idx;
 162 }
 163 #define CTZ64 msvc64_ctz64
 164
 165 #elif defined(_WIN32)
 166
 167 inline int msvc_ctz32(uint32_t v)
 168 {
 169     unsigned long idx = 32;
 170     _BitScanForward(&idx, v);
 171     return (int)idx;
 172 }
 173 #define CTZ32 msvc_ctz32
 174 inline int msvc_ctz64(uint64_t v)
 175 {
 176     unsigned long idx = 64;
 177     if(!_BitScanForward(&idx, (uint32_t)(v&0xffffffff)))
 178     {
 179         if(_BitScanForward(&idx, (uint32_t)(v>>32)))
 180             idx += 32;
 181     }
 182     return (int)idx;
 183 }
 184 #define CTZ64 msvc_ctz64
 185
 186 #else
 187
 188 inline int fallback_ctz32(uint32_t value)
 189 { return POPCNT32(~value & (value - 1)); }
 190 #define CTZ32 fallback_ctz32
 191 inline int fallback_ctz64(uint64_t value)
 192 { return POPCNT64(~value & (value - 1)); }
 193 #define CTZ64 fallback_ctz64
 194
 195 #endif
 196 #endif
 197
 198
 199 /**
 200  * Fast float-to-int conversion. No particular rounding mode is assumed; the
 201  * IEEE-754 default is round-to-nearest with ties-to-even, though an app could
 202  * change it on its own threads. On some systems, a truncating conversion may
 203  * always be the fastest method.
 204  */
 205 inline int fastf2i(float f) noexcept
 206 {
 207 #if defined(HAVE_SSE_INTRINSICS)
 208     return _mm_cvt_ss2si(_mm_set_ss(f));
 209
 210 #elif defined(_MSC_VER) && defined(_M_IX86_FP)
 211
 212     int i;
 213     __asm fld f
 214     __asm fistp i
 215     return i;
 216
 217 #elif (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__))
 218
 219     int i;
 220 #ifdef __SSE_MATH__
 221     __asm__("cvtss2si %1, %0" : "=r"(i) : "x"(f));
 222 #else
 223     __asm__ __volatile__("fistpl %0" : "=m"(i) : "t"(f) : "st");
 224 #endif
 225     return i;
 226
 227 #else
 228
 229     return static_cast<int>(f);
 230 #endif
 231 }
 232 inline unsigned int fastf2u(float f) noexcept
 233 { return static_cast<unsigned int>(fastf2i(f)); }
 234
 235 /** Converts float-to-int using standard behavior (truncation). */
 236 inline int float2int(float f) noexcept
 237 {
 238 #if defined(HAVE_SSE_INTRINSICS)
 239     return _mm_cvtt_ss2si(_mm_set_ss(f));
 240
 241 #elif (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP == 0) \
 242     || ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
 243         &&  !defined(__SSE_MATH__))
 244     int sign, shift, mant;
 245     union {
 246         float f;
 247         int i;
 248     } conv;
 249
 250     conv.f = f;
 251     sign = (conv.i>>31) | 1;
 252     shift = ((conv.i>>23)&0xff) - (127+23);
 253
 254     /* Over/underflow */
 255     if UNLIKELY(shift >= 31 || shift < -23)
 256         return 0;
 257
 258     mant = (conv.i&0x7fffff) | 0x800000;
 259     if LIKELY(shift < 0)
 260         return (mant >> -shift) * sign;
 261     return (mant << shift) * sign;
 262
 263 #else
 264
 265     return static_cast<int>(f);
 266 #endif
 267 }
 268 inline unsigned int float2uint(float f) noexcept
 269 { return static_cast<unsigned int>(float2int(f)); }
 270
 271 /** Converts double-to-int using standard behavior (truncation). */
 272 inline int double2int(double d) noexcept
 273 {
 274 #if defined(HAVE_SSE_INTRINSICS)
 275     return _mm_cvttsd_si32(_mm_set_sd(d));
 276
 277 #elif (defined(_MSC_VER) && defined(_M_IX86_FP) && _M_IX86_FP < 2) \
 278     || ((defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
 279         &&  !defined(__SSE2_MATH__))
 280     int sign, shift;
 281     int64_t mant;
 282     union {
 283         double d;
 284         int64_t i64;
 285     } conv;
 286
 287     conv.d = d;
 288     sign = (conv.i64 >> 63) | 1;
 289     shift = ((conv.i64 >> 52) & 0x7ff) - (1023 + 52);
 290
 291     /* Over/underflow */
 292     if UNLIKELY(shift >= 63 || shift < -52)
 293         return 0;
 294
 295     mant = (conv.i64 & 0xfffffffffffff_i64) | 0x10000000000000_i64;
 296     if LIKELY(shift < 0)
 297         return (int)(mant >> -shift) * sign;
 298     return (int)(mant << shift) * sign;
 299
 300 #else
 301
 302     return static_cast<int>(d);
 303 #endif
 304 }
 305
 306 /**
 307  * Rounds a float to the nearest integral value, according to the current
 308  * rounding mode. This is essentially an inlined version of rintf, although
 309  * makes fewer promises (e.g. -0 or -0.25 rounded to 0 may result in +0).
 310  */
 311 inline float fast_roundf(float f) noexcept
 312 {
 313 #if (defined(__GNUC__) || defined(__clang__)) && (defined(__i386__) || defined(__x86_64__)) \
 314     && !defined(__SSE_MATH__)
 315
 316     float out;
 317     __asm__ __volatile__("frndint" : "=t"(out) : "0"(f));
 318     return out;
 319
 320 #else
 321
 322     /* Integral limit, where sub-integral precision is not available for
 323      * floats.
 324      */
 325     static const float ilim[2]{
 326          8388608.0f /*  0x1.0p+23 */,
 327         -8388608.0f /* -0x1.0p+23 */
 328     };
 329     unsigned int sign, expo;
 330     union {
 331         float f;
 332         unsigned int i;
 333     } conv;
 334
 335     conv.f = f;
 336     sign = (conv.i>>31)&0x01;
 337     expo = (conv.i>>23)&0xff;
 338
 339     if UNLIKELY(expo >= 150/*+23*/)
 340     {
 341         /* An exponent (base-2) of 23 or higher is incapable of sub-integral
 342          * precision, so it's already an integral value. We don't need to worry
 343          * about infinity or NaN here.
 344          */
 345         return f;
 346     }
 347     /* Adding the integral limit to the value (with a matching sign) forces a
 348      * result that has no sub-integral precision, and is consequently forced to
 349      * round to an integral value. Removing the integral limit then restores
 350      * the initial value rounded to the integral. The compiler should not
 351      * optimize this out because of non-associative rules on floating-point
 352      * math (as long as you don't use -fassociative-math,
 353      * -funsafe-math-optimizations, -ffast-math, or -Ofast, in which case this
 354      * may break).
 355      */
 356     f += ilim[sign];
 357     return f - ilim[sign];
 358 #endif
 359 }
 360
 361 #endif /* AL_NUMERIC_H */