src/math.c

   1 #include "a/math.h"
   2
   3 #if defined(__MINGW32__) && A_PREREQ_GNUC(3, 0)
   4 #pragma GCC diagnostic ignored "-Wfloat-conversion"
   5 #endif /* __MINGW32__ */
   6 #if A_PREREQ_GNUC(3, 0) || __has_warning("-Wfloat-equal")
   7 #pragma GCC diagnostic ignored "-Wfloat-equal"
   8 #endif /* -Wfloat-equal */
   9 #if A_PREREQ_MSVC(19, 30)
  10 #pragma warning(disable : 5250)
  11 #endif /* msvc 17.0+ */
  12
  13 static A_INLINE a_float polevl(a_float const *p, a_size n, a_float x)
  14 {
  15     a_float y = *p++;
  16     do {
  17         y = y * x + *p++;
  18     } while (--n);
  19     return y;
  20 }
  21
  22 a_f32 a_f32_rsqrt(a_f32 x)
  23 {
  24 #if 1
  25     return 1 / a_f32_sqrt(x);
  26 #else
  27     union
  28     {
  29         a_f32 x;
  30         a_u32 u;
  31     } u;
  32     u.x = x;
  33     if (a_likely(x > 0))
  34     {
  35         a_f32 xh = A_F32_C(0.5) * x;
  36         u.u = A_U32_C(0x5F375A86) - (u.u >> 1);
  37         u.x *= A_F32_C(1.5) - u.x * u.x * xh;
  38         u.x *= A_F32_C(1.5) - u.x * u.x * xh;
  39     }
  40     else if (x < 0)
  41     {
  42         u.u = A_F32_NNAN;
  43     }
  44     else
  45     {
  46         u.u |= A_F32_PINF;
  47     }
  48     return u.x;
  49 #endif
  50 }
  51
  52 a_f64 a_f64_rsqrt(a_f64 x)
  53 {
  54 #if 1
  55     return 1 / a_f64_sqrt(x);
  56 #else
  57     union
  58     {
  59         a_f64 x;
  60         a_u64 u;
  61     } u;
  62     u.x = x;
  63     if (a_likely(x > 0))
  64     {
  65         a_f64 xh = A_F64_C(0.5) * x;
  66         u.u = A_U64_C(0x5FE6EC85E7DE30DA) - (u.u >> 1);
  67         u.x *= A_F64_C(1.5) - u.x * u.x * xh;
  68         u.x *= A_F64_C(1.5) - u.x * u.x * xh;
  69     }
  70     else if (x < 0)
  71     {
  72         u.u = A_F64_NNAN;
  73     }
  74     else
  75     {
  76         u.u |= A_F64_PINF;
  77     }
  78     return u.x;
  79 #endif
  80 }
  81
  82 #if (__has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)) && (ULONG_MAX == A_U32_MAX)
  83 #define A_U32_BSR(x) (31 - __builtin_clzl(x))
  84 #elif __has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)
  85 #define A_U32_BSR(x) (31 - __builtin_clz(x))
  86 #elif defined(_MSC_VER)
  87 #pragma intrinsic(_BitScanReverse)
  88 #define A_U32_BSR(x) BitScanReverse(x)
  89 static A_INLINE int A_U32_BSR(a_u32 x)
  90 {
  91     unsigned long i = 0;
  92     _BitScanReverse(&i, x);
  93     return (int)i;
  94 }
  95 #endif /* A_U32_BSR */
  96
  97 a_u16 a_u32_sqrt(a_u32 x)
  98 {
  99 #if defined(A_U32_BSR) /* Newton's method */
 100     a_u32 x0, x1 = 1;
 101     if (x <= 1) { return (a_u16)x; }
 102     x1 <<= (A_U32_BSR(x) + 1) >> 1;
 103     do {
 104         x0 = x1;
 105         x1 = (x0 + x / x0) >> 1;
 106     } while (x0 > x1);
 107     return (a_u16)x0;
 108 #else /* Digit-by-digit */
 109     a_u32 a, b = 1, y = 0;
 110     for (b <<= sizeof(b) * 8 - 2; b > x; b >>= 2) {}
 111     for (; b; b >>= 2)
 112     {
 113         a = y + b;
 114         y >>= 1;
 115         if (x >= a)
 116         {
 117             x -= a;
 118             y |= b;
 119         }
 120     }
 121     return (a_u16)y;
 122 #endif
 123 }
 124
 125 #if (__has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)) && (ULONG_MAX == A_U32_MAX)
 126 #define A_U64_BSR(x) (63 - __builtin_clzll(x))
 127 #elif __has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)
 128 #define A_U64_BSR(x) (63 - __builtin_clzl(x))
 129 #elif defined(_MSC_VER) && defined(_WIN64)
 130 #pragma intrinsic(_BitScanReverse64)
 131 #define A_U64_BSR(x) BitScanReverse64(x)
 132 static A_INLINE int A_U64_BSR(a_u64 x)
 133 {
 134     unsigned long i = 0;
 135     _BitScanReverse64(&i, x);
 136     return (int)i;
 137 }
 138 #elif defined(_MSC_VER)
 139 #define A_U64_BSR(x) BitScanReverse64(x)
 140 static A_INLINE int A_U64_BSR(a_u64 x)
 141 {
 142     unsigned long i = 0, hi;
 143     if (_BitScanReverse(&hi, x >> 32))
 144     {
 145         return (int)hi + 32;
 146     }
 147     _BitScanReverse(&i, (a_u32)x);
 148     return (int)i;
 149 }
 150 #endif /* A_U64_BSR */
 151
 152 a_u32 a_u64_sqrt(a_u64 x)
 153 {
 154 #if defined(A_U64_BSR) /* Newton's method */
 155     a_u64 x0, x1 = 1;
 156     if (x <= 1) { return (a_u32)x; }
 157     x1 <<= (A_U64_BSR(x) + 1) >> 1;
 158     do {
 159         x0 = x1;
 160         x1 = (x0 + x / x0) >> 1;
 161     } while (x0 > x1);
 162     return (a_u32)x0;
 163 #else /* Digit-by-digit */
 164     a_u64 a, b = 1, y = 0;
 165     for (b <<= sizeof(b) * 8 - 2; b > x; b >>= 2) {}
 166     for (; b; b >>= 2)
 167     {
 168         a = y + b;
 169         y >>= 1;
 170         if (x >= a)
 171         {
 172             x -= a;
 173             y |= b;
 174         }
 175     }
 176     return (a_u32)y;
 177 #endif
 178 }
 179
 180 #undef a_float_log1p
 181 a_float a_float_log1p(a_float x)
 182 {
 183     a_float volatile a = x + 1;
 184     a_float y = a_float_log(a);
 185     if (x < A_FLOAT_EPSILON && a > 0)
 186     {
 187         a_float volatile b = a - 1;
 188         y -= (b - x) / a;
 189     }
 190     return y;
 191 }
 192
 193 #undef a_float_expm1
 194 a_float a_float_expm1(a_float x)
 195 {
 196     static a_float const P[] = {
 197         A_FLOAT_C(1.2617719307481059087798E-4),
 198         A_FLOAT_C(3.0299440770744196129956E-2),
 199         A_FLOAT_C(9.9999999999999999991025E-1),
 200     };
 201     static a_float const Q[] = {
 202         A_FLOAT_C(3.0019850513866445504159E-6),
 203         A_FLOAT_C(2.5244834034968410419224E-3),
 204         A_FLOAT_C(2.2726554820815502876593E-1),
 205         A_FLOAT_C(2.0000000000000000000897E-0),
 206     };
 207     a_float xx, y;
 208     if (!isfinite(x))
 209     {
 210         if (isnan(x) || x > 0) { return x; }
 211         else { return -1; }
 212     }
 213     if (x < -A_FLOAT_C(0.5) || x > A_FLOAT_C(0.5))
 214     {
 215         return a_float_exp(x) - 1;
 216     }
 217     xx = x * x;
 218     y = polevl(P, A_LEN(P) - 1, xx) * x;
 219     y /= polevl(Q, A_LEN(Q) - 1, xx) - y;
 220     return y + y;
 221 }
 222
 223 #undef a_float_atan2
 224 a_float a_float_atan2(a_float x, a_float y)
 225 {
 226     if (x > 0) { return a_float_atan(y / x); }
 227     if (x < 0)
 228     {
 229         if (y >= 0) { return a_float_atan(y / x) + A_FLOAT_PI; }
 230         return a_float_atan(y / x) - A_FLOAT_PI;
 231     }
 232     if (y > 0) { return +A_FLOAT_PI; }
 233     if (y < 0) { return -A_FLOAT_PI; }
 234     return 0;
 235 }
 236
 237 #undef a_float_hypot
 238 a_float a_float_hypot(a_float x, a_float y)
 239 {
 240     a_float const a = a_float_abs(x);
 241     a_float const b = a_float_abs(y);
 242     if (isinf(x) || isinf(y))
 243     {
 244         return A_FLOAT_INF;
 245     }
 246     if (a <= b)
 247     {
 248         x = a;
 249         y = b;
 250     }
 251     else
 252     {
 253         x = b;
 254         y = a;
 255     }
 256     x /= y;
 257     return a_float_sqrt(x * x + 1) * y;
 258 }
 259
 260 a_float a_float_hypot3(a_float x, a_float y, a_float z)
 261 {
 262     a_float const a = a_float_abs(x);
 263     a_float const b = a_float_abs(y);
 264     a_float const c = a_float_abs(z);
 265     a_float w = a;
 266     if (isinf(x) || isinf(y) || isinf(z))
 267     {
 268         return A_FLOAT_INF;
 269     }
 270     if (w < b) { w = b; }
 271     if (w < c) { w = c; }
 272     if (w == 0) { return 0; }
 273     x /= w;
 274     y /= w;
 275     z /= w;
 276     return a_float_sqrt(x * x + y * y + z * z) * w;
 277 }