src/math.c

   1 #include "a/math.h"
   2
   3 #if defined(__MINGW32__) && A_PREREQ_GNUC(3, 0)
   4 #pragma GCC diagnostic ignored "-Wfloat-conversion"
   5 #endif /* __MINGW32__ */
   6 #if A_PREREQ_GNUC(3, 0) || __has_warning("-Wfloat-equal")
   7 #pragma GCC diagnostic ignored "-Wfloat-equal"
   8 #endif /* -Wfloat-equal */
   9
  10 static A_INLINE a_float polevl(a_float const *p, a_size n, a_float x)
  11 {
  12     a_float y = *p++;
  13     do {
  14         y = y * x + *p++;
  15     } while (--n);
  16     return y;
  17 }
  18
  19 a_f32 a_f32_rsqrt(a_f32 x)
  20 {
  21 #if 1
  22     return 1 / a_f32_sqrt(x);
  23 #else
  24     union
  25     {
  26         a_f32 x;
  27         a_u32 u;
  28     } u;
  29     u.x = x;
  30     if (a_likely(x > 0))
  31     {
  32         a_f32 xh = A_F32_C(0.5) * x;
  33         u.u = A_U32_C(0x5F375A86) - (u.u >> 1);
  34         u.x *= A_F32_C(1.5) - u.x * u.x * xh;
  35         u.x *= A_F32_C(1.5) - u.x * u.x * xh;
  36     }
  37     else if (x < 0)
  38     {
  39         u.u = A_F32_NNAN;
  40     }
  41     else
  42     {
  43         u.u |= A_F32_PINF;
  44     }
  45     return u.x;
  46 #endif
  47 }
  48
  49 a_f64 a_f64_rsqrt(a_f64 x)
  50 {
  51 #if 1
  52     return 1 / a_f64_sqrt(x);
  53 #else
  54     union
  55     {
  56         a_f64 x;
  57         a_u64 u;
  58     } u;
  59     u.x = x;
  60     if (a_likely(x > 0))
  61     {
  62         a_f64 xh = A_F64_C(0.5) * x;
  63         u.u = A_U64_C(0x5FE6EC85E7DE30DA) - (u.u >> 1);
  64         u.x *= A_F64_C(1.5) - u.x * u.x * xh;
  65         u.x *= A_F64_C(1.5) - u.x * u.x * xh;
  66     }
  67     else if (x < 0)
  68     {
  69         u.u = A_F64_NNAN;
  70     }
  71     else
  72     {
  73         u.u |= A_F64_PINF;
  74     }
  75     return u.x;
  76 #endif
  77 }
  78
  79 #if (__has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)) && (ULONG_MAX == A_U32_MAX)
  80 #define A_U32_BSR(x) (31 - __builtin_clzl(x))
  81 #elif __has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)
  82 #define A_U32_BSR(x) (31 - __builtin_clz(x))
  83 #elif defined(_MSC_VER)
  84 #pragma intrinsic(_BitScanReverse)
  85 #define A_U32_BSR(x) BitScanReverse(x)
  86 static A_INLINE int A_U32_BSR(a_u32 x)
  87 {
  88     unsigned long i = 0;
  89     _BitScanReverse(&i, x);
  90     return (int)i;
  91 }
  92 #endif /* A_U32_BSR */
  93
  94 a_u16 a_u32_sqrt(a_u32 x)
  95 {
  96 #if defined(A_U32_BSR) /* Newton's method */
  97     a_u32 x0, x1 = 1;
  98     if (x <= 1) { return (a_u16)x; }
  99     x1 <<= (A_U32_BSR(x) + 1) >> 1;
 100     do {
 101         x0 = x1;
 102         x1 = (x0 + x / x0) >> 1;
 103     } while (x0 > x1);
 104     return (a_u16)x0;
 105 #else /* Digit-by-digit */
 106     a_u32 a, b = 1, y = 0;
 107     for (b <<= sizeof(b) * 8 - 2; b > x; b >>= 2) {}
 108     for (; b; b >>= 2)
 109     {
 110         a = y + b;
 111         y >>= 1;
 112         if (x >= a)
 113         {
 114             x -= a;
 115             y |= b;
 116         }
 117     }
 118     return (a_u16)y;
 119 #endif
 120 }
 121
 122 #if (__has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)) && (ULONG_MAX == A_U32_MAX)
 123 #define A_U64_BSR(x) (63 - __builtin_clzll(x))
 124 #elif __has_builtin(__builtin_clz) || A_PREREQ_GNUC(3, 4)
 125 #define A_U64_BSR(x) (63 - __builtin_clzl(x))
 126 #elif defined(_MSC_VER) && defined(_WIN64)
 127 #pragma intrinsic(_BitScanReverse64)
 128 #define A_U64_BSR(x) BitScanReverse64(x)
 129 static A_INLINE int A_U64_BSR(a_u64 x)
 130 {
 131     unsigned long i = 0;
 132     _BitScanReverse64(&i, x);
 133     return (int)i;
 134 }
 135 #elif defined(_MSC_VER)
 136 #define A_U64_BSR(x) BitScanReverse64(x)
 137 static A_INLINE int A_U64_BSR(a_u64 x)
 138 {
 139     unsigned long i = 0, hi = 0;
 140     _BitScanReverse(&i, (a_u32)x);
 141     if (_BitScanReverse(&hi, x >> 32))
 142     {
 143         i += hi + 1;
 144     }
 145     return (int)i;
 146 }
 147 #endif /* A_U64_BSR */
 148
 149 a_u32 a_u64_sqrt(a_u64 x)
 150 {
 151 #if defined(A_U64_BSR) /* Newton's method */
 152     a_u64 x0, x1 = 1;
 153     if (x <= 1) { return (a_u32)x; }
 154     x1 <<= (A_U64_BSR(x) + 1) >> 1;
 155     do {
 156         x0 = x1;
 157         x1 = (x0 + x / x0) >> 1;
 158     } while (x0 > x1);
 159     return (a_u32)x0;
 160 #else /* Digit-by-digit */
 161     a_u64 a, b = 1, y = 0;
 162     for (b <<= sizeof(b) * 8 - 2; b > x; b >>= 2) {}
 163     for (; b; b >>= 2)
 164     {
 165         a = y + b;
 166         y >>= 1;
 167         if (x >= a)
 168         {
 169             x -= a;
 170             y |= b;
 171         }
 172     }
 173     return (a_u32)y;
 174 #endif
 175 }
 176
 177 #undef a_float_log1p
 178 a_float a_float_log1p(a_float x)
 179 {
 180     a_float volatile a = x + 1;
 181     a_float y = a_float_log(a);
 182     if (x < A_FLOAT_EPSILON && a > 0)
 183     {
 184         a_float volatile b = a - 1;
 185         y -= (b - x) / a;
 186     }
 187     return y;
 188 }
 189
 190 #undef a_float_expm1
 191 a_float a_float_expm1(a_float x)
 192 {
 193     static a_float const P[] = {
 194         A_FLOAT_C(1.2617719307481059087798E-4),
 195         A_FLOAT_C(3.0299440770744196129956E-2),
 196         A_FLOAT_C(9.9999999999999999991025E-1),
 197     };
 198     static a_float const Q[] = {
 199         A_FLOAT_C(3.0019850513866445504159E-6),
 200         A_FLOAT_C(2.5244834034968410419224E-3),
 201         A_FLOAT_C(2.2726554820815502876593E-1),
 202         A_FLOAT_C(2.0000000000000000000897E-0),
 203     };
 204     a_float xx, y;
 205     if (!isfinite(x))
 206     {
 207         if (isnan(x) || x > 0) { return x; }
 208         else { return -1; }
 209     }
 210     if (x < -A_FLOAT_C(0.5) || x > A_FLOAT_C(0.5))
 211     {
 212         return a_float_exp(x) - 1;
 213     }
 214     xx = x * x;
 215     y = polevl(P, A_LEN(P) - 1, xx) * x;
 216     y /= polevl(Q, A_LEN(Q) - 1, xx) - y;
 217     return y + y;
 218 }
 219
 220 #undef a_float_atan2
 221 a_float a_float_atan2(a_float x, a_float y)
 222 {
 223     if (x > 0) { return a_float_atan(y / x); }
 224     if (x < 0)
 225     {
 226         if (y >= 0) { return a_float_atan(y / x) + A_FLOAT_PI; }
 227         return a_float_atan(y / x) - A_FLOAT_PI;
 228     }
 229     if (y > 0) { return +A_FLOAT_PI; }
 230     if (y < 0) { return -A_FLOAT_PI; }
 231     return 0;
 232 }
 233
 234 #undef a_float_hypot
 235 a_float a_float_hypot(a_float x, a_float y)
 236 {
 237     a_float const a = a_float_abs(x);
 238     a_float const b = a_float_abs(y);
 239     if (isinf(x) || isinf(y))
 240     {
 241         return A_FLOAT_INF;
 242     }
 243     if (a <= b)
 244     {
 245         x = a;
 246         y = b;
 247     }
 248     else
 249     {
 250         x = b;
 251         y = a;
 252     }
 253     x /= y;
 254     return a_float_sqrt(x * x + 1) * y;
 255 }
 256
 257 a_float a_float_hypot3(a_float x, a_float y, a_float z)
 258 {
 259     a_float const a = a_float_abs(x);
 260     a_float const b = a_float_abs(y);
 261     a_float const c = a_float_abs(z);
 262     a_float w = a;
 263     if (isinf(x) || isinf(y) || isinf(z))
 264     {
 265         return A_FLOAT_INF;
 266     }
 267     if (w < b) { w = b; }
 268     if (w < c) { w = c; }
 269     if (w == 0) { return 0; }
 270     x /= w;
 271     y /= w;
 272     z /= w;
 273     return a_float_sqrt(x * x + y * y + z * z) * w;
 274 }