libc/src/math/generic/explogxf.h

   1 //===-- Single-precision general exp/log functions ------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
  10 #define LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H
  11
  12 #include "common_constants.h"
  13 #include "math_utils.h"
  14 #include "src/__support/CPP/bit.h"
  15 #include "src/__support/CPP/optional.h"
  16 #include "src/__support/FPUtil/FEnvImpl.h"
  17 #include "src/__support/FPUtil/FPBits.h"
  18 #include "src/__support/FPUtil/PolyEval.h"
  19 #include "src/__support/FPUtil/nearest_integer.h"
  20 #include "src/__support/common.h"
  21 #include "src/__support/macros/properties/cpu_features.h"
  22
  23 #include <errno.h>
  24
  25 namespace LIBC_NAMESPACE {
  26
  27 struct ExpBase {
  28   // Base = e
  29   static constexpr int MID_BITS = 5;
  30   static constexpr int MID_MASK = (1 << MID_BITS) - 1;
  31   // log2(e) * 2^5
  32   static constexpr double LOG2_B = 0x1.71547652b82fep+0 * (1 << MID_BITS);
  33   // High and low parts of -log(2) * 2^(-5)
  34   static constexpr double M_LOGB_2_HI = -0x1.62e42fefa0000p-1 / (1 << MID_BITS);
  35   static constexpr double M_LOGB_2_LO =
  36       -0x1.cf79abc9e3b3ap-40 / (1 << MID_BITS);
  37   // Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
  38   // with:
  39   // > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
  40   static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
  41       0x3ff0000000000000, 0x3ff059b0d3158574, 0x3ff0b5586cf9890f,
  42       0x3ff11301d0125b51, 0x3ff172b83c7d517b, 0x3ff1d4873168b9aa,
  43       0x3ff2387a6e756238, 0x3ff29e9df51fdee1, 0x3ff306fe0a31b715,
  44       0x3ff371a7373aa9cb, 0x3ff3dea64c123422, 0x3ff44e086061892d,
  45       0x3ff4bfdad5362a27, 0x3ff5342b569d4f82, 0x3ff5ab07dd485429,
  46       0x3ff6247eb03a5585, 0x3ff6a09e667f3bcd, 0x3ff71f75e8ec5f74,
  47       0x3ff7a11473eb0187, 0x3ff82589994cce13, 0x3ff8ace5422aa0db,
  48       0x3ff93737b0cdc5e5, 0x3ff9c49182a3f090, 0x3ffa5503b23e255d,
  49       0x3ffae89f995ad3ad, 0x3ffb7f76f2fb5e47, 0x3ffc199bdd85529c,
  50       0x3ffcb720dcef9069, 0x3ffd5818dcfba487, 0x3ffdfc97337b9b5f,
  51       0x3ffea4afa2a490da, 0x3fff50765b6e4540,
  52   };
  53
  54   // Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
  55   // > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
  56   // Then:
  57   //   e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[3] * dx^5.
  58   static constexpr double COEFFS[4] = {
  59       0x1.ffffffffe5bc8p-2, 0x1.555555555cd67p-3, 0x1.5555c2a9b48b4p-5,
  60       0x1.11112a0e34bdbp-7};
  61
  62   LIBC_INLINE static double powb_lo(double dx) {
  63     using fputil::multiply_add;
  64     double dx2 = dx * dx;
  65     double c0 = 1.0 + dx;
  66     // c1 = COEFFS[0] + COEFFS[1] * dx
  67     double c1 = multiply_add(dx, ExpBase::COEFFS[1], ExpBase::COEFFS[0]);
  68     // c2 = COEFFS[2] + COEFFS[3] * dx
  69     double c2 = multiply_add(dx, ExpBase::COEFFS[3], ExpBase::COEFFS[2]);
  70     // r = c4 + c5 * dx^4
  71     //   = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
  72     return fputil::polyeval(dx2, c0, c1, c2);
  73   }
  74 };
  75
  76 struct Exp10Base : public ExpBase {
  77   // log2(10) * 2^5
  78   static constexpr double LOG2_B = 0x1.a934f0979a371p1 * (1 << MID_BITS);
  79   // High and low parts of -log10(2) * 2^(-5).
  80   // Notice that since |x * log2(10)| < 150:
  81   //   |k| = |round(x * log2(10) * 2^5)| < 2^8 * 2^5 = 2^13
  82   // So when the FMA instructions are not available, in order for the product
  83   //   k * M_LOGB_2_HI
  84   // to be exact, we only store the high part of log10(2) up to 38 bits
  85   // (= 53 - 15) of precision.
  86   // It is generated by Sollya with:
  87   // > round(log10(2), 44, RN);
  88   static constexpr double M_LOGB_2_HI = -0x1.34413509f8p-2 / (1 << MID_BITS);
  89   // > round(log10(2) - 0x1.34413509f8p-2, D, RN);
  90   static constexpr double M_LOGB_2_LO = 0x1.80433b83b532ap-44 / (1 << MID_BITS);
  91
  92   // Approximating 10^dx with degree-5 minimax polynomial generated by Sollya:
  93   // > Q = fpminimax((10^x - 1)/x, 4, [|D...|], [-log10(2)/2^6, log10(2)/2^6]);
  94   // Then:
  95   //   10^dx ~ P(dx) = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
  96   static constexpr double COEFFS[5] = {0x1.26bb1bbb55515p1, 0x1.53524c73bd3eap1,
  97                                        0x1.0470591dff149p1, 0x1.2bd7c0a9fbc4dp0,
  98                                        0x1.1429e74a98f43p-1};
  99
 100   static double powb_lo(double dx) {
 101     using fputil::multiply_add;
 102     double dx2 = dx * dx;
 103     // c0 = 1 + COEFFS[0] * dx
 104     double c0 = multiply_add(dx, Exp10Base::COEFFS[0], 1.0);
 105     // c1 = COEFFS[1] + COEFFS[2] * dx
 106     double c1 = multiply_add(dx, Exp10Base::COEFFS[2], Exp10Base::COEFFS[1]);
 107     // c2 = COEFFS[3] + COEFFS[4] * dx
 108     double c2 = multiply_add(dx, Exp10Base::COEFFS[4], Exp10Base::COEFFS[3]);
 109     // r = c0 + dx^2 * (c1 + c2 * dx^2)
 110     //   = c0 + c1 * dx^2 + c2 * dx^4
 111     //   = 1 + COEFFS[0] * dx + ... + COEFFS[4] * dx^5.
 112     return fputil::polyeval(dx2, c0, c1, c2);
 113   }
 114 };
 115
 116 constexpr int LOG_P1_BITS = 6;
 117 constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;
 118
 119 // N[Table[Log[2, 1 + x], {x, 0/64, 63/64, 1/64}], 40]
 120 extern const double LOG_P1_LOG2[LOG_P1_SIZE];
 121
 122 // N[Table[1/(1 + x), {x, 0/64, 63/64, 1/64}], 40]
 123 extern const double LOG_P1_1_OVER[LOG_P1_SIZE];
 124
 125 // Taylor series expansion for Log[2, 1 + x] splitted to EVEN AND ODD numbers
 126 // K_LOG2_ODD starts from x^3
 127 extern const double K_LOG2_ODD[4];
 128 extern const double K_LOG2_EVEN[4];
 129
 130 // Output of range reduction for exp_b: (2^(mid + hi), lo)
 131 // where:
 132 //   b^x = 2^(mid + hi) * b^lo
 133 struct exp_b_reduc_t {
 134   double mh; // 2^(mid + hi)
 135   double lo;
 136 };
 137
 138 // The function correctly calculates b^x value with at least float precision
 139 // in a limited range.
 140 // Range reduction:
 141 //   b^x = 2^(hi + mid) * b^lo
 142 // where:
 143 //   x = (hi + mid) * log_b(2) + lo
 144 //   hi is an integer,
 145 //   0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
 146 //   -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
 147 // Base class needs to provide the following constants:
 148 //   - MID_BITS    : number of bits after decimal points used for mid
 149 //   - MID_MASK    : 2^MID_BITS - 1, mask to extract mid bits
 150 //   - LOG2_B      : log2(b) * 2^MID_BITS for scaling
 151 //   - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
 152 //   - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
 153 //   - EXP_2_MID   : look up table for bit fields of 2^mid
 154 // Return:
 155 //   { 2^(hi + mid), lo }
 156 template <class Base> LIBC_INLINE exp_b_reduc_t exp_b_range_reduc(float x) {
 157   double xd = static_cast<double>(x);
 158   // kd = round((hi + mid) * log2(b) * 2^MID_BITS)
 159   double kd = fputil::nearest_integer(Base::LOG2_B * xd);
 160   // k = round((hi + mid) * log2(b) * 2^MID_BITS)
 161   int k = static_cast<int>(kd);
 162   // hi = floor(kd * 2^(-MID_BITS))
 163   // exp_hi = shift hi to the exponent field of double precision.
 164   int64_t exp_hi = static_cast<int64_t>((k >> Base::MID_BITS))
 165                    << fputil::FloatProperties<double>::MANTISSA_WIDTH;
 166   // mh = 2^hi * 2^mid
 167   // mh_bits = bit field of mh
 168   int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
 169   double mh = fputil::FPBits<double>(uint64_t(mh_bits)).get_val();
 170   // dx = lo = x - (hi + mid) * log(2)
 171   double dx = fputil::multiply_add(
 172       kd, Base::M_LOGB_2_LO, fputil::multiply_add(kd, Base::M_LOGB_2_HI, xd));
 173   return {mh, dx};
 174 }
 175
 176 // The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
 177 // and exp(-x) simultaneously.
 178 // To compute e^x, we perform the following range
 179 // reduction: find hi, mid, lo such that:
 180 //   x = (hi + mid) * log(2) + lo, in which
 181 //     hi is an integer,
 182 //     0 <= mid * 2^5 < 32 is an integer
 183 //     -2^(-6) <= lo * log2(e) <= 2^-6.
 184 // In particular,
 185 //   hi + mid = round(x * log2(e) * 2^5) * 2^(-5).
 186 // Then,
 187 //   e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
 188 // 2^mid is stored in the lookup table of 32 elements.
 189 // e^lo is computed using a degree-5 minimax polynomial
 190 // generated by Sollya:
 191 //   e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
 192 //        = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
 193 //        = P_even + lo * P_odd
 194 // We perform 2^hi * 2^mid by simply add hi to the exponent field
 195 // of 2^mid.
 196 // To compute e^(-x), notice that:
 197 //   e^(-x) = 2^(-(hi + mid)) * e^(-lo)
 198 //          ~ 2^(-(hi + mid)) * P(-lo)
 199 //          = 2^(-(hi + mid)) * (P_even - lo * P_odd)
 200 // So:
 201 //   sinh(x) = (e^x - e^(-x)) / 2
 202 //           ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) -
 203 //                    2^(-(hi + mid)) * (P_even - lo * P_odd))
 204 //           = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) +
 205 //                    lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid))))
 206 // And similarly:
 207 //   cosh(x) = (e^x + e^(-x)) / 2
 208 //           ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) +
 209 //                    lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid))))
 210 // The main point of these formulas is that the expensive part of calculating
 211 // the polynomials approximating lower parts of e^(x) and e^(-x) are shared
 212 // and only done once.
 213 template <bool is_sinh> LIBC_INLINE double exp_pm_eval(float x) {
 214   double xd = static_cast<double>(x);
 215
 216   // kd = round(x * log2(e) * 2^5)
 217   // k_p = round(x * log2(e) * 2^5)
 218   // k_m = round(-x * log2(e) * 2^5)
 219   double kd;
 220   int k_p, k_m;
 221
 222 #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
 223   kd = fputil::nearest_integer(ExpBase::LOG2_B * xd);
 224   k_p = static_cast<int>(kd);
 225   k_m = -k_p;
 226 #else
 227   constexpr double HALF_WAY[2] = {0.5, -0.5};
 228
 229   k_p = static_cast<int>(
 230       fputil::multiply_add(xd, ExpBase::LOG2_B, HALF_WAY[x < 0.0f]));
 231   k_m = -k_p;
 232   kd = static_cast<double>(k_p);
 233 #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
 234
 235   // hi = floor(kf * 2^(-5))
 236   // exp_hi = shift hi to the exponent field of double precision.
 237   int64_t exp_hi_p = static_cast<int64_t>((k_p >> ExpBase::MID_BITS))
 238                      << fputil::FloatProperties<double>::MANTISSA_WIDTH;
 239   int64_t exp_hi_m = static_cast<int64_t>((k_m >> ExpBase::MID_BITS))
 240                      << fputil::FloatProperties<double>::MANTISSA_WIDTH;
 241   // mh_p = 2^(hi + mid)
 242   // mh_m = 2^(-(hi + mid))
 243   // mh_bits_* = bit field of mh_*
 244   int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
 245   int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
 246   double mh_p = fputil::FPBits<double>(uint64_t(mh_bits_p)).get_val();
 247   double mh_m = fputil::FPBits<double>(uint64_t(mh_bits_m)).get_val();
 248   // mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
 249   double mh_sum = mh_p + mh_m;
 250   // mh_diff = 2^(hi + mid) - 2^(-(hi + mid))
 251   double mh_diff = mh_p - mh_m;
 252
 253   // dx = lo = x - (hi + mid) * log(2)
 254   double dx =
 255       fputil::multiply_add(kd, ExpBase::M_LOGB_2_LO,
 256                            fputil::multiply_add(kd, ExpBase::M_LOGB_2_HI, xd));
 257   double dx2 = dx * dx;
 258
 259   // c0 = 1 + COEFFS[0] * lo^2
 260   // P_even = (1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4) / 2
 261   double p_even = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[0] * 0.5,
 262                                    ExpBase::COEFFS[2] * 0.5);
 263   // P_odd = (1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4) / 2
 264   double p_odd = fputil::polyeval(dx2, 0.5, ExpBase::COEFFS[1] * 0.5,
 265                                   ExpBase::COEFFS[3] * 0.5);
 266
 267   double r;
 268   if constexpr (is_sinh)
 269     r = fputil::multiply_add(dx * mh_sum, p_odd, p_even * mh_diff);
 270   else
 271     r = fputil::multiply_add(dx * mh_diff, p_odd, p_even * mh_sum);
 272   return r;
 273 }
 274
 275 // x should be positive, normal finite value
 276 LIBC_INLINE static double log2_eval(double x) {
 277   using FPB = fputil::FPBits<double>;
 278   FPB bs(x);
 279
 280   double result = 0;
 281   result += bs.get_exponent();
 282
 283   int p1 =
 284       (bs.get_mantissa() >> (FPB::FloatProp::MANTISSA_WIDTH - LOG_P1_BITS)) &
 285       (LOG_P1_SIZE - 1);
 286
 287   bs.bits &= FPB::FloatProp::MANTISSA_MASK >> LOG_P1_BITS;
 288   bs.set_unbiased_exponent(FPB::FloatProp::EXPONENT_BIAS);
 289   double dx = (bs.get_val() - 1.0) * LOG_P1_1_OVER[p1];
 290
 291   // Taylor series for log(2,1+x)
 292   double c1 = fputil::multiply_add(dx, K_LOG2_ODD[0], K_LOG2_EVEN[0]);
 293   double c2 = fputil::multiply_add(dx, K_LOG2_ODD[1], K_LOG2_EVEN[1]);
 294   double c3 = fputil::multiply_add(dx, K_LOG2_ODD[2], K_LOG2_EVEN[2]);
 295   double c4 = fputil::multiply_add(dx, K_LOG2_ODD[3], K_LOG2_EVEN[3]);
 296
 297   // c0 = dx * (1.0 / ln(2)) + LOG_P1_LOG2[p1]
 298   double c0 = fputil::multiply_add(dx, 0x1.71547652b82fep+0, LOG_P1_LOG2[p1]);
 299   result += LIBC_NAMESPACE::fputil::polyeval(dx * dx, c0, c1, c2, c3, c4);
 300   return result;
 301 }
 302
 303 // x should be positive, normal finite value
 304 LIBC_INLINE static double log_eval(double x) {
 305   // For x = 2^ex * (1 + mx)
 306   //   log(x) = ex * log(2) + log(1 + mx)
 307   using FPB = fputil::FPBits<double>;
 308   FPB bs(x);
 309
 310   double ex = static_cast<double>(bs.get_exponent());
 311
 312   // p1 is the leading 7 bits of mx, i.e.
 313   // p1 * 2^(-7) <= m_x < (p1 + 1) * 2^(-7).
 314   int p1 = (bs.get_mantissa() >> (FPB::FloatProp::MANTISSA_WIDTH - 7));
 315
 316   // Set bs to (1 + (mx - p1*2^(-7))
 317   bs.bits &= FPB::FloatProp::MANTISSA_MASK >> 7;
 318   bs.set_unbiased_exponent(FPB::FloatProp::EXPONENT_BIAS);
 319   // dx = (mx - p1*2^(-7)) / (1 + p1*2^(-7)).
 320   double dx = (bs.get_val() - 1.0) * ONE_OVER_F[p1];
 321
 322   // Minimax polynomial of log(1 + dx) generated by Sollya with:
 323   // > P = fpminimax(log(1 + x)/x, 6, [|D...|], [0, 2^-7]);
 324   const double COEFFS[6] = {-0x1.ffffffffffffcp-2, 0x1.5555555552ddep-2,
 325                             -0x1.ffffffefe562dp-3, 0x1.9999817d3a50fp-3,
 326                             -0x1.554317b3f67a5p-3, 0x1.1dc5c45e09c18p-3};
 327   double dx2 = dx * dx;
 328   double c1 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
 329   double c2 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
 330   double c3 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
 331
 332   double p = fputil::polyeval(dx2, dx, c1, c2, c3);
 333   double result =
 334       fputil::multiply_add(ex, /*log(2)*/ 0x1.62e42fefa39efp-1, LOG_F[p1] + p);
 335   return result;
 336 }
 337
 338 // Rounding tests for 2^hi * (mid + lo) when the output might be denormal. We
 339 // assume further that 1 <= mid < 2, mid + lo < 2, and |lo| << mid.
 340 // Notice that, if 0 < x < 2^-1022,
 341 //   double(2^-1022 + x) - 2^-1022 = double(x).
 342 // So if we scale x up by 2^1022, we can use
 343 //   double(1.0 + 2^1022 * x) - 1.0 to test how x is rounded in denormal range.
 344 LIBC_INLINE cpp::optional<double> ziv_test_denorm(int hi, double mid, double lo,
 345                                                   double err) {
 346   using FloatProp = typename fputil::FloatProperties<double>;
 347
 348   // Scaling factor = 1/(min normal number) = 2^1022
 349   int64_t exp_hi = static_cast<int64_t>(hi + 1022) << FloatProp::MANTISSA_WIDTH;
 350   double mid_hi = cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(mid));
 351   double lo_scaled =
 352       (lo != 0.0) ? cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(lo))
 353                   : 0.0;
 354
 355   double extra_factor = 0.0;
 356   uint64_t scale_down = 0x3FE0'0000'0000'0000; // 1022 in the exponent field.
 357
 358   // Result is denormal if (mid_hi + lo_scale < 1.0).
 359   if ((1.0 - mid_hi) > lo_scaled) {
 360     // Extra rounding step is needed, which adds more rounding errors.
 361     err += 0x1.0p-52;
 362     extra_factor = 1.0;
 363     scale_down = 0x3FF0'0000'0000'0000; // 1023 in the exponent field.
 364   }
 365
 366   double err_scaled =
 367       cpp::bit_cast<double>(exp_hi + cpp::bit_cast<int64_t>(err));
 368
 369   double lo_u = lo_scaled + err_scaled;
 370   double lo_l = lo_scaled - err_scaled;
 371
 372   // By adding 1.0, the results will have similar rounding points as denormal
 373   // outputs.
 374   double upper = extra_factor + (mid_hi + lo_u);
 375   double lower = extra_factor + (mid_hi + lo_l);
 376
 377   if (LIBC_LIKELY(upper == lower)) {
 378     return cpp::bit_cast<double>(cpp::bit_cast<uint64_t>(upper) - scale_down);
 379   }
 380
 381   return cpp::nullopt;
 382 }
 383
 384 } // namespace LIBC_NAMESPACE
 385
 386 #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPLOGXF_H