libc/src/__support/FPUtil/generic/FMod.h

   1 //===-- Common header for fmod implementations ------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H
  10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H
  11
  12 #include "src/__support/CPP/limits.h"
  13 #include "src/__support/CPP/type_traits.h"
  14 #include "src/__support/FPUtil/FEnvImpl.h"
  15 #include "src/__support/FPUtil/FPBits.h"
  16 #include "src/__support/builtin_wrappers.h"
  17 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
  18 #include "src/math/generic/math_utils.h"
  19
  20 namespace __llvm_libc {
  21 namespace fputil {
  22 namespace generic {
  23
  24 //  Objective:
  25 //    The  algorithm uses  integer arithmetic  (max uint64_t)  for general case.
  26 //    Some common  cases, like  abs(x) < abs(y)  or  abs(x) < 1000 *  abs(y) are
  27 //    treated specially to increase  performance.  The part of checking  special
  28 //    cases, numbers NaN, INF etc. treated separately.
  29 //
  30 //  Objective:
  31 //    1) FMod definition (https://cplusplus.com/reference/cmath/fmod/):
  32 //       fmod = numer - tquot * denom, where tquot is the truncated
  33 //       (i.e., rounded towards zero) result of: numer/denom.
  34 //    2) FMod with negative x and/or y can be trivially converted to fmod for
  35 //       positive x and y. Therefore the algorithm below works only with
  36 //       positive numbers.
  37 //    3) All positive floating point numbers can be represented as m * 2^e,
  38 //       where "m" is positive integer and "e" is signed.
  39 //    4) FMod function can be calculated in integer numbers (x > y):
  40 //         fmod = m_x * 2^e_x - tquot * m_y * 2^e_y
  41 //              = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y).
  42 //       All variables in parentheses are unsigned integers.
  43 //
  44 //  Mathematical background:
  45 //    Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x
  46 //    and m_y*2^e_y. This is an ambiguous number representation. For example:
  47 //      m * 2^e = (2 * m) * 2^(e-1)
  48 //    The algorithm uses the facts that
  49 //      r = a % b = (a % (N * b)) % b,
  50 //      (a * c) % (b * c) = (a % b) * c
  51 //    where N is positive  integer number. a, b and c - positive. Let's  adopt
  52 //    the formula for representation above.
  53 //      a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k
  54 //      r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y)
  55 //           = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y)
  56 //      r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k)
  57 //           = (2^p * (m_x % m_y) * 2^(e_y + k - p))
  58 //        m_r = 2^p * (m_x % m_y), e_r = m_y + k - p
  59 //
  60 //  Algorithm description:
  61 //  First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y
  62 //  are integers (m_x amd m_y positive).
  63 //  Then the naive  implementation of the fmod function with a simple
  64 //  for/while loop:
  65 //      while (e_x > e_y) {
  66 //        m_x *= 2; --e_x; //  m_x * 2^e_x == 2 * m_x * 2^(e_x - 1)
  67 //        m_x %= m_y;
  68 //      }
  69 //  On the other hand, the algorithm exploits the fact that m_x, m_y are the
  70 //  mantissas of floating point numbers, which use less bits than the storage
  71 //  integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of
  72 //  the iteration, we can left shift m_x as many bits as the storage integer
  73 //  type can hold, the exponent reduction per step will be at least 32 - 24 = 8
  74 //  for floats and 64 - 53 = 11 for doubles (double example below):
  75 //      while (e_x > e_y) {
  76 //        m_x <<= 11; e_x -= 11; //  m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11)
  77 //        m_x %= m_y;
  78 //      }
  79 //  Some extra improvements are done:
  80 //    1) Shift m_y maximum to the right, which can significantly improve
  81 //       performance for small integer numbers (y = 3 for example).
  82 //       The m_x shift in the loop can be 62 instead of 11 for double.
  83 //    2) For some architectures with very slow division, it can be better to
  84 //       calculate inverse value ones, and after do multiplication in the loop.
  85 //    3) "likely" special cases are treated specially to improve performance.
  86 //
  87 //  Simple example:
  88 //  The examples below use byte for simplicity.
  89 //    1) Shift hy maximum to right without losing bits and increase iy value
  90 //       m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22.
  91 //    2) m_x = m_x % m_y.
  92 //    3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x
  93 //    is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000,
  94 //       e_x = 100-4 = 96.
  95 //    4) Repeat (2) until e_x == e_y.
  96 //
  97 //  Complexity analysis (double):
  98 //    Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop  count:
  99 //      (m_x - m_y) / (64 -  "length of m_y").
 100 //      max("length of m_y")  = 53,
 101 //      max(e_x - e_y)  = 2048
 102 //    Maximum operation is  186. For rare "unrealistic" cases.
 103 //
 104 //  Special cases (double):
 105 //    Supposing  that  case  where |y| > 1e-292 and |x/y|<2000  is  very  common
 106 //    special processing is implemented. No m_y alignment, no loop:
 107 //      result = (m_x * 2^(e_x - e_y)) % m_y.
 108 //    When x and y are both subnormal (rare case but...) the
 109 //      result = m_x % m_y.
 110 //    Simplified conversion back to double.
 111
 112 // Exceptional cases handler according to cppreference.com
 113 //    https://en.cppreference.com/w/cpp/numeric/math/fmod
 114 // and POSIX standard described in Linux man
 115 //   https://man7.org/linux/man-pages/man3/fmod.3p.html
 116 // C standard for the function is not full, so not by default (although it can
 117 // be implemented in another handler.
 118 // Signaling NaN converted to quiet NaN with FE_INVALID exception.
 119 //    https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm
 120 template <typename T> struct FModExceptionalInputHandler {
 121
 122   static_assert(cpp::is_floating_point_v<T>,
 123                 "FModCStandardWrapper instantiated with invalid type.");
 124
 125   LIBC_INLINE static bool pre_check(T x, T y, T &out) {
 126     using FPB = fputil::FPBits<T>;
 127     const T quiet_nan = FPB::build_quiet_nan(0);
 128     FPB sx(x), sy(y);
 129     if (LIBC_LIKELY(!sy.is_zero() && !sy.is_inf_or_nan() &&
 130                     !sx.is_inf_or_nan())) {
 131       return false;
 132     }
 133
 134     if (sx.is_nan() || sy.is_nan()) {
 135       if ((sx.is_nan() && !sx.is_quiet_nan()) ||
 136           (sy.is_nan() && !sy.is_quiet_nan()))
 137         fputil::raise_except_if_required(FE_INVALID);
 138       out = quiet_nan;
 139       return true;
 140     }
 141
 142     if (sx.is_inf() || sy.is_zero()) {
 143       fputil::raise_except_if_required(FE_INVALID);
 144       fputil::set_errno_if_required(EDOM);
 145       out = quiet_nan;
 146       return true;
 147     }
 148
 149     if (sy.is_inf()) {
 150       out = x;
 151       return true;
 152     }
 153
 154     // case where x == 0
 155     out = x;
 156     return true;
 157   }
 158 };
 159
 160 template <typename T> struct FModFastMathWrapper {
 161
 162   static_assert(cpp::is_floating_point_v<T>,
 163                 "FModFastMathWrapper instantiated with invalid type.");
 164
 165   static bool pre_check(T, T, T &) { return false; }
 166 };
 167
 168 template <typename T> class FModDivisionSimpleHelper {
 169 private:
 170   using intU_t = typename FPBits<T>::UIntType;
 171
 172 public:
 173   LIBC_INLINE constexpr static intU_t
 174   execute(int exp_diff, int sides_zeroes_count, intU_t m_x, intU_t m_y) {
 175     while (exp_diff > sides_zeroes_count) {
 176       exp_diff -= sides_zeroes_count;
 177       m_x <<= sides_zeroes_count;
 178       m_x %= m_y;
 179     }
 180     m_x <<= exp_diff;
 181     m_x %= m_y;
 182     return m_x;
 183   }
 184 };
 185
 186 template <typename T> class FModDivisionInvMultHelper {
 187 private:
 188   using FPB = FPBits<T>;
 189   using intU_t = typename FPB::UIntType;
 190
 191 public:
 192   LIBC_INLINE constexpr static intU_t
 193   execute(int exp_diff, int sides_zeroes_count, intU_t m_x, intU_t m_y) {
 194     if (exp_diff > sides_zeroes_count) {
 195       intU_t inv_hy = (cpp::numeric_limits<intU_t>::max() / m_y);
 196       while (exp_diff > sides_zeroes_count) {
 197         exp_diff -= sides_zeroes_count;
 198         intU_t hd =
 199             (m_x * inv_hy) >> (FPB::FloatProp::BIT_WIDTH - sides_zeroes_count);
 200         m_x <<= sides_zeroes_count;
 201         m_x -= hd * m_y;
 202         while (LIBC_UNLIKELY(m_x > m_y))
 203           m_x -= m_y;
 204       }
 205       intU_t hd = (m_x * inv_hy) >> (FPB::FloatProp::BIT_WIDTH - exp_diff);
 206       m_x <<= exp_diff;
 207       m_x -= hd * m_y;
 208       while (LIBC_UNLIKELY(m_x > m_y))
 209         m_x -= m_y;
 210     } else {
 211       m_x <<= exp_diff;
 212       m_x %= m_y;
 213     }
 214     return m_x;
 215   }
 216 };
 217
 218 template <typename T, class Wrapper = FModExceptionalInputHandler<T>,
 219           class DivisionHelper = FModDivisionSimpleHelper<T>>
 220 class FMod {
 221   static_assert(cpp::is_floating_point_v<T>,
 222                 "FMod instantiated with invalid type.");
 223
 224 private:
 225   using FPB = FPBits<T>;
 226   using intU_t = typename FPB::UIntType;
 227
 228   LIBC_INLINE static constexpr FPB eval_internal(FPB sx, FPB sy) {
 229
 230     if (LIBC_LIKELY(sx.uintval() <= sy.uintval())) {
 231       if (sx.uintval() < sy.uintval())
 232         return sx;        // |x|<|y| return x
 233       return FPB::zero(); // |x|=|y| return 0.0
 234     }
 235
 236     int e_x = sx.get_unbiased_exponent();
 237     int e_y = sy.get_unbiased_exponent();
 238
 239     // Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH
 240     if (LIBC_LIKELY(e_y > int(FPB::FloatProp::MANTISSA_WIDTH) &&
 241                     e_x - e_y <= int(FPB::FloatProp::EXPONENT_WIDTH))) {
 242       intU_t m_x = sx.get_explicit_mantissa();
 243       intU_t m_y = sy.get_explicit_mantissa();
 244       intU_t d = (e_x == e_y) ? (m_x - m_y) : (m_x << (e_x - e_y)) % m_y;
 245       if (d == 0)
 246         return FPB::zero();
 247       // iy - 1 because of "zero power" for number with power 1
 248       return FPB::make_value(d, e_y - 1);
 249     }
 250     /* Both subnormal special case. */
 251     if (LIBC_UNLIKELY(e_x == 0 && e_y == 0)) {
 252       FPB d;
 253       d.set_mantissa(sx.uintval() % sy.uintval());
 254       return d;
 255     }
 256
 257     // Note that hx is not subnormal by conditions above.
 258     intU_t m_x = sx.get_explicit_mantissa();
 259     e_x--;
 260
 261     intU_t m_y = sy.get_explicit_mantissa();
 262     int lead_zeros_m_y = FPB::FloatProp::EXPONENT_WIDTH;
 263     if (LIBC_LIKELY(e_y > 0)) {
 264       e_y--;
 265     } else {
 266       m_y = sy.get_mantissa();
 267       lead_zeros_m_y = unsafe_clz(m_y);
 268     }
 269
 270     // Assume hy != 0
 271     int tail_zeros_m_y = unsafe_ctz(m_y);
 272     int sides_zeroes_count = lead_zeros_m_y + tail_zeros_m_y;
 273     // n > 0 by conditions above
 274     int exp_diff = e_x - e_y;
 275     {
 276       // Shift hy right until the end or n = 0
 277       int right_shift = exp_diff < tail_zeros_m_y ? exp_diff : tail_zeros_m_y;
 278       m_y >>= right_shift;
 279       exp_diff -= right_shift;
 280       e_y += right_shift;
 281     }
 282
 283     {
 284       // Shift hx left until the end or n = 0
 285       int left_shift = exp_diff < int(FPB::FloatProp::EXPONENT_WIDTH)
 286                            ? exp_diff
 287                            : FPB::FloatProp::EXPONENT_WIDTH;
 288       m_x <<= left_shift;
 289       exp_diff -= left_shift;
 290     }
 291
 292     m_x %= m_y;
 293     if (LIBC_UNLIKELY(m_x == 0))
 294       return FPB::zero();
 295
 296     if (exp_diff == 0)
 297       return FPB::make_value(m_x, e_y);
 298
 299     /* hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0 */
 300     m_x = DivisionHelper::execute(exp_diff, sides_zeroes_count, m_x, m_y);
 301     return FPB::make_value(m_x, e_y);
 302   }
 303
 304 public:
 305   LIBC_INLINE static T eval(T x, T y) {
 306     if (T out; Wrapper::pre_check(x, y, out))
 307       return out;
 308     FPB sx(x), sy(y);
 309     bool sign = sx.get_sign();
 310     sx.set_sign(false);
 311     sy.set_sign(false);
 312     FPB result = eval_internal(sx, sy);
 313     result.set_sign(sign);
 314     return result.get_val();
 315   }
 316 };
 317
 318 } // namespace generic
 319 } // namespace fputil
 320 } // namespace __llvm_libc
 321
 322 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H