1 //===-- Common header for fmod implementations ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H
10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H
12 #include "src/__support/CPP/limits.h"
13 #include "src/__support/CPP/type_traits.h"
14 #include "src/__support/FPUtil/FEnvImpl.h"
15 #include "src/__support/FPUtil/FPBits.h"
16 #include "src/__support/builtin_wrappers.h"
17 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
18 #include "src/math/generic/math_utils.h"
20 namespace __llvm_libc
{
25 // The algorithm uses integer arithmetic (max uint64_t) for general case.
26 // Some common cases, like abs(x) < abs(y) or abs(x) < 1000 * abs(y) are
27 // treated specially to increase performance. The part of checking special
28 // cases, numbers NaN, INF etc. treated separately.
31 // 1) FMod definition (https://cplusplus.com/reference/cmath/fmod/):
32 // fmod = numer - tquot * denom, where tquot is the truncated
33 // (i.e., rounded towards zero) result of: numer/denom.
34 // 2) FMod with negative x and/or y can be trivially converted to fmod for
35 // positive x and y. Therefore the algorithm below works only with
37 // 3) All positive floating point numbers can be represented as m * 2^e,
38 // where "m" is positive integer and "e" is signed.
39 // 4) FMod function can be calculated in integer numbers (x > y):
40 // fmod = m_x * 2^e_x - tquot * m_y * 2^e_y
41 // = 2^e_y * (m_x * 2^(e_x - e^y) - tquot * m_y).
42 // All variables in parentheses are unsigned integers.
44 // Mathematical background:
45 // Input x,y in the algorithm is represented (mathematically) like m_x*2^e_x
46 // and m_y*2^e_y. This is an ambiguous number representation. For example:
47 // m * 2^e = (2 * m) * 2^(e-1)
48 // The algorithm uses the facts that
49 // r = a % b = (a % (N * b)) % b,
50 // (a * c) % (b * c) = (a % b) * c
51 // where N is positive integer number. a, b and c - positive. Let's adopt
52 // the formula for representation above.
53 // a = m_x * 2^e_x, b = m_y * 2^e_y, N = 2^k
54 // r(k) = a % b = (m_x * 2^e_x) % (2^k * m_y * 2^e_y)
55 // = 2^(e_y + k) * (m_x * 2^(e_x - e_y - k) % m_y)
56 // r(k) = m_r * 2^e_r = (m_x % m_y) * 2^(m_y + k)
57 // = (2^p * (m_x % m_y) * 2^(e_y + k - p))
58 // m_r = 2^p * (m_x % m_y), e_r = m_y + k - p
60 // Algorithm description:
61 // First, let write x = m_x * 2^e_x and y = m_y * 2^e_y with m_x, m_y, e_x, e_y
62 // are integers (m_x amd m_y positive).
63 // Then the naive implementation of the fmod function with a simple
65 // while (e_x > e_y) {
66 // m_x *= 2; --e_x; // m_x * 2^e_x == 2 * m_x * 2^(e_x - 1)
69 // On the other hand, the algorithm exploits the fact that m_x, m_y are the
70 // mantissas of floating point numbers, which use less bits than the storage
71 // integers: 24 / 32 for floats and 53 / 64 for doubles, so if in each step of
72 // the iteration, we can left shift m_x as many bits as the storage integer
73 // type can hold, the exponent reduction per step will be at least 32 - 24 = 8
74 // for floats and 64 - 53 = 11 for doubles (double example below):
75 // while (e_x > e_y) {
76 // m_x <<= 11; e_x -= 11; // m_x * 2^e_x == 2^11 * m_x * 2^(e_x - 11)
79 // Some extra improvements are done:
80 // 1) Shift m_y maximum to the right, which can significantly improve
81 // performance for small integer numbers (y = 3 for example).
82 // The m_x shift in the loop can be 62 instead of 11 for double.
83 // 2) For some architectures with very slow division, it can be better to
84 // calculate inverse value ones, and after do multiplication in the loop.
85 // 3) "likely" special cases are treated specially to improve performance.
88 // The examples below use byte for simplicity.
89 // 1) Shift hy maximum to right without losing bits and increase iy value
90 // m_y = 0b00101100 e_y = 20 after shift m_y = 0b00001011 e_y = 22.
91 // 2) m_x = m_x % m_y.
92 // 3) Move m_x maximum to left. Note that after (m_x = m_x % m_y) CLZ in m_x
93 // is not lower than CLZ in m_y. m_x=0b00001001 e_x = 100, m_x=0b10010000,
95 // 4) Repeat (2) until e_x == e_y.
97 // Complexity analysis (double):
98 // Converting x,y to (m_x,e_x),(m_y, e_y): CTZ/shift/AND/OR/if. Loop count:
99 // (m_x - m_y) / (64 - "length of m_y").
100 // max("length of m_y") = 53,
101 // max(e_x - e_y) = 2048
102 // Maximum operation is 186. For rare "unrealistic" cases.
104 // Special cases (double):
105 // Supposing that case where |y| > 1e-292 and |x/y|<2000 is very common
106 // special processing is implemented. No m_y alignment, no loop:
107 // result = (m_x * 2^(e_x - e_y)) % m_y.
108 // When x and y are both subnormal (rare case but...) the
109 // result = m_x % m_y.
110 // Simplified conversion back to double.
112 // Exceptional cases handler according to cppreference.com
113 // https://en.cppreference.com/w/cpp/numeric/math/fmod
114 // and POSIX standard described in Linux man
115 // https://man7.org/linux/man-pages/man3/fmod.3p.html
116 // C standard for the function is not full, so not by default (although it can
117 // be implemented in another handler.
118 // Signaling NaN converted to quiet NaN with FE_INVALID exception.
119 // https://www.open-std.org/JTC1/SC22/WG14/www/docs/n1011.htm
120 template <typename T
> struct FModExceptionalInputHandler
{
122 static_assert(cpp::is_floating_point_v
<T
>,
123 "FModCStandardWrapper instantiated with invalid type.");
125 LIBC_INLINE
static bool pre_check(T x
, T y
, T
&out
) {
126 using FPB
= fputil::FPBits
<T
>;
127 const T quiet_nan
= FPB::build_quiet_nan(0);
129 if (LIBC_LIKELY(!sy
.is_zero() && !sy
.is_inf_or_nan() &&
130 !sx
.is_inf_or_nan())) {
134 if (sx
.is_nan() || sy
.is_nan()) {
135 if ((sx
.is_nan() && !sx
.is_quiet_nan()) ||
136 (sy
.is_nan() && !sy
.is_quiet_nan()))
137 fputil::raise_except_if_required(FE_INVALID
);
142 if (sx
.is_inf() || sy
.is_zero()) {
143 fputil::raise_except_if_required(FE_INVALID
);
144 fputil::set_errno_if_required(EDOM
);
160 template <typename T
> struct FModFastMathWrapper
{
162 static_assert(cpp::is_floating_point_v
<T
>,
163 "FModFastMathWrapper instantiated with invalid type.");
165 static bool pre_check(T
, T
, T
&) { return false; }
168 template <typename T
> class FModDivisionSimpleHelper
{
170 using intU_t
= typename FPBits
<T
>::UIntType
;
173 LIBC_INLINE
constexpr static intU_t
174 execute(int exp_diff
, int sides_zeroes_count
, intU_t m_x
, intU_t m_y
) {
175 while (exp_diff
> sides_zeroes_count
) {
176 exp_diff
-= sides_zeroes_count
;
177 m_x
<<= sides_zeroes_count
;
186 template <typename T
> class FModDivisionInvMultHelper
{
188 using FPB
= FPBits
<T
>;
189 using intU_t
= typename
FPB::UIntType
;
192 LIBC_INLINE
constexpr static intU_t
193 execute(int exp_diff
, int sides_zeroes_count
, intU_t m_x
, intU_t m_y
) {
194 if (exp_diff
> sides_zeroes_count
) {
195 intU_t inv_hy
= (cpp::numeric_limits
<intU_t
>::max() / m_y
);
196 while (exp_diff
> sides_zeroes_count
) {
197 exp_diff
-= sides_zeroes_count
;
199 (m_x
* inv_hy
) >> (FPB::FloatProp::BIT_WIDTH
- sides_zeroes_count
);
200 m_x
<<= sides_zeroes_count
;
202 while (LIBC_UNLIKELY(m_x
> m_y
))
205 intU_t hd
= (m_x
* inv_hy
) >> (FPB::FloatProp::BIT_WIDTH
- exp_diff
);
208 while (LIBC_UNLIKELY(m_x
> m_y
))
218 template <typename T
, class Wrapper
= FModExceptionalInputHandler
<T
>,
219 class DivisionHelper
= FModDivisionSimpleHelper
<T
>>
221 static_assert(cpp::is_floating_point_v
<T
>,
222 "FMod instantiated with invalid type.");
225 using FPB
= FPBits
<T
>;
226 using intU_t
= typename
FPB::UIntType
;
228 LIBC_INLINE
static constexpr FPB
eval_internal(FPB sx
, FPB sy
) {
230 if (LIBC_LIKELY(sx
.uintval() <= sy
.uintval())) {
231 if (sx
.uintval() < sy
.uintval())
232 return sx
; // |x|<|y| return x
233 return FPB::zero(); // |x|=|y| return 0.0
236 int e_x
= sx
.get_unbiased_exponent();
237 int e_y
= sy
.get_unbiased_exponent();
239 // Most common case where |y| is "very normal" and |x/y| < 2^EXPONENT_WIDTH
240 if (LIBC_LIKELY(e_y
> int(FPB::FloatProp::MANTISSA_WIDTH
) &&
241 e_x
- e_y
<= int(FPB::FloatProp::EXPONENT_WIDTH
))) {
242 intU_t m_x
= sx
.get_explicit_mantissa();
243 intU_t m_y
= sy
.get_explicit_mantissa();
244 intU_t d
= (e_x
== e_y
) ? (m_x
- m_y
) : (m_x
<< (e_x
- e_y
)) % m_y
;
247 // iy - 1 because of "zero power" for number with power 1
248 return FPB::make_value(d
, e_y
- 1);
250 /* Both subnormal special case. */
251 if (LIBC_UNLIKELY(e_x
== 0 && e_y
== 0)) {
253 d
.set_mantissa(sx
.uintval() % sy
.uintval());
257 // Note that hx is not subnormal by conditions above.
258 intU_t m_x
= sx
.get_explicit_mantissa();
261 intU_t m_y
= sy
.get_explicit_mantissa();
262 int lead_zeros_m_y
= FPB::FloatProp::EXPONENT_WIDTH
;
263 if (LIBC_LIKELY(e_y
> 0)) {
266 m_y
= sy
.get_mantissa();
267 lead_zeros_m_y
= unsafe_clz(m_y
);
271 int tail_zeros_m_y
= unsafe_ctz(m_y
);
272 int sides_zeroes_count
= lead_zeros_m_y
+ tail_zeros_m_y
;
273 // n > 0 by conditions above
274 int exp_diff
= e_x
- e_y
;
276 // Shift hy right until the end or n = 0
277 int right_shift
= exp_diff
< tail_zeros_m_y
? exp_diff
: tail_zeros_m_y
;
279 exp_diff
-= right_shift
;
284 // Shift hx left until the end or n = 0
285 int left_shift
= exp_diff
< int(FPB::FloatProp::EXPONENT_WIDTH
)
287 : FPB::FloatProp::EXPONENT_WIDTH
;
289 exp_diff
-= left_shift
;
293 if (LIBC_UNLIKELY(m_x
== 0))
297 return FPB::make_value(m_x
, e_y
);
299 /* hx next can't be 0, because hx < hy, hy % 2 == 1 hx * 2^i % hy != 0 */
300 m_x
= DivisionHelper::execute(exp_diff
, sides_zeroes_count
, m_x
, m_y
);
301 return FPB::make_value(m_x
, e_y
);
305 LIBC_INLINE
static T
eval(T x
, T y
) {
306 if (T out
; Wrapper::pre_check(x
, y
, out
))
309 bool sign
= sx
.get_sign();
312 FPB result
= eval_internal(sx
, sy
);
313 result
.set_sign(sign
);
314 return result
.get_val();
318 } // namespace generic
319 } // namespace fputil
320 } // namespace __llvm_libc
322 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_GENERIC_FMOD_H