1 //===-- Single-precision tanh function ------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/math/tanhf.h"
10 #include "src/__support/FPUtil/FPBits.h"
11 #include "src/__support/FPUtil/PolyEval.h"
12 #include "src/__support/FPUtil/multiply_add.h"
13 #include "src/__support/FPUtil/nearest_integer.h"
14 #include "src/__support/macros/config.h"
15 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
16 #include "src/__support/macros/properties/cpu_features.h"
17 #include "src/math/generic/explogxf.h"
19 namespace LIBC_NAMESPACE_DECL
{
22 constexpr double LOG2_E_EXP2_6
= ExpBase::LOG2_B
* 2.0;
24 LLVM_LIBC_FUNCTION(float, tanhf
, (float x
)) {
25 using FPBits
= typename
fputil::FPBits
<float>;
27 uint32_t x_abs
= xbits
.abs().uintval();
29 const int sign_index
= xbits
.is_neg() ? 1 : 0;
31 // When |x| >= 15, or x is inf or nan, or |x| <= 0.078125
32 if (LIBC_UNLIKELY((x_abs
>= 0x4170'0000U
) || (x_abs
<= 0x3da0'0000U
))) {
33 if (x_abs
<= 0x3da0'0000U
) {
35 if (LIBC_UNLIKELY(x_abs
<= 0x3280'0000U
)) {
38 ? static_cast<float>(x
- 0x1.5555555555555p
-2 * x
* x
* x
)
42 const double TAYLOR
[] = {-0x1.5555555555555p
-2, 0x1.1111111111111p
-3,
43 -0x1.ba1ba1ba1ba1cp
-5, 0x1.664f4882c10fap
-6,
44 -0x1.226e355e6c23dp
-7};
46 double x2
= xdbl
* xdbl
;
49 double c0
= x2
* TAYLOR
[0];
50 double c1
= fputil::multiply_add(x2
, TAYLOR
[2], TAYLOR
[1]);
51 double c2
= fputil::multiply_add(x2
, TAYLOR
[4], TAYLOR
[3]);
52 double pe
= fputil::polyeval(x4
, c0
, c1
, c2
);
54 return static_cast<float>(fputil::multiply_add(xdbl
, pe
, xdbl
));
58 if (LIBC_UNLIKELY(xbits
.is_nan()))
59 return x
+ 1.0f
; // sNaN to qNaN + signal
61 constexpr float SIGNS
[2][2] = {{1.0f
, -0x1.0p
-25f
}, {-1.0f
, 0x1.0p
-25f
}};
63 if (LIBC_UNLIKELY(xbits
.is_inf()))
64 return SIGNS
[sign_index
][0];
66 return SIGNS
[sign_index
][0] + SIGNS
[sign_index
][1];
69 // Range reduction: e^(2x) = 2^(hi + mid) * e^lo
70 // Let k = round( x * 2^6 * log2(e)),
71 // So k = (hi + mid) * 2^5
72 // Then lo = 2x - (hi + mid) * log(2) = 2x - k * 2^-5 * log(2).
74 double xd
= static_cast<double>(x
);
75 // k = round( x* 2^6 * log2(e) )
79 #ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT
80 k
= fputil::nearest_integer(xd
* LOG2_E_EXP2_6
);
81 mk
= -static_cast<int>(k
);
83 constexpr double HALF_WAY
[2] = {-0.5, 0.5};
85 mk
= static_cast<int>(
86 fputil::multiply_add(xd
, -LOG2_E_EXP2_6
, HALF_WAY
[sign_index
]));
87 k
= static_cast<double>(-mk
);
88 #endif // LIBC_TARGET_CPU_HAS_NEAREST_INT
89 // -hi = floor(-k * 2^(-MID_BITS))
90 // exp_mhi = shift -hi to the exponent field of double precision.
91 int64_t exp_mhi
= static_cast<int64_t>(mk
>> ExpBase::MID_BITS
)
92 << fputil::FPBits
<double>::FRACTION_LEN
;
94 int64_t mh_bits
= ExpBase::EXP_2_MID
[mk
& ExpBase::MID_MASK
] + exp_mhi
;
95 double mh
= fputil::FPBits
<double>(uint64_t(mh_bits
)).get_val();
96 // dx = lo/2 = x - (hi + mid) * log(2)/2 = x - k * 2^-6 * log(2)
97 double dx
= fputil::multiply_add(
98 k
, ExpBase::M_LOGB_2_LO
* 0.5,
99 fputil::multiply_add(k
, ExpBase::M_LOGB_2_HI
* 0.5, xd
));
101 // > P = fpminimax(expm1(2*x)/x, 4, [|D...|], [-log(2)/128, log(2)/128]);
102 constexpr double COEFFS
[] = {0x1.ffffffffe5bc8p0
, 0x1.555555555cd67p0
,
103 0x1.5555c2a9b48b4p
-1, 0x1.11112a0e34bdbp
-2};
105 double dx2
= dx
* dx
;
106 double c0
= fputil::multiply_add(dx
, 2.0, 1.0);
107 double c1
= fputil::multiply_add(dx
, COEFFS
[1], COEFFS
[0]);
108 double c2
= fputil::multiply_add(dx
, COEFFS
[3], COEFFS
[2]);
109 double r
= fputil::polyeval(dx2
, c0
, c1
, c2
);
111 // tanh(x) = sinh(x) / cosh(x)
112 // = (e^x - e^(-x)) / (e^x + e^(-x))
113 // = (e^(2x) - 1) / (e^(2x) + 1)
114 // = (2^(hi + mid) * e^lo - 1) / (2^(hi + mid) * e^lo + 1)
115 // = (e^lo - 2^(-hi - mid)) / (e^lo + 2^(-hi - mid))
116 // = (r - mh) / (r + mh)
117 return static_cast<float>((r
- mh
) / (r
+ mh
));
120 } // namespace LIBC_NAMESPACE_DECL