1 //===-- Extra range reduction steps for accurate pass of logarithms -------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H
10 #define LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H
12 #include "common_constants.h"
13 #include "src/__support/FPUtil/dyadic_float.h"
14 #include "src/__support/macros/config.h"
15 #include "src/__support/uint128.h"
17 namespace LIBC_NAMESPACE_DECL
{
19 // Struct to store -log*(r) for 4 range reduction steps.
21 fputil::DyadicFloat
<128> step_1
[128];
22 fputil::DyadicFloat
<128> step_2
[193];
23 fputil::DyadicFloat
<128> step_3
[161];
24 fputil::DyadicFloat
<128> step_4
[130];
27 // Perform logarithm range reduction steps 2-4.
28 // Inputs from the first step of range reduction:
29 // m_x : the reduced argument after the first step of range reduction
30 // satisfying -2^-8 <= m_x < 2^-7 and ulp(m_x) >= 2^-60.
31 // idx1: index of the -log(r1) table from the first step.
32 // Outputs of the extra range reduction steps:
33 // sum: adding -log(r1) - log(r2) - log(r3) - log(r4) to the resulted sum.
34 // return value: the reduced argument v satisfying:
35 // -0x1.0002143p-29 <= v < 0x1p-29, and ulp(v) >= 2^(-125).
36 LIBC_INLINE
fputil::DyadicFloat
<128>
37 log_range_reduction(double m_x
, const LogRR
&log_table
,
38 fputil::DyadicFloat
<128> &sum
) {
39 using Float128
= typename
fputil::DyadicFloat
<128>;
40 using MType
= typename
Float128::MantissaType
;
42 int64_t v
= static_cast<int64_t>(m_x
* 0x1.0p60
); // ulp = 2^-60
44 // Range reduction - Step 2
45 // Output range: vv2 in [-0x1.3ffcp-15, 0x1.3e3dp-15].
46 // idx2 = trunc(2^14 * (v + 2^-8 + 2^-15))
47 size_t idx2
= static_cast<size_t>((v
+ 0x10'2000'0000'0000) >> 46);
48 sum
= fputil::quick_add(sum
, log_table
.step_2
[idx2
]);
50 int64_t s2
= static_cast<int64_t>(S2
[idx2
]); // |s| <= 2^-7, ulp = 2^-16
51 int64_t sv2
= s2
* v
; // |s*v| < 2^-14, ulp = 2^(-60-16) = 2^-76
52 int64_t spv2
= (s2
<< 44) + v
; // |s + v| < 2^-14, ulp = 2^-60
53 int64_t vv2
= (spv2
<< 16) + sv2
; // |vv2| < 2^-14, ulp = 2^-76
55 // Range reduction - Step 3
56 // Output range: vv3 in [-0x1.01928p-22 , 0x1p-22]
57 // idx3 = trunc(2^21 * (v + 80*2^-21 + 2^-22))
58 size_t idx3
= static_cast<size_t>((vv2
+ 0x2840'0000'0000'0000) >> 55);
59 sum
= fputil::quick_add(sum
, log_table
.step_3
[idx3
]);
61 int64_t s3
= static_cast<int64_t>(S3
[idx3
]); // |s| < 2^-13, ulp = 2^-21
62 int64_t spv3
= (s3
<< 55) + vv2
; // |s + v| < 2^-21, ulp = 2^-76
63 // |s*v| < 2^-27, ulp = 2^(-76-21) = 2^-97
64 Int128 sv3
= static_cast<Int128
>(s3
) * static_cast<Int128
>(vv2
);
65 // |vv3| < 2^-21, ulp = 2^-97
66 Int128 vv3
= (static_cast<Int128
>(spv3
) << 21) + sv3
;
68 // Range reduction - Step 4
69 // Output range: vv4 in [-0x1.0002143p-29 , 0x1p-29]
70 // idx4 = trunc(2^21 * (v + 65*2^-28 + 2^-29))
71 size_t idx4
= static_cast<size_t>((static_cast<int>(vv3
>> 68) + 131) >> 1);
73 sum
= fputil::quick_add(sum
, log_table
.step_4
[idx4
]);
75 Int128 s4
= static_cast<Int128
>(S4
[idx4
]); // |s| < 2^-21, ulp = 2^-28
76 // |s + v| < 2^-28, ulp = 2^-97
77 Int128 spv4
= (s4
<< 69) + vv3
;
78 // |s*v| < 2^-42, ulp = 2^(-97-28) = 2^-125
79 Int128 sv4
= s4
* vv3
;
80 // |vv4| < 2^-28, ulp = 2^-125
81 Int128 vv4
= (spv4
<< 28) + sv4
;
83 return (vv4
< 0) ? Float128(Sign::NEG
, -125,
84 MType({static_cast<uint64_t>(-vv4
),
85 static_cast<uint64_t>((-vv4
) >> 64)}))
86 : Float128(Sign::POS
, -125,
87 MType({static_cast<uint64_t>(vv4
),
88 static_cast<uint64_t>(vv4
>> 64)}));
91 } // namespace LIBC_NAMESPACE_DECL
93 #endif // LLVM_LIBC_SRC_MATH_GENERIC_LOG_RANGE_REDUCTION_H