1 //===-- Utilities for double-double data type. ------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
12 #include "multiply_add.h"
13 #include "src/__support/common.h"
14 #include "src/__support/macros/config.h"
15 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
16 #include "src/__support/number_pair.h"
18 namespace LIBC_NAMESPACE_DECL
{
21 #define DEFAULT_DOUBLE_SPLIT 27
23 using DoubleDouble
= LIBC_NAMESPACE::NumberPair
<double>;
25 // The output of Dekker's FastTwoSum algorithm is correct, i.e.:
26 // r.hi + r.lo = a + b exactly
27 // and |r.lo| < eps(r.lo)
28 // Assumption: |a| >= |b|, or a = 0.
29 template <bool FAST2SUM
= true>
30 LIBC_INLINE
constexpr DoubleDouble
exact_add(double a
, double b
) {
31 DoubleDouble r
{0.0, 0.0};
32 if constexpr (FAST2SUM
) {
39 double t2
= r
.hi
- t1
;
47 // Assumption: |a.hi| >= |b.hi|
48 LIBC_INLINE
constexpr DoubleDouble
add(const DoubleDouble
&a
,
49 const DoubleDouble
&b
) {
50 DoubleDouble r
= exact_add(a
.hi
, b
.hi
);
51 double lo
= a
.lo
+ b
.lo
;
52 return exact_add(r
.hi
, r
.lo
+ lo
);
55 // Assumption: |a.hi| >= |b|
56 LIBC_INLINE
constexpr DoubleDouble
add(const DoubleDouble
&a
, double b
) {
57 DoubleDouble r
= exact_add
<false>(a
.hi
, b
);
58 return exact_add(r
.hi
, r
.lo
+ a
.lo
);
61 // Veltkamp's Splitting for double precision.
62 // Note: This is proved to be correct for all rounding modes:
63 // Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
64 // Roundings," https://inria.hal.science/hal-04480440.
65 // Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
66 template <size_t N
= DEFAULT_DOUBLE_SPLIT
>
67 LIBC_INLINE
constexpr DoubleDouble
split(double a
) {
68 DoubleDouble r
{0.0, 0.0};
70 constexpr double CN
= static_cast<double>(1 << N
);
71 constexpr double C
= CN
+ 1.0;
79 // Helper for non-fma exact mult where the first number is already split.
80 template <size_t SPLIT_B
= DEFAULT_DOUBLE_SPLIT
>
81 LIBC_INLINE DoubleDouble
exact_mult(const DoubleDouble
&as
, double a
,
83 DoubleDouble bs
= split
<SPLIT_B
>(b
);
84 DoubleDouble r
{0.0, 0.0};
87 double t1
= as
.hi
* bs
.hi
- r
.hi
;
88 double t2
= as
.hi
* bs
.lo
+ t1
;
89 double t3
= as
.lo
* bs
.hi
+ t2
;
90 r
.lo
= as
.lo
* bs
.lo
+ t3
;
95 // Note: When FMA instruction is not available, the `exact_mult` function is
96 // only correct for round-to-nearest mode. See:
97 // Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
98 // Roundings," https://inria.hal.science/hal-04480440.
99 // Using Theorem 1 in the paper above, without FMA instruction, if we restrict
100 // the generated constants to precision <= 51, and splitting it by 2^28 + 1,
101 // then a * b = r.hi + r.lo is exact for all rounding modes.
102 template <size_t SPLIT_B
= 27>
103 LIBC_INLINE DoubleDouble
exact_mult(double a
, double b
) {
104 DoubleDouble r
{0.0, 0.0};
106 #ifdef LIBC_TARGET_CPU_HAS_FMA
108 r
.lo
= fputil::multiply_add(a
, b
, -r
.hi
);
111 DoubleDouble as
= split(a
);
113 r
= exact_mult
<SPLIT_B
>(as
, a
, b
);
114 #endif // LIBC_TARGET_CPU_HAS_FMA
119 LIBC_INLINE DoubleDouble
quick_mult(double a
, const DoubleDouble
&b
) {
120 DoubleDouble r
= exact_mult(a
, b
.hi
);
121 r
.lo
= multiply_add(a
, b
.lo
, r
.lo
);
125 template <size_t SPLIT_B
= 27>
126 LIBC_INLINE DoubleDouble
quick_mult(const DoubleDouble
&a
,
127 const DoubleDouble
&b
) {
128 DoubleDouble r
= exact_mult
<SPLIT_B
>(a
.hi
, b
.hi
);
129 double t1
= multiply_add(a
.hi
, b
.lo
, r
.lo
);
130 double t2
= multiply_add(a
.lo
, b
.hi
, t1
);
135 // Assuming |c| >= |a * b|.
137 LIBC_INLINE DoubleDouble multiply_add
<DoubleDouble
>(const DoubleDouble
&a
,
138 const DoubleDouble
&b
,
139 const DoubleDouble
&c
) {
140 return add(c
, quick_mult(a
, b
));
143 // Accurate double-double division, following Karp-Markstein's trick for
144 // division, implemented in the CORE-MATH project at:
145 // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855
148 // Let a = ah + al, b = bh + bl.
149 // Let r = rh + rl be the approximation of (ah + al) / (bh + bl).
151 // (ah + al) / (bh + bl) - rh =
152 // = ((ah - bh * rh) + (al - bl * rh)) / (bh + bl)
153 // = (1 + O(bl/bh)) * ((ah - bh * rh) + (al - bl * rh)) / bh
154 // Let q = round(1/bh), then the above expressions are approximately:
155 // = (1 + O(bl / bh)) * (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh))
156 // So we can compute:
157 // rl = q * (ah - bh * rh) + q * (al - bl * rh)
158 // as accurate as possible, then the error is bounded by:
159 // |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh)
160 LIBC_INLINE DoubleDouble
div(const DoubleDouble
&a
, const DoubleDouble
&b
) {
162 double q
= 1.0 / b
.hi
;
165 #ifdef LIBC_TARGET_CPU_HAS_FMA
166 double e_hi
= fputil::multiply_add(b
.hi
, -r
.hi
, a
.hi
);
167 double e_lo
= fputil::multiply_add(b
.lo
, -r
.hi
, a
.lo
);
169 DoubleDouble b_hi_r_hi
= fputil::exact_mult(b
.hi
, -r
.hi
);
170 DoubleDouble b_lo_r_hi
= fputil::exact_mult(b
.lo
, -r
.hi
);
171 double e_hi
= (a
.hi
+ b_hi_r_hi
.hi
) + b_hi_r_hi
.lo
;
172 double e_lo
= (a
.lo
+ b_lo_r_hi
.hi
) + b_lo_r_hi
.lo
;
173 #endif // LIBC_TARGET_CPU_HAS_FMA
175 r
.lo
= q
* (e_hi
+ e_lo
);
179 } // namespace fputil
180 } // namespace LIBC_NAMESPACE_DECL
182 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H