libc/src/__support/FPUtil/double_double.h

   1 //===-- Utilities for double-double data type. ------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
  10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H
  11
  12 #include "multiply_add.h"
  13 #include "src/__support/common.h"
  14 #include "src/__support/macros/config.h"
  15 #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
  16 #include "src/__support/number_pair.h"
  17
  18 namespace LIBC_NAMESPACE_DECL {
  19 namespace fputil {
  20
  21 #define DEFAULT_DOUBLE_SPLIT 27
  22
  23 using DoubleDouble = LIBC_NAMESPACE::NumberPair<double>;
  24
  25 // The output of Dekker's FastTwoSum algorithm is correct, i.e.:
  26 //   r.hi + r.lo = a + b exactly
  27 //   and |r.lo| < eps(r.lo)
  28 // Assumption: |a| >= |b|, or a = 0.
  29 template <bool FAST2SUM = true>
  30 LIBC_INLINE constexpr DoubleDouble exact_add(double a, double b) {
  31   DoubleDouble r{0.0, 0.0};
  32   if constexpr (FAST2SUM) {
  33     r.hi = a + b;
  34     double t = r.hi - a;
  35     r.lo = b - t;
  36   } else {
  37     r.hi = a + b;
  38     double t1 = r.hi - a;
  39     double t2 = r.hi - t1;
  40     double t3 = b - t1;
  41     double t4 = a - t2;
  42     r.lo = t3 + t4;
  43   }
  44   return r;
  45 }
  46
  47 // Assumption: |a.hi| >= |b.hi|
  48 LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a,
  49                                        const DoubleDouble &b) {
  50   DoubleDouble r = exact_add(a.hi, b.hi);
  51   double lo = a.lo + b.lo;
  52   return exact_add(r.hi, r.lo + lo);
  53 }
  54
  55 // Assumption: |a.hi| >= |b|
  56 LIBC_INLINE constexpr DoubleDouble add(const DoubleDouble &a, double b) {
  57   DoubleDouble r = exact_add<false>(a.hi, b);
  58   return exact_add(r.hi, r.lo + a.lo);
  59 }
  60
  61 // Veltkamp's Splitting for double precision.
  62 // Note: This is proved to be correct for all rounding modes:
  63 //   Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
  64 //   Roundings," https://inria.hal.science/hal-04480440.
  65 // Default splitting constant = 2^ceil(prec(double)/2) + 1 = 2^27 + 1.
  66 template <size_t N = DEFAULT_DOUBLE_SPLIT>
  67 LIBC_INLINE constexpr DoubleDouble split(double a) {
  68   DoubleDouble r{0.0, 0.0};
  69   // CN = 2^N.
  70   constexpr double CN = static_cast<double>(1 << N);
  71   constexpr double C = CN + 1.0;
  72   double t1 = C * a;
  73   double t2 = a - t1;
  74   r.hi = t1 + t2;
  75   r.lo = a - r.hi;
  76   return r;
  77 }
  78
  79 // Helper for non-fma exact mult where the first number is already split.
  80 template <size_t SPLIT_B = DEFAULT_DOUBLE_SPLIT>
  81 LIBC_INLINE DoubleDouble exact_mult(const DoubleDouble &as, double a,
  82                                     double b) {
  83   DoubleDouble bs = split<SPLIT_B>(b);
  84   DoubleDouble r{0.0, 0.0};
  85
  86   r.hi = a * b;
  87   double t1 = as.hi * bs.hi - r.hi;
  88   double t2 = as.hi * bs.lo + t1;
  89   double t3 = as.lo * bs.hi + t2;
  90   r.lo = as.lo * bs.lo + t3;
  91
  92   return r;
  93 }
  94
  95 // Note: When FMA instruction is not available, the `exact_mult` function is
  96 // only correct for round-to-nearest mode.  See:
  97 //   Zimmermann, P., "Note on the Veltkamp/Dekker Algorithms with Directed
  98 //   Roundings," https://inria.hal.science/hal-04480440.
  99 // Using Theorem 1 in the paper above, without FMA instruction, if we restrict
 100 // the generated constants to precision <= 51, and splitting it by 2^28 + 1,
 101 // then a * b = r.hi + r.lo is exact for all rounding modes.
 102 template <size_t SPLIT_B = 27>
 103 LIBC_INLINE DoubleDouble exact_mult(double a, double b) {
 104   DoubleDouble r{0.0, 0.0};
 105
 106 #ifdef LIBC_TARGET_CPU_HAS_FMA
 107   r.hi = a * b;
 108   r.lo = fputil::multiply_add(a, b, -r.hi);
 109 #else
 110   // Dekker's Product.
 111   DoubleDouble as = split(a);
 112
 113   r = exact_mult<SPLIT_B>(as, a, b);
 114 #endif // LIBC_TARGET_CPU_HAS_FMA
 115
 116   return r;
 117 }
 118
 119 LIBC_INLINE DoubleDouble quick_mult(double a, const DoubleDouble &b) {
 120   DoubleDouble r = exact_mult(a, b.hi);
 121   r.lo = multiply_add(a, b.lo, r.lo);
 122   return r;
 123 }
 124
 125 template <size_t SPLIT_B = 27>
 126 LIBC_INLINE DoubleDouble quick_mult(const DoubleDouble &a,
 127                                     const DoubleDouble &b) {
 128   DoubleDouble r = exact_mult<SPLIT_B>(a.hi, b.hi);
 129   double t1 = multiply_add(a.hi, b.lo, r.lo);
 130   double t2 = multiply_add(a.lo, b.hi, t1);
 131   r.lo = t2;
 132   return r;
 133 }
 134
 135 // Assuming |c| >= |a * b|.
 136 template <>
 137 LIBC_INLINE DoubleDouble multiply_add<DoubleDouble>(const DoubleDouble &a,
 138                                                     const DoubleDouble &b,
 139                                                     const DoubleDouble &c) {
 140   return add(c, quick_mult(a, b));
 141 }
 142
 143 // Accurate double-double division, following Karp-Markstein's trick for
 144 // division, implemented in the CORE-MATH project at:
 145 // https://gitlab.inria.fr/core-math/core-math/-/blob/master/src/binary64/tan/tan.c#L1855
 146 //
 147 // Error bounds:
 148 // Let a = ah + al, b = bh + bl.
 149 // Let r = rh + rl be the approximation of (ah + al) / (bh + bl).
 150 // Then:
 151 //   (ah + al) / (bh + bl) - rh =
 152 // = ((ah - bh * rh) + (al - bl * rh)) / (bh + bl)
 153 // = (1 + O(bl/bh)) * ((ah - bh * rh) + (al - bl * rh)) / bh
 154 // Let q = round(1/bh), then the above expressions are approximately:
 155 // = (1 + O(bl / bh)) * (1 + O(2^-52)) * q * ((ah - bh * rh) + (al - bl * rh))
 156 // So we can compute:
 157 //   rl = q * (ah - bh * rh) + q * (al - bl * rh)
 158 // as accurate as possible, then the error is bounded by:
 159 //   |(ah + al) / (bh + bl) - (rh + rl)| < O(bl/bh) * (2^-52 + al/ah + bl/bh)
 160 LIBC_INLINE DoubleDouble div(const DoubleDouble &a, const DoubleDouble &b) {
 161   DoubleDouble r;
 162   double q = 1.0 / b.hi;
 163   r.hi = a.hi * q;
 164
 165 #ifdef LIBC_TARGET_CPU_HAS_FMA
 166   double e_hi = fputil::multiply_add(b.hi, -r.hi, a.hi);
 167   double e_lo = fputil::multiply_add(b.lo, -r.hi, a.lo);
 168 #else
 169   DoubleDouble b_hi_r_hi = fputil::exact_mult(b.hi, -r.hi);
 170   DoubleDouble b_lo_r_hi = fputil::exact_mult(b.lo, -r.hi);
 171   double e_hi = (a.hi + b_hi_r_hi.hi) + b_hi_r_hi.lo;
 172   double e_lo = (a.lo + b_lo_r_hi.hi) + b_lo_r_hi.lo;
 173 #endif // LIBC_TARGET_CPU_HAS_FMA
 174
 175   r.lo = q * (e_hi + e_lo);
 176   return r;
 177 }
 178
 179 } // namespace fputil
 180 } // namespace LIBC_NAMESPACE_DECL
 181
 182 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_DOUBLE_DOUBLE_H