libc/src/__support/FPUtil/XFloat.h

   1 //===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #include "FPBits.h"
  10 #include "NormalFloat.h"
  11 #include "src/__support/UInt.h"
  12
  13 #include <stdint.h>
  14
  15 namespace __llvm_libc {
  16 namespace fputil {
  17
  18 // Store and manipulate positive double precision numbers at |Precision| bits.
  19 template <size_t Precision> class XFloat {
  20   static constexpr uint64_t OneMask = (uint64_t(1) << 63);
  21   UInt<Precision> man;
  22   static constexpr uint64_t WORDCOUNT = Precision / 64;
  23   int exp;
  24
  25   size_t bit_width(uint64_t x) {
  26     if (x == 0)
  27       return 0;
  28     size_t shift = 0;
  29     while ((OneMask & x) == 0) {
  30       ++shift;
  31       x <<= 1;
  32     }
  33     return 64 - shift;
  34   }
  35
  36 public:
  37   XFloat() : exp(0) {
  38     for (int i = 0; i < WORDCOUNT; ++i)
  39       man[i] = 0;
  40   }
  41
  42   XFloat(const XFloat &other) : exp(other.exp) {
  43     for (int i = 0; i < WORDCOUNT; ++i)
  44       man[i] = other.man[i];
  45   }
  46
  47   explicit XFloat(double x) {
  48     auto xn = NormalFloat<double>(x);
  49     exp = xn.exponent;
  50     man[WORDCOUNT - 1] = xn.mantissa << 11;
  51     for (int i = 0; i < WORDCOUNT - 1; ++i)
  52       man[i] = 0;
  53   }
  54
  55   XFloat(int e, const UInt<Precision> &bits) : exp(e) {
  56     for (size_t i = 0; i < WORDCOUNT; ++i)
  57       man[i] = bits[i];
  58   }
  59
  60   // Multiply this number with x and store the result in this number.
  61   void mul(double x) {
  62     auto xn = NormalFloat<double>(x);
  63     exp += xn.exponent;
  64     uint64_t carry = man.mul(xn.mantissa << 11);
  65     size_t carry_width = bit_width(carry);
  66     carry_width = (carry_width == 64 ? 64 : 63);
  67     man.shift_right(carry_width);
  68     man[WORDCOUNT - 1] = man[WORDCOUNT - 1] + (carry << (64 - carry_width));
  69     exp += carry_width == 64 ? 1 : 0;
  70     normalize();
  71   }
  72
  73   void drop_int() {
  74     if (exp < 0)
  75       return;
  76     if (exp > int(Precision - 1)) {
  77       for (size_t i = 0; i < WORDCOUNT; ++i)
  78         man[i] = 0;
  79       return;
  80     }
  81
  82     man.shift_left(exp + 1);
  83     man.shift_right(exp + 1);
  84
  85     normalize();
  86   }
  87
  88   double mul(const XFloat<Precision> &other) {
  89     constexpr size_t row_words = 2 * WORDCOUNT + 1;
  90     constexpr size_t row_precision = row_words * 64;
  91     constexpr size_t result_bits = 2 * Precision;
  92     UInt<row_precision> rows[WORDCOUNT];
  93
  94     for (size_t r = 0; r < WORDCOUNT; ++r) {
  95       for (size_t i = 0; i < row_words; ++i) {
  96         if (i < WORDCOUNT)
  97           rows[r][i] = man[i];
  98         else
  99           rows[r][i] = 0;
 100       }
 101       rows[r].mul(other.man[r]);
 102       rows[r].shift_left(r * 64);
 103     }
 104
 105     for (size_t r = 1; r < WORDCOUNT; ++r) {
 106       rows[0].add(rows[r]);
 107     }
 108     int result_exp = exp + other.exp;
 109     uint64_t carry = rows[0][row_words - 1];
 110     if (carry) {
 111       size_t carry_width = bit_width(carry);
 112       rows[0].shift_right(carry_width);
 113       rows[0][row_words - 2] =
 114           rows[0][row_words - 2] + (carry << (64 - carry_width));
 115       result_exp += carry_width;
 116     }
 117
 118     if (rows[0][row_words - 2] & OneMask) {
 119       ++result_exp;
 120     } else {
 121       rows[0].shift_left(1);
 122     }
 123
 124     UInt<result_bits> result_man;
 125     for (size_t i = 0; i < result_bits / 64; ++i)
 126       result_man[i] = rows[0][i];
 127     XFloat<result_bits> result(result_exp, result_man);
 128     result.normalize();
 129     return double(result);
 130   }
 131
 132   explicit operator double() {
 133     normalize();
 134
 135     constexpr uint64_t one = uint64_t(1) << 10;
 136     constexpr uint64_t excess_mask = (one << 1) - 1;
 137     uint64_t excess = man[WORDCOUNT - 1] & excess_mask;
 138     uint64_t new_man = man[WORDCOUNT - 1] >> 11;
 139     if (excess > one) {
 140       // We have to round up.
 141       ++new_man;
 142     } else if (excess == one) {
 143       bool greater_than_one = false;
 144       for (size_t i = 0; i < WORDCOUNT - 1; ++i) {
 145         greater_than_one = (man[i] != 0);
 146         if (greater_than_one)
 147           break;
 148       }
 149       if (greater_than_one || (new_man & 1) != 0) {
 150         ++new_man;
 151       }
 152     }
 153
 154     if (new_man == (uint64_t(1) << 53))
 155       ++exp;
 156
 157     // We use NormalFloat as it can produce subnormal numbers or underflow to 0
 158     // if necessary.
 159     NormalFloat<double> d(exp, new_man, 0);
 160     return double(d);
 161   }
 162
 163   // Normalizes this number.
 164   void normalize() {
 165     uint64_t man_bits = 0;
 166     for (size_t i = 0; i < WORDCOUNT; ++i)
 167       man_bits |= man[i];
 168
 169     if (man_bits == 0)
 170       return;
 171
 172     while ((man[WORDCOUNT - 1] & OneMask) == 0) {
 173       man.shift_left(1);
 174       --exp;
 175     }
 176   }
 177 };
 178
 179 } // namespace fputil
 180 } // namespace __llvm_libc