libc/src/__support/FPUtil/NormalFloat.h

   1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H
  10 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H
  11
  12 #include "FPBits.h"
  13
  14 #include "src/__support/CPP/type_traits.h"
  15 #include "src/__support/common.h"
  16
  17 #include <stdint.h>
  18
  19 namespace __llvm_libc {
  20 namespace fputil {
  21
  22 // A class which stores the normalized form of a floating point value.
  23 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
  24 // are not handled by this class.
  25 //
  26 // A normalized floating point number is of this form:
  27 //    (-1)*sign * 2^exponent * <mantissa>
  28 // where <mantissa> is of the form 1.<...>.
  29 template <typename T> struct NormalFloat {
  30   static_assert(
  31       cpp::is_floating_point_v<T>,
  32       "NormalFloat template parameter has to be a floating point type.");
  33
  34   using UIntType = typename FPBits<T>::UIntType;
  35   static constexpr UIntType ONE = (UIntType(1) << MantissaWidth<T>::VALUE);
  36
  37   // Unbiased exponent value.
  38   int32_t exponent;
  39
  40   UIntType mantissa;
  41   // We want |UIntType| to have atleast one bit more than the actual mantissa
  42   // bit width to accommodate the implicit 1 value.
  43   static_assert(sizeof(UIntType) * 8 >= MantissaWidth<T>::VALUE + 1,
  44                 "Bad type for mantissa in NormalFloat.");
  45
  46   bool sign;
  47
  48   LIBC_INLINE NormalFloat(int32_t e, UIntType m, bool s)
  49       : exponent(e), mantissa(m), sign(s) {
  50     if (mantissa >= ONE)
  51       return;
  52
  53     unsigned normalization_shift = evaluate_normalization_shift(mantissa);
  54     mantissa = mantissa << normalization_shift;
  55     exponent -= normalization_shift;
  56   }
  57
  58   LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(FPBits<T>(x)); }
  59
  60   LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
  61
  62   // Compares this normalized number with another normalized number.
  63   // Returns -1 is this number is less than |other|, 0 if this number is equal
  64   // to |other|, and 1 if this number is greater than |other|.
  65   LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
  66     if (sign != other.sign)
  67       return sign ? -1 : 1;
  68
  69     if (exponent > other.exponent) {
  70       return sign ? -1 : 1;
  71     } else if (exponent == other.exponent) {
  72       if (mantissa > other.mantissa)
  73         return sign ? -1 : 1;
  74       else if (mantissa == other.mantissa)
  75         return 0;
  76       else
  77         return sign ? 1 : -1;
  78     } else {
  79       return sign ? 1 : -1;
  80     }
  81   }
  82
  83   // Returns a new normalized floating point number which is equal in value
  84   // to this number multiplied by 2^e. That is:
  85   //     new = this *  2^e
  86   LIBC_INLINE NormalFloat<T> mul2(int e) const {
  87     NormalFloat<T> result = *this;
  88     result.exponent += e;
  89     return result;
  90   }
  91
  92   LIBC_INLINE operator T() const {
  93     int biased_exponent = exponent + FPBits<T>::EXPONENT_BIAS;
  94     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
  95     constexpr int MAX_EXPONENT_VALUE = (1 << ExponentWidth<T>::VALUE) - 2;
  96     if (biased_exponent > MAX_EXPONENT_VALUE) {
  97       return sign ? T(FPBits<T>::neg_inf()) : T(FPBits<T>::inf());
  98     }
  99
 100     FPBits<T> result(T(0.0));
 101     result.set_sign(sign);
 102
 103     constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXPONENT_BIAS + 1;
 104     if (exponent < SUBNORMAL_EXPONENT) {
 105       unsigned shift = SUBNORMAL_EXPONENT - exponent;
 106       // Since exponent > subnormalExponent, shift is strictly greater than
 107       // zero.
 108       if (shift <= MantissaWidth<T>::VALUE + 1) {
 109         // Generate a subnormal number. Might lead to loss of precision.
 110         // We round to nearest and round halfway cases to even.
 111         const UIntType shift_out_mask = (UIntType(1) << shift) - 1;
 112         const UIntType shift_out_value = mantissa & shift_out_mask;
 113         const UIntType halfway_value = UIntType(1) << (shift - 1);
 114         result.set_unbiased_exponent(0);
 115         result.set_mantissa(mantissa >> shift);
 116         UIntType new_mantissa = result.get_mantissa();
 117         if (shift_out_value > halfway_value) {
 118           new_mantissa += 1;
 119         } else if (shift_out_value == halfway_value) {
 120           // Round to even.
 121           if (result.get_mantissa() & 0x1)
 122             new_mantissa += 1;
 123         }
 124         result.set_mantissa(new_mantissa);
 125         // Adding 1 to mantissa can lead to overflow. This can only happen if
 126         // mantissa was all ones (0b111..11). For such a case, we will carry
 127         // the overflow into the exponent.
 128         if (new_mantissa == ONE)
 129           result.set_unbiased_exponent(1);
 130         return T(result);
 131       } else {
 132         return T(result);
 133       }
 134     }
 135
 136     result.set_unbiased_exponent(exponent + FPBits<T>::EXPONENT_BIAS);
 137     result.set_mantissa(mantissa);
 138     return T(result);
 139   }
 140
 141 private:
 142   LIBC_INLINE void init_from_bits(FPBits<T> bits) {
 143     sign = bits.get_sign();
 144
 145     if (bits.is_inf_or_nan() || bits.is_zero()) {
 146       // Ignore special bit patterns. Implementations deal with them separately
 147       // anyway so this should not be a problem.
 148       exponent = 0;
 149       mantissa = 0;
 150       return;
 151     }
 152
 153     // Normalize subnormal numbers.
 154     if (bits.get_unbiased_exponent() == 0) {
 155       unsigned shift = evaluate_normalization_shift(bits.get_mantissa());
 156       mantissa = UIntType(bits.get_mantissa()) << shift;
 157       exponent = 1 - FPBits<T>::EXPONENT_BIAS - shift;
 158     } else {
 159       exponent = bits.get_unbiased_exponent() - FPBits<T>::EXPONENT_BIAS;
 160       mantissa = ONE | bits.get_mantissa();
 161     }
 162   }
 163
 164   LIBC_INLINE unsigned evaluate_normalization_shift(UIntType m) {
 165     unsigned shift = 0;
 166     for (; (ONE & m) == 0 && (shift < MantissaWidth<T>::VALUE);
 167          m <<= 1, ++shift)
 168       ;
 169     return shift;
 170   }
 171 };
 172
 173 #ifdef SPECIAL_X86_LONG_DOUBLE
 174 template <>
 175 LIBC_INLINE void
 176 NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
 177   sign = bits.get_sign();
 178
 179   if (bits.is_inf_or_nan() || bits.is_zero()) {
 180     // Ignore special bit patterns. Implementations deal with them separately
 181     // anyway so this should not be a problem.
 182     exponent = 0;
 183     mantissa = 0;
 184     return;
 185   }
 186
 187   if (bits.get_unbiased_exponent() == 0) {
 188     if (bits.get_implicit_bit() == 0) {
 189       // Since we ignore zero value, the mantissa in this case is non-zero.
 190       int normalization_shift =
 191           evaluate_normalization_shift(bits.get_mantissa());
 192       exponent = -16382 - normalization_shift;
 193       mantissa = (bits.get_mantissa() << normalization_shift);
 194     } else {
 195       exponent = -16382;
 196       mantissa = ONE | bits.get_mantissa();
 197     }
 198   } else {
 199     if (bits.get_implicit_bit() == 0) {
 200       // Invalid number so just store 0 similar to a NaN.
 201       exponent = 0;
 202       mantissa = 0;
 203     } else {
 204       exponent = bits.get_unbiased_exponent() - 16383;
 205       mantissa = ONE | bits.get_mantissa();
 206     }
 207   }
 208 }
 209
 210 template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
 211   int biased_exponent = exponent + FPBits<long double>::EXPONENT_BIAS;
 212   // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
 213   constexpr int MAX_EXPONENT_VALUE =
 214       (1 << ExponentWidth<long double>::VALUE) - 2;
 215   if (biased_exponent > MAX_EXPONENT_VALUE) {
 216     return sign ? FPBits<long double>::neg_inf() : FPBits<long double>::inf();
 217   }
 218
 219   FPBits<long double> result(0.0l);
 220   result.set_sign(sign);
 221
 222   constexpr int SUBNORMAL_EXPONENT = -FPBits<long double>::EXPONENT_BIAS + 1;
 223   if (exponent < SUBNORMAL_EXPONENT) {
 224     unsigned shift = SUBNORMAL_EXPONENT - exponent;
 225     if (shift <= MantissaWidth<long double>::VALUE + 1) {
 226       // Generate a subnormal number. Might lead to loss of precision.
 227       // We round to nearest and round halfway cases to even.
 228       const UIntType shift_out_mask = (UIntType(1) << shift) - 1;
 229       const UIntType shift_out_value = mantissa & shift_out_mask;
 230       const UIntType halfway_value = UIntType(1) << (shift - 1);
 231       result.set_unbiased_exponent(0);
 232       result.set_mantissa(mantissa >> shift);
 233       UIntType new_mantissa = result.get_mantissa();
 234       if (shift_out_value > halfway_value) {
 235         new_mantissa += 1;
 236       } else if (shift_out_value == halfway_value) {
 237         // Round to even.
 238         if (result.get_mantissa() & 0x1)
 239           new_mantissa += 1;
 240       }
 241       result.set_mantissa(new_mantissa);
 242       // Adding 1 to mantissa can lead to overflow. This can only happen if
 243       // mantissa was all ones (0b111..11). For such a case, we will carry
 244       // the overflow into the exponent and set the implicit bit to 1.
 245       if (new_mantissa == ONE) {
 246         result.set_unbiased_exponent(1);
 247         result.set_implicit_bit(1);
 248       } else {
 249         result.set_implicit_bit(0);
 250       }
 251       return static_cast<long double>(result);
 252     } else {
 253       return static_cast<long double>(result);
 254     }
 255   }
 256
 257   result.set_unbiased_exponent(biased_exponent);
 258   result.set_mantissa(mantissa);
 259   result.set_implicit_bit(1);
 260   return static_cast<long double>(result);
 261 }
 262 #endif // SPECIAL_X86_LONG_DOUBLE
 263
 264 } // namespace fputil
 265 } // namespace __llvm_libc
 266
 267 #endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_NORMAL_FLOAT_H