libc/src/__support/FPUtil/NormalFloat.h

   1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
  10 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H
  11
  12 #include "FPBits.h"
  13
  14 #include "src/__support/CPP/type_traits.h"
  15 #include "src/__support/common.h"
  16 #include "src/__support/macros/config.h"
  17
  18 #include <stdint.h>
  19
  20 namespace LIBC_NAMESPACE_DECL {
  21 namespace fputil {
  22
  23 // A class which stores the normalized form of a floating point value.
  24 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
  25 // are not handled by this class.
  26 //
  27 // A normalized floating point number is of this form:
  28 //    (-1)*sign * 2^exponent * <mantissa>
  29 // where <mantissa> is of the form 1.<...>.
  30 template <typename T> struct NormalFloat {
  31   static_assert(
  32       cpp::is_floating_point_v<T>,
  33       "NormalFloat template parameter has to be a floating point type.");
  34
  35   using StorageType = typename FPBits<T>::StorageType;
  36   static constexpr StorageType ONE =
  37       (StorageType(1) << FPBits<T>::FRACTION_LEN);
  38
  39   // Unbiased exponent value.
  40   int32_t exponent;
  41
  42   StorageType mantissa;
  43   // We want |StorageType| to have atleast one bit more than the actual mantissa
  44   // bit width to accommodate the implicit 1 value.
  45   static_assert(sizeof(StorageType) * 8 >= FPBits<T>::FRACTION_LEN + 1,
  46                 "Bad type for mantissa in NormalFloat.");
  47
  48   Sign sign = Sign::POS;
  49
  50   LIBC_INLINE NormalFloat(Sign s, int32_t e, StorageType m)
  51       : exponent(e), mantissa(m), sign(s) {
  52     if (mantissa >= ONE)
  53       return;
  54
  55     unsigned normalization_shift = evaluate_normalization_shift(mantissa);
  56     mantissa <<= normalization_shift;
  57     exponent -= normalization_shift;
  58   }
  59
  60   LIBC_INLINE explicit NormalFloat(T x) { init_from_bits(FPBits<T>(x)); }
  61
  62   LIBC_INLINE explicit NormalFloat(FPBits<T> bits) { init_from_bits(bits); }
  63
  64   // Compares this normalized number with another normalized number.
  65   // Returns -1 is this number is less than |other|, 0 if this number is equal
  66   // to |other|, and 1 if this number is greater than |other|.
  67   LIBC_INLINE int cmp(const NormalFloat<T> &other) const {
  68     const int result = sign.is_neg() ? -1 : 1;
  69     if (sign != other.sign)
  70       return result;
  71
  72     if (exponent > other.exponent) {
  73       return result;
  74     } else if (exponent == other.exponent) {
  75       if (mantissa > other.mantissa)
  76         return result;
  77       else if (mantissa == other.mantissa)
  78         return 0;
  79       else
  80         return -result;
  81     } else {
  82       return -result;
  83     }
  84   }
  85
  86   // Returns a new normalized floating point number which is equal in value
  87   // to this number multiplied by 2^e. That is:
  88   //     new = this *  2^e
  89   LIBC_INLINE NormalFloat<T> mul2(int e) const {
  90     NormalFloat<T> result = *this;
  91     result.exponent += e;
  92     return result;
  93   }
  94
  95   LIBC_INLINE operator T() const {
  96     int biased_exponent = exponent + FPBits<T>::EXP_BIAS;
  97     // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
  98     constexpr int MAX_EXPONENT_VALUE = (1 << FPBits<T>::EXP_LEN) - 2;
  99     if (biased_exponent > MAX_EXPONENT_VALUE) {
 100       return FPBits<T>::inf(sign).get_val();
 101     }
 102
 103     FPBits<T> result(T(0.0));
 104     result.set_sign(sign);
 105
 106     constexpr int SUBNORMAL_EXPONENT = -FPBits<T>::EXP_BIAS + 1;
 107     if (exponent < SUBNORMAL_EXPONENT) {
 108       unsigned shift = SUBNORMAL_EXPONENT - exponent;
 109       // Since exponent > subnormalExponent, shift is strictly greater than
 110       // zero.
 111       if (shift <= FPBits<T>::FRACTION_LEN + 1) {
 112         // Generate a subnormal number. Might lead to loss of precision.
 113         // We round to nearest and round halfway cases to even.
 114         const StorageType shift_out_mask =
 115             static_cast<StorageType>(StorageType(1) << shift) - 1;
 116         const StorageType shift_out_value = mantissa & shift_out_mask;
 117         const StorageType halfway_value =
 118             static_cast<StorageType>(StorageType(1) << (shift - 1));
 119         result.set_biased_exponent(0);
 120         result.set_mantissa(mantissa >> shift);
 121         StorageType new_mantissa = result.get_mantissa();
 122         if (shift_out_value > halfway_value) {
 123           new_mantissa += 1;
 124         } else if (shift_out_value == halfway_value) {
 125           // Round to even.
 126           if (result.get_mantissa() & 0x1)
 127             new_mantissa += 1;
 128         }
 129         result.set_mantissa(new_mantissa);
 130         // Adding 1 to mantissa can lead to overflow. This can only happen if
 131         // mantissa was all ones (0b111..11). For such a case, we will carry
 132         // the overflow into the exponent.
 133         if (new_mantissa == ONE)
 134           result.set_biased_exponent(1);
 135         return result.get_val();
 136       } else {
 137         return result.get_val();
 138       }
 139     }
 140
 141     result.set_biased_exponent(
 142         static_cast<StorageType>(exponent + FPBits<T>::EXP_BIAS));
 143     result.set_mantissa(mantissa);
 144     return result.get_val();
 145   }
 146
 147 private:
 148   LIBC_INLINE void init_from_bits(FPBits<T> bits) {
 149     sign = bits.sign();
 150
 151     if (bits.is_inf_or_nan() || bits.is_zero()) {
 152       // Ignore special bit patterns. Implementations deal with them separately
 153       // anyway so this should not be a problem.
 154       exponent = 0;
 155       mantissa = 0;
 156       return;
 157     }
 158
 159     // Normalize subnormal numbers.
 160     if (bits.is_subnormal()) {
 161       unsigned shift = evaluate_normalization_shift(bits.get_mantissa());
 162       mantissa = static_cast<StorageType>(bits.get_mantissa() << shift);
 163       exponent = 1 - FPBits<T>::EXP_BIAS - shift;
 164     } else {
 165       exponent = bits.get_biased_exponent() - FPBits<T>::EXP_BIAS;
 166       mantissa = ONE | bits.get_mantissa();
 167     }
 168   }
 169
 170   LIBC_INLINE unsigned evaluate_normalization_shift(StorageType m) {
 171     unsigned shift = 0;
 172     for (; (ONE & m) == 0 && (shift < FPBits<T>::FRACTION_LEN);
 173          m <<= 1, ++shift)
 174       ;
 175     return shift;
 176   }
 177 };
 178
 179 #ifdef LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
 180 template <>
 181 LIBC_INLINE void
 182 NormalFloat<long double>::init_from_bits(FPBits<long double> bits) {
 183   sign = bits.sign();
 184
 185   if (bits.is_inf_or_nan() || bits.is_zero()) {
 186     // Ignore special bit patterns. Implementations deal with them separately
 187     // anyway so this should not be a problem.
 188     exponent = 0;
 189     mantissa = 0;
 190     return;
 191   }
 192
 193   if (bits.is_subnormal()) {
 194     if (bits.get_implicit_bit() == 0) {
 195       // Since we ignore zero value, the mantissa in this case is non-zero.
 196       int normalization_shift =
 197           evaluate_normalization_shift(bits.get_mantissa());
 198       exponent = -16382 - normalization_shift;
 199       mantissa = (bits.get_mantissa() << normalization_shift);
 200     } else {
 201       exponent = -16382;
 202       mantissa = ONE | bits.get_mantissa();
 203     }
 204   } else {
 205     if (bits.get_implicit_bit() == 0) {
 206       // Invalid number so just store 0 similar to a NaN.
 207       exponent = 0;
 208       mantissa = 0;
 209     } else {
 210       exponent = bits.get_biased_exponent() - 16383;
 211       mantissa = ONE | bits.get_mantissa();
 212     }
 213   }
 214 }
 215
 216 template <> LIBC_INLINE NormalFloat<long double>::operator long double() const {
 217   using LDBits = FPBits<long double>;
 218   int biased_exponent = exponent + LDBits::EXP_BIAS;
 219   // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
 220   constexpr int MAX_EXPONENT_VALUE = (1 << LDBits::EXP_LEN) - 2;
 221   if (biased_exponent > MAX_EXPONENT_VALUE) {
 222     return LDBits::inf(sign).get_val();
 223   }
 224
 225   FPBits<long double> result(0.0l);
 226   result.set_sign(sign);
 227
 228   constexpr int SUBNORMAL_EXPONENT = -LDBits::EXP_BIAS + 1;
 229   if (exponent < SUBNORMAL_EXPONENT) {
 230     unsigned shift = SUBNORMAL_EXPONENT - exponent;
 231     if (shift <= LDBits::FRACTION_LEN + 1) {
 232       // Generate a subnormal number. Might lead to loss of precision.
 233       // We round to nearest and round halfway cases to even.
 234       const StorageType shift_out_mask = (StorageType(1) << shift) - 1;
 235       const StorageType shift_out_value = mantissa & shift_out_mask;
 236       const StorageType halfway_value = StorageType(1) << (shift - 1);
 237       result.set_biased_exponent(0);
 238       result.set_mantissa(mantissa >> shift);
 239       StorageType new_mantissa = result.get_mantissa();
 240       if (shift_out_value > halfway_value) {
 241         new_mantissa += 1;
 242       } else if (shift_out_value == halfway_value) {
 243         // Round to even.
 244         if (result.get_mantissa() & 0x1)
 245           new_mantissa += 1;
 246       }
 247       result.set_mantissa(new_mantissa);
 248       // Adding 1 to mantissa can lead to overflow. This can only happen if
 249       // mantissa was all ones (0b111..11). For such a case, we will carry
 250       // the overflow into the exponent and set the implicit bit to 1.
 251       if (new_mantissa == ONE) {
 252         result.set_biased_exponent(1);
 253         result.set_implicit_bit(1);
 254       } else {
 255         result.set_implicit_bit(0);
 256       }
 257       return result.get_val();
 258     } else {
 259       return result.get_val();
 260     }
 261   }
 262
 263   result.set_biased_exponent(biased_exponent);
 264   result.set_mantissa(mantissa);
 265   result.set_implicit_bit(1);
 266   return result.get_val();
 267 }
 268 #endif // LIBC_TYPES_LONG_DOUBLE_IS_X86_FLOAT80
 269
 270 } // namespace fputil
 271 } // namespace LIBC_NAMESPACE_DECL
 272
 273 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_NORMALFLOAT_H