libc/src/__support/FPUtil/FPBits.h

   1 //===-- Abstract class for bit manipulation of float numbers. ---*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 // -----------------------------------------------------------------------------
  10 //                               **** WARNING ****
  11 // This file is shared with libc++. You should also be careful when adding
  12 // dependencies to this file, since it needs to build for all libc++ targets.
  13 // -----------------------------------------------------------------------------
  14
  15 #ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
  16 #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H
  17
  18 #include "src/__support/CPP/bit.h"
  19 #include "src/__support/CPP/type_traits.h"
  20 #include "src/__support/common.h"
  21 #include "src/__support/libc_assert.h"       // LIBC_ASSERT
  22 #include "src/__support/macros/attributes.h" // LIBC_INLINE, LIBC_INLINE_VAR
  23 #include "src/__support/macros/config.h"
  24 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_FLOAT128
  25 #include "src/__support/math_extras.h"             // mask_trailing_ones
  26 #include "src/__support/sign.h"                    // Sign
  27 #include "src/__support/uint128.h"
  28
  29 #include <stdint.h>
  30
  31 namespace LIBC_NAMESPACE_DECL {
  32 namespace fputil {
  33
  34 // The supported floating point types.
  35 enum class FPType {
  36   IEEE754_Binary16,
  37   IEEE754_Binary32,
  38   IEEE754_Binary64,
  39   IEEE754_Binary128,
  40   X86_Binary80,
  41 };
  42
  43 // The classes hierarchy is as follows:
  44 //
  45 //             ┌───────────────────┐
  46 //             │ FPLayout<FPType>  │
  47 //             └─────────▲─────────┘
  48 //                       │
  49 //             ┌─────────┴─────────┐
  50 //             │ FPStorage<FPType> │
  51 //             └─────────▲─────────┘
  52 //                       │
  53 //          ┌────────────┴─────────────┐
  54 //          │                          │
  55 // ┌────────┴─────────┐ ┌──────────────┴──────────────────┐
  56 // │ FPRepSem<FPType> │ │  FPRepSem<FPType::X86_Binary80  │
  57 // └────────▲─────────┘ └──────────────▲──────────────────┘
  58 //          │                          │
  59 //          └────────────┬─────────────┘
  60 //                       │
  61 //               ┌───────┴───────┐
  62 //               │  FPRepImpl<T> │
  63 //               └───────▲───────┘
  64 //                       │
  65 //              ┌────────┴────────┐
  66 //        ┌─────┴─────┐     ┌─────┴─────┐
  67 //        │  FPRep<T> │     │ FPBits<T> │
  68 //        └───────────┘     └───────────┘
  69 //
  70 // - 'FPLayout' defines only a few constants, namely the 'StorageType' and
  71 //   length of the sign, the exponent, fraction and significand parts.
  72 // - 'FPStorage' builds more constants on top of those from 'FPLayout' like
  73 //   exponent bias and masks. It also holds the bit representation of the
  74 //   floating point as a 'StorageType' type and defines tools to assemble or
  75 //   test these parts.
  76 // - 'FPRepSem' defines functions to interact semantically with the floating
  77 //   point representation. The default implementation is the one for 'IEEE754',
  78 //   a specialization is provided for X86 Extended Precision.
  79 // - 'FPRepImpl' derives from 'FPRepSem' and adds functions that are common to
  80 //   all implementations or build on the ones in 'FPRepSem'.
  81 // - 'FPRep' exposes all functions from 'FPRepImpl' and returns 'FPRep'
  82 //   instances when using Builders (static functions to create values).
  83 // - 'FPBits' exposes all the functions from 'FPRepImpl' but operates on the
  84 //   native C++ floating point type instead of 'FPType'. An additional 'get_val'
  85 //   function allows getting the C++ floating point type value back. Builders
  86 //   called from 'FPBits' return 'FPBits' instances.
  87
  88 namespace internal {
  89
  90 // Defines the layout (sign, exponent, significand) of a floating point type in
  91 // memory. It also defines its associated StorageType, i.e., the unsigned
  92 // integer type used to manipulate its representation.
  93 // Additionally we provide the fractional part length, i.e., the number of bits
  94 // after the decimal dot when the number is in normal form.
  95 template <FPType> struct FPLayout {};
  96
  97 template <> struct FPLayout<FPType::IEEE754_Binary16> {
  98   using StorageType = uint16_t;
  99   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
 100   LIBC_INLINE_VAR static constexpr int EXP_LEN = 5;
 101   LIBC_INLINE_VAR static constexpr int SIG_LEN = 10;
 102   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
 103 };
 104
 105 template <> struct FPLayout<FPType::IEEE754_Binary32> {
 106   using StorageType = uint32_t;
 107   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
 108   LIBC_INLINE_VAR static constexpr int EXP_LEN = 8;
 109   LIBC_INLINE_VAR static constexpr int SIG_LEN = 23;
 110   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
 111 };
 112
 113 template <> struct FPLayout<FPType::IEEE754_Binary64> {
 114   using StorageType = uint64_t;
 115   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
 116   LIBC_INLINE_VAR static constexpr int EXP_LEN = 11;
 117   LIBC_INLINE_VAR static constexpr int SIG_LEN = 52;
 118   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
 119 };
 120
 121 template <> struct FPLayout<FPType::IEEE754_Binary128> {
 122   using StorageType = UInt128;
 123   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
 124   LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
 125   LIBC_INLINE_VAR static constexpr int SIG_LEN = 112;
 126   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN;
 127 };
 128
 129 template <> struct FPLayout<FPType::X86_Binary80> {
 130 #if __SIZEOF_LONG_DOUBLE__ == 12
 131   using StorageType = UInt<__SIZEOF_LONG_DOUBLE__ * CHAR_BIT>;
 132 #else
 133   using StorageType = UInt128;
 134 #endif
 135   LIBC_INLINE_VAR static constexpr int SIGN_LEN = 1;
 136   LIBC_INLINE_VAR static constexpr int EXP_LEN = 15;
 137   LIBC_INLINE_VAR static constexpr int SIG_LEN = 64;
 138   LIBC_INLINE_VAR static constexpr int FRACTION_LEN = SIG_LEN - 1;
 139 };
 140
 141 // FPStorage derives useful constants from the FPLayout above.
 142 template <FPType fp_type> struct FPStorage : public FPLayout<fp_type> {
 143   using UP = FPLayout<fp_type>;
 144
 145   using UP::EXP_LEN;  // The number of bits for the *exponent* part
 146   using UP::SIG_LEN;  // The number of bits for the *significand* part
 147   using UP::SIGN_LEN; // The number of bits for the *sign* part
 148   // For convenience, the sum of `SIG_LEN`, `EXP_LEN`, and `SIGN_LEN`.
 149   LIBC_INLINE_VAR static constexpr int TOTAL_LEN = SIGN_LEN + EXP_LEN + SIG_LEN;
 150
 151   // The number of bits after the decimal dot when the number is in normal form.
 152   using UP::FRACTION_LEN;
 153
 154   // An unsigned integer that is wide enough to contain all of the floating
 155   // point bits.
 156   using StorageType = typename UP::StorageType;
 157
 158   // The number of bits in StorageType.
 159   LIBC_INLINE_VAR static constexpr int STORAGE_LEN =
 160       sizeof(StorageType) * CHAR_BIT;
 161   static_assert(STORAGE_LEN >= TOTAL_LEN);
 162
 163   // The exponent bias. Always positive.
 164   LIBC_INLINE_VAR static constexpr int32_t EXP_BIAS =
 165       (1U << (EXP_LEN - 1U)) - 1U;
 166   static_assert(EXP_BIAS > 0);
 167
 168   // The bit pattern that keeps only the *significand* part.
 169   LIBC_INLINE_VAR static constexpr StorageType SIG_MASK =
 170       mask_trailing_ones<StorageType, SIG_LEN>();
 171   // The bit pattern that keeps only the *exponent* part.
 172   LIBC_INLINE_VAR static constexpr StorageType EXP_MASK =
 173       mask_trailing_ones<StorageType, EXP_LEN>() << SIG_LEN;
 174   // The bit pattern that keeps only the *sign* part.
 175   LIBC_INLINE_VAR static constexpr StorageType SIGN_MASK =
 176       mask_trailing_ones<StorageType, SIGN_LEN>() << (EXP_LEN + SIG_LEN);
 177   // The bit pattern that keeps only the *exponent + significand* part.
 178   LIBC_INLINE_VAR static constexpr StorageType EXP_SIG_MASK =
 179       mask_trailing_ones<StorageType, EXP_LEN + SIG_LEN>();
 180   // The bit pattern that keeps only the *sign + exponent + significand* part.
 181   LIBC_INLINE_VAR static constexpr StorageType FP_MASK =
 182       mask_trailing_ones<StorageType, TOTAL_LEN>();
 183   // The bit pattern that keeps only the *fraction* part.
 184   // i.e., the *significand* without the leading one.
 185   LIBC_INLINE_VAR static constexpr StorageType FRACTION_MASK =
 186       mask_trailing_ones<StorageType, FRACTION_LEN>();
 187
 188   static_assert((SIG_MASK & EXP_MASK & SIGN_MASK) == 0, "masks disjoint");
 189   static_assert((SIG_MASK | EXP_MASK | SIGN_MASK) == FP_MASK, "masks cover");
 190
 191 protected:
 192   // Merge bits from 'a' and 'b' values according to 'mask'.
 193   // Use 'a' bits when corresponding 'mask' bits are zeroes and 'b' bits when
 194   // corresponding bits are ones.
 195   LIBC_INLINE static constexpr StorageType merge(StorageType a, StorageType b,
 196                                                  StorageType mask) {
 197     // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
 198     return a ^ ((a ^ b) & mask);
 199   }
 200
 201   // A stongly typed integer that prevents mixing and matching integers with
 202   // different semantics.
 203   template <typename T> struct TypedInt {
 204     using value_type = T;
 205     LIBC_INLINE constexpr explicit TypedInt(T value) : value(value) {}
 206     LIBC_INLINE constexpr TypedInt(const TypedInt &value) = default;
 207     LIBC_INLINE constexpr TypedInt &operator=(const TypedInt &value) = default;
 208
 209     LIBC_INLINE constexpr explicit operator T() const { return value; }
 210
 211     LIBC_INLINE constexpr StorageType to_storage_type() const {
 212       return StorageType(value);
 213     }
 214
 215     LIBC_INLINE friend constexpr bool operator==(TypedInt a, TypedInt b) {
 216       return a.value == b.value;
 217     }
 218     LIBC_INLINE friend constexpr bool operator!=(TypedInt a, TypedInt b) {
 219       return a.value != b.value;
 220     }
 221
 222   protected:
 223     T value;
 224   };
 225
 226   // An opaque type to store a floating point exponent.
 227   // We define special values but it is valid to create arbitrary values as long
 228   // as they are in the range [min, max].
 229   struct Exponent : public TypedInt<int32_t> {
 230     using UP = TypedInt<int32_t>;
 231     using UP::UP;
 232     LIBC_INLINE static constexpr auto subnormal() {
 233       return Exponent(-EXP_BIAS);
 234     }
 235     LIBC_INLINE static constexpr auto min() { return Exponent(1 - EXP_BIAS); }
 236     LIBC_INLINE static constexpr auto zero() { return Exponent(0); }
 237     LIBC_INLINE static constexpr auto max() { return Exponent(EXP_BIAS); }
 238     LIBC_INLINE static constexpr auto inf() { return Exponent(EXP_BIAS + 1); }
 239   };
 240
 241   // An opaque type to store a floating point biased exponent.
 242   // We define special values but it is valid to create arbitrary values as long
 243   // as they are in the range [zero, bits_all_ones].
 244   // Values greater than bits_all_ones are truncated.
 245   struct BiasedExponent : public TypedInt<uint32_t> {
 246     using UP = TypedInt<uint32_t>;
 247     using UP::UP;
 248
 249     LIBC_INLINE constexpr BiasedExponent(Exponent exp)
 250         : UP(static_cast<int32_t>(exp) + EXP_BIAS) {}
 251
 252     // Cast operator to get convert from BiasedExponent to Exponent.
 253     LIBC_INLINE constexpr operator Exponent() const {
 254       return Exponent(UP::value - EXP_BIAS);
 255     }
 256
 257     LIBC_INLINE constexpr BiasedExponent &operator++() {
 258       LIBC_ASSERT(*this != BiasedExponent(Exponent::inf()));
 259       ++UP::value;
 260       return *this;
 261     }
 262
 263     LIBC_INLINE constexpr BiasedExponent &operator--() {
 264       LIBC_ASSERT(*this != BiasedExponent(Exponent::subnormal()));
 265       --UP::value;
 266       return *this;
 267     }
 268   };
 269
 270   // An opaque type to store a floating point significand.
 271   // We define special values but it is valid to create arbitrary values as long
 272   // as they are in the range [zero, bits_all_ones].
 273   // Note that the semantics of the Significand are implementation dependent.
 274   // Values greater than bits_all_ones are truncated.
 275   struct Significand : public TypedInt<StorageType> {
 276     using UP = TypedInt<StorageType>;
 277     using UP::UP;
 278
 279     LIBC_INLINE friend constexpr Significand operator|(const Significand a,
 280                                                        const Significand b) {
 281       return Significand(
 282           StorageType(a.to_storage_type() | b.to_storage_type()));
 283     }
 284     LIBC_INLINE friend constexpr Significand operator^(const Significand a,
 285                                                        const Significand b) {
 286       return Significand(
 287           StorageType(a.to_storage_type() ^ b.to_storage_type()));
 288     }
 289     LIBC_INLINE friend constexpr Significand operator>>(const Significand a,
 290                                                         int shift) {
 291       return Significand(StorageType(a.to_storage_type() >> shift));
 292     }
 293
 294     LIBC_INLINE static constexpr auto zero() {
 295       return Significand(StorageType(0));
 296     }
 297     LIBC_INLINE static constexpr auto lsb() {
 298       return Significand(StorageType(1));
 299     }
 300     LIBC_INLINE static constexpr auto msb() {
 301       return Significand(StorageType(1) << (SIG_LEN - 1));
 302     }
 303     LIBC_INLINE static constexpr auto bits_all_ones() {
 304       return Significand(SIG_MASK);
 305     }
 306   };
 307
 308   LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp) {
 309     return (exp.to_storage_type() << SIG_LEN) & EXP_MASK;
 310   }
 311
 312   LIBC_INLINE static constexpr StorageType encode(Significand value) {
 313     return value.to_storage_type() & SIG_MASK;
 314   }
 315
 316   LIBC_INLINE static constexpr StorageType encode(BiasedExponent exp,
 317                                                   Significand sig) {
 318     return encode(exp) | encode(sig);
 319   }
 320
 321   LIBC_INLINE static constexpr StorageType encode(Sign sign, BiasedExponent exp,
 322                                                   Significand sig) {
 323     if (sign.is_neg())
 324       return SIGN_MASK | encode(exp, sig);
 325     return encode(exp, sig);
 326   }
 327
 328   // The floating point number representation as an unsigned integer.
 329   StorageType bits{};
 330
 331   LIBC_INLINE constexpr FPStorage() : bits(0) {}
 332   LIBC_INLINE constexpr FPStorage(StorageType value) : bits(value) {}
 333
 334   // Observers
 335   LIBC_INLINE constexpr StorageType exp_bits() const { return bits & EXP_MASK; }
 336   LIBC_INLINE constexpr StorageType sig_bits() const { return bits & SIG_MASK; }
 337   LIBC_INLINE constexpr StorageType exp_sig_bits() const {
 338     return bits & EXP_SIG_MASK;
 339   }
 340
 341   // Parts
 342   LIBC_INLINE constexpr BiasedExponent biased_exponent() const {
 343     return BiasedExponent(static_cast<uint32_t>(exp_bits() >> SIG_LEN));
 344   }
 345   LIBC_INLINE constexpr void set_biased_exponent(BiasedExponent biased) {
 346     bits = merge(bits, encode(biased), EXP_MASK);
 347   }
 348
 349 public:
 350   LIBC_INLINE constexpr Sign sign() const {
 351     return (bits & SIGN_MASK) ? Sign::NEG : Sign::POS;
 352   }
 353   LIBC_INLINE constexpr void set_sign(Sign signVal) {
 354     if (sign() != signVal)
 355       bits ^= SIGN_MASK;
 356   }
 357 };
 358
 359 // This layer defines all functions that are specific to how the the floating
 360 // point type is encoded. It enables constructions, modification and observation
 361 // of values manipulated as 'StorageType'.
 362 template <FPType fp_type, typename RetT>
 363 struct FPRepSem : public FPStorage<fp_type> {
 364   using UP = FPStorage<fp_type>;
 365   using typename UP::StorageType;
 366   using UP::FRACTION_LEN;
 367   using UP::FRACTION_MASK;
 368
 369 protected:
 370   using typename UP::Exponent;
 371   using typename UP::Significand;
 372   using UP::bits;
 373   using UP::encode;
 374   using UP::exp_bits;
 375   using UP::exp_sig_bits;
 376   using UP::sig_bits;
 377   using UP::UP;
 378
 379 public:
 380   // Builders
 381   LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
 382     return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
 383   }
 384   LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
 385     return RetT(encode(sign, Exponent::zero(), Significand::zero()));
 386   }
 387   LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
 388     return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
 389   }
 390   LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
 391     return RetT(
 392         encode(sign, Exponent::subnormal(), Significand::bits_all_ones()));
 393   }
 394   LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
 395     return RetT(encode(sign, Exponent::min(), Significand::zero()));
 396   }
 397   LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
 398     return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
 399   }
 400   LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
 401     return RetT(encode(sign, Exponent::inf(), Significand::zero()));
 402   }
 403   LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
 404                                                   StorageType v = 0) {
 405     return RetT(encode(sign, Exponent::inf(),
 406                        (v ? Significand(v) : (Significand::msb() >> 1))));
 407   }
 408   LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
 409                                               StorageType v = 0) {
 410     return RetT(
 411         encode(sign, Exponent::inf(), Significand::msb() | Significand(v)));
 412   }
 413
 414   // Observers
 415   LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; }
 416   LIBC_INLINE constexpr bool is_nan() const {
 417     return exp_sig_bits() > encode(Exponent::inf(), Significand::zero());
 418   }
 419   LIBC_INLINE constexpr bool is_quiet_nan() const {
 420     return exp_sig_bits() >= encode(Exponent::inf(), Significand::msb());
 421   }
 422   LIBC_INLINE constexpr bool is_signaling_nan() const {
 423     return is_nan() && !is_quiet_nan();
 424   }
 425   LIBC_INLINE constexpr bool is_inf() const {
 426     return exp_sig_bits() == encode(Exponent::inf(), Significand::zero());
 427   }
 428   LIBC_INLINE constexpr bool is_finite() const {
 429     return exp_bits() != encode(Exponent::inf());
 430   }
 431   LIBC_INLINE
 432   constexpr bool is_subnormal() const {
 433     return exp_bits() == encode(Exponent::subnormal());
 434   }
 435   LIBC_INLINE constexpr bool is_normal() const {
 436     return is_finite() && !is_subnormal();
 437   }
 438   LIBC_INLINE constexpr RetT next_toward_inf() const {
 439     if (is_finite())
 440       return RetT(bits + StorageType(1));
 441     return RetT(bits);
 442   }
 443
 444   // Returns the mantissa with the implicit bit set iff the current
 445   // value is a valid normal number.
 446   LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
 447     if (is_subnormal())
 448       return sig_bits();
 449     return (StorageType(1) << UP::SIG_LEN) | sig_bits();
 450   }
 451 };
 452
 453 // Specialization for the X86 Extended Precision type.
 454 template <typename RetT>
 455 struct FPRepSem<FPType::X86_Binary80, RetT>
 456     : public FPStorage<FPType::X86_Binary80> {
 457   using UP = FPStorage<FPType::X86_Binary80>;
 458   using typename UP::StorageType;
 459   using UP::FRACTION_LEN;
 460   using UP::FRACTION_MASK;
 461
 462   // The x86 80 bit float represents the leading digit of the mantissa
 463   // explicitly. This is the mask for that bit.
 464   static constexpr StorageType EXPLICIT_BIT_MASK = StorageType(1)
 465                                                    << FRACTION_LEN;
 466   // The X80 significand is made of an explicit bit and the fractional part.
 467   static_assert((EXPLICIT_BIT_MASK & FRACTION_MASK) == 0,
 468                 "the explicit bit and the fractional part should not overlap");
 469   static_assert((EXPLICIT_BIT_MASK | FRACTION_MASK) == SIG_MASK,
 470                 "the explicit bit and the fractional part should cover the "
 471                 "whole significand");
 472
 473 protected:
 474   using typename UP::Exponent;
 475   using typename UP::Significand;
 476   using UP::encode;
 477   using UP::UP;
 478
 479 public:
 480   // Builders
 481   LIBC_INLINE static constexpr RetT zero(Sign sign = Sign::POS) {
 482     return RetT(encode(sign, Exponent::subnormal(), Significand::zero()));
 483   }
 484   LIBC_INLINE static constexpr RetT one(Sign sign = Sign::POS) {
 485     return RetT(encode(sign, Exponent::zero(), Significand::msb()));
 486   }
 487   LIBC_INLINE static constexpr RetT min_subnormal(Sign sign = Sign::POS) {
 488     return RetT(encode(sign, Exponent::subnormal(), Significand::lsb()));
 489   }
 490   LIBC_INLINE static constexpr RetT max_subnormal(Sign sign = Sign::POS) {
 491     return RetT(encode(sign, Exponent::subnormal(),
 492                        Significand::bits_all_ones() ^ Significand::msb()));
 493   }
 494   LIBC_INLINE static constexpr RetT min_normal(Sign sign = Sign::POS) {
 495     return RetT(encode(sign, Exponent::min(), Significand::msb()));
 496   }
 497   LIBC_INLINE static constexpr RetT max_normal(Sign sign = Sign::POS) {
 498     return RetT(encode(sign, Exponent::max(), Significand::bits_all_ones()));
 499   }
 500   LIBC_INLINE static constexpr RetT inf(Sign sign = Sign::POS) {
 501     return RetT(encode(sign, Exponent::inf(), Significand::msb()));
 502   }
 503   LIBC_INLINE static constexpr RetT signaling_nan(Sign sign = Sign::POS,
 504                                                   StorageType v = 0) {
 505     return RetT(encode(sign, Exponent::inf(),
 506                        Significand::msb() |
 507                            (v ? Significand(v) : (Significand::msb() >> 2))));
 508   }
 509   LIBC_INLINE static constexpr RetT quiet_nan(Sign sign = Sign::POS,
 510                                               StorageType v = 0) {
 511     return RetT(encode(sign, Exponent::inf(),
 512                        Significand::msb() | (Significand::msb() >> 1) |
 513                            Significand(v)));
 514   }
 515
 516   // Observers
 517   LIBC_INLINE constexpr bool is_zero() const { return exp_sig_bits() == 0; }
 518   LIBC_INLINE constexpr bool is_nan() const {
 519     // Most encoding forms from the table found in
 520     // https://en.wikipedia.org/wiki/Extended_precision#x86_extended_precision_format
 521     // are interpreted as NaN.
 522     // More precisely :
 523     // - Pseudo-Infinity
 524     // - Pseudo Not a Number
 525     // - Signalling Not a Number
 526     // - Floating-point Indefinite
 527     // - Quiet Not a Number
 528     // - Unnormal
 529     // This can be reduced to the following logic:
 530     if (exp_bits() == encode(Exponent::inf()))
 531       return !is_inf();
 532     if (exp_bits() != encode(Exponent::subnormal()))
 533       return (sig_bits() & encode(Significand::msb())) == 0;
 534     return false;
 535   }
 536   LIBC_INLINE constexpr bool is_quiet_nan() const {
 537     return exp_sig_bits() >=
 538            encode(Exponent::inf(),
 539                   Significand::msb() | (Significand::msb() >> 1));
 540   }
 541   LIBC_INLINE constexpr bool is_signaling_nan() const {
 542     return is_nan() && !is_quiet_nan();
 543   }
 544   LIBC_INLINE constexpr bool is_inf() const {
 545     return exp_sig_bits() == encode(Exponent::inf(), Significand::msb());
 546   }
 547   LIBC_INLINE constexpr bool is_finite() const {
 548     return !is_inf() && !is_nan();
 549   }
 550   LIBC_INLINE
 551   constexpr bool is_subnormal() const {
 552     return exp_bits() == encode(Exponent::subnormal());
 553   }
 554   LIBC_INLINE constexpr bool is_normal() const {
 555     const auto exp = exp_bits();
 556     if (exp == encode(Exponent::subnormal()) || exp == encode(Exponent::inf()))
 557       return false;
 558     return get_implicit_bit();
 559   }
 560   LIBC_INLINE constexpr RetT next_toward_inf() const {
 561     if (is_finite()) {
 562       if (exp_sig_bits() == max_normal().uintval()) {
 563         return inf(sign());
 564       } else if (exp_sig_bits() == max_subnormal().uintval()) {
 565         return min_normal(sign());
 566       } else if (sig_bits() == SIG_MASK) {
 567         return RetT(encode(sign(), ++biased_exponent(), Significand::zero()));
 568       } else {
 569         return RetT(bits + StorageType(1));
 570       }
 571     }
 572     return RetT(bits);
 573   }
 574
 575   LIBC_INLINE constexpr StorageType get_explicit_mantissa() const {
 576     return sig_bits();
 577   }
 578
 579   // This functions is specific to FPRepSem<FPType::X86_Binary80>.
 580   // TODO: Remove if possible.
 581   LIBC_INLINE constexpr bool get_implicit_bit() const {
 582     return static_cast<bool>(bits & EXPLICIT_BIT_MASK);
 583   }
 584
 585   // This functions is specific to FPRepSem<FPType::X86_Binary80>.
 586   // TODO: Remove if possible.
 587   LIBC_INLINE constexpr void set_implicit_bit(bool implicitVal) {
 588     if (get_implicit_bit() != implicitVal)
 589       bits ^= EXPLICIT_BIT_MASK;
 590   }
 591 };
 592
 593 // 'FPRepImpl' is the bottom of the class hierarchy that only deals with
 594 // 'FPType'. The operations dealing with specific float semantics are
 595 // implemented by 'FPRepSem' above and specialized when needed.
 596 //
 597 // The 'RetT' type is being propagated up to 'FPRepSem' so that the functions
 598 // creating new values (Builders) can return the appropriate type. That is, when
 599 // creating a value through 'FPBits' below the builder will return an 'FPBits'
 600 // value.
 601 // FPBits<float>::zero(); // returns an FPBits<>
 602 //
 603 // When we don't care about specific C++ floating point type we can use
 604 // 'FPRep' and specify the 'FPType' directly.
 605 // FPRep<FPType::IEEE754_Binary32:>::zero() // returns an FPRep<>
 606 template <FPType fp_type, typename RetT>
 607 struct FPRepImpl : public FPRepSem<fp_type, RetT> {
 608   using UP = FPRepSem<fp_type, RetT>;
 609   using StorageType = typename UP::StorageType;
 610
 611 protected:
 612   using UP::bits;
 613   using UP::encode;
 614   using UP::exp_bits;
 615   using UP::exp_sig_bits;
 616
 617   using typename UP::BiasedExponent;
 618   using typename UP::Exponent;
 619   using typename UP::Significand;
 620
 621   using UP::FP_MASK;
 622
 623 public:
 624   // Constants.
 625   using UP::EXP_BIAS;
 626   using UP::EXP_MASK;
 627   using UP::FRACTION_MASK;
 628   using UP::SIG_LEN;
 629   using UP::SIG_MASK;
 630   using UP::SIGN_MASK;
 631   LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
 632       (1 << UP::EXP_LEN) - 1;
 633
 634   // CTors
 635   LIBC_INLINE constexpr FPRepImpl() = default;
 636   LIBC_INLINE constexpr explicit FPRepImpl(StorageType x) : UP(x) {}
 637
 638   // Comparison
 639   LIBC_INLINE constexpr friend bool operator==(FPRepImpl a, FPRepImpl b) {
 640     return a.uintval() == b.uintval();
 641   }
 642   LIBC_INLINE constexpr friend bool operator!=(FPRepImpl a, FPRepImpl b) {
 643     return a.uintval() != b.uintval();
 644   }
 645
 646   // Representation
 647   LIBC_INLINE constexpr StorageType uintval() const { return bits & FP_MASK; }
 648   LIBC_INLINE constexpr void set_uintval(StorageType value) {
 649     bits = (value & FP_MASK);
 650   }
 651
 652   // Builders
 653   using UP::inf;
 654   using UP::max_normal;
 655   using UP::max_subnormal;
 656   using UP::min_normal;
 657   using UP::min_subnormal;
 658   using UP::one;
 659   using UP::quiet_nan;
 660   using UP::signaling_nan;
 661   using UP::zero;
 662
 663   // Modifiers
 664   LIBC_INLINE constexpr RetT abs() const {
 665     return RetT(static_cast<StorageType>(bits & UP::EXP_SIG_MASK));
 666   }
 667
 668   // Observers
 669   using UP::get_explicit_mantissa;
 670   using UP::is_finite;
 671   using UP::is_inf;
 672   using UP::is_nan;
 673   using UP::is_normal;
 674   using UP::is_quiet_nan;
 675   using UP::is_signaling_nan;
 676   using UP::is_subnormal;
 677   using UP::is_zero;
 678   using UP::next_toward_inf;
 679   using UP::sign;
 680   LIBC_INLINE constexpr bool is_inf_or_nan() const { return !is_finite(); }
 681   LIBC_INLINE constexpr bool is_neg() const { return sign().is_neg(); }
 682   LIBC_INLINE constexpr bool is_pos() const { return sign().is_pos(); }
 683
 684   LIBC_INLINE constexpr uint16_t get_biased_exponent() const {
 685     return static_cast<uint16_t>(static_cast<uint32_t>(UP::biased_exponent()));
 686   }
 687
 688   LIBC_INLINE constexpr void set_biased_exponent(StorageType biased) {
 689     UP::set_biased_exponent(BiasedExponent((int32_t)biased));
 690   }
 691
 692   LIBC_INLINE constexpr int get_exponent() const {
 693     return static_cast<int32_t>(Exponent(UP::biased_exponent()));
 694   }
 695
 696   // If the number is subnormal, the exponent is treated as if it were the
 697   // minimum exponent for a normal number. This is to keep continuity between
 698   // the normal and subnormal ranges, but it causes problems for functions where
 699   // values are calculated from the exponent, since just subtracting the bias
 700   // will give a slightly incorrect result. Additionally, zero has an exponent
 701   // of zero, and that should actually be treated as zero.
 702   LIBC_INLINE constexpr int get_explicit_exponent() const {
 703     Exponent exponent(UP::biased_exponent());
 704     if (is_zero())
 705       exponent = Exponent::zero();
 706     if (exponent == Exponent::subnormal())
 707       exponent = Exponent::min();
 708     return static_cast<int32_t>(exponent);
 709   }
 710
 711   LIBC_INLINE constexpr StorageType get_mantissa() const {
 712     return bits & FRACTION_MASK;
 713   }
 714
 715   LIBC_INLINE constexpr void set_mantissa(StorageType mantVal) {
 716     bits = UP::merge(bits, mantVal, FRACTION_MASK);
 717   }
 718
 719   LIBC_INLINE constexpr void set_significand(StorageType sigVal) {
 720     bits = UP::merge(bits, sigVal, SIG_MASK);
 721   }
 722   // Unsafe function to create a floating point representation.
 723   // It simply packs the sign, biased exponent and mantissa values without
 724   // checking bound nor normalization.
 725   //
 726   // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
 727   // in the 'mantissa' by the caller.  This function will not check for its
 728   // validity.
 729   //
 730   // FIXME: Use an uint32_t for 'biased_exp'.
 731   LIBC_INLINE static constexpr RetT
 732   create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
 733     return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
 734                        Significand(mantissa)));
 735   }
 736
 737   // The function converts integer number and unbiased exponent to proper
 738   // float T type:
 739   //   Result = number * 2^(ep+1 - exponent_bias)
 740   // Be careful!
 741   //   1) "ep" is the raw exponent value.
 742   //   2) The function adds +1 to ep for seamless normalized to denormalized
 743   //      transition.
 744   //   3) The function does not check exponent high limit.
 745   //   4) "number" zero value is not processed correctly.
 746   //   5) Number is unsigned, so the result can be only positive.
 747   LIBC_INLINE static constexpr RetT make_value(StorageType number, int ep) {
 748     FPRepImpl result(0);
 749     int lz =
 750         UP::FRACTION_LEN + 1 - (UP::STORAGE_LEN - cpp::countl_zero(number));
 751
 752     number <<= lz;
 753     ep -= lz;
 754
 755     if (LIBC_LIKELY(ep >= 0)) {
 756       // Implicit number bit will be removed by mask
 757       result.set_significand(number);
 758       result.set_biased_exponent(static_cast<StorageType>(ep + 1));
 759     } else {
 760       result.set_significand(number >> -ep);
 761     }
 762     return RetT(result.uintval());
 763   }
 764 };
 765
 766 // A generic class to manipulate floating point formats.
 767 // It derives its functionality to FPRepImpl above.
 768 template <FPType fp_type>
 769 struct FPRep : public FPRepImpl<fp_type, FPRep<fp_type>> {
 770   using UP = FPRepImpl<fp_type, FPRep<fp_type>>;
 771   using StorageType = typename UP::StorageType;
 772   using UP::UP;
 773
 774   LIBC_INLINE constexpr explicit operator StorageType() const {
 775     return UP::uintval();
 776   }
 777 };
 778
 779 } // namespace internal
 780
 781 // Returns the FPType corresponding to C++ type T on the host.
 782 template <typename T> LIBC_INLINE static constexpr FPType get_fp_type() {
 783   using UnqualT = cpp::remove_cv_t<T>;
 784   if constexpr (cpp::is_same_v<UnqualT, float> && __FLT_MANT_DIG__ == 24)
 785     return FPType::IEEE754_Binary32;
 786   else if constexpr (cpp::is_same_v<UnqualT, double> && __DBL_MANT_DIG__ == 53)
 787     return FPType::IEEE754_Binary64;
 788   else if constexpr (cpp::is_same_v<UnqualT, long double>) {
 789     if constexpr (__LDBL_MANT_DIG__ == 53)
 790       return FPType::IEEE754_Binary64;
 791     else if constexpr (__LDBL_MANT_DIG__ == 64)
 792       return FPType::X86_Binary80;
 793     else if constexpr (__LDBL_MANT_DIG__ == 113)
 794       return FPType::IEEE754_Binary128;
 795   }
 796 #if defined(LIBC_TYPES_HAS_FLOAT16)
 797   else if constexpr (cpp::is_same_v<UnqualT, float16>)
 798     return FPType::IEEE754_Binary16;
 799 #endif
 800 #if defined(LIBC_TYPES_HAS_FLOAT128)
 801   else if constexpr (cpp::is_same_v<UnqualT, float128>)
 802     return FPType::IEEE754_Binary128;
 803 #endif
 804   else
 805     static_assert(cpp::always_false<UnqualT>, "Unsupported type");
 806 }
 807
 808 // -----------------------------------------------------------------------------
 809 //                               **** WARNING ****
 810 // This interface is shared with libc++, if you change this interface you need
 811 // to update it in both libc and libc++. You should also be careful when adding
 812 // dependencies to this file, since it needs to build for all libc++ targets.
 813 // -----------------------------------------------------------------------------
 814 // A generic class to manipulate C++ floating point formats.
 815 // It derives its functionality to FPRepImpl above.
 816 template <typename T>
 817 struct FPBits final : public internal::FPRepImpl<get_fp_type<T>(), FPBits<T>> {
 818   static_assert(cpp::is_floating_point_v<T>,
 819                 "FPBits instantiated with invalid type.");
 820   using UP = internal::FPRepImpl<get_fp_type<T>(), FPBits<T>>;
 821   using StorageType = typename UP::StorageType;
 822
 823   // Constructors.
 824   LIBC_INLINE constexpr FPBits() = default;
 825
 826   template <typename XType> LIBC_INLINE constexpr explicit FPBits(XType x) {
 827     using Unqual = typename cpp::remove_cv_t<XType>;
 828     if constexpr (cpp::is_same_v<Unqual, T>) {
 829       UP::bits = cpp::bit_cast<StorageType>(x);
 830     } else if constexpr (cpp::is_same_v<Unqual, StorageType>) {
 831       UP::bits = x;
 832     } else {
 833       // We don't want accidental type promotions/conversions, so we require
 834       // exact type match.
 835       static_assert(cpp::always_false<XType>);
 836     }
 837   }
 838
 839   // Floating-point conversions.
 840   LIBC_INLINE constexpr T get_val() const { return cpp::bit_cast<T>(UP::bits); }
 841 };
 842
 843 } // namespace fputil
 844 } // namespace LIBC_NAMESPACE_DECL
 845
 846 #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FPBITS_H