llvm/lib/Support/APFloat.cpp

   1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision floating
  10 // point values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APFloat.h"
  15 #include "llvm/ADT/APSInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FloatingPointMode.h"
  18 #include "llvm/ADT/FoldingSet.h"
  19 #include "llvm/ADT/Hashing.h"
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/ADT/StringExtras.h"
  22 #include "llvm/ADT/StringRef.h"
  23 #include "llvm/Config/llvm-config.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/Error.h"
  26 #include "llvm/Support/MathExtras.h"
  27 #include "llvm/Support/raw_ostream.h"
  28 #include <cstring>
  29 #include <limits.h>
  30
  31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
  32   do {                                                                         \
  33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
  34       return U.IEEE.METHOD_CALL;                                               \
  35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
  36       return U.Double.METHOD_CALL;                                             \
  37     llvm_unreachable("Unexpected semantics");                                  \
  38   } while (false)
  39
  40 using namespace llvm;
  41
  42 /// A macro used to combine two fcCategory enums into one key which can be used
  43 /// in a switch statement to classify how the interaction of two APFloat's
  44 /// categories affects an operation.
  45 ///
  46 /// TODO: If clang source code is ever allowed to use constexpr in its own
  47 /// codebase, change this into a static inline function.
  48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
  49
  50 /* Assumed in hexadecimal significand parsing, and conversion to
  51    hexadecimal strings.  */
  52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
  53
  54 namespace llvm {
  55
  56 // How the nonfinite values Inf and NaN are represented.
  57 enum class fltNonfiniteBehavior {
  58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
  59   // exponent field is all 1s. In such cases, a value is Inf if the
  60   // significand bits are all zero, and NaN otherwise
  61   IEEE754,
  62
  63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
  64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
  65   // representation for Inf, and operations that would ordinarily produce Inf
  66   // produce NaN instead.
  67   // The details of the NaN representation(s) in this form are determined by the
  68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
  69   // encodings do not distinguish between signalling and quiet NaN.
  70   NanOnly,
  71
  72   // This behavior is present in Float6E3M2FN, Float6E2M3FN, and
  73   // Float4E2M1FN types, which do not support Inf or NaN values.
  74   FiniteOnly,
  75 };
  76
  77 // How NaN values are represented. This is curently only used in combination
  78 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
  79 // while having IEEE non-finite behavior is liable to lead to unexpected
  80 // results.
  81 enum class fltNanEncoding {
  82   // Represents the standard IEEE behavior where a value is NaN if its
  83   // exponent is all 1s and the significand is non-zero.
  84   IEEE,
  85
  86   // Represents the behavior in the Float8E4M3FN floating point type where NaN
  87   // is represented by having the exponent and mantissa set to all 1s.
  88   // This behavior matches the FP8 E4M3 type described in
  89   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
  90   // as non-signalling, although the paper does not state whether the NaN
  91   // values are signalling or not.
  92   AllOnes,
  93
  94   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
  95   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
  96   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
  97   // there is only one NaN value, it is treated as quiet NaN. This matches the
  98   // behavior described in https://arxiv.org/abs/2206.02915 .
  99   NegativeZero,
 100 };
 101
 102 /* Represents floating point arithmetic semantics.  */
 103 struct fltSemantics {
 104   /* The largest E such that 2^E is representable; this matches the
 105      definition of IEEE 754.  */
 106   APFloatBase::ExponentType maxExponent;
 107
 108   /* The smallest E such that 2^E is a normalized number; this
 109      matches the definition of IEEE 754.  */
 110   APFloatBase::ExponentType minExponent;
 111
 112   /* Number of bits in the significand.  This includes the integer
 113      bit.  */
 114   unsigned int precision;
 115
 116   /* Number of bits actually used in the semantics. */
 117   unsigned int sizeInBits;
 118
 119   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
 120
 121   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
 122
 123   /* Whether this semantics has an encoding for Zero */
 124   bool hasZero = true;
 125
 126   /* Whether this semantics can represent signed values */
 127   bool hasSignedRepr = true;
 128
 129   // Returns true if any number described by this semantics can be precisely
 130   // represented by the specified semantics. Does not take into account
 131   // the value of fltNonfiniteBehavior.
 132   bool isRepresentableBy(const fltSemantics &S) const {
 133     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
 134            precision <= S.precision;
 135   }
 136 };
 137
 138 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
 139 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
 140 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
 141 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
 142 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
 143 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
 144 static constexpr fltSemantics semFloat8E5M2FNUZ = {
 145     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 146 static constexpr fltSemantics semFloat8E4M3 = {7, -6, 4, 8};
 147 static constexpr fltSemantics semFloat8E4M3FN = {
 148     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
 149 static constexpr fltSemantics semFloat8E4M3FNUZ = {
 150     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 151 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
 152     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 153 static constexpr fltSemantics semFloat8E3M4 = {3, -2, 5, 8};
 154 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
 155 static constexpr fltSemantics semFloat8E8M0FNU = {
 156     127,   -127, 1, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes,
 157     false, false};
 158
 159 static constexpr fltSemantics semFloat6E3M2FN = {
 160     4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly};
 161 static constexpr fltSemantics semFloat6E2M3FN = {
 162     2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly};
 163 static constexpr fltSemantics semFloat4E2M1FN = {
 164     2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly};
 165 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
 166 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
 167 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
 168 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
 169                                                           53 + 53, 128};
 170
 171 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
 172   switch (S) {
 173   case S_IEEEhalf:
 174     return IEEEhalf();
 175   case S_BFloat:
 176     return BFloat();
 177   case S_IEEEsingle:
 178     return IEEEsingle();
 179   case S_IEEEdouble:
 180     return IEEEdouble();
 181   case S_IEEEquad:
 182     return IEEEquad();
 183   case S_PPCDoubleDouble:
 184     return PPCDoubleDouble();
 185   case S_PPCDoubleDoubleLegacy:
 186     return PPCDoubleDoubleLegacy();
 187   case S_Float8E5M2:
 188     return Float8E5M2();
 189   case S_Float8E5M2FNUZ:
 190     return Float8E5M2FNUZ();
 191   case S_Float8E4M3:
 192     return Float8E4M3();
 193   case S_Float8E4M3FN:
 194     return Float8E4M3FN();
 195   case S_Float8E4M3FNUZ:
 196     return Float8E4M3FNUZ();
 197   case S_Float8E4M3B11FNUZ:
 198     return Float8E4M3B11FNUZ();
 199   case S_Float8E3M4:
 200     return Float8E3M4();
 201   case S_FloatTF32:
 202     return FloatTF32();
 203   case S_Float8E8M0FNU:
 204     return Float8E8M0FNU();
 205   case S_Float6E3M2FN:
 206     return Float6E3M2FN();
 207   case S_Float6E2M3FN:
 208     return Float6E2M3FN();
 209   case S_Float4E2M1FN:
 210     return Float4E2M1FN();
 211   case S_x87DoubleExtended:
 212     return x87DoubleExtended();
 213   }
 214   llvm_unreachable("Unrecognised floating semantics");
 215 }
 216
 217 APFloatBase::Semantics
 218 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
 219   if (&Sem == &llvm::APFloat::IEEEhalf())
 220     return S_IEEEhalf;
 221   else if (&Sem == &llvm::APFloat::BFloat())
 222     return S_BFloat;
 223   else if (&Sem == &llvm::APFloat::IEEEsingle())
 224     return S_IEEEsingle;
 225   else if (&Sem == &llvm::APFloat::IEEEdouble())
 226     return S_IEEEdouble;
 227   else if (&Sem == &llvm::APFloat::IEEEquad())
 228     return S_IEEEquad;
 229   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
 230     return S_PPCDoubleDouble;
 231   else if (&Sem == &llvm::APFloat::PPCDoubleDoubleLegacy())
 232     return S_PPCDoubleDoubleLegacy;
 233   else if (&Sem == &llvm::APFloat::Float8E5M2())
 234     return S_Float8E5M2;
 235   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
 236     return S_Float8E5M2FNUZ;
 237   else if (&Sem == &llvm::APFloat::Float8E4M3())
 238     return S_Float8E4M3;
 239   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
 240     return S_Float8E4M3FN;
 241   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
 242     return S_Float8E4M3FNUZ;
 243   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
 244     return S_Float8E4M3B11FNUZ;
 245   else if (&Sem == &llvm::APFloat::Float8E3M4())
 246     return S_Float8E3M4;
 247   else if (&Sem == &llvm::APFloat::FloatTF32())
 248     return S_FloatTF32;
 249   else if (&Sem == &llvm::APFloat::Float8E8M0FNU())
 250     return S_Float8E8M0FNU;
 251   else if (&Sem == &llvm::APFloat::Float6E3M2FN())
 252     return S_Float6E3M2FN;
 253   else if (&Sem == &llvm::APFloat::Float6E2M3FN())
 254     return S_Float6E2M3FN;
 255   else if (&Sem == &llvm::APFloat::Float4E2M1FN())
 256     return S_Float4E2M1FN;
 257   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
 258     return S_x87DoubleExtended;
 259   else
 260     llvm_unreachable("Unknown floating semantics");
 261 }
 262
 263 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
 264 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
 265 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
 266 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
 267 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
 268 const fltSemantics &APFloatBase::PPCDoubleDouble() {
 269   return semPPCDoubleDouble;
 270 }
 271 const fltSemantics &APFloatBase::PPCDoubleDoubleLegacy() {
 272   return semPPCDoubleDoubleLegacy;
 273 }
 274 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
 275 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
 276 const fltSemantics &APFloatBase::Float8E4M3() { return semFloat8E4M3; }
 277 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
 278 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
 279 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
 280   return semFloat8E4M3B11FNUZ;
 281 }
 282 const fltSemantics &APFloatBase::Float8E3M4() { return semFloat8E3M4; }
 283 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
 284 const fltSemantics &APFloatBase::Float8E8M0FNU() { return semFloat8E8M0FNU; }
 285 const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; }
 286 const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; }
 287 const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; }
 288 const fltSemantics &APFloatBase::x87DoubleExtended() {
 289   return semX87DoubleExtended;
 290 }
 291 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
 292
 293 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
 294 constexpr RoundingMode APFloatBase::rmTowardPositive;
 295 constexpr RoundingMode APFloatBase::rmTowardNegative;
 296 constexpr RoundingMode APFloatBase::rmTowardZero;
 297 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
 298
 299 /* A tight upper bound on number of parts required to hold the value
 300    pow(5, power) is
 301
 302      power * 815 / (351 * integerPartWidth) + 1
 303
 304    However, whilst the result may require only this many parts,
 305    because we are multiplying two values to get it, the
 306    multiplication may require an extra part with the excess part
 307    being zero (consider the trivial case of 1 * 1, tcFullMultiply
 308    requires two parts to hold the single-part result).  So we add an
 309    extra one to guarantee enough space whilst multiplying.  */
 310 const unsigned int maxExponent = 16383;
 311 const unsigned int maxPrecision = 113;
 312 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
 313 const unsigned int maxPowerOfFiveParts =
 314     2 +
 315     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
 316
 317 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
 318   return semantics.precision;
 319 }
 320 APFloatBase::ExponentType
 321 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
 322   return semantics.maxExponent;
 323 }
 324 APFloatBase::ExponentType
 325 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
 326   return semantics.minExponent;
 327 }
 328 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
 329   return semantics.sizeInBits;
 330 }
 331 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
 332                                                  bool isSigned) {
 333   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
 334   // at least one more bit than the MaxExponent to hold the max FP value.
 335   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
 336   // Extra sign bit needed.
 337   if (isSigned)
 338     ++MinBitWidth;
 339   return MinBitWidth;
 340 }
 341
 342 bool APFloatBase::semanticsHasZero(const fltSemantics &semantics) {
 343   return semantics.hasZero;
 344 }
 345
 346 bool APFloatBase::semanticsHasSignedRepr(const fltSemantics &semantics) {
 347   return semantics.hasSignedRepr;
 348 }
 349
 350 bool APFloatBase::semanticsHasInf(const fltSemantics &semantics) {
 351   return semantics.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754;
 352 }
 353
 354 bool APFloatBase::semanticsHasNaN(const fltSemantics &semantics) {
 355   return semantics.nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly;
 356 }
 357
 358 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
 359                                             const fltSemantics &Dst) {
 360   // Exponent range must be larger.
 361   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
 362     return false;
 363
 364   // If the mantissa is long enough, the result value could still be denormal
 365   // with a larger exponent range.
 366   //
 367   // FIXME: This condition is probably not accurate but also shouldn't be a
 368   // practical concern with existing types.
 369   return Dst.precision >= Src.precision;
 370 }
 371
 372 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
 373   return Sem.sizeInBits;
 374 }
 375
 376 static constexpr APFloatBase::ExponentType
 377 exponentZero(const fltSemantics &semantics) {
 378   return semantics.minExponent - 1;
 379 }
 380
 381 static constexpr APFloatBase::ExponentType
 382 exponentInf(const fltSemantics &semantics) {
 383   return semantics.maxExponent + 1;
 384 }
 385
 386 static constexpr APFloatBase::ExponentType
 387 exponentNaN(const fltSemantics &semantics) {
 388   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
 389     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
 390       return exponentZero(semantics);
 391     if (semantics.hasSignedRepr)
 392       return semantics.maxExponent;
 393   }
 394   return semantics.maxExponent + 1;
 395 }
 396
 397 /* A bunch of private, handy routines.  */
 398
 399 static inline Error createError(const Twine &Err) {
 400   return make_error<StringError>(Err, inconvertibleErrorCode());
 401 }
 402
 403 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
 404   return std::max(1u, (bits + APFloatBase::integerPartWidth - 1) /
 405                           APFloatBase::integerPartWidth);
 406 }
 407
 408 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
 409 static inline unsigned int
 410 decDigitValue(unsigned int c)
 411 {
 412   return c - '0';
 413 }
 414
 415 /* Return the value of a decimal exponent of the form
 416    [+-]ddddddd.
 417
 418    If the exponent overflows, returns a large exponent with the
 419    appropriate sign.  */
 420 static Expected<int> readExponent(StringRef::iterator begin,
 421                                   StringRef::iterator end) {
 422   bool isNegative;
 423   unsigned int absExponent;
 424   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
 425   StringRef::iterator p = begin;
 426
 427   // Treat no exponent as 0 to match binutils
 428   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
 429     return 0;
 430   }
 431
 432   isNegative = (*p == '-');
 433   if (*p == '-' || *p == '+') {
 434     p++;
 435     if (p == end)
 436       return createError("Exponent has no digits");
 437   }
 438
 439   absExponent = decDigitValue(*p++);
 440   if (absExponent >= 10U)
 441     return createError("Invalid character in exponent");
 442
 443   for (; p != end; ++p) {
 444     unsigned int value;
 445
 446     value = decDigitValue(*p);
 447     if (value >= 10U)
 448       return createError("Invalid character in exponent");
 449
 450     absExponent = absExponent * 10U + value;
 451     if (absExponent >= overlargeExponent) {
 452       absExponent = overlargeExponent;
 453       break;
 454     }
 455   }
 456
 457   if (isNegative)
 458     return -(int) absExponent;
 459   else
 460     return (int) absExponent;
 461 }
 462
 463 /* This is ugly and needs cleaning up, but I don't immediately see
 464    how whilst remaining safe.  */
 465 static Expected<int> totalExponent(StringRef::iterator p,
 466                                    StringRef::iterator end,
 467                                    int exponentAdjustment) {
 468   int unsignedExponent;
 469   bool negative, overflow;
 470   int exponent = 0;
 471
 472   if (p == end)
 473     return createError("Exponent has no digits");
 474
 475   negative = *p == '-';
 476   if (*p == '-' || *p == '+') {
 477     p++;
 478     if (p == end)
 479       return createError("Exponent has no digits");
 480   }
 481
 482   unsignedExponent = 0;
 483   overflow = false;
 484   for (; p != end; ++p) {
 485     unsigned int value;
 486
 487     value = decDigitValue(*p);
 488     if (value >= 10U)
 489       return createError("Invalid character in exponent");
 490
 491     unsignedExponent = unsignedExponent * 10 + value;
 492     if (unsignedExponent > 32767) {
 493       overflow = true;
 494       break;
 495     }
 496   }
 497
 498   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
 499     overflow = true;
 500
 501   if (!overflow) {
 502     exponent = unsignedExponent;
 503     if (negative)
 504       exponent = -exponent;
 505     exponent += exponentAdjustment;
 506     if (exponent > 32767 || exponent < -32768)
 507       overflow = true;
 508   }
 509
 510   if (overflow)
 511     exponent = negative ? -32768: 32767;
 512
 513   return exponent;
 514 }
 515
 516 static Expected<StringRef::iterator>
 517 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
 518                            StringRef::iterator *dot) {
 519   StringRef::iterator p = begin;
 520   *dot = end;
 521   while (p != end && *p == '0')
 522     p++;
 523
 524   if (p != end && *p == '.') {
 525     *dot = p++;
 526
 527     if (end - begin == 1)
 528       return createError("Significand has no digits");
 529
 530     while (p != end && *p == '0')
 531       p++;
 532   }
 533
 534   return p;
 535 }
 536
 537 /* Given a normal decimal floating point number of the form
 538
 539      dddd.dddd[eE][+-]ddd
 540
 541    where the decimal point and exponent are optional, fill out the
 542    structure D.  Exponent is appropriate if the significand is
 543    treated as an integer, and normalizedExponent if the significand
 544    is taken to have the decimal point after a single leading
 545    non-zero digit.
 546
 547    If the value is zero, V->firstSigDigit points to a non-digit, and
 548    the return exponent is zero.
 549 */
 550 struct decimalInfo {
 551   const char *firstSigDigit;
 552   const char *lastSigDigit;
 553   int exponent;
 554   int normalizedExponent;
 555 };
 556
 557 static Error interpretDecimal(StringRef::iterator begin,
 558                               StringRef::iterator end, decimalInfo *D) {
 559   StringRef::iterator dot = end;
 560
 561   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
 562   if (!PtrOrErr)
 563     return PtrOrErr.takeError();
 564   StringRef::iterator p = *PtrOrErr;
 565
 566   D->firstSigDigit = p;
 567   D->exponent = 0;
 568   D->normalizedExponent = 0;
 569
 570   for (; p != end; ++p) {
 571     if (*p == '.') {
 572       if (dot != end)
 573         return createError("String contains multiple dots");
 574       dot = p++;
 575       if (p == end)
 576         break;
 577     }
 578     if (decDigitValue(*p) >= 10U)
 579       break;
 580   }
 581
 582   if (p != end) {
 583     if (*p != 'e' && *p != 'E')
 584       return createError("Invalid character in significand");
 585     if (p == begin)
 586       return createError("Significand has no digits");
 587     if (dot != end && p - begin == 1)
 588       return createError("Significand has no digits");
 589
 590     /* p points to the first non-digit in the string */
 591     auto ExpOrErr = readExponent(p + 1, end);
 592     if (!ExpOrErr)
 593       return ExpOrErr.takeError();
 594     D->exponent = *ExpOrErr;
 595
 596     /* Implied decimal point?  */
 597     if (dot == end)
 598       dot = p;
 599   }
 600
 601   /* If number is all zeroes accept any exponent.  */
 602   if (p != D->firstSigDigit) {
 603     /* Drop insignificant trailing zeroes.  */
 604     if (p != begin) {
 605       do
 606         do
 607           p--;
 608         while (p != begin && *p == '0');
 609       while (p != begin && *p == '.');
 610     }
 611
 612     /* Adjust the exponents for any decimal point.  */
 613     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
 614     D->normalizedExponent = (D->exponent +
 615               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
 616                                       - (dot > D->firstSigDigit && dot < p)));
 617   }
 618
 619   D->lastSigDigit = p;
 620   return Error::success();
 621 }
 622
 623 /* Return the trailing fraction of a hexadecimal number.
 624    DIGITVALUE is the first hex digit of the fraction, P points to
 625    the next digit.  */
 626 static Expected<lostFraction>
 627 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
 628                             unsigned int digitValue) {
 629   unsigned int hexDigit;
 630
 631   /* If the first trailing digit isn't 0 or 8 we can work out the
 632      fraction immediately.  */
 633   if (digitValue > 8)
 634     return lfMoreThanHalf;
 635   else if (digitValue < 8 && digitValue > 0)
 636     return lfLessThanHalf;
 637
 638   // Otherwise we need to find the first non-zero digit.
 639   while (p != end && (*p == '0' || *p == '.'))
 640     p++;
 641
 642   if (p == end)
 643     return createError("Invalid trailing hexadecimal fraction!");
 644
 645   hexDigit = hexDigitValue(*p);
 646
 647   /* If we ran off the end it is exactly zero or one-half, otherwise
 648      a little more.  */
 649   if (hexDigit == UINT_MAX)
 650     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
 651   else
 652     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
 653 }
 654
 655 /* Return the fraction lost were a bignum truncated losing the least
 656    significant BITS bits.  */
 657 static lostFraction
 658 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
 659                               unsigned int partCount,
 660                               unsigned int bits)
 661 {
 662   unsigned int lsb;
 663
 664   lsb = APInt::tcLSB(parts, partCount);
 665
 666   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
 667   if (bits <= lsb)
 668     return lfExactlyZero;
 669   if (bits == lsb + 1)
 670     return lfExactlyHalf;
 671   if (bits <= partCount * APFloatBase::integerPartWidth &&
 672       APInt::tcExtractBit(parts, bits - 1))
 673     return lfMoreThanHalf;
 674
 675   return lfLessThanHalf;
 676 }
 677
 678 /* Shift DST right BITS bits noting lost fraction.  */
 679 static lostFraction
 680 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
 681 {
 682   lostFraction lost_fraction;
 683
 684   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
 685
 686   APInt::tcShiftRight(dst, parts, bits);
 687
 688   return lost_fraction;
 689 }
 690
 691 /* Combine the effect of two lost fractions.  */
 692 static lostFraction
 693 combineLostFractions(lostFraction moreSignificant,
 694                      lostFraction lessSignificant)
 695 {
 696   if (lessSignificant != lfExactlyZero) {
 697     if (moreSignificant == lfExactlyZero)
 698       moreSignificant = lfLessThanHalf;
 699     else if (moreSignificant == lfExactlyHalf)
 700       moreSignificant = lfMoreThanHalf;
 701   }
 702
 703   return moreSignificant;
 704 }
 705
 706 /* The error from the true value, in half-ulps, on multiplying two
 707    floating point numbers, which differ from the value they
 708    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
 709    than the returned value.
 710
 711    See "How to Read Floating Point Numbers Accurately" by William D
 712    Clinger.  */
 713 static unsigned int
 714 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
 715 {
 716   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
 717
 718   if (HUerr1 + HUerr2 == 0)
 719     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
 720   else
 721     return inexactMultiply + 2 * (HUerr1 + HUerr2);
 722 }
 723
 724 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
 725    when the least significant BITS are truncated.  BITS cannot be
 726    zero.  */
 727 static APFloatBase::integerPart
 728 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
 729                  bool isNearest) {
 730   unsigned int count, partBits;
 731   APFloatBase::integerPart part, boundary;
 732
 733   assert(bits != 0);
 734
 735   bits--;
 736   count = bits / APFloatBase::integerPartWidth;
 737   partBits = bits % APFloatBase::integerPartWidth + 1;
 738
 739   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
 740
 741   if (isNearest)
 742     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
 743   else
 744     boundary = 0;
 745
 746   if (count == 0) {
 747     if (part - boundary <= boundary - part)
 748       return part - boundary;
 749     else
 750       return boundary - part;
 751   }
 752
 753   if (part == boundary) {
 754     while (--count)
 755       if (parts[count])
 756         return ~(APFloatBase::integerPart) 0; /* A lot.  */
 757
 758     return parts[0];
 759   } else if (part == boundary - 1) {
 760     while (--count)
 761       if (~parts[count])
 762         return ~(APFloatBase::integerPart) 0; /* A lot.  */
 763
 764     return -parts[0];
 765   }
 766
 767   return ~(APFloatBase::integerPart) 0; /* A lot.  */
 768 }
 769
 770 /* Place pow(5, power) in DST, and return the number of parts used.
 771    DST must be at least one part larger than size of the answer.  */
 772 static unsigned int
 773 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
 774   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
 775   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
 776   pow5s[0] = 78125 * 5;
 777
 778   unsigned int partsCount = 1;
 779   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
 780   unsigned int result;
 781   assert(power <= maxExponent);
 782
 783   p1 = dst;
 784   p2 = scratch;
 785
 786   *p1 = firstEightPowers[power & 7];
 787   power >>= 3;
 788
 789   result = 1;
 790   pow5 = pow5s;
 791
 792   for (unsigned int n = 0; power; power >>= 1, n++) {
 793     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
 794     if (n != 0) {
 795       APInt::tcFullMultiply(pow5, pow5 - partsCount, pow5 - partsCount,
 796                             partsCount, partsCount);
 797       partsCount *= 2;
 798       if (pow5[partsCount - 1] == 0)
 799         partsCount--;
 800     }
 801
 802     if (power & 1) {
 803       APFloatBase::integerPart *tmp;
 804
 805       APInt::tcFullMultiply(p2, p1, pow5, result, partsCount);
 806       result += partsCount;
 807       if (p2[result - 1] == 0)
 808         result--;
 809
 810       /* Now result is in p1 with partsCount parts and p2 is scratch
 811          space.  */
 812       tmp = p1;
 813       p1 = p2;
 814       p2 = tmp;
 815     }
 816
 817     pow5 += partsCount;
 818   }
 819
 820   if (p1 != dst)
 821     APInt::tcAssign(dst, p1, result);
 822
 823   return result;
 824 }
 825
 826 /* Zero at the end to avoid modular arithmetic when adding one; used
 827    when rounding up during hexadecimal output.  */
 828 static const char hexDigitsLower[] = "0123456789abcdef0";
 829 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
 830 static const char infinityL[] = "infinity";
 831 static const char infinityU[] = "INFINITY";
 832 static const char NaNL[] = "nan";
 833 static const char NaNU[] = "NAN";
 834
 835 /* Write out an integerPart in hexadecimal, starting with the most
 836    significant nibble.  Write out exactly COUNT hexdigits, return
 837    COUNT.  */
 838 static unsigned int
 839 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
 840            const char *hexDigitChars)
 841 {
 842   unsigned int result = count;
 843
 844   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
 845
 846   part >>= (APFloatBase::integerPartWidth - 4 * count);
 847   while (count--) {
 848     dst[count] = hexDigitChars[part & 0xf];
 849     part >>= 4;
 850   }
 851
 852   return result;
 853 }
 854
 855 /* Write out an unsigned decimal integer.  */
 856 static char *
 857 writeUnsignedDecimal (char *dst, unsigned int n)
 858 {
 859   char buff[40], *p;
 860
 861   p = buff;
 862   do
 863     *p++ = '0' + n % 10;
 864   while (n /= 10);
 865
 866   do
 867     *dst++ = *--p;
 868   while (p != buff);
 869
 870   return dst;
 871 }
 872
 873 /* Write out a signed decimal integer.  */
 874 static char *
 875 writeSignedDecimal (char *dst, int value)
 876 {
 877   if (value < 0) {
 878     *dst++ = '-';
 879     dst = writeUnsignedDecimal(dst, -(unsigned) value);
 880   } else
 881     dst = writeUnsignedDecimal(dst, value);
 882
 883   return dst;
 884 }
 885
 886 namespace detail {
 887 /* Constructors.  */
 888 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
 889   unsigned int count;
 890
 891   semantics = ourSemantics;
 892   count = partCount();
 893   if (count > 1)
 894     significand.parts = new integerPart[count];
 895 }
 896
 897 void IEEEFloat::freeSignificand() {
 898   if (needsCleanup())
 899     delete [] significand.parts;
 900 }
 901
 902 void IEEEFloat::assign(const IEEEFloat &rhs) {
 903   assert(semantics == rhs.semantics);
 904
 905   sign = rhs.sign;
 906   category = rhs.category;
 907   exponent = rhs.exponent;
 908   if (isFiniteNonZero() || category == fcNaN)
 909     copySignificand(rhs);
 910 }
 911
 912 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
 913   assert(isFiniteNonZero() || category == fcNaN);
 914   assert(rhs.partCount() >= partCount());
 915
 916   APInt::tcAssign(significandParts(), rhs.significandParts(),
 917                   partCount());
 918 }
 919
 920 /* Make this number a NaN, with an arbitrary but deterministic value
 921    for the significand.  If double or longer, this is a signalling NaN,
 922    which may not be ideal.  If float, this is QNaN(0).  */
 923 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
 924   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
 925     llvm_unreachable("This floating point format does not support NaN");
 926
 927   if (Negative && !semantics->hasSignedRepr)
 928     llvm_unreachable(
 929         "This floating point format does not support signed values");
 930
 931   category = fcNaN;
 932   sign = Negative;
 933   exponent = exponentNaN();
 934
 935   integerPart *significand = significandParts();
 936   unsigned numParts = partCount();
 937
 938   APInt fill_storage;
 939   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
 940     // Finite-only types do not distinguish signalling and quiet NaN, so
 941     // make them all signalling.
 942     SNaN = false;
 943     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
 944       sign = true;
 945       fill_storage = APInt::getZero(semantics->precision - 1);
 946     } else {
 947       fill_storage = APInt::getAllOnes(semantics->precision - 1);
 948     }
 949     fill = &fill_storage;
 950   }
 951
 952   // Set the significand bits to the fill.
 953   if (!fill || fill->getNumWords() < numParts)
 954     APInt::tcSet(significand, 0, numParts);
 955   if (fill) {
 956     APInt::tcAssign(significand, fill->getRawData(),
 957                     std::min(fill->getNumWords(), numParts));
 958
 959     // Zero out the excess bits of the significand.
 960     unsigned bitsToPreserve = semantics->precision - 1;
 961     unsigned part = bitsToPreserve / 64;
 962     bitsToPreserve %= 64;
 963     significand[part] &= ((1ULL << bitsToPreserve) - 1);
 964     for (part++; part != numParts; ++part)
 965       significand[part] = 0;
 966   }
 967
 968   unsigned QNaNBit =
 969       (semantics->precision >= 2) ? (semantics->precision - 2) : 0;
 970
 971   if (SNaN) {
 972     // We always have to clear the QNaN bit to make it an SNaN.
 973     APInt::tcClearBit(significand, QNaNBit);
 974
 975     // If there are no bits set in the payload, we have to set
 976     // *something* to make it a NaN instead of an infinity;
 977     // conventionally, this is the next bit down from the QNaN bit.
 978     if (APInt::tcIsZero(significand, numParts))
 979       APInt::tcSetBit(significand, QNaNBit - 1);
 980   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
 981     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
 982     // Do nothing.
 983   } else {
 984     // We always have to set the QNaN bit to make it a QNaN.
 985     APInt::tcSetBit(significand, QNaNBit);
 986   }
 987
 988   // For x87 extended precision, we want to make a NaN, not a
 989   // pseudo-NaN.  Maybe we should expose the ability to make
 990   // pseudo-NaNs?
 991   if (semantics == &semX87DoubleExtended)
 992     APInt::tcSetBit(significand, QNaNBit + 1);
 993 }
 994
 995 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
 996   if (this != &rhs) {
 997     if (semantics != rhs.semantics) {
 998       freeSignificand();
 999       initialize(rhs.semantics);
1000     }
1001     assign(rhs);
1002   }
1003
1004   return *this;
1005 }
1006
1007 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
1008   freeSignificand();
1009
1010   semantics = rhs.semantics;
1011   significand = rhs.significand;
1012   exponent = rhs.exponent;
1013   category = rhs.category;
1014   sign = rhs.sign;
1015
1016   rhs.semantics = &semBogus;
1017   return *this;
1018 }
1019
1020 bool IEEEFloat::isDenormal() const {
1021   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
1022          (APInt::tcExtractBit(significandParts(),
1023                               semantics->precision - 1) == 0);
1024 }
1025
1026 bool IEEEFloat::isSmallest() const {
1027   // The smallest number by magnitude in our format will be the smallest
1028   // denormal, i.e. the floating point number with exponent being minimum
1029   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
1030   return isFiniteNonZero() && exponent == semantics->minExponent &&
1031     significandMSB() == 0;
1032 }
1033
1034 bool IEEEFloat::isSmallestNormalized() const {
1035   return getCategory() == fcNormal && exponent == semantics->minExponent &&
1036          isSignificandAllZerosExceptMSB();
1037 }
1038
1039 unsigned int IEEEFloat::getNumHighBits() const {
1040   const unsigned int PartCount = partCountForBits(semantics->precision);
1041   const unsigned int Bits = PartCount * integerPartWidth;
1042
1043   // Compute how many bits are used in the final word.
1044   // When precision is just 1, it represents the 'Pth'
1045   // Precision bit and not the actual significand bit.
1046   const unsigned int NumHighBits = (semantics->precision > 1)
1047                                        ? (Bits - semantics->precision + 1)
1048                                        : (Bits - semantics->precision);
1049   return NumHighBits;
1050 }
1051
1052 bool IEEEFloat::isSignificandAllOnes() const {
1053   // Test if the significand excluding the integral bit is all ones. This allows
1054   // us to test for binade boundaries.
1055   const integerPart *Parts = significandParts();
1056   const unsigned PartCount = partCountForBits(semantics->precision);
1057   for (unsigned i = 0; i < PartCount - 1; i++)
1058     if (~Parts[i])
1059       return false;
1060
1061   // Set the unused high bits to all ones when we compare.
1062   const unsigned NumHighBits = getNumHighBits();
1063   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1064          "Can not have more high bits to fill than integerPartWidth");
1065   const integerPart HighBitFill =
1066     ~integerPart(0) << (integerPartWidth - NumHighBits);
1067   if ((semantics->precision <= 1) || (~(Parts[PartCount - 1] | HighBitFill)))
1068     return false;
1069
1070   return true;
1071 }
1072
1073 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1074   // Test if the significand excluding the integral bit is all ones except for
1075   // the least significant bit.
1076   const integerPart *Parts = significandParts();
1077
1078   if (Parts[0] & 1)
1079     return false;
1080
1081   const unsigned PartCount = partCountForBits(semantics->precision);
1082   for (unsigned i = 0; i < PartCount - 1; i++) {
1083     if (~Parts[i] & ~unsigned{!i})
1084       return false;
1085   }
1086
1087   // Set the unused high bits to all ones when we compare.
1088   const unsigned NumHighBits = getNumHighBits();
1089   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1090          "Can not have more high bits to fill than integerPartWidth");
1091   const integerPart HighBitFill = ~integerPart(0)
1092                                   << (integerPartWidth - NumHighBits);
1093   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1094     return false;
1095
1096   return true;
1097 }
1098
1099 bool IEEEFloat::isSignificandAllZeros() const {
1100   // Test if the significand excluding the integral bit is all zeros. This
1101   // allows us to test for binade boundaries.
1102   const integerPart *Parts = significandParts();
1103   const unsigned PartCount = partCountForBits(semantics->precision);
1104
1105   for (unsigned i = 0; i < PartCount - 1; i++)
1106     if (Parts[i])
1107       return false;
1108
1109   // Compute how many bits are used in the final word.
1110   const unsigned NumHighBits = getNumHighBits();
1111   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1112          "clear than integerPartWidth");
1113   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1114
1115   if ((semantics->precision > 1) && (Parts[PartCount - 1] & HighBitMask))
1116     return false;
1117
1118   return true;
1119 }
1120
1121 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1122   const integerPart *Parts = significandParts();
1123   const unsigned PartCount = partCountForBits(semantics->precision);
1124
1125   for (unsigned i = 0; i < PartCount - 1; i++) {
1126     if (Parts[i])
1127       return false;
1128   }
1129
1130   const unsigned NumHighBits = getNumHighBits();
1131   const integerPart MSBMask = integerPart(1)
1132                               << (integerPartWidth - NumHighBits);
1133   return ((semantics->precision <= 1) || (Parts[PartCount - 1] == MSBMask));
1134 }
1135
1136 bool IEEEFloat::isLargest() const {
1137   bool IsMaxExp = isFiniteNonZero() && exponent == semantics->maxExponent;
1138   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1139       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1140     // The largest number by magnitude in our format will be the floating point
1141     // number with maximum exponent and with significand that is all ones except
1142     // the LSB.
1143     return (IsMaxExp && APFloat::hasSignificand(*semantics))
1144                ? isSignificandAllOnesExceptLSB()
1145                : IsMaxExp;
1146   } else {
1147     // The largest number by magnitude in our format will be the floating point
1148     // number with maximum exponent and with significand that is all ones.
1149     return IsMaxExp && isSignificandAllOnes();
1150   }
1151 }
1152
1153 bool IEEEFloat::isInteger() const {
1154   // This could be made more efficient; I'm going for obviously correct.
1155   if (!isFinite()) return false;
1156   IEEEFloat truncated = *this;
1157   truncated.roundToIntegral(rmTowardZero);
1158   return compare(truncated) == cmpEqual;
1159 }
1160
1161 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1162   if (this == &rhs)
1163     return true;
1164   if (semantics != rhs.semantics ||
1165       category != rhs.category ||
1166       sign != rhs.sign)
1167     return false;
1168   if (category==fcZero || category==fcInfinity)
1169     return true;
1170
1171   if (isFiniteNonZero() && exponent != rhs.exponent)
1172     return false;
1173
1174   return std::equal(significandParts(), significandParts() + partCount(),
1175                     rhs.significandParts());
1176 }
1177
1178 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1179   initialize(&ourSemantics);
1180   sign = 0;
1181   category = fcNormal;
1182   zeroSignificand();
1183   exponent = ourSemantics.precision - 1;
1184   significandParts()[0] = value;
1185   normalize(rmNearestTiesToEven, lfExactlyZero);
1186 }
1187
1188 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1189   initialize(&ourSemantics);
1190   // The Float8E8MOFNU format does not have a representation
1191   // for zero. So, use the closest representation instead.
1192   // Moreover, the all-zero encoding represents a valid
1193   // normal value (which is the smallestNormalized here).
1194   // Hence, we call makeSmallestNormalized (where category is
1195   // 'fcNormal') instead of makeZero (where category is 'fcZero').
1196   ourSemantics.hasZero ? makeZero(false) : makeSmallestNormalized(false);
1197 }
1198
1199 // Delegate to the previous constructor, because later copy constructor may
1200 // actually inspects category, which can't be garbage.
1201 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1202     : IEEEFloat(ourSemantics) {}
1203
1204 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1205   initialize(rhs.semantics);
1206   assign(rhs);
1207 }
1208
1209 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1210   *this = std::move(rhs);
1211 }
1212
1213 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1214
1215 unsigned int IEEEFloat::partCount() const {
1216   return partCountForBits(semantics->precision + 1);
1217 }
1218
1219 const APFloat::integerPart *IEEEFloat::significandParts() const {
1220   return const_cast<IEEEFloat *>(this)->significandParts();
1221 }
1222
1223 APFloat::integerPart *IEEEFloat::significandParts() {
1224   if (partCount() > 1)
1225     return significand.parts;
1226   else
1227     return &significand.part;
1228 }
1229
1230 void IEEEFloat::zeroSignificand() {
1231   APInt::tcSet(significandParts(), 0, partCount());
1232 }
1233
1234 /* Increment an fcNormal floating point number's significand.  */
1235 void IEEEFloat::incrementSignificand() {
1236   integerPart carry;
1237
1238   carry = APInt::tcIncrement(significandParts(), partCount());
1239
1240   /* Our callers should never cause us to overflow.  */
1241   assert(carry == 0);
1242   (void)carry;
1243 }
1244
1245 /* Add the significand of the RHS.  Returns the carry flag.  */
1246 APFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1247   integerPart *parts;
1248
1249   parts = significandParts();
1250
1251   assert(semantics == rhs.semantics);
1252   assert(exponent == rhs.exponent);
1253
1254   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1255 }
1256
1257 /* Subtract the significand of the RHS with a borrow flag.  Returns
1258    the borrow flag.  */
1259 APFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1260                                                     integerPart borrow) {
1261   integerPart *parts;
1262
1263   parts = significandParts();
1264
1265   assert(semantics == rhs.semantics);
1266   assert(exponent == rhs.exponent);
1267
1268   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1269                            partCount());
1270 }
1271
1272 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1273    on to the full-precision result of the multiplication.  Returns the
1274    lost fraction.  */
1275 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1276                                             IEEEFloat addend,
1277                                             bool ignoreAddend) {
1278   unsigned int omsb;        // One, not zero, based MSB.
1279   unsigned int partsCount, newPartsCount, precision;
1280   integerPart *lhsSignificand;
1281   integerPart scratch[4];
1282   integerPart *fullSignificand;
1283   lostFraction lost_fraction;
1284   bool ignored;
1285
1286   assert(semantics == rhs.semantics);
1287
1288   precision = semantics->precision;
1289
1290   // Allocate space for twice as many bits as the original significand, plus one
1291   // extra bit for the addition to overflow into.
1292   newPartsCount = partCountForBits(precision * 2 + 1);
1293
1294   if (newPartsCount > 4)
1295     fullSignificand = new integerPart[newPartsCount];
1296   else
1297     fullSignificand = scratch;
1298
1299   lhsSignificand = significandParts();
1300   partsCount = partCount();
1301
1302   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1303                         rhs.significandParts(), partsCount, partsCount);
1304
1305   lost_fraction = lfExactlyZero;
1306   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1307   exponent += rhs.exponent;
1308
1309   // Assume the operands involved in the multiplication are single-precision
1310   // FP, and the two multiplicants are:
1311   //   *this = a23 . a22 ... a0 * 2^e1
1312   //     rhs = b23 . b22 ... b0 * 2^e2
1313   // the result of multiplication is:
1314   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1315   // Note that there are three significant bits at the left-hand side of the
1316   // radix point: two for the multiplication, and an overflow bit for the
1317   // addition (that will always be zero at this point). Move the radix point
1318   // toward left by two bits, and adjust exponent accordingly.
1319   exponent += 2;
1320
1321   if (!ignoreAddend && addend.isNonZero()) {
1322     // The intermediate result of the multiplication has "2 * precision"
1323     // signicant bit; adjust the addend to be consistent with mul result.
1324     //
1325     Significand savedSignificand = significand;
1326     const fltSemantics *savedSemantics = semantics;
1327     fltSemantics extendedSemantics;
1328     opStatus status;
1329     unsigned int extendedPrecision;
1330
1331     // Normalize our MSB to one below the top bit to allow for overflow.
1332     extendedPrecision = 2 * precision + 1;
1333     if (omsb != extendedPrecision - 1) {
1334       assert(extendedPrecision > omsb);
1335       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1336                          (extendedPrecision - 1) - omsb);
1337       exponent -= (extendedPrecision - 1) - omsb;
1338     }
1339
1340     /* Create new semantics.  */
1341     extendedSemantics = *semantics;
1342     extendedSemantics.precision = extendedPrecision;
1343
1344     if (newPartsCount == 1)
1345       significand.part = fullSignificand[0];
1346     else
1347       significand.parts = fullSignificand;
1348     semantics = &extendedSemantics;
1349
1350     // Make a copy so we can convert it to the extended semantics.
1351     // Note that we cannot convert the addend directly, as the extendedSemantics
1352     // is a local variable (which we take a reference to).
1353     IEEEFloat extendedAddend(addend);
1354     status = extendedAddend.convert(extendedSemantics, APFloat::rmTowardZero,
1355                                     &ignored);
1356     assert(status == APFloat::opOK);
1357     (void)status;
1358
1359     // Shift the significand of the addend right by one bit. This guarantees
1360     // that the high bit of the significand is zero (same as fullSignificand),
1361     // so the addition will overflow (if it does overflow at all) into the top bit.
1362     lost_fraction = extendedAddend.shiftSignificandRight(1);
1363     assert(lost_fraction == lfExactlyZero &&
1364            "Lost precision while shifting addend for fused-multiply-add.");
1365
1366     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1367
1368     /* Restore our state.  */
1369     if (newPartsCount == 1)
1370       fullSignificand[0] = significand.part;
1371     significand = savedSignificand;
1372     semantics = savedSemantics;
1373
1374     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1375   }
1376
1377   // Convert the result having "2 * precision" significant-bits back to the one
1378   // having "precision" significant-bits. First, move the radix point from
1379   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1380   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1381   exponent -= precision + 1;
1382
1383   // In case MSB resides at the left-hand side of radix point, shift the
1384   // mantissa right by some amount to make sure the MSB reside right before
1385   // the radix point (i.e. "MSB . rest-significant-bits").
1386   //
1387   // Note that the result is not normalized when "omsb < precision". So, the
1388   // caller needs to call IEEEFloat::normalize() if normalized value is
1389   // expected.
1390   if (omsb > precision) {
1391     unsigned int bits, significantParts;
1392     lostFraction lf;
1393
1394     bits = omsb - precision;
1395     significantParts = partCountForBits(omsb);
1396     lf = shiftRight(fullSignificand, significantParts, bits);
1397     lost_fraction = combineLostFractions(lf, lost_fraction);
1398     exponent += bits;
1399   }
1400
1401   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1402
1403   if (newPartsCount > 4)
1404     delete [] fullSignificand;
1405
1406   return lost_fraction;
1407 }
1408
1409 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1410   // When the given semantics has zero, the addend here is a zero.
1411   // i.e . it belongs to the 'fcZero' category.
1412   // But when the semantics does not support zero, we need to
1413   // explicitly convey that this addend should be ignored
1414   // for multiplication.
1415   return multiplySignificand(rhs, IEEEFloat(*semantics), !semantics->hasZero);
1416 }
1417
1418 /* Multiply the significands of LHS and RHS to DST.  */
1419 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1420   unsigned int bit, i, partsCount;
1421   const integerPart *rhsSignificand;
1422   integerPart *lhsSignificand, *dividend, *divisor;
1423   integerPart scratch[4];
1424   lostFraction lost_fraction;
1425
1426   assert(semantics == rhs.semantics);
1427
1428   lhsSignificand = significandParts();
1429   rhsSignificand = rhs.significandParts();
1430   partsCount = partCount();
1431
1432   if (partsCount > 2)
1433     dividend = new integerPart[partsCount * 2];
1434   else
1435     dividend = scratch;
1436
1437   divisor = dividend + partsCount;
1438
1439   /* Copy the dividend and divisor as they will be modified in-place.  */
1440   for (i = 0; i < partsCount; i++) {
1441     dividend[i] = lhsSignificand[i];
1442     divisor[i] = rhsSignificand[i];
1443     lhsSignificand[i] = 0;
1444   }
1445
1446   exponent -= rhs.exponent;
1447
1448   unsigned int precision = semantics->precision;
1449
1450   /* Normalize the divisor.  */
1451   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1452   if (bit) {
1453     exponent += bit;
1454     APInt::tcShiftLeft(divisor, partsCount, bit);
1455   }
1456
1457   /* Normalize the dividend.  */
1458   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1459   if (bit) {
1460     exponent -= bit;
1461     APInt::tcShiftLeft(dividend, partsCount, bit);
1462   }
1463
1464   /* Ensure the dividend >= divisor initially for the loop below.
1465      Incidentally, this means that the division loop below is
1466      guaranteed to set the integer bit to one.  */
1467   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1468     exponent--;
1469     APInt::tcShiftLeft(dividend, partsCount, 1);
1470     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1471   }
1472
1473   /* Long division.  */
1474   for (bit = precision; bit; bit -= 1) {
1475     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1476       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1477       APInt::tcSetBit(lhsSignificand, bit - 1);
1478     }
1479
1480     APInt::tcShiftLeft(dividend, partsCount, 1);
1481   }
1482
1483   /* Figure out the lost fraction.  */
1484   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1485
1486   if (cmp > 0)
1487     lost_fraction = lfMoreThanHalf;
1488   else if (cmp == 0)
1489     lost_fraction = lfExactlyHalf;
1490   else if (APInt::tcIsZero(dividend, partsCount))
1491     lost_fraction = lfExactlyZero;
1492   else
1493     lost_fraction = lfLessThanHalf;
1494
1495   if (partsCount > 2)
1496     delete [] dividend;
1497
1498   return lost_fraction;
1499 }
1500
1501 unsigned int IEEEFloat::significandMSB() const {
1502   return APInt::tcMSB(significandParts(), partCount());
1503 }
1504
1505 unsigned int IEEEFloat::significandLSB() const {
1506   return APInt::tcLSB(significandParts(), partCount());
1507 }
1508
1509 /* Note that a zero result is NOT normalized to fcZero.  */
1510 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1511   /* Our exponent should not overflow.  */
1512   assert((ExponentType) (exponent + bits) >= exponent);
1513
1514   exponent += bits;
1515
1516   return shiftRight(significandParts(), partCount(), bits);
1517 }
1518
1519 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1520 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1521   assert(bits < semantics->precision ||
1522          (semantics->precision == 1 && bits <= 1));
1523
1524   if (bits) {
1525     unsigned int partsCount = partCount();
1526
1527     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1528     exponent -= bits;
1529
1530     assert(!APInt::tcIsZero(significandParts(), partsCount));
1531   }
1532 }
1533
1534 APFloat::cmpResult IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1535   int compare;
1536
1537   assert(semantics == rhs.semantics);
1538   assert(isFiniteNonZero());
1539   assert(rhs.isFiniteNonZero());
1540
1541   compare = exponent - rhs.exponent;
1542
1543   /* If exponents are equal, do an unsigned bignum comparison of the
1544      significands.  */
1545   if (compare == 0)
1546     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1547                                partCount());
1548
1549   if (compare > 0)
1550     return cmpGreaterThan;
1551   else if (compare < 0)
1552     return cmpLessThan;
1553   else
1554     return cmpEqual;
1555 }
1556
1557 /* Set the least significant BITS bits of a bignum, clear the
1558    rest.  */
1559 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1560                                       unsigned bits) {
1561   unsigned i = 0;
1562   while (bits > APInt::APINT_BITS_PER_WORD) {
1563     dst[i++] = ~(APInt::WordType)0;
1564     bits -= APInt::APINT_BITS_PER_WORD;
1565   }
1566
1567   if (bits)
1568     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1569
1570   while (i < parts)
1571     dst[i++] = 0;
1572 }
1573
1574 /* Handle overflow.  Sign is preserved.  We either become infinity or
1575    the largest finite number.  */
1576 APFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1577   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::FiniteOnly) {
1578     /* Infinity?  */
1579     if (rounding_mode == rmNearestTiesToEven ||
1580         rounding_mode == rmNearestTiesToAway ||
1581         (rounding_mode == rmTowardPositive && !sign) ||
1582         (rounding_mode == rmTowardNegative && sign)) {
1583       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1584         makeNaN(false, sign);
1585       else
1586         category = fcInfinity;
1587       return static_cast<opStatus>(opOverflow | opInexact);
1588     }
1589   }
1590
1591   /* Otherwise we become the largest finite number.  */
1592   category = fcNormal;
1593   exponent = semantics->maxExponent;
1594   tcSetLeastSignificantBits(significandParts(), partCount(),
1595                             semantics->precision);
1596   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1597       semantics->nanEncoding == fltNanEncoding::AllOnes)
1598     APInt::tcClearBit(significandParts(), 0);
1599
1600   return opInexact;
1601 }
1602
1603 /* Returns TRUE if, when truncating the current number, with BIT the
1604    new LSB, with the given lost fraction and rounding mode, the result
1605    would need to be rounded away from zero (i.e., by increasing the
1606    signficand).  This routine must work for fcZero of both signs, and
1607    fcNormal numbers.  */
1608 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1609                                   lostFraction lost_fraction,
1610                                   unsigned int bit) const {
1611   /* NaNs and infinities should not have lost fractions.  */
1612   assert(isFiniteNonZero() || category == fcZero);
1613
1614   /* Current callers never pass this so we don't handle it.  */
1615   assert(lost_fraction != lfExactlyZero);
1616
1617   switch (rounding_mode) {
1618   case rmNearestTiesToAway:
1619     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1620
1621   case rmNearestTiesToEven:
1622     if (lost_fraction == lfMoreThanHalf)
1623       return true;
1624
1625     /* Our zeroes don't have a significand to test.  */
1626     if (lost_fraction == lfExactlyHalf && category != fcZero)
1627       return APInt::tcExtractBit(significandParts(), bit);
1628
1629     return false;
1630
1631   case rmTowardZero:
1632     return false;
1633
1634   case rmTowardPositive:
1635     return !sign;
1636
1637   case rmTowardNegative:
1638     return sign;
1639
1640   default:
1641     break;
1642   }
1643   llvm_unreachable("Invalid rounding mode found");
1644 }
1645
1646 APFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1647                                        lostFraction lost_fraction) {
1648   unsigned int omsb;                /* One, not zero, based MSB.  */
1649   int exponentChange;
1650
1651   if (!isFiniteNonZero())
1652     return opOK;
1653
1654   /* Before rounding normalize the exponent of fcNormal numbers.  */
1655   omsb = significandMSB() + 1;
1656
1657   if (omsb) {
1658     /* OMSB is numbered from 1.  We want to place it in the integer
1659        bit numbered PRECISION if possible, with a compensating change in
1660        the exponent.  */
1661     exponentChange = omsb - semantics->precision;
1662
1663     /* If the resulting exponent is too high, overflow according to
1664        the rounding mode.  */
1665     if (exponent + exponentChange > semantics->maxExponent)
1666       return handleOverflow(rounding_mode);
1667
1668     /* Subnormal numbers have exponent minExponent, and their MSB
1669        is forced based on that.  */
1670     if (exponent + exponentChange < semantics->minExponent)
1671       exponentChange = semantics->minExponent - exponent;
1672
1673     /* Shifting left is easy as we don't lose precision.  */
1674     if (exponentChange < 0) {
1675       assert(lost_fraction == lfExactlyZero);
1676
1677       shiftSignificandLeft(-exponentChange);
1678
1679       return opOK;
1680     }
1681
1682     if (exponentChange > 0) {
1683       lostFraction lf;
1684
1685       /* Shift right and capture any new lost fraction.  */
1686       lf = shiftSignificandRight(exponentChange);
1687
1688       lost_fraction = combineLostFractions(lf, lost_fraction);
1689
1690       /* Keep OMSB up-to-date.  */
1691       if (omsb > (unsigned) exponentChange)
1692         omsb -= exponentChange;
1693       else
1694         omsb = 0;
1695     }
1696   }
1697
1698   // The all-ones values is an overflow if NaN is all ones. If NaN is
1699   // represented by negative zero, then it is a valid finite value.
1700   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1701       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1702       exponent == semantics->maxExponent && isSignificandAllOnes())
1703     return handleOverflow(rounding_mode);
1704
1705   /* Now round the number according to rounding_mode given the lost
1706      fraction.  */
1707
1708   /* As specified in IEEE 754, since we do not trap we do not report
1709      underflow for exact results.  */
1710   if (lost_fraction == lfExactlyZero) {
1711     /* Canonicalize zeroes.  */
1712     if (omsb == 0) {
1713       category = fcZero;
1714       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1715         sign = false;
1716       if (!semantics->hasZero)
1717         makeSmallestNormalized(false);
1718     }
1719
1720     return opOK;
1721   }
1722
1723   /* Increment the significand if we're rounding away from zero.  */
1724   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1725     if (omsb == 0)
1726       exponent = semantics->minExponent;
1727
1728     incrementSignificand();
1729     omsb = significandMSB() + 1;
1730
1731     /* Did the significand increment overflow?  */
1732     if (omsb == (unsigned) semantics->precision + 1) {
1733       /* Renormalize by incrementing the exponent and shifting our
1734          significand right one.  However if we already have the
1735          maximum exponent we overflow to infinity.  */
1736       if (exponent == semantics->maxExponent)
1737         // Invoke overflow handling with a rounding mode that will guarantee
1738         // that the result gets turned into the correct infinity representation.
1739         // This is needed instead of just setting the category to infinity to
1740         // account for 8-bit floating point types that have no inf, only NaN.
1741         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1742
1743       shiftSignificandRight(1);
1744
1745       return opInexact;
1746     }
1747
1748     // The all-ones values is an overflow if NaN is all ones. If NaN is
1749     // represented by negative zero, then it is a valid finite value.
1750     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1751         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1752         exponent == semantics->maxExponent && isSignificandAllOnes())
1753       return handleOverflow(rounding_mode);
1754   }
1755
1756   /* The normal case - we were and are not denormal, and any
1757      significand increment above didn't overflow.  */
1758   if (omsb == semantics->precision)
1759     return opInexact;
1760
1761   /* We have a non-zero denormal.  */
1762   assert(omsb < semantics->precision);
1763
1764   /* Canonicalize zeroes.  */
1765   if (omsb == 0) {
1766     category = fcZero;
1767     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1768       sign = false;
1769     // This condition handles the case where the semantics
1770     // does not have zero but uses the all-zero encoding
1771     // to represent the smallest normal value.
1772     if (!semantics->hasZero)
1773       makeSmallestNormalized(false);
1774   }
1775
1776   /* The fcZero case is a denormal that underflowed to zero.  */
1777   return (opStatus) (opUnderflow | opInexact);
1778 }
1779
1780 APFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1781                                                    bool subtract) {
1782   switch (PackCategoriesIntoKey(category, rhs.category)) {
1783   default:
1784     llvm_unreachable(nullptr);
1785
1786   case PackCategoriesIntoKey(fcZero, fcNaN):
1787   case PackCategoriesIntoKey(fcNormal, fcNaN):
1788   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1789     assign(rhs);
1790     [[fallthrough]];
1791   case PackCategoriesIntoKey(fcNaN, fcZero):
1792   case PackCategoriesIntoKey(fcNaN, fcNormal):
1793   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1794   case PackCategoriesIntoKey(fcNaN, fcNaN):
1795     if (isSignaling()) {
1796       makeQuiet();
1797       return opInvalidOp;
1798     }
1799     return rhs.isSignaling() ? opInvalidOp : opOK;
1800
1801   case PackCategoriesIntoKey(fcNormal, fcZero):
1802   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1803   case PackCategoriesIntoKey(fcInfinity, fcZero):
1804     return opOK;
1805
1806   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1807   case PackCategoriesIntoKey(fcZero, fcInfinity):
1808     category = fcInfinity;
1809     sign = rhs.sign ^ subtract;
1810     return opOK;
1811
1812   case PackCategoriesIntoKey(fcZero, fcNormal):
1813     assign(rhs);
1814     sign = rhs.sign ^ subtract;
1815     return opOK;
1816
1817   case PackCategoriesIntoKey(fcZero, fcZero):
1818     /* Sign depends on rounding mode; handled by caller.  */
1819     return opOK;
1820
1821   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1822     /* Differently signed infinities can only be validly
1823        subtracted.  */
1824     if (((sign ^ rhs.sign)!=0) != subtract) {
1825       makeNaN();
1826       return opInvalidOp;
1827     }
1828
1829     return opOK;
1830
1831   case PackCategoriesIntoKey(fcNormal, fcNormal):
1832     return opDivByZero;
1833   }
1834 }
1835
1836 /* Add or subtract two normal numbers.  */
1837 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1838                                                  bool subtract) {
1839   integerPart carry;
1840   lostFraction lost_fraction;
1841   int bits;
1842
1843   /* Determine if the operation on the absolute values is effectively
1844      an addition or subtraction.  */
1845   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1846
1847   /* Are we bigger exponent-wise than the RHS?  */
1848   bits = exponent - rhs.exponent;
1849
1850   /* Subtraction is more subtle than one might naively expect.  */
1851   if (subtract) {
1852     if ((bits < 0) && !semantics->hasSignedRepr)
1853       llvm_unreachable(
1854           "This floating point format does not support signed values");
1855
1856     IEEEFloat temp_rhs(rhs);
1857
1858     if (bits == 0)
1859       lost_fraction = lfExactlyZero;
1860     else if (bits > 0) {
1861       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1862       shiftSignificandLeft(1);
1863     } else {
1864       lost_fraction = shiftSignificandRight(-bits - 1);
1865       temp_rhs.shiftSignificandLeft(1);
1866     }
1867
1868     // Should we reverse the subtraction.
1869     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1870       carry = temp_rhs.subtractSignificand
1871         (*this, lost_fraction != lfExactlyZero);
1872       copySignificand(temp_rhs);
1873       sign = !sign;
1874     } else {
1875       carry = subtractSignificand
1876         (temp_rhs, lost_fraction != lfExactlyZero);
1877     }
1878
1879     /* Invert the lost fraction - it was on the RHS and
1880        subtracted.  */
1881     if (lost_fraction == lfLessThanHalf)
1882       lost_fraction = lfMoreThanHalf;
1883     else if (lost_fraction == lfMoreThanHalf)
1884       lost_fraction = lfLessThanHalf;
1885
1886     /* The code above is intended to ensure that no borrow is
1887        necessary.  */
1888     assert(!carry);
1889     (void)carry;
1890   } else {
1891     if (bits > 0) {
1892       IEEEFloat temp_rhs(rhs);
1893
1894       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1895       carry = addSignificand(temp_rhs);
1896     } else {
1897       lost_fraction = shiftSignificandRight(-bits);
1898       carry = addSignificand(rhs);
1899     }
1900
1901     /* We have a guard bit; generating a carry cannot happen.  */
1902     assert(!carry);
1903     (void)carry;
1904   }
1905
1906   return lost_fraction;
1907 }
1908
1909 APFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1910   switch (PackCategoriesIntoKey(category, rhs.category)) {
1911   default:
1912     llvm_unreachable(nullptr);
1913
1914   case PackCategoriesIntoKey(fcZero, fcNaN):
1915   case PackCategoriesIntoKey(fcNormal, fcNaN):
1916   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1917     assign(rhs);
1918     sign = false;
1919     [[fallthrough]];
1920   case PackCategoriesIntoKey(fcNaN, fcZero):
1921   case PackCategoriesIntoKey(fcNaN, fcNormal):
1922   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1923   case PackCategoriesIntoKey(fcNaN, fcNaN):
1924     sign ^= rhs.sign; // restore the original sign
1925     if (isSignaling()) {
1926       makeQuiet();
1927       return opInvalidOp;
1928     }
1929     return rhs.isSignaling() ? opInvalidOp : opOK;
1930
1931   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1932   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1933   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1934     category = fcInfinity;
1935     return opOK;
1936
1937   case PackCategoriesIntoKey(fcZero, fcNormal):
1938   case PackCategoriesIntoKey(fcNormal, fcZero):
1939   case PackCategoriesIntoKey(fcZero, fcZero):
1940     category = fcZero;
1941     return opOK;
1942
1943   case PackCategoriesIntoKey(fcZero, fcInfinity):
1944   case PackCategoriesIntoKey(fcInfinity, fcZero):
1945     makeNaN();
1946     return opInvalidOp;
1947
1948   case PackCategoriesIntoKey(fcNormal, fcNormal):
1949     return opOK;
1950   }
1951 }
1952
1953 APFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1954   switch (PackCategoriesIntoKey(category, rhs.category)) {
1955   default:
1956     llvm_unreachable(nullptr);
1957
1958   case PackCategoriesIntoKey(fcZero, fcNaN):
1959   case PackCategoriesIntoKey(fcNormal, fcNaN):
1960   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1961     assign(rhs);
1962     sign = false;
1963     [[fallthrough]];
1964   case PackCategoriesIntoKey(fcNaN, fcZero):
1965   case PackCategoriesIntoKey(fcNaN, fcNormal):
1966   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1967   case PackCategoriesIntoKey(fcNaN, fcNaN):
1968     sign ^= rhs.sign; // restore the original sign
1969     if (isSignaling()) {
1970       makeQuiet();
1971       return opInvalidOp;
1972     }
1973     return rhs.isSignaling() ? opInvalidOp : opOK;
1974
1975   case PackCategoriesIntoKey(fcInfinity, fcZero):
1976   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1977   case PackCategoriesIntoKey(fcZero, fcInfinity):
1978   case PackCategoriesIntoKey(fcZero, fcNormal):
1979     return opOK;
1980
1981   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1982     category = fcZero;
1983     return opOK;
1984
1985   case PackCategoriesIntoKey(fcNormal, fcZero):
1986     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1987       makeNaN(false, sign);
1988     else
1989       category = fcInfinity;
1990     return opDivByZero;
1991
1992   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1993   case PackCategoriesIntoKey(fcZero, fcZero):
1994     makeNaN();
1995     return opInvalidOp;
1996
1997   case PackCategoriesIntoKey(fcNormal, fcNormal):
1998     return opOK;
1999   }
2000 }
2001
2002 APFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
2003   switch (PackCategoriesIntoKey(category, rhs.category)) {
2004   default:
2005     llvm_unreachable(nullptr);
2006
2007   case PackCategoriesIntoKey(fcZero, fcNaN):
2008   case PackCategoriesIntoKey(fcNormal, fcNaN):
2009   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2010     assign(rhs);
2011     [[fallthrough]];
2012   case PackCategoriesIntoKey(fcNaN, fcZero):
2013   case PackCategoriesIntoKey(fcNaN, fcNormal):
2014   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2015   case PackCategoriesIntoKey(fcNaN, fcNaN):
2016     if (isSignaling()) {
2017       makeQuiet();
2018       return opInvalidOp;
2019     }
2020     return rhs.isSignaling() ? opInvalidOp : opOK;
2021
2022   case PackCategoriesIntoKey(fcZero, fcInfinity):
2023   case PackCategoriesIntoKey(fcZero, fcNormal):
2024   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2025     return opOK;
2026
2027   case PackCategoriesIntoKey(fcNormal, fcZero):
2028   case PackCategoriesIntoKey(fcInfinity, fcZero):
2029   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2030   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2031   case PackCategoriesIntoKey(fcZero, fcZero):
2032     makeNaN();
2033     return opInvalidOp;
2034
2035   case PackCategoriesIntoKey(fcNormal, fcNormal):
2036     return opOK;
2037   }
2038 }
2039
2040 APFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
2041   switch (PackCategoriesIntoKey(category, rhs.category)) {
2042   default:
2043     llvm_unreachable(nullptr);
2044
2045   case PackCategoriesIntoKey(fcZero, fcNaN):
2046   case PackCategoriesIntoKey(fcNormal, fcNaN):
2047   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2048     assign(rhs);
2049     [[fallthrough]];
2050   case PackCategoriesIntoKey(fcNaN, fcZero):
2051   case PackCategoriesIntoKey(fcNaN, fcNormal):
2052   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2053   case PackCategoriesIntoKey(fcNaN, fcNaN):
2054     if (isSignaling()) {
2055       makeQuiet();
2056       return opInvalidOp;
2057     }
2058     return rhs.isSignaling() ? opInvalidOp : opOK;
2059
2060   case PackCategoriesIntoKey(fcZero, fcInfinity):
2061   case PackCategoriesIntoKey(fcZero, fcNormal):
2062   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2063     return opOK;
2064
2065   case PackCategoriesIntoKey(fcNormal, fcZero):
2066   case PackCategoriesIntoKey(fcInfinity, fcZero):
2067   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2068   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2069   case PackCategoriesIntoKey(fcZero, fcZero):
2070     makeNaN();
2071     return opInvalidOp;
2072
2073   case PackCategoriesIntoKey(fcNormal, fcNormal):
2074     return opDivByZero; // fake status, indicating this is not a special case
2075   }
2076 }
2077
2078 /* Change sign.  */
2079 void IEEEFloat::changeSign() {
2080   // With NaN-as-negative-zero, neither NaN or negative zero can change
2081   // their signs.
2082   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2083       (isZero() || isNaN()))
2084     return;
2085   /* Look mummy, this one's easy.  */
2086   sign = !sign;
2087 }
2088
2089 /* Normalized addition or subtraction.  */
2090 APFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2091                                            roundingMode rounding_mode,
2092                                            bool subtract) {
2093   opStatus fs;
2094
2095   fs = addOrSubtractSpecials(rhs, subtract);
2096
2097   /* This return code means it was not a simple case.  */
2098   if (fs == opDivByZero) {
2099     lostFraction lost_fraction;
2100
2101     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2102     fs = normalize(rounding_mode, lost_fraction);
2103
2104     /* Can only be zero if we lost no fraction.  */
2105     assert(category != fcZero || lost_fraction == lfExactlyZero);
2106   }
2107
2108   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2109      positive zero unless rounding to minus infinity, except that
2110      adding two like-signed zeroes gives that zero.  */
2111   if (category == fcZero) {
2112     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2113       sign = (rounding_mode == rmTowardNegative);
2114     // NaN-in-negative-zero means zeros need to be normalized to +0.
2115     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2116       sign = false;
2117   }
2118
2119   return fs;
2120 }
2121
2122 /* Normalized addition.  */
2123 APFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2124                                  roundingMode rounding_mode) {
2125   return addOrSubtract(rhs, rounding_mode, false);
2126 }
2127
2128 /* Normalized subtraction.  */
2129 APFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2130                                       roundingMode rounding_mode) {
2131   return addOrSubtract(rhs, rounding_mode, true);
2132 }
2133
2134 /* Normalized multiply.  */
2135 APFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2136                                       roundingMode rounding_mode) {
2137   opStatus fs;
2138
2139   sign ^= rhs.sign;
2140   fs = multiplySpecials(rhs);
2141
2142   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2143     sign = false;
2144   if (isFiniteNonZero()) {
2145     lostFraction lost_fraction = multiplySignificand(rhs);
2146     fs = normalize(rounding_mode, lost_fraction);
2147     if (lost_fraction != lfExactlyZero)
2148       fs = (opStatus) (fs | opInexact);
2149   }
2150
2151   return fs;
2152 }
2153
2154 /* Normalized divide.  */
2155 APFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2156                                     roundingMode rounding_mode) {
2157   opStatus fs;
2158
2159   sign ^= rhs.sign;
2160   fs = divideSpecials(rhs);
2161
2162   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2163     sign = false;
2164   if (isFiniteNonZero()) {
2165     lostFraction lost_fraction = divideSignificand(rhs);
2166     fs = normalize(rounding_mode, lost_fraction);
2167     if (lost_fraction != lfExactlyZero)
2168       fs = (opStatus) (fs | opInexact);
2169   }
2170
2171   return fs;
2172 }
2173
2174 /* Normalized remainder.  */
2175 APFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2176   opStatus fs;
2177   unsigned int origSign = sign;
2178
2179   // First handle the special cases.
2180   fs = remainderSpecials(rhs);
2181   if (fs != opDivByZero)
2182     return fs;
2183
2184   fs = opOK;
2185
2186   // Make sure the current value is less than twice the denom. If the addition
2187   // did not succeed (an overflow has happened), which means that the finite
2188   // value we currently posses must be less than twice the denom (as we are
2189   // using the same semantics).
2190   IEEEFloat P2 = rhs;
2191   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2192     fs = mod(P2);
2193     assert(fs == opOK);
2194   }
2195
2196   // Lets work with absolute numbers.
2197   IEEEFloat P = rhs;
2198   P.sign = false;
2199   sign = false;
2200
2201   //
2202   // To calculate the remainder we use the following scheme.
2203   //
2204   // The remainder is defained as follows:
2205   //
2206   // remainder = numer - rquot * denom = x - r * p
2207   //
2208   // Where r is the result of: x/p, rounded toward the nearest integral value
2209   // (with halfway cases rounded toward the even number).
2210   //
2211   // Currently, (after x mod 2p):
2212   // r is the number of 2p's present inside x, which is inherently, an even
2213   // number of p's.
2214   //
2215   // We may split the remaining calculation into 4 options:
2216   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2217   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2218   //   are done as well.
2219   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2220   //   to subtract 1p at least once.
2221   // - if x >= p then we must subtract p at least once, as x must be a
2222   //   remainder.
2223   //
2224   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2225   //
2226   // We can now split the remaining calculation to the following 3 options:
2227   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2228   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2229   //   must round up to the next even number. so we must subtract p once more.
2230   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2231   //   integral, and subtract p once more.
2232   //
2233
2234   // Extend the semantics to prevent an overflow/underflow or inexact result.
2235   bool losesInfo;
2236   fltSemantics extendedSemantics = *semantics;
2237   extendedSemantics.maxExponent++;
2238   extendedSemantics.minExponent--;
2239   extendedSemantics.precision += 2;
2240
2241   IEEEFloat VEx = *this;
2242   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2243   assert(fs == opOK && !losesInfo);
2244   IEEEFloat PEx = P;
2245   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2246   assert(fs == opOK && !losesInfo);
2247
2248   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2249   // any fraction.
2250   fs = VEx.add(VEx, rmNearestTiesToEven);
2251   assert(fs == opOK);
2252
2253   if (VEx.compare(PEx) == cmpGreaterThan) {
2254     fs = subtract(P, rmNearestTiesToEven);
2255     assert(fs == opOK);
2256
2257     // Make VEx = this.add(this), but because we have different semantics, we do
2258     // not want to `convert` again, so we just subtract PEx twice (which equals
2259     // to the desired value).
2260     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2261     assert(fs == opOK);
2262     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2263     assert(fs == opOK);
2264
2265     cmpResult result = VEx.compare(PEx);
2266     if (result == cmpGreaterThan || result == cmpEqual) {
2267       fs = subtract(P, rmNearestTiesToEven);
2268       assert(fs == opOK);
2269     }
2270   }
2271
2272   if (isZero()) {
2273     sign = origSign;    // IEEE754 requires this
2274     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2275       // But some 8-bit floats only have positive 0.
2276       sign = false;
2277   }
2278
2279   else
2280     sign ^= origSign;
2281   return fs;
2282 }
2283
2284 /* Normalized llvm frem (C fmod). */
2285 APFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2286   opStatus fs;
2287   fs = modSpecials(rhs);
2288   unsigned int origSign = sign;
2289
2290   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2291          compareAbsoluteValue(rhs) != cmpLessThan) {
2292     int Exp = ilogb(*this) - ilogb(rhs);
2293     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2294     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2295     // check for it.
2296     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2297       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2298     V.sign = sign;
2299
2300     fs = subtract(V, rmNearestTiesToEven);
2301
2302     // When the semantics supports zero, this loop's
2303     // exit-condition is handled by the 'isFiniteNonZero'
2304     // category check above. However, when the semantics
2305     // does not have 'fcZero' and we have reached the
2306     // minimum possible value, (and any further subtract
2307     // will underflow to the same value) explicitly
2308     // provide an exit-path here.
2309     if (!semantics->hasZero && this->isSmallest())
2310       break;
2311
2312     assert(fs==opOK);
2313   }
2314   if (isZero()) {
2315     sign = origSign; // fmod requires this
2316     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2317       sign = false;
2318   }
2319   return fs;
2320 }
2321
2322 /* Normalized fused-multiply-add.  */
2323 APFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2324                                               const IEEEFloat &addend,
2325                                               roundingMode rounding_mode) {
2326   opStatus fs;
2327
2328   /* Post-multiplication sign, before addition.  */
2329   sign ^= multiplicand.sign;
2330
2331   /* If and only if all arguments are normal do we need to do an
2332      extended-precision calculation.  */
2333   if (isFiniteNonZero() &&
2334       multiplicand.isFiniteNonZero() &&
2335       addend.isFinite()) {
2336     lostFraction lost_fraction;
2337
2338     lost_fraction = multiplySignificand(multiplicand, addend);
2339     fs = normalize(rounding_mode, lost_fraction);
2340     if (lost_fraction != lfExactlyZero)
2341       fs = (opStatus) (fs | opInexact);
2342
2343     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2344        positive zero unless rounding to minus infinity, except that
2345        adding two like-signed zeroes gives that zero.  */
2346     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2347       sign = (rounding_mode == rmTowardNegative);
2348       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2349         sign = false;
2350     }
2351   } else {
2352     fs = multiplySpecials(multiplicand);
2353
2354     /* FS can only be opOK or opInvalidOp.  There is no more work
2355        to do in the latter case.  The IEEE-754R standard says it is
2356        implementation-defined in this case whether, if ADDEND is a
2357        quiet NaN, we raise invalid op; this implementation does so.
2358
2359        If we need to do the addition we can do so with normal
2360        precision.  */
2361     if (fs == opOK)
2362       fs = addOrSubtract(addend, rounding_mode, false);
2363   }
2364
2365   return fs;
2366 }
2367
2368 /* Rounding-mode correct round to integral value.  */
2369 APFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2370   opStatus fs;
2371
2372   if (isInfinity())
2373     // [IEEE Std 754-2008 6.1]:
2374     // The behavior of infinity in floating-point arithmetic is derived from the
2375     // limiting cases of real arithmetic with operands of arbitrarily
2376     // large magnitude, when such a limit exists.
2377     // ...
2378     // Operations on infinite operands are usually exact and therefore signal no
2379     // exceptions ...
2380     return opOK;
2381
2382   if (isNaN()) {
2383     if (isSignaling()) {
2384       // [IEEE Std 754-2008 6.2]:
2385       // Under default exception handling, any operation signaling an invalid
2386       // operation exception and for which a floating-point result is to be
2387       // delivered shall deliver a quiet NaN.
2388       makeQuiet();
2389       // [IEEE Std 754-2008 6.2]:
2390       // Signaling NaNs shall be reserved operands that, under default exception
2391       // handling, signal the invalid operation exception(see 7.2) for every
2392       // general-computational and signaling-computational operation except for
2393       // the conversions described in 5.12.
2394       return opInvalidOp;
2395     } else {
2396       // [IEEE Std 754-2008 6.2]:
2397       // For an operation with quiet NaN inputs, other than maximum and minimum
2398       // operations, if a floating-point result is to be delivered the result
2399       // shall be a quiet NaN which should be one of the input NaNs.
2400       // ...
2401       // Every general-computational and quiet-computational operation involving
2402       // one or more input NaNs, none of them signaling, shall signal no
2403       // exception, except fusedMultiplyAdd might signal the invalid operation
2404       // exception(see 7.2).
2405       return opOK;
2406     }
2407   }
2408
2409   if (isZero()) {
2410     // [IEEE Std 754-2008 6.3]:
2411     // ... the sign of the result of conversions, the quantize operation, the
2412     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2413     // the sign of the first or only operand.
2414     return opOK;
2415   }
2416
2417   // If the exponent is large enough, we know that this value is already
2418   // integral, and the arithmetic below would potentially cause it to saturate
2419   // to +/-Inf.  Bail out early instead.
2420   if (exponent + 1 >= (int)APFloat::semanticsPrecision(*semantics))
2421     return opOK;
2422
2423   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2424   // precision of our format, and then subtract it back off again.  The choice
2425   // of rounding modes for the addition/subtraction determines the rounding mode
2426   // for our integral rounding as well.
2427   // NOTE: When the input value is negative, we do subtraction followed by
2428   // addition instead.
2429   APInt IntegerConstant(NextPowerOf2(APFloat::semanticsPrecision(*semantics)),
2430                         1);
2431   IntegerConstant <<= APFloat::semanticsPrecision(*semantics) - 1;
2432   IEEEFloat MagicConstant(*semantics);
2433   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2434                                       rmNearestTiesToEven);
2435   assert(fs == opOK);
2436   MagicConstant.sign = sign;
2437
2438   // Preserve the input sign so that we can handle the case of zero result
2439   // correctly.
2440   bool inputSign = isNegative();
2441
2442   fs = add(MagicConstant, rounding_mode);
2443
2444   // Current value and 'MagicConstant' are both integers, so the result of the
2445   // subtraction is always exact according to Sterbenz' lemma.
2446   subtract(MagicConstant, rounding_mode);
2447
2448   // Restore the input sign.
2449   if (inputSign != isNegative())
2450     changeSign();
2451
2452   return fs;
2453 }
2454
2455 /* Comparison requires normalized numbers.  */
2456 APFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2457   cmpResult result;
2458
2459   assert(semantics == rhs.semantics);
2460
2461   switch (PackCategoriesIntoKey(category, rhs.category)) {
2462   default:
2463     llvm_unreachable(nullptr);
2464
2465   case PackCategoriesIntoKey(fcNaN, fcZero):
2466   case PackCategoriesIntoKey(fcNaN, fcNormal):
2467   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2468   case PackCategoriesIntoKey(fcNaN, fcNaN):
2469   case PackCategoriesIntoKey(fcZero, fcNaN):
2470   case PackCategoriesIntoKey(fcNormal, fcNaN):
2471   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2472     return cmpUnordered;
2473
2474   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2475   case PackCategoriesIntoKey(fcInfinity, fcZero):
2476   case PackCategoriesIntoKey(fcNormal, fcZero):
2477     if (sign)
2478       return cmpLessThan;
2479     else
2480       return cmpGreaterThan;
2481
2482   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2483   case PackCategoriesIntoKey(fcZero, fcInfinity):
2484   case PackCategoriesIntoKey(fcZero, fcNormal):
2485     if (rhs.sign)
2486       return cmpGreaterThan;
2487     else
2488       return cmpLessThan;
2489
2490   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2491     if (sign == rhs.sign)
2492       return cmpEqual;
2493     else if (sign)
2494       return cmpLessThan;
2495     else
2496       return cmpGreaterThan;
2497
2498   case PackCategoriesIntoKey(fcZero, fcZero):
2499     return cmpEqual;
2500
2501   case PackCategoriesIntoKey(fcNormal, fcNormal):
2502     break;
2503   }
2504
2505   /* Two normal numbers.  Do they have the same sign?  */
2506   if (sign != rhs.sign) {
2507     if (sign)
2508       result = cmpLessThan;
2509     else
2510       result = cmpGreaterThan;
2511   } else {
2512     /* Compare absolute values; invert result if negative.  */
2513     result = compareAbsoluteValue(rhs);
2514
2515     if (sign) {
2516       if (result == cmpLessThan)
2517         result = cmpGreaterThan;
2518       else if (result == cmpGreaterThan)
2519         result = cmpLessThan;
2520     }
2521   }
2522
2523   return result;
2524 }
2525
2526 /// IEEEFloat::convert - convert a value of one floating point type to another.
2527 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2528 /// records whether the transformation lost information, i.e. whether
2529 /// converting the result back to the original type will produce the
2530 /// original value (this is almost the same as return value==fsOK, but there
2531 /// are edge cases where this is not so).
2532
2533 APFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2534                                      roundingMode rounding_mode,
2535                                      bool *losesInfo) {
2536   lostFraction lostFraction;
2537   unsigned int newPartCount, oldPartCount;
2538   opStatus fs;
2539   int shift;
2540   const fltSemantics &fromSemantics = *semantics;
2541   bool is_signaling = isSignaling();
2542
2543   lostFraction = lfExactlyZero;
2544   newPartCount = partCountForBits(toSemantics.precision + 1);
2545   oldPartCount = partCount();
2546   shift = toSemantics.precision - fromSemantics.precision;
2547
2548   bool X86SpecialNan = false;
2549   if (&fromSemantics == &semX87DoubleExtended &&
2550       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2551       (!(*significandParts() & 0x8000000000000000ULL) ||
2552        !(*significandParts() & 0x4000000000000000ULL))) {
2553     // x86 has some unusual NaNs which cannot be represented in any other
2554     // format; note them here.
2555     X86SpecialNan = true;
2556   }
2557
2558   // If this is a truncation of a denormal number, and the target semantics
2559   // has larger exponent range than the source semantics (this can happen
2560   // when truncating from PowerPC double-double to double format), the
2561   // right shift could lose result mantissa bits.  Adjust exponent instead
2562   // of performing excessive shift.
2563   // Also do a similar trick in case shifting denormal would produce zero
2564   // significand as this case isn't handled correctly by normalize.
2565   if (shift < 0 && isFiniteNonZero()) {
2566     int omsb = significandMSB() + 1;
2567     int exponentChange = omsb - fromSemantics.precision;
2568     if (exponent + exponentChange < toSemantics.minExponent)
2569       exponentChange = toSemantics.minExponent - exponent;
2570     if (exponentChange < shift)
2571       exponentChange = shift;
2572     if (exponentChange < 0) {
2573       shift -= exponentChange;
2574       exponent += exponentChange;
2575     } else if (omsb <= -shift) {
2576       exponentChange = omsb + shift - 1; // leave at least one bit set
2577       shift -= exponentChange;
2578       exponent += exponentChange;
2579     }
2580   }
2581
2582   // If this is a truncation, perform the shift before we narrow the storage.
2583   if (shift < 0 && (isFiniteNonZero() ||
2584                     (category == fcNaN && semantics->nonFiniteBehavior !=
2585                                               fltNonfiniteBehavior::NanOnly)))
2586     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2587
2588   // Fix the storage so it can hold to new value.
2589   if (newPartCount > oldPartCount) {
2590     // The new type requires more storage; make it available.
2591     integerPart *newParts;
2592     newParts = new integerPart[newPartCount];
2593     APInt::tcSet(newParts, 0, newPartCount);
2594     if (isFiniteNonZero() || category==fcNaN)
2595       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2596     freeSignificand();
2597     significand.parts = newParts;
2598   } else if (newPartCount == 1 && oldPartCount != 1) {
2599     // Switch to built-in storage for a single part.
2600     integerPart newPart = 0;
2601     if (isFiniteNonZero() || category==fcNaN)
2602       newPart = significandParts()[0];
2603     freeSignificand();
2604     significand.part = newPart;
2605   }
2606
2607   // Now that we have the right storage, switch the semantics.
2608   semantics = &toSemantics;
2609
2610   // If this is an extension, perform the shift now that the storage is
2611   // available.
2612   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2613     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2614
2615   if (isFiniteNonZero()) {
2616     fs = normalize(rounding_mode, lostFraction);
2617     *losesInfo = (fs != opOK);
2618   } else if (category == fcNaN) {
2619     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2620       *losesInfo =
2621           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2622       makeNaN(false, sign);
2623       return is_signaling ? opInvalidOp : opOK;
2624     }
2625
2626     // If NaN is negative zero, we need to create a new NaN to avoid converting
2627     // NaN to -Inf.
2628     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2629         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2630       makeNaN(false, false);
2631
2632     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2633
2634     // For x87 extended precision, we want to make a NaN, not a special NaN if
2635     // the input wasn't special either.
2636     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2637       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2638
2639     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2640     // This also guarantees that a sNaN does not become Inf on a truncation
2641     // that loses all payload bits.
2642     if (is_signaling) {
2643       makeQuiet();
2644       fs = opInvalidOp;
2645     } else {
2646       fs = opOK;
2647     }
2648   } else if (category == fcInfinity &&
2649              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2650     makeNaN(false, sign);
2651     *losesInfo = true;
2652     fs = opInexact;
2653   } else if (category == fcZero &&
2654              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2655     // Negative zero loses info, but positive zero doesn't.
2656     *losesInfo =
2657         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2658     fs = *losesInfo ? opInexact : opOK;
2659     // NaN is negative zero means -0 -> +0, which can lose information
2660     sign = false;
2661   } else {
2662     *losesInfo = false;
2663     fs = opOK;
2664   }
2665
2666   if (category == fcZero && !semantics->hasZero)
2667     makeSmallestNormalized(false);
2668   return fs;
2669 }
2670
2671 /* Convert a floating point number to an integer according to the
2672    rounding mode.  If the rounded integer value is out of range this
2673    returns an invalid operation exception and the contents of the
2674    destination parts are unspecified.  If the rounded value is in
2675    range but the floating point number is not the exact integer, the C
2676    standard doesn't require an inexact exception to be raised.  IEEE
2677    854 does require it so we do that.
2678
2679    Note that for conversions to integer type the C standard requires
2680    round-to-zero to always be used.  */
2681 APFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2682     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2683     roundingMode rounding_mode, bool *isExact) const {
2684   lostFraction lost_fraction;
2685   const integerPart *src;
2686   unsigned int dstPartsCount, truncatedBits;
2687
2688   *isExact = false;
2689
2690   /* Handle the three special cases first.  */
2691   if (category == fcInfinity || category == fcNaN)
2692     return opInvalidOp;
2693
2694   dstPartsCount = partCountForBits(width);
2695   assert(dstPartsCount <= parts.size() && "Integer too big");
2696
2697   if (category == fcZero) {
2698     APInt::tcSet(parts.data(), 0, dstPartsCount);
2699     // Negative zero can't be represented as an int.
2700     *isExact = !sign;
2701     return opOK;
2702   }
2703
2704   src = significandParts();
2705
2706   /* Step 1: place our absolute value, with any fraction truncated, in
2707      the destination.  */
2708   if (exponent < 0) {
2709     /* Our absolute value is less than one; truncate everything.  */
2710     APInt::tcSet(parts.data(), 0, dstPartsCount);
2711     /* For exponent -1 the integer bit represents .5, look at that.
2712        For smaller exponents leftmost truncated bit is 0. */
2713     truncatedBits = semantics->precision -1U - exponent;
2714   } else {
2715     /* We want the most significant (exponent + 1) bits; the rest are
2716        truncated.  */
2717     unsigned int bits = exponent + 1U;
2718
2719     /* Hopelessly large in magnitude?  */
2720     if (bits > width)
2721       return opInvalidOp;
2722
2723     if (bits < semantics->precision) {
2724       /* We truncate (semantics->precision - bits) bits.  */
2725       truncatedBits = semantics->precision - bits;
2726       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2727     } else {
2728       /* We want at least as many bits as are available.  */
2729       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2730                        0);
2731       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2732                          bits - semantics->precision);
2733       truncatedBits = 0;
2734     }
2735   }
2736
2737   /* Step 2: work out any lost fraction, and increment the absolute
2738      value if we would round away from zero.  */
2739   if (truncatedBits) {
2740     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2741                                                   truncatedBits);
2742     if (lost_fraction != lfExactlyZero &&
2743         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2744       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2745         return opInvalidOp;     /* Overflow.  */
2746     }
2747   } else {
2748     lost_fraction = lfExactlyZero;
2749   }
2750
2751   /* Step 3: check if we fit in the destination.  */
2752   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2753
2754   if (sign) {
2755     if (!isSigned) {
2756       /* Negative numbers cannot be represented as unsigned.  */
2757       if (omsb != 0)
2758         return opInvalidOp;
2759     } else {
2760       /* It takes omsb bits to represent the unsigned integer value.
2761          We lose a bit for the sign, but care is needed as the
2762          maximally negative integer is a special case.  */
2763       if (omsb == width &&
2764           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2765         return opInvalidOp;
2766
2767       /* This case can happen because of rounding.  */
2768       if (omsb > width)
2769         return opInvalidOp;
2770     }
2771
2772     APInt::tcNegate (parts.data(), dstPartsCount);
2773   } else {
2774     if (omsb >= width + !isSigned)
2775       return opInvalidOp;
2776   }
2777
2778   if (lost_fraction == lfExactlyZero) {
2779     *isExact = true;
2780     return opOK;
2781   } else
2782     return opInexact;
2783 }
2784
2785 /* Same as convertToSignExtendedInteger, except we provide
2786    deterministic values in case of an invalid operation exception,
2787    namely zero for NaNs and the minimal or maximal value respectively
2788    for underflow or overflow.
2789    The *isExact output tells whether the result is exact, in the sense
2790    that converting it back to the original floating point type produces
2791    the original value.  This is almost equivalent to result==opOK,
2792    except for negative zeroes.
2793 */
2794 APFloat::opStatus
2795 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2796                             unsigned int width, bool isSigned,
2797                             roundingMode rounding_mode, bool *isExact) const {
2798   opStatus fs;
2799
2800   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2801                                     isExact);
2802
2803   if (fs == opInvalidOp) {
2804     unsigned int bits, dstPartsCount;
2805
2806     dstPartsCount = partCountForBits(width);
2807     assert(dstPartsCount <= parts.size() && "Integer too big");
2808
2809     if (category == fcNaN)
2810       bits = 0;
2811     else if (sign)
2812       bits = isSigned;
2813     else
2814       bits = width - isSigned;
2815
2816     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2817     if (sign && isSigned)
2818       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2819   }
2820
2821   return fs;
2822 }
2823
2824 /* Convert an unsigned integer SRC to a floating point number,
2825    rounding according to ROUNDING_MODE.  The sign of the floating
2826    point number is not modified.  */
2827 APFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2828     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2829   unsigned int omsb, precision, dstCount;
2830   integerPart *dst;
2831   lostFraction lost_fraction;
2832
2833   category = fcNormal;
2834   omsb = APInt::tcMSB(src, srcCount) + 1;
2835   dst = significandParts();
2836   dstCount = partCount();
2837   precision = semantics->precision;
2838
2839   /* We want the most significant PRECISION bits of SRC.  There may not
2840      be that many; extract what we can.  */
2841   if (precision <= omsb) {
2842     exponent = omsb - 1;
2843     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2844                                                   omsb - precision);
2845     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2846   } else {
2847     exponent = precision - 1;
2848     lost_fraction = lfExactlyZero;
2849     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2850   }
2851
2852   return normalize(rounding_mode, lost_fraction);
2853 }
2854
2855 APFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2856                                               roundingMode rounding_mode) {
2857   unsigned int partCount = Val.getNumWords();
2858   APInt api = Val;
2859
2860   sign = false;
2861   if (isSigned && api.isNegative()) {
2862     sign = true;
2863     api = -api;
2864   }
2865
2866   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2867 }
2868
2869 /* Convert a two's complement integer SRC to a floating point number,
2870    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2871    integer is signed, in which case it must be sign-extended.  */
2872 APFloat::opStatus
2873 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2874                                           unsigned int srcCount, bool isSigned,
2875                                           roundingMode rounding_mode) {
2876   opStatus status;
2877
2878   if (isSigned &&
2879       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2880     integerPart *copy;
2881
2882     /* If we're signed and negative negate a copy.  */
2883     sign = true;
2884     copy = new integerPart[srcCount];
2885     APInt::tcAssign(copy, src, srcCount);
2886     APInt::tcNegate(copy, srcCount);
2887     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2888     delete [] copy;
2889   } else {
2890     sign = false;
2891     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2892   }
2893
2894   return status;
2895 }
2896
2897 /* FIXME: should this just take a const APInt reference?  */
2898 APFloat::opStatus
2899 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2900                                           unsigned int width, bool isSigned,
2901                                           roundingMode rounding_mode) {
2902   unsigned int partCount = partCountForBits(width);
2903   APInt api = APInt(width, ArrayRef(parts, partCount));
2904
2905   sign = false;
2906   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2907     sign = true;
2908     api = -api;
2909   }
2910
2911   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2912 }
2913
2914 Expected<APFloat::opStatus>
2915 IEEEFloat::convertFromHexadecimalString(StringRef s,
2916                                         roundingMode rounding_mode) {
2917   lostFraction lost_fraction = lfExactlyZero;
2918
2919   category = fcNormal;
2920   zeroSignificand();
2921   exponent = 0;
2922
2923   integerPart *significand = significandParts();
2924   unsigned partsCount = partCount();
2925   unsigned bitPos = partsCount * integerPartWidth;
2926   bool computedTrailingFraction = false;
2927
2928   // Skip leading zeroes and any (hexa)decimal point.
2929   StringRef::iterator begin = s.begin();
2930   StringRef::iterator end = s.end();
2931   StringRef::iterator dot;
2932   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2933   if (!PtrOrErr)
2934     return PtrOrErr.takeError();
2935   StringRef::iterator p = *PtrOrErr;
2936   StringRef::iterator firstSignificantDigit = p;
2937
2938   while (p != end) {
2939     integerPart hex_value;
2940
2941     if (*p == '.') {
2942       if (dot != end)
2943         return createError("String contains multiple dots");
2944       dot = p++;
2945       continue;
2946     }
2947
2948     hex_value = hexDigitValue(*p);
2949     if (hex_value == UINT_MAX)
2950       break;
2951
2952     p++;
2953
2954     // Store the number while we have space.
2955     if (bitPos) {
2956       bitPos -= 4;
2957       hex_value <<= bitPos % integerPartWidth;
2958       significand[bitPos / integerPartWidth] |= hex_value;
2959     } else if (!computedTrailingFraction) {
2960       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2961       if (!FractOrErr)
2962         return FractOrErr.takeError();
2963       lost_fraction = *FractOrErr;
2964       computedTrailingFraction = true;
2965     }
2966   }
2967
2968   /* Hex floats require an exponent but not a hexadecimal point.  */
2969   if (p == end)
2970     return createError("Hex strings require an exponent");
2971   if (*p != 'p' && *p != 'P')
2972     return createError("Invalid character in significand");
2973   if (p == begin)
2974     return createError("Significand has no digits");
2975   if (dot != end && p - begin == 1)
2976     return createError("Significand has no digits");
2977
2978   /* Ignore the exponent if we are zero.  */
2979   if (p != firstSignificantDigit) {
2980     int expAdjustment;
2981
2982     /* Implicit hexadecimal point?  */
2983     if (dot == end)
2984       dot = p;
2985
2986     /* Calculate the exponent adjustment implicit in the number of
2987        significant digits.  */
2988     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2989     if (expAdjustment < 0)
2990       expAdjustment++;
2991     expAdjustment = expAdjustment * 4 - 1;
2992
2993     /* Adjust for writing the significand starting at the most
2994        significant nibble.  */
2995     expAdjustment += semantics->precision;
2996     expAdjustment -= partsCount * integerPartWidth;
2997
2998     /* Adjust for the given exponent.  */
2999     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
3000     if (!ExpOrErr)
3001       return ExpOrErr.takeError();
3002     exponent = *ExpOrErr;
3003   }
3004
3005   return normalize(rounding_mode, lost_fraction);
3006 }
3007
3008 APFloat::opStatus
3009 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
3010                                         unsigned sigPartCount, int exp,
3011                                         roundingMode rounding_mode) {
3012   unsigned int parts, pow5PartCount;
3013   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
3014   integerPart pow5Parts[maxPowerOfFiveParts];
3015   bool isNearest;
3016
3017   isNearest = (rounding_mode == rmNearestTiesToEven ||
3018                rounding_mode == rmNearestTiesToAway);
3019
3020   parts = partCountForBits(semantics->precision + 11);
3021
3022   /* Calculate pow(5, abs(exp)).  */
3023   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
3024
3025   for (;; parts *= 2) {
3026     opStatus sigStatus, powStatus;
3027     unsigned int excessPrecision, truncatedBits;
3028
3029     calcSemantics.precision = parts * integerPartWidth - 1;
3030     excessPrecision = calcSemantics.precision - semantics->precision;
3031     truncatedBits = excessPrecision;
3032
3033     IEEEFloat decSig(calcSemantics, uninitialized);
3034     decSig.makeZero(sign);
3035     IEEEFloat pow5(calcSemantics);
3036
3037     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
3038                                                 rmNearestTiesToEven);
3039     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
3040                                               rmNearestTiesToEven);
3041     /* Add exp, as 10^n = 5^n * 2^n.  */
3042     decSig.exponent += exp;
3043
3044     lostFraction calcLostFraction;
3045     integerPart HUerr, HUdistance;
3046     unsigned int powHUerr;
3047
3048     if (exp >= 0) {
3049       /* multiplySignificand leaves the precision-th bit set to 1.  */
3050       calcLostFraction = decSig.multiplySignificand(pow5);
3051       powHUerr = powStatus != opOK;
3052     } else {
3053       calcLostFraction = decSig.divideSignificand(pow5);
3054       /* Denormal numbers have less precision.  */
3055       if (decSig.exponent < semantics->minExponent) {
3056         excessPrecision += (semantics->minExponent - decSig.exponent);
3057         truncatedBits = excessPrecision;
3058         if (excessPrecision > calcSemantics.precision)
3059           excessPrecision = calcSemantics.precision;
3060       }
3061       /* Extra half-ulp lost in reciprocal of exponent.  */
3062       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
3063     }
3064
3065     /* Both multiplySignificand and divideSignificand return the
3066        result with the integer bit set.  */
3067     assert(APInt::tcExtractBit
3068            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
3069
3070     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
3071                        powHUerr);
3072     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
3073                                       excessPrecision, isNearest);
3074
3075     /* Are we guaranteed to round correctly if we truncate?  */
3076     if (HUdistance >= HUerr) {
3077       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
3078                        calcSemantics.precision - excessPrecision,
3079                        excessPrecision);
3080       /* Take the exponent of decSig.  If we tcExtract-ed less bits
3081          above we must adjust our exponent to compensate for the
3082          implicit right shift.  */
3083       exponent = (decSig.exponent + semantics->precision
3084                   - (calcSemantics.precision - excessPrecision));
3085       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
3086                                                        decSig.partCount(),
3087                                                        truncatedBits);
3088       return normalize(rounding_mode, calcLostFraction);
3089     }
3090   }
3091 }
3092
3093 Expected<APFloat::opStatus>
3094 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
3095   decimalInfo D;
3096   opStatus fs;
3097
3098   /* Scan the text.  */
3099   StringRef::iterator p = str.begin();
3100   if (Error Err = interpretDecimal(p, str.end(), &D))
3101     return std::move(Err);
3102
3103   /* Handle the quick cases.  First the case of no significant digits,
3104      i.e. zero, and then exponents that are obviously too large or too
3105      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3106      definitely overflows if
3107
3108            (exp - 1) * L >= maxExponent
3109
3110      and definitely underflows to zero where
3111
3112            (exp + 1) * L <= minExponent - precision
3113
3114      With integer arithmetic the tightest bounds for L are
3115
3116            93/28 < L < 196/59            [ numerator <= 256 ]
3117            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3118   */
3119
3120   // Test if we have a zero number allowing for strings with no null terminators
3121   // and zero decimals with non-zero exponents.
3122   //
3123   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3124   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3125   // be at most one dot. On the other hand, if we have a zero with a non-zero
3126   // exponent, then we know that D.firstSigDigit will be non-numeric.
3127   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3128     category = fcZero;
3129     fs = opOK;
3130     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3131       sign = false;
3132     if (!semantics->hasZero)
3133       makeSmallestNormalized(false);
3134
3135     /* Check whether the normalized exponent is high enough to overflow
3136        max during the log-rebasing in the max-exponent check below. */
3137   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3138     fs = handleOverflow(rounding_mode);
3139
3140   /* If it wasn't, then it also wasn't high enough to overflow max
3141      during the log-rebasing in the min-exponent check.  Check that it
3142      won't overflow min in either check, then perform the min-exponent
3143      check. */
3144   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3145              (D.normalizedExponent + 1) * 28738 <=
3146                8651 * (semantics->minExponent - (int) semantics->precision)) {
3147     /* Underflow to zero and round.  */
3148     category = fcNormal;
3149     zeroSignificand();
3150     fs = normalize(rounding_mode, lfLessThanHalf);
3151
3152   /* We can finally safely perform the max-exponent check. */
3153   } else if ((D.normalizedExponent - 1) * 42039
3154              >= 12655 * semantics->maxExponent) {
3155     /* Overflow and round.  */
3156     fs = handleOverflow(rounding_mode);
3157   } else {
3158     integerPart *decSignificand;
3159     unsigned int partCount;
3160
3161     /* A tight upper bound on number of bits required to hold an
3162        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3163        to hold the full significand, and an extra part required by
3164        tcMultiplyPart.  */
3165     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3166     partCount = partCountForBits(1 + 196 * partCount / 59);
3167     decSignificand = new integerPart[partCount + 1];
3168     partCount = 0;
3169
3170     /* Convert to binary efficiently - we do almost all multiplication
3171        in an integerPart.  When this would overflow do we do a single
3172        bignum multiplication, and then revert again to multiplication
3173        in an integerPart.  */
3174     do {
3175       integerPart decValue, val, multiplier;
3176
3177       val = 0;
3178       multiplier = 1;
3179
3180       do {
3181         if (*p == '.') {
3182           p++;
3183           if (p == str.end()) {
3184             break;
3185           }
3186         }
3187         decValue = decDigitValue(*p++);
3188         if (decValue >= 10U) {
3189           delete[] decSignificand;
3190           return createError("Invalid character in significand");
3191         }
3192         multiplier *= 10;
3193         val = val * 10 + decValue;
3194         /* The maximum number that can be multiplied by ten with any
3195            digit added without overflowing an integerPart.  */
3196       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3197
3198       /* Multiply out the current part.  */
3199       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3200                             partCount, partCount + 1, false);
3201
3202       /* If we used another part (likely but not guaranteed), increase
3203          the count.  */
3204       if (decSignificand[partCount])
3205         partCount++;
3206     } while (p <= D.lastSigDigit);
3207
3208     category = fcNormal;
3209     fs = roundSignificandWithExponent(decSignificand, partCount,
3210                                       D.exponent, rounding_mode);
3211
3212     delete [] decSignificand;
3213   }
3214
3215   return fs;
3216 }
3217
3218 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3219   const size_t MIN_NAME_SIZE = 3;
3220
3221   if (str.size() < MIN_NAME_SIZE)
3222     return false;
3223
3224   if (str == "inf" || str == "INFINITY" || str == "+Inf") {
3225     makeInf(false);
3226     return true;
3227   }
3228
3229   bool IsNegative = str.front() == '-';
3230   if (IsNegative) {
3231     str = str.drop_front();
3232     if (str.size() < MIN_NAME_SIZE)
3233       return false;
3234
3235     if (str == "inf" || str == "INFINITY" || str == "Inf") {
3236       makeInf(true);
3237       return true;
3238     }
3239   }
3240
3241   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3242   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3243   if (IsSignaling) {
3244     str = str.drop_front();
3245     if (str.size() < MIN_NAME_SIZE)
3246       return false;
3247   }
3248
3249   if (str.starts_with("nan") || str.starts_with("NaN")) {
3250     str = str.drop_front(3);
3251
3252     // A NaN without payload.
3253     if (str.empty()) {
3254       makeNaN(IsSignaling, IsNegative);
3255       return true;
3256     }
3257
3258     // Allow the payload to be inside parentheses.
3259     if (str.front() == '(') {
3260       // Parentheses should be balanced (and not empty).
3261       if (str.size() <= 2 || str.back() != ')')
3262         return false;
3263
3264       str = str.slice(1, str.size() - 1);
3265     }
3266
3267     // Determine the payload number's radix.
3268     unsigned Radix = 10;
3269     if (str[0] == '0') {
3270       if (str.size() > 1 && tolower(str[1]) == 'x') {
3271         str = str.drop_front(2);
3272         Radix = 16;
3273       } else
3274         Radix = 8;
3275     }
3276
3277     // Parse the payload and make the NaN.
3278     APInt Payload;
3279     if (!str.getAsInteger(Radix, Payload)) {
3280       makeNaN(IsSignaling, IsNegative, &Payload);
3281       return true;
3282     }
3283   }
3284
3285   return false;
3286 }
3287
3288 Expected<APFloat::opStatus>
3289 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3290   if (str.empty())
3291     return createError("Invalid string length");
3292
3293   // Handle special cases.
3294   if (convertFromStringSpecials(str))
3295     return opOK;
3296
3297   /* Handle a leading minus sign.  */
3298   StringRef::iterator p = str.begin();
3299   size_t slen = str.size();
3300   sign = *p == '-' ? 1 : 0;
3301   if (sign && !semantics->hasSignedRepr)
3302     llvm_unreachable(
3303         "This floating point format does not support signed values");
3304
3305   if (*p == '-' || *p == '+') {
3306     p++;
3307     slen--;
3308     if (!slen)
3309       return createError("String has no digits");
3310   }
3311
3312   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3313     if (slen == 2)
3314       return createError("Invalid string");
3315     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3316                                         rounding_mode);
3317   }
3318
3319   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3320 }
3321
3322 /* Write out a hexadecimal representation of the floating point value
3323    to DST, which must be of sufficient size, in the C99 form
3324    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3325    excluding the terminating NUL.
3326
3327    If UPPERCASE, the output is in upper case, otherwise in lower case.
3328
3329    HEXDIGITS digits appear altogether, rounding the value if
3330    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3331    number precisely is used instead.  If nothing would appear after
3332    the decimal point it is suppressed.
3333
3334    The decimal exponent is always printed and has at least one digit.
3335    Zero values display an exponent of zero.  Infinities and NaNs
3336    appear as "infinity" or "nan" respectively.
3337
3338    The above rules are as specified by C99.  There is ambiguity about
3339    what the leading hexadecimal digit should be.  This implementation
3340    uses whatever is necessary so that the exponent is displayed as
3341    stored.  This implies the exponent will fall within the IEEE format
3342    range, and the leading hexadecimal digit will be 0 (for denormals),
3343    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3344    any other digits zero).
3345 */
3346 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3347                                            bool upperCase,
3348                                            roundingMode rounding_mode) const {
3349   char *p;
3350
3351   p = dst;
3352   if (sign)
3353     *dst++ = '-';
3354
3355   switch (category) {
3356   case fcInfinity:
3357     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3358     dst += sizeof infinityL - 1;
3359     break;
3360
3361   case fcNaN:
3362     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3363     dst += sizeof NaNU - 1;
3364     break;
3365
3366   case fcZero:
3367     *dst++ = '0';
3368     *dst++ = upperCase ? 'X': 'x';
3369     *dst++ = '0';
3370     if (hexDigits > 1) {
3371       *dst++ = '.';
3372       memset (dst, '0', hexDigits - 1);
3373       dst += hexDigits - 1;
3374     }
3375     *dst++ = upperCase ? 'P': 'p';
3376     *dst++ = '0';
3377     break;
3378
3379   case fcNormal:
3380     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3381     break;
3382   }
3383
3384   *dst = 0;
3385
3386   return static_cast<unsigned int>(dst - p);
3387 }
3388
3389 /* Does the hard work of outputting the correctly rounded hexadecimal
3390    form of a normal floating point number with the specified number of
3391    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3392    digits necessary to print the value precisely is output.  */
3393 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3394                                           bool upperCase,
3395                                           roundingMode rounding_mode) const {
3396   unsigned int count, valueBits, shift, partsCount, outputDigits;
3397   const char *hexDigitChars;
3398   const integerPart *significand;
3399   char *p;
3400   bool roundUp;
3401
3402   *dst++ = '0';
3403   *dst++ = upperCase ? 'X': 'x';
3404
3405   roundUp = false;
3406   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3407
3408   significand = significandParts();
3409   partsCount = partCount();
3410
3411   /* +3 because the first digit only uses the single integer bit, so
3412      we have 3 virtual zero most-significant-bits.  */
3413   valueBits = semantics->precision + 3;
3414   shift = integerPartWidth - valueBits % integerPartWidth;
3415
3416   /* The natural number of digits required ignoring trailing
3417      insignificant zeroes.  */
3418   outputDigits = (valueBits - significandLSB () + 3) / 4;
3419
3420   /* hexDigits of zero means use the required number for the
3421      precision.  Otherwise, see if we are truncating.  If we are,
3422      find out if we need to round away from zero.  */
3423   if (hexDigits) {
3424     if (hexDigits < outputDigits) {
3425       /* We are dropping non-zero bits, so need to check how to round.
3426          "bits" is the number of dropped bits.  */
3427       unsigned int bits;
3428       lostFraction fraction;
3429
3430       bits = valueBits - hexDigits * 4;
3431       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3432       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3433     }
3434     outputDigits = hexDigits;
3435   }
3436
3437   /* Write the digits consecutively, and start writing in the location
3438      of the hexadecimal point.  We move the most significant digit
3439      left and add the hexadecimal point later.  */
3440   p = ++dst;
3441
3442   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3443
3444   while (outputDigits && count) {
3445     integerPart part;
3446
3447     /* Put the most significant integerPartWidth bits in "part".  */
3448     if (--count == partsCount)
3449       part = 0;  /* An imaginary higher zero part.  */
3450     else
3451       part = significand[count] << shift;
3452
3453     if (count && shift)
3454       part |= significand[count - 1] >> (integerPartWidth - shift);
3455
3456     /* Convert as much of "part" to hexdigits as we can.  */
3457     unsigned int curDigits = integerPartWidth / 4;
3458
3459     if (curDigits > outputDigits)
3460       curDigits = outputDigits;
3461     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3462     outputDigits -= curDigits;
3463   }
3464
3465   if (roundUp) {
3466     char *q = dst;
3467
3468     /* Note that hexDigitChars has a trailing '0'.  */
3469     do {
3470       q--;
3471       *q = hexDigitChars[hexDigitValue (*q) + 1];
3472     } while (*q == '0');
3473     assert(q >= p);
3474   } else {
3475     /* Add trailing zeroes.  */
3476     memset (dst, '0', outputDigits);
3477     dst += outputDigits;
3478   }
3479
3480   /* Move the most significant digit to before the point, and if there
3481      is something after the decimal point add it.  This must come
3482      after rounding above.  */
3483   p[-1] = p[0];
3484   if (dst -1 == p)
3485     dst--;
3486   else
3487     p[0] = '.';
3488
3489   /* Finally output the exponent.  */
3490   *dst++ = upperCase ? 'P': 'p';
3491
3492   return writeSignedDecimal (dst, exponent);
3493 }
3494
3495 hash_code hash_value(const IEEEFloat &Arg) {
3496   if (!Arg.isFiniteNonZero())
3497     return hash_combine((uint8_t)Arg.category,
3498                         // NaN has no sign, fix it at zero.
3499                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3500                         Arg.semantics->precision);
3501
3502   // Normal floats need their exponent and significand hashed.
3503   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3504                       Arg.semantics->precision, Arg.exponent,
3505                       hash_combine_range(
3506                         Arg.significandParts(),
3507                         Arg.significandParts() + Arg.partCount()));
3508 }
3509
3510 // Conversion from APFloat to/from host float/double.  It may eventually be
3511 // possible to eliminate these and have everybody deal with APFloats, but that
3512 // will take a while.  This approach will not easily extend to long double.
3513 // Current implementation requires integerPartWidth==64, which is correct at
3514 // the moment but could be made more general.
3515
3516 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3517 // the actual IEEE respresentations.  We compensate for that here.
3518
3519 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3520   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3521   assert(partCount()==2);
3522
3523   uint64_t myexponent, mysignificand;
3524
3525   if (isFiniteNonZero()) {
3526     myexponent = exponent+16383; //bias
3527     mysignificand = significandParts()[0];
3528     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3529       myexponent = 0;   // denormal
3530   } else if (category==fcZero) {
3531     myexponent = 0;
3532     mysignificand = 0;
3533   } else if (category==fcInfinity) {
3534     myexponent = 0x7fff;
3535     mysignificand = 0x8000000000000000ULL;
3536   } else {
3537     assert(category == fcNaN && "Unknown category");
3538     myexponent = 0x7fff;
3539     mysignificand = significandParts()[0];
3540   }
3541
3542   uint64_t words[2];
3543   words[0] = mysignificand;
3544   words[1] =  ((uint64_t)(sign & 1) << 15) |
3545               (myexponent & 0x7fffLL);
3546   return APInt(80, words);
3547 }
3548
3549 APInt IEEEFloat::convertPPCDoubleDoubleLegacyAPFloatToAPInt() const {
3550   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3551   assert(partCount()==2);
3552
3553   uint64_t words[2];
3554   opStatus fs;
3555   bool losesInfo;
3556
3557   // Convert number to double.  To avoid spurious underflows, we re-
3558   // normalize against the "double" minExponent first, and only *then*
3559   // truncate the mantissa.  The result of that second conversion
3560   // may be inexact, but should never underflow.
3561   // Declare fltSemantics before APFloat that uses it (and
3562   // saves pointer to it) to ensure correct destruction order.
3563   fltSemantics extendedSemantics = *semantics;
3564   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3565   IEEEFloat extended(*this);
3566   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3567   assert(fs == opOK && !losesInfo);
3568   (void)fs;
3569
3570   IEEEFloat u(extended);
3571   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3572   assert(fs == opOK || fs == opInexact);
3573   (void)fs;
3574   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3575
3576   // If conversion was exact or resulted in a special case, we're done;
3577   // just set the second double to zero.  Otherwise, re-convert back to
3578   // the extended format and compute the difference.  This now should
3579   // convert exactly to double.
3580   if (u.isFiniteNonZero() && losesInfo) {
3581     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3582     assert(fs == opOK && !losesInfo);
3583     (void)fs;
3584
3585     IEEEFloat v(extended);
3586     v.subtract(u, rmNearestTiesToEven);
3587     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3588     assert(fs == opOK && !losesInfo);
3589     (void)fs;
3590     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3591   } else {
3592     words[1] = 0;
3593   }
3594
3595   return APInt(128, words);
3596 }
3597
3598 template <const fltSemantics &S>
3599 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3600   assert(semantics == &S);
3601   const int bias =
3602       (semantics == &semFloat8E8M0FNU) ? -S.minExponent : -(S.minExponent - 1);
3603   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3604   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3605   constexpr integerPart integer_bit =
3606       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3607   constexpr uint64_t significand_mask = integer_bit - 1;
3608   constexpr unsigned int exponent_bits =
3609       trailing_significand_bits ? (S.sizeInBits - 1 - trailing_significand_bits)
3610                                 : S.sizeInBits;
3611   static_assert(exponent_bits < 64);
3612   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3613
3614   uint64_t myexponent;
3615   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3616       mysignificand;
3617
3618   if (isFiniteNonZero()) {
3619     myexponent = exponent + bias;
3620     std::copy_n(significandParts(), mysignificand.size(),
3621                 mysignificand.begin());
3622     if (myexponent == 1 &&
3623         !(significandParts()[integer_bit_part] & integer_bit))
3624       myexponent = 0; // denormal
3625   } else if (category == fcZero) {
3626     if (!S.hasZero)
3627       llvm_unreachable("semantics does not support zero!");
3628     myexponent = ::exponentZero(S) + bias;
3629     mysignificand.fill(0);
3630   } else if (category == fcInfinity) {
3631     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
3632         S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3633       llvm_unreachable("semantics don't support inf!");
3634     myexponent = ::exponentInf(S) + bias;
3635     mysignificand.fill(0);
3636   } else {
3637     assert(category == fcNaN && "Unknown category!");
3638     if (S.nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
3639       llvm_unreachable("semantics don't support NaN!");
3640     myexponent = ::exponentNaN(S) + bias;
3641     std::copy_n(significandParts(), mysignificand.size(),
3642                 mysignificand.begin());
3643   }
3644   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3645   auto words_iter =
3646       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3647   if constexpr (significand_mask != 0 || trailing_significand_bits == 0) {
3648     // Clear the integer bit.
3649     words[mysignificand.size() - 1] &= significand_mask;
3650   }
3651   std::fill(words_iter, words.end(), uint64_t{0});
3652   constexpr size_t last_word = words.size() - 1;
3653   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3654                           << ((S.sizeInBits - 1) % 64);
3655   words[last_word] |= shifted_sign;
3656   uint64_t shifted_exponent = (myexponent & exponent_mask)
3657                               << (trailing_significand_bits % 64);
3658   words[last_word] |= shifted_exponent;
3659   if constexpr (last_word == 0) {
3660     return APInt(S.sizeInBits, words[0]);
3661   }
3662   return APInt(S.sizeInBits, words);
3663 }
3664
3665 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3666   assert(partCount() == 2);
3667   return convertIEEEFloatToAPInt<semIEEEquad>();
3668 }
3669
3670 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3671   assert(partCount()==1);
3672   return convertIEEEFloatToAPInt<semIEEEdouble>();
3673 }
3674
3675 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3676   assert(partCount()==1);
3677   return convertIEEEFloatToAPInt<semIEEEsingle>();
3678 }
3679
3680 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3681   assert(partCount() == 1);
3682   return convertIEEEFloatToAPInt<semBFloat>();
3683 }
3684
3685 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3686   assert(partCount()==1);
3687   return convertIEEEFloatToAPInt<semIEEEhalf>();
3688 }
3689
3690 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3691   assert(partCount() == 1);
3692   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3693 }
3694
3695 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3696   assert(partCount() == 1);
3697   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3698 }
3699
3700 APInt IEEEFloat::convertFloat8E4M3APFloatToAPInt() const {
3701   assert(partCount() == 1);
3702   return convertIEEEFloatToAPInt<semFloat8E4M3>();
3703 }
3704
3705 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3706   assert(partCount() == 1);
3707   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3708 }
3709
3710 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3711   assert(partCount() == 1);
3712   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3713 }
3714
3715 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3716   assert(partCount() == 1);
3717   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3718 }
3719
3720 APInt IEEEFloat::convertFloat8E3M4APFloatToAPInt() const {
3721   assert(partCount() == 1);
3722   return convertIEEEFloatToAPInt<semFloat8E3M4>();
3723 }
3724
3725 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3726   assert(partCount() == 1);
3727   return convertIEEEFloatToAPInt<semFloatTF32>();
3728 }
3729
3730 APInt IEEEFloat::convertFloat8E8M0FNUAPFloatToAPInt() const {
3731   assert(partCount() == 1);
3732   return convertIEEEFloatToAPInt<semFloat8E8M0FNU>();
3733 }
3734
3735 APInt IEEEFloat::convertFloat6E3M2FNAPFloatToAPInt() const {
3736   assert(partCount() == 1);
3737   return convertIEEEFloatToAPInt<semFloat6E3M2FN>();
3738 }
3739
3740 APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const {
3741   assert(partCount() == 1);
3742   return convertIEEEFloatToAPInt<semFloat6E2M3FN>();
3743 }
3744
3745 APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const {
3746   assert(partCount() == 1);
3747   return convertIEEEFloatToAPInt<semFloat4E2M1FN>();
3748 }
3749
3750 // This function creates an APInt that is just a bit map of the floating
3751 // point constant as it would appear in memory.  It is not a conversion,
3752 // and treating the result as a normal integer is unlikely to be useful.
3753
3754 APInt IEEEFloat::bitcastToAPInt() const {
3755   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3756     return convertHalfAPFloatToAPInt();
3757
3758   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3759     return convertBFloatAPFloatToAPInt();
3760
3761   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3762     return convertFloatAPFloatToAPInt();
3763
3764   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3765     return convertDoubleAPFloatToAPInt();
3766
3767   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3768     return convertQuadrupleAPFloatToAPInt();
3769
3770   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3771     return convertPPCDoubleDoubleLegacyAPFloatToAPInt();
3772
3773   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3774     return convertFloat8E5M2APFloatToAPInt();
3775
3776   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3777     return convertFloat8E5M2FNUZAPFloatToAPInt();
3778
3779   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3)
3780     return convertFloat8E4M3APFloatToAPInt();
3781
3782   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3783     return convertFloat8E4M3FNAPFloatToAPInt();
3784
3785   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3786     return convertFloat8E4M3FNUZAPFloatToAPInt();
3787
3788   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3789     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3790
3791   if (semantics == (const llvm::fltSemantics *)&semFloat8E3M4)
3792     return convertFloat8E3M4APFloatToAPInt();
3793
3794   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3795     return convertFloatTF32APFloatToAPInt();
3796
3797   if (semantics == (const llvm::fltSemantics *)&semFloat8E8M0FNU)
3798     return convertFloat8E8M0FNUAPFloatToAPInt();
3799
3800   if (semantics == (const llvm::fltSemantics *)&semFloat6E3M2FN)
3801     return convertFloat6E3M2FNAPFloatToAPInt();
3802
3803   if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN)
3804     return convertFloat6E2M3FNAPFloatToAPInt();
3805
3806   if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN)
3807     return convertFloat4E2M1FNAPFloatToAPInt();
3808
3809   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3810          "unknown format!");
3811   return convertF80LongDoubleAPFloatToAPInt();
3812 }
3813
3814 float IEEEFloat::convertToFloat() const {
3815   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3816          "Float semantics are not IEEEsingle");
3817   APInt api = bitcastToAPInt();
3818   return api.bitsToFloat();
3819 }
3820
3821 double IEEEFloat::convertToDouble() const {
3822   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3823          "Float semantics are not IEEEdouble");
3824   APInt api = bitcastToAPInt();
3825   return api.bitsToDouble();
3826 }
3827
3828 #ifdef HAS_IEE754_FLOAT128
3829 float128 IEEEFloat::convertToQuad() const {
3830   assert(semantics == (const llvm::fltSemantics *)&semIEEEquad &&
3831          "Float semantics are not IEEEquads");
3832   APInt api = bitcastToAPInt();
3833   return api.bitsToQuad();
3834 }
3835 #endif
3836
3837 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3838 /// does not support these bit patterns:
3839 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3840 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3841 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3842 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3843 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3844 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3845   uint64_t i1 = api.getRawData()[0];
3846   uint64_t i2 = api.getRawData()[1];
3847   uint64_t myexponent = (i2 & 0x7fff);
3848   uint64_t mysignificand = i1;
3849   uint8_t myintegerbit = mysignificand >> 63;
3850
3851   initialize(&semX87DoubleExtended);
3852   assert(partCount()==2);
3853
3854   sign = static_cast<unsigned int>(i2>>15);
3855   if (myexponent == 0 && mysignificand == 0) {
3856     makeZero(sign);
3857   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3858     makeInf(sign);
3859   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3860              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3861     category = fcNaN;
3862     exponent = exponentNaN();
3863     significandParts()[0] = mysignificand;
3864     significandParts()[1] = 0;
3865   } else {
3866     category = fcNormal;
3867     exponent = myexponent - 16383;
3868     significandParts()[0] = mysignificand;
3869     significandParts()[1] = 0;
3870     if (myexponent==0)          // denormal
3871       exponent = -16382;
3872   }
3873 }
3874
3875 void IEEEFloat::initFromPPCDoubleDoubleLegacyAPInt(const APInt &api) {
3876   uint64_t i1 = api.getRawData()[0];
3877   uint64_t i2 = api.getRawData()[1];
3878   opStatus fs;
3879   bool losesInfo;
3880
3881   // Get the first double and convert to our format.
3882   initFromDoubleAPInt(APInt(64, i1));
3883   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3884   assert(fs == opOK && !losesInfo);
3885   (void)fs;
3886
3887   // Unless we have a special case, add in second double.
3888   if (isFiniteNonZero()) {
3889     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3890     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3891     assert(fs == opOK && !losesInfo);
3892     (void)fs;
3893
3894     add(v, rmNearestTiesToEven);
3895   }
3896 }
3897
3898 // The E8M0 format has the following characteristics:
3899 // It is an 8-bit unsigned format with only exponents (no actual significand).
3900 // No encodings for {zero, infinities or denorms}.
3901 // NaN is represented by all 1's.
3902 // Bias is 127.
3903 void IEEEFloat::initFromFloat8E8M0FNUAPInt(const APInt &api) {
3904   const uint64_t exponent_mask = 0xff;
3905   uint64_t val = api.getRawData()[0];
3906   uint64_t myexponent = (val & exponent_mask);
3907
3908   initialize(&semFloat8E8M0FNU);
3909   assert(partCount() == 1);
3910
3911   // This format has unsigned representation only
3912   sign = 0;
3913
3914   // Set the significand
3915   // This format does not have any significand but the 'Pth' precision bit is
3916   // always set to 1 for consistency in APFloat's internal representation.
3917   uint64_t mysignificand = 1;
3918   significandParts()[0] = mysignificand;
3919
3920   // This format can either have a NaN or fcNormal
3921   // All 1's i.e. 255 is a NaN
3922   if (val == exponent_mask) {
3923     category = fcNaN;
3924     exponent = exponentNaN();
3925     return;
3926   }
3927   // Handle fcNormal...
3928   category = fcNormal;
3929   exponent = myexponent - 127; // 127 is bias
3930 }
3931 template <const fltSemantics &S>
3932 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3933   assert(api.getBitWidth() == S.sizeInBits);
3934   constexpr integerPart integer_bit = integerPart{1}
3935                                       << ((S.precision - 1) % integerPartWidth);
3936   constexpr uint64_t significand_mask = integer_bit - 1;
3937   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3938   constexpr unsigned int stored_significand_parts =
3939       partCountForBits(trailing_significand_bits);
3940   constexpr unsigned int exponent_bits =
3941       S.sizeInBits - 1 - trailing_significand_bits;
3942   static_assert(exponent_bits < 64);
3943   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3944   constexpr int bias = -(S.minExponent - 1);
3945
3946   // Copy the bits of the significand. We need to clear out the exponent and
3947   // sign bit in the last word.
3948   std::array<integerPart, stored_significand_parts> mysignificand;
3949   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3950   if constexpr (significand_mask != 0) {
3951     mysignificand[mysignificand.size() - 1] &= significand_mask;
3952   }
3953
3954   // We assume the last word holds the sign bit, the exponent, and potentially
3955   // some of the trailing significand field.
3956   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3957   uint64_t myexponent =
3958       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3959
3960   initialize(&S);
3961   assert(partCount() == mysignificand.size());
3962
3963   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3964
3965   bool all_zero_significand =
3966       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3967
3968   bool is_zero = myexponent == 0 && all_zero_significand;
3969
3970   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3971     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3972       makeInf(sign);
3973       return;
3974     }
3975   }
3976
3977   bool is_nan = false;
3978
3979   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3980     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3981   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3982     bool all_ones_significand =
3983         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3984                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3985         (!significand_mask ||
3986          mysignificand[mysignificand.size() - 1] == significand_mask);
3987     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3988   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3989     is_nan = is_zero && sign;
3990   }
3991
3992   if (is_nan) {
3993     category = fcNaN;
3994     exponent = ::exponentNaN(S);
3995     std::copy_n(mysignificand.begin(), mysignificand.size(),
3996                 significandParts());
3997     return;
3998   }
3999
4000   if (is_zero) {
4001     makeZero(sign);
4002     return;
4003   }
4004
4005   category = fcNormal;
4006   exponent = myexponent - bias;
4007   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
4008   if (myexponent == 0) // denormal
4009     exponent = S.minExponent;
4010   else
4011     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
4012 }
4013
4014 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
4015   initFromIEEEAPInt<semIEEEquad>(api);
4016 }
4017
4018 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
4019   initFromIEEEAPInt<semIEEEdouble>(api);
4020 }
4021
4022 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
4023   initFromIEEEAPInt<semIEEEsingle>(api);
4024 }
4025
4026 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
4027   initFromIEEEAPInt<semBFloat>(api);
4028 }
4029
4030 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
4031   initFromIEEEAPInt<semIEEEhalf>(api);
4032 }
4033
4034 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
4035   initFromIEEEAPInt<semFloat8E5M2>(api);
4036 }
4037
4038 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
4039   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
4040 }
4041
4042 void IEEEFloat::initFromFloat8E4M3APInt(const APInt &api) {
4043   initFromIEEEAPInt<semFloat8E4M3>(api);
4044 }
4045
4046 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
4047   initFromIEEEAPInt<semFloat8E4M3FN>(api);
4048 }
4049
4050 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
4051   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
4052 }
4053
4054 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
4055   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
4056 }
4057
4058 void IEEEFloat::initFromFloat8E3M4APInt(const APInt &api) {
4059   initFromIEEEAPInt<semFloat8E3M4>(api);
4060 }
4061
4062 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
4063   initFromIEEEAPInt<semFloatTF32>(api);
4064 }
4065
4066 void IEEEFloat::initFromFloat6E3M2FNAPInt(const APInt &api) {
4067   initFromIEEEAPInt<semFloat6E3M2FN>(api);
4068 }
4069
4070 void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) {
4071   initFromIEEEAPInt<semFloat6E2M3FN>(api);
4072 }
4073
4074 void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) {
4075   initFromIEEEAPInt<semFloat4E2M1FN>(api);
4076 }
4077
4078 /// Treat api as containing the bits of a floating point number.
4079 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
4080   assert(api.getBitWidth() == Sem->sizeInBits);
4081   if (Sem == &semIEEEhalf)
4082     return initFromHalfAPInt(api);
4083   if (Sem == &semBFloat)
4084     return initFromBFloatAPInt(api);
4085   if (Sem == &semIEEEsingle)
4086     return initFromFloatAPInt(api);
4087   if (Sem == &semIEEEdouble)
4088     return initFromDoubleAPInt(api);
4089   if (Sem == &semX87DoubleExtended)
4090     return initFromF80LongDoubleAPInt(api);
4091   if (Sem == &semIEEEquad)
4092     return initFromQuadrupleAPInt(api);
4093   if (Sem == &semPPCDoubleDoubleLegacy)
4094     return initFromPPCDoubleDoubleLegacyAPInt(api);
4095   if (Sem == &semFloat8E5M2)
4096     return initFromFloat8E5M2APInt(api);
4097   if (Sem == &semFloat8E5M2FNUZ)
4098     return initFromFloat8E5M2FNUZAPInt(api);
4099   if (Sem == &semFloat8E4M3)
4100     return initFromFloat8E4M3APInt(api);
4101   if (Sem == &semFloat8E4M3FN)
4102     return initFromFloat8E4M3FNAPInt(api);
4103   if (Sem == &semFloat8E4M3FNUZ)
4104     return initFromFloat8E4M3FNUZAPInt(api);
4105   if (Sem == &semFloat8E4M3B11FNUZ)
4106     return initFromFloat8E4M3B11FNUZAPInt(api);
4107   if (Sem == &semFloat8E3M4)
4108     return initFromFloat8E3M4APInt(api);
4109   if (Sem == &semFloatTF32)
4110     return initFromFloatTF32APInt(api);
4111   if (Sem == &semFloat8E8M0FNU)
4112     return initFromFloat8E8M0FNUAPInt(api);
4113   if (Sem == &semFloat6E3M2FN)
4114     return initFromFloat6E3M2FNAPInt(api);
4115   if (Sem == &semFloat6E2M3FN)
4116     return initFromFloat6E2M3FNAPInt(api);
4117   if (Sem == &semFloat4E2M1FN)
4118     return initFromFloat4E2M1FNAPInt(api);
4119
4120   llvm_unreachable("unsupported semantics");
4121 }
4122
4123 /// Make this number the largest magnitude normal number in the given
4124 /// semantics.
4125 void IEEEFloat::makeLargest(bool Negative) {
4126   if (Negative && !semantics->hasSignedRepr)
4127     llvm_unreachable(
4128         "This floating point format does not support signed values");
4129   // We want (in interchange format):
4130   //   sign = {Negative}
4131   //   exponent = 1..10
4132   //   significand = 1..1
4133   category = fcNormal;
4134   sign = Negative;
4135   exponent = semantics->maxExponent;
4136
4137   // Use memset to set all but the highest integerPart to all ones.
4138   integerPart *significand = significandParts();
4139   unsigned PartCount = partCount();
4140   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
4141
4142   // Set the high integerPart especially setting all unused top bits for
4143   // internal consistency.
4144   const unsigned NumUnusedHighBits =
4145     PartCount*integerPartWidth - semantics->precision;
4146   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
4147                                    ? (~integerPart(0) >> NumUnusedHighBits)
4148                                    : 0;
4149   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
4150       semantics->nanEncoding == fltNanEncoding::AllOnes &&
4151       (semantics->precision > 1))
4152     significand[0] &= ~integerPart(1);
4153 }
4154
4155 /// Make this number the smallest magnitude denormal number in the given
4156 /// semantics.
4157 void IEEEFloat::makeSmallest(bool Negative) {
4158   if (Negative && !semantics->hasSignedRepr)
4159     llvm_unreachable(
4160         "This floating point format does not support signed values");
4161   // We want (in interchange format):
4162   //   sign = {Negative}
4163   //   exponent = 0..0
4164   //   significand = 0..01
4165   category = fcNormal;
4166   sign = Negative;
4167   exponent = semantics->minExponent;
4168   APInt::tcSet(significandParts(), 1, partCount());
4169 }
4170
4171 void IEEEFloat::makeSmallestNormalized(bool Negative) {
4172   if (Negative && !semantics->hasSignedRepr)
4173     llvm_unreachable(
4174         "This floating point format does not support signed values");
4175   // We want (in interchange format):
4176   //   sign = {Negative}
4177   //   exponent = 0..0
4178   //   significand = 10..0
4179
4180   category = fcNormal;
4181   zeroSignificand();
4182   sign = Negative;
4183   exponent = semantics->minExponent;
4184   APInt::tcSetBit(significandParts(), semantics->precision - 1);
4185 }
4186
4187 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
4188   initFromAPInt(&Sem, API);
4189 }
4190
4191 IEEEFloat::IEEEFloat(float f) {
4192   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
4193 }
4194
4195 IEEEFloat::IEEEFloat(double d) {
4196   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
4197 }
4198
4199 namespace {
4200   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
4201     Buffer.append(Str.begin(), Str.end());
4202   }
4203
4204   /// Removes data from the given significand until it is no more
4205   /// precise than is required for the desired precision.
4206   void AdjustToPrecision(APInt &significand,
4207                          int &exp, unsigned FormatPrecision) {
4208     unsigned bits = significand.getActiveBits();
4209
4210     // 196/59 is a very slight overestimate of lg_2(10).
4211     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
4212
4213     if (bits <= bitsRequired) return;
4214
4215     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
4216     if (!tensRemovable) return;
4217
4218     exp += tensRemovable;
4219
4220     APInt divisor(significand.getBitWidth(), 1);
4221     APInt powten(significand.getBitWidth(), 10);
4222     while (true) {
4223       if (tensRemovable & 1)
4224         divisor *= powten;
4225       tensRemovable >>= 1;
4226       if (!tensRemovable) break;
4227       powten *= powten;
4228     }
4229
4230     significand = significand.udiv(divisor);
4231
4232     // Truncate the significand down to its active bit count.
4233     significand = significand.trunc(significand.getActiveBits());
4234   }
4235
4236
4237   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4238                          int &exp, unsigned FormatPrecision) {
4239     unsigned N = buffer.size();
4240     if (N <= FormatPrecision) return;
4241
4242     // The most significant figures are the last ones in the buffer.
4243     unsigned FirstSignificant = N - FormatPrecision;
4244
4245     // Round.
4246     // FIXME: this probably shouldn't use 'round half up'.
4247
4248     // Rounding down is just a truncation, except we also want to drop
4249     // trailing zeros from the new result.
4250     if (buffer[FirstSignificant - 1] < '5') {
4251       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4252         FirstSignificant++;
4253
4254       exp += FirstSignificant;
4255       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4256       return;
4257     }
4258
4259     // Rounding up requires a decimal add-with-carry.  If we continue
4260     // the carry, the newly-introduced zeros will just be truncated.
4261     for (unsigned I = FirstSignificant; I != N; ++I) {
4262       if (buffer[I] == '9') {
4263         FirstSignificant++;
4264       } else {
4265         buffer[I]++;
4266         break;
4267       }
4268     }
4269
4270     // If we carried through, we have exactly one digit of precision.
4271     if (FirstSignificant == N) {
4272       exp += FirstSignificant;
4273       buffer.clear();
4274       buffer.push_back('1');
4275       return;
4276     }
4277
4278     exp += FirstSignificant;
4279     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4280   }
4281
4282   void toStringImpl(SmallVectorImpl<char> &Str, const bool isNeg, int exp,
4283                     APInt significand, unsigned FormatPrecision,
4284                     unsigned FormatMaxPadding, bool TruncateZero) {
4285     const int semanticsPrecision = significand.getBitWidth();
4286
4287     if (isNeg)
4288       Str.push_back('-');
4289
4290     // Set FormatPrecision if zero.  We want to do this before we
4291     // truncate trailing zeros, as those are part of the precision.
4292     if (!FormatPrecision) {
4293       // We use enough digits so the number can be round-tripped back to an
4294       // APFloat. The formula comes from "How to Print Floating-Point Numbers
4295       // Accurately" by Steele and White.
4296       // FIXME: Using a formula based purely on the precision is conservative;
4297       // we can print fewer digits depending on the actual value being printed.
4298
4299       // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4300       FormatPrecision = 2 + semanticsPrecision * 59 / 196;
4301     }
4302
4303     // Ignore trailing binary zeros.
4304     int trailingZeros = significand.countr_zero();
4305     exp += trailingZeros;
4306     significand.lshrInPlace(trailingZeros);
4307
4308     // Change the exponent from 2^e to 10^e.
4309     if (exp == 0) {
4310       // Nothing to do.
4311     } else if (exp > 0) {
4312       // Just shift left.
4313       significand = significand.zext(semanticsPrecision + exp);
4314       significand <<= exp;
4315       exp = 0;
4316     } else { /* exp < 0 */
4317       int texp = -exp;
4318
4319       // We transform this using the identity:
4320       //   (N)(2^-e) == (N)(5^e)(10^-e)
4321       // This means we have to multiply N (the significand) by 5^e.
4322       // To avoid overflow, we have to operate on numbers large
4323       // enough to store N * 5^e:
4324       //   log2(N * 5^e) == log2(N) + e * log2(5)
4325       //                 <= semantics->precision + e * 137 / 59
4326       //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4327
4328       unsigned precision = semanticsPrecision + (137 * texp + 136) / 59;
4329
4330       // Multiply significand by 5^e.
4331       //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4332       significand = significand.zext(precision);
4333       APInt five_to_the_i(precision, 5);
4334       while (true) {
4335         if (texp & 1)
4336           significand *= five_to_the_i;
4337
4338         texp >>= 1;
4339         if (!texp)
4340           break;
4341         five_to_the_i *= five_to_the_i;
4342       }
4343     }
4344
4345     AdjustToPrecision(significand, exp, FormatPrecision);
4346
4347     SmallVector<char, 256> buffer;
4348
4349     // Fill the buffer.
4350     unsigned precision = significand.getBitWidth();
4351     if (precision < 4) {
4352       // We need enough precision to store the value 10.
4353       precision = 4;
4354       significand = significand.zext(precision);
4355     }
4356     APInt ten(precision, 10);
4357     APInt digit(precision, 0);
4358
4359     bool inTrail = true;
4360     while (significand != 0) {
4361       // digit <- significand % 10
4362       // significand <- significand / 10
4363       APInt::udivrem(significand, ten, significand, digit);
4364
4365       unsigned d = digit.getZExtValue();
4366
4367       // Drop trailing zeros.
4368       if (inTrail && !d)
4369         exp++;
4370       else {
4371         buffer.push_back((char) ('0' + d));
4372         inTrail = false;
4373       }
4374     }
4375
4376     assert(!buffer.empty() && "no characters in buffer!");
4377
4378     // Drop down to FormatPrecision.
4379     // TODO: don't do more precise calculations above than are required.
4380     AdjustToPrecision(buffer, exp, FormatPrecision);
4381
4382     unsigned NDigits = buffer.size();
4383
4384     // Check whether we should use scientific notation.
4385     bool FormatScientific;
4386     if (!FormatMaxPadding)
4387       FormatScientific = true;
4388     else {
4389       if (exp >= 0) {
4390         // 765e3 --> 765000
4391         //              ^^^
4392         // But we shouldn't make the number look more precise than it is.
4393         FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4394                             NDigits + (unsigned) exp > FormatPrecision);
4395       } else {
4396         // Power of the most significant digit.
4397         int MSD = exp + (int) (NDigits - 1);
4398         if (MSD >= 0) {
4399           // 765e-2 == 7.65
4400           FormatScientific = false;
4401         } else {
4402           // 765e-5 == 0.00765
4403           //           ^ ^^
4404           FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4405         }
4406       }
4407     }
4408
4409     // Scientific formatting is pretty straightforward.
4410     if (FormatScientific) {
4411       exp += (NDigits - 1);
4412
4413       Str.push_back(buffer[NDigits-1]);
4414       Str.push_back('.');
4415       if (NDigits == 1 && TruncateZero)
4416         Str.push_back('0');
4417       else
4418         for (unsigned I = 1; I != NDigits; ++I)
4419           Str.push_back(buffer[NDigits-1-I]);
4420       // Fill with zeros up to FormatPrecision.
4421       if (!TruncateZero && FormatPrecision > NDigits - 1)
4422         Str.append(FormatPrecision - NDigits + 1, '0');
4423       // For !TruncateZero we use lower 'e'.
4424       Str.push_back(TruncateZero ? 'E' : 'e');
4425
4426       Str.push_back(exp >= 0 ? '+' : '-');
4427       if (exp < 0)
4428         exp = -exp;
4429       SmallVector<char, 6> expbuf;
4430       do {
4431         expbuf.push_back((char) ('0' + (exp % 10)));
4432         exp /= 10;
4433       } while (exp);
4434       // Exponent always at least two digits if we do not truncate zeros.
4435       if (!TruncateZero && expbuf.size() < 2)
4436         expbuf.push_back('0');
4437       for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4438         Str.push_back(expbuf[E-1-I]);
4439       return;
4440     }
4441
4442     // Non-scientific, positive exponents.
4443     if (exp >= 0) {
4444       for (unsigned I = 0; I != NDigits; ++I)
4445         Str.push_back(buffer[NDigits-1-I]);
4446       for (unsigned I = 0; I != (unsigned) exp; ++I)
4447         Str.push_back('0');
4448       return;
4449     }
4450
4451     // Non-scientific, negative exponents.
4452
4453     // The number of digits to the left of the decimal point.
4454     int NWholeDigits = exp + (int) NDigits;
4455
4456     unsigned I = 0;
4457     if (NWholeDigits > 0) {
4458       for (; I != (unsigned) NWholeDigits; ++I)
4459         Str.push_back(buffer[NDigits-I-1]);
4460       Str.push_back('.');
4461     } else {
4462       unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4463
4464       Str.push_back('0');
4465       Str.push_back('.');
4466       for (unsigned Z = 1; Z != NZeros; ++Z)
4467         Str.push_back('0');
4468     }
4469
4470     for (; I != NDigits; ++I)
4471       Str.push_back(buffer[NDigits-I-1]);
4472
4473   }
4474 } // namespace
4475
4476 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4477                          unsigned FormatMaxPadding, bool TruncateZero) const {
4478   switch (category) {
4479   case fcInfinity:
4480     if (isNegative())
4481       return append(Str, "-Inf");
4482     else
4483       return append(Str, "+Inf");
4484
4485   case fcNaN: return append(Str, "NaN");
4486
4487   case fcZero:
4488     if (isNegative())
4489       Str.push_back('-');
4490
4491     if (!FormatMaxPadding) {
4492       if (TruncateZero)
4493         append(Str, "0.0E+0");
4494       else {
4495         append(Str, "0.0");
4496         if (FormatPrecision > 1)
4497           Str.append(FormatPrecision - 1, '0');
4498         append(Str, "e+00");
4499       }
4500     } else
4501       Str.push_back('0');
4502     return;
4503
4504   case fcNormal:
4505     break;
4506   }
4507
4508   // Decompose the number into an APInt and an exponent.
4509   int exp = exponent - ((int) semantics->precision - 1);
4510   APInt significand(
4511       semantics->precision,
4512       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4513
4514   toStringImpl(Str, isNegative(), exp, significand, FormatPrecision,
4515                FormatMaxPadding, TruncateZero);
4516
4517 }
4518
4519 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4520   // Special floats and denormals have no exact inverse.
4521   if (!isFiniteNonZero())
4522     return false;
4523
4524   // Check that the number is a power of two by making sure that only the
4525   // integer bit is set in the significand.
4526   if (significandLSB() != semantics->precision - 1)
4527     return false;
4528
4529   // Get the inverse.
4530   IEEEFloat reciprocal(*semantics, 1ULL);
4531   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4532     return false;
4533
4534   // Avoid multiplication with a denormal, it is not safe on all platforms and
4535   // may be slower than a normal division.
4536   if (reciprocal.isDenormal())
4537     return false;
4538
4539   assert(reciprocal.isFiniteNonZero() &&
4540          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4541
4542   if (inv)
4543     *inv = APFloat(reciprocal, *semantics);
4544
4545   return true;
4546 }
4547
4548 int IEEEFloat::getExactLog2Abs() const {
4549   if (!isFinite() || isZero())
4550     return INT_MIN;
4551
4552   const integerPart *Parts = significandParts();
4553   const int PartCount = partCountForBits(semantics->precision);
4554
4555   int PopCount = 0;
4556   for (int i = 0; i < PartCount; ++i) {
4557     PopCount += llvm::popcount(Parts[i]);
4558     if (PopCount > 1)
4559       return INT_MIN;
4560   }
4561
4562   if (exponent != semantics->minExponent)
4563     return exponent;
4564
4565   int CountrParts = 0;
4566   for (int i = 0; i < PartCount;
4567        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4568     if (Parts[i] != 0) {
4569       return exponent - semantics->precision + CountrParts +
4570              llvm::countr_zero(Parts[i]) + 1;
4571     }
4572   }
4573
4574   llvm_unreachable("didn't find the set bit");
4575 }
4576
4577 bool IEEEFloat::isSignaling() const {
4578   if (!isNaN())
4579     return false;
4580   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly ||
4581       semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4582     return false;
4583
4584   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4585   // first bit of the trailing significand being 0.
4586   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4587 }
4588
4589 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4590 ///
4591 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4592 /// appropriate sign switching before/after the computation.
4593 APFloat::opStatus IEEEFloat::next(bool nextDown) {
4594   // If we are performing nextDown, swap sign so we have -x.
4595   if (nextDown)
4596     changeSign();
4597
4598   // Compute nextUp(x)
4599   opStatus result = opOK;
4600
4601   // Handle each float category separately.
4602   switch (category) {
4603   case fcInfinity:
4604     // nextUp(+inf) = +inf
4605     if (!isNegative())
4606       break;
4607     // nextUp(-inf) = -getLargest()
4608     makeLargest(true);
4609     break;
4610   case fcNaN:
4611     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4612     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4613     //                     change the payload.
4614     if (isSignaling()) {
4615       result = opInvalidOp;
4616       // For consistency, propagate the sign of the sNaN to the qNaN.
4617       makeNaN(false, isNegative(), nullptr);
4618     }
4619     break;
4620   case fcZero:
4621     // nextUp(pm 0) = +getSmallest()
4622     makeSmallest(false);
4623     break;
4624   case fcNormal:
4625     // nextUp(-getSmallest()) = -0
4626     if (isSmallest() && isNegative()) {
4627       APInt::tcSet(significandParts(), 0, partCount());
4628       category = fcZero;
4629       exponent = 0;
4630       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4631         sign = false;
4632       if (!semantics->hasZero)
4633         makeSmallestNormalized(false);
4634       break;
4635     }
4636
4637     if (isLargest() && !isNegative()) {
4638       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4639         // nextUp(getLargest()) == NAN
4640         makeNaN();
4641         break;
4642       } else if (semantics->nonFiniteBehavior ==
4643                  fltNonfiniteBehavior::FiniteOnly) {
4644         // nextUp(getLargest()) == getLargest()
4645         break;
4646       } else {
4647         // nextUp(getLargest()) == INFINITY
4648         APInt::tcSet(significandParts(), 0, partCount());
4649         category = fcInfinity;
4650         exponent = semantics->maxExponent + 1;
4651         break;
4652       }
4653     }
4654
4655     // nextUp(normal) == normal + inc.
4656     if (isNegative()) {
4657       // If we are negative, we need to decrement the significand.
4658
4659       // We only cross a binade boundary that requires adjusting the exponent
4660       // if:
4661       //   1. exponent != semantics->minExponent. This implies we are not in the
4662       //   smallest binade or are dealing with denormals.
4663       //   2. Our significand excluding the integral bit is all zeros.
4664       bool WillCrossBinadeBoundary =
4665         exponent != semantics->minExponent && isSignificandAllZeros();
4666
4667       // Decrement the significand.
4668       //
4669       // We always do this since:
4670       //   1. If we are dealing with a non-binade decrement, by definition we
4671       //   just decrement the significand.
4672       //   2. If we are dealing with a normal -> normal binade decrement, since
4673       //   we have an explicit integral bit the fact that all bits but the
4674       //   integral bit are zero implies that subtracting one will yield a
4675       //   significand with 0 integral bit and 1 in all other spots. Thus we
4676       //   must just adjust the exponent and set the integral bit to 1.
4677       //   3. If we are dealing with a normal -> denormal binade decrement,
4678       //   since we set the integral bit to 0 when we represent denormals, we
4679       //   just decrement the significand.
4680       integerPart *Parts = significandParts();
4681       APInt::tcDecrement(Parts, partCount());
4682
4683       if (WillCrossBinadeBoundary) {
4684         // Our result is a normal number. Do the following:
4685         // 1. Set the integral bit to 1.
4686         // 2. Decrement the exponent.
4687         APInt::tcSetBit(Parts, semantics->precision - 1);
4688         exponent--;
4689       }
4690     } else {
4691       // If we are positive, we need to increment the significand.
4692
4693       // We only cross a binade boundary that requires adjusting the exponent if
4694       // the input is not a denormal and all of said input's significand bits
4695       // are set. If all of said conditions are true: clear the significand, set
4696       // the integral bit to 1, and increment the exponent. If we have a
4697       // denormal always increment since moving denormals and the numbers in the
4698       // smallest normal binade have the same exponent in our representation.
4699       // If there are only exponents, any increment always crosses the
4700       // BinadeBoundary.
4701       bool WillCrossBinadeBoundary = !APFloat::hasSignificand(*semantics) ||
4702                                      (!isDenormal() && isSignificandAllOnes());
4703
4704       if (WillCrossBinadeBoundary) {
4705         integerPart *Parts = significandParts();
4706         APInt::tcSet(Parts, 0, partCount());
4707         APInt::tcSetBit(Parts, semantics->precision - 1);
4708         assert(exponent != semantics->maxExponent &&
4709                "We can not increment an exponent beyond the maxExponent allowed"
4710                " by the given floating point semantics.");
4711         exponent++;
4712       } else {
4713         incrementSignificand();
4714       }
4715     }
4716     break;
4717   }
4718
4719   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4720   if (nextDown)
4721     changeSign();
4722
4723   return result;
4724 }
4725
4726 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4727   return ::exponentNaN(*semantics);
4728 }
4729
4730 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4731   return ::exponentInf(*semantics);
4732 }
4733
4734 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4735   return ::exponentZero(*semantics);
4736 }
4737
4738 void IEEEFloat::makeInf(bool Negative) {
4739   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::FiniteOnly)
4740     llvm_unreachable("This floating point format does not support Inf");
4741
4742   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4743     // There is no Inf, so make NaN instead.
4744     makeNaN(false, Negative);
4745     return;
4746   }
4747   category = fcInfinity;
4748   sign = Negative;
4749   exponent = exponentInf();
4750   APInt::tcSet(significandParts(), 0, partCount());
4751 }
4752
4753 void IEEEFloat::makeZero(bool Negative) {
4754   if (!semantics->hasZero)
4755     llvm_unreachable("This floating point format does not support Zero");
4756
4757   category = fcZero;
4758   sign = Negative;
4759   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4760     // Merge negative zero to positive because 0b10000...000 is used for NaN
4761     sign = false;
4762   }
4763   exponent = exponentZero();
4764   APInt::tcSet(significandParts(), 0, partCount());
4765 }
4766
4767 void IEEEFloat::makeQuiet() {
4768   assert(isNaN());
4769   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4770     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4771 }
4772
4773 int ilogb(const IEEEFloat &Arg) {
4774   if (Arg.isNaN())
4775     return APFloat::IEK_NaN;
4776   if (Arg.isZero())
4777     return APFloat::IEK_Zero;
4778   if (Arg.isInfinity())
4779     return APFloat::IEK_Inf;
4780   if (!Arg.isDenormal())
4781     return Arg.exponent;
4782
4783   IEEEFloat Normalized(Arg);
4784   int SignificandBits = Arg.getSemantics().precision - 1;
4785
4786   Normalized.exponent += SignificandBits;
4787   Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
4788   return Normalized.exponent - SignificandBits;
4789 }
4790
4791 IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode RoundingMode) {
4792   auto MaxExp = X.getSemantics().maxExponent;
4793   auto MinExp = X.getSemantics().minExponent;
4794
4795   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4796   // overflow; clamp it to a safe range before adding, but ensure that the range
4797   // is large enough that the clamp does not change the result. The range we
4798   // need to support is the difference between the largest possible exponent and
4799   // the normalized exponent of half the smallest denormal.
4800
4801   int SignificandBits = X.getSemantics().precision - 1;
4802   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4803
4804   // Clamp to one past the range ends to let normalize handle overlflow.
4805   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4806   X.normalize(RoundingMode, lfExactlyZero);
4807   if (X.isNaN())
4808     X.makeQuiet();
4809   return X;
4810 }
4811
4812 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, roundingMode RM) {
4813   Exp = ilogb(Val);
4814
4815   // Quiet signalling nans.
4816   if (Exp == APFloat::IEK_NaN) {
4817     IEEEFloat Quiet(Val);
4818     Quiet.makeQuiet();
4819     return Quiet;
4820   }
4821
4822   if (Exp == APFloat::IEK_Inf)
4823     return Val;
4824
4825   // 1 is added because frexp is defined to return a normalized fraction in
4826   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4827   Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
4828   return scalbn(Val, -Exp, RM);
4829 }
4830
4831 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4832     : Semantics(&S),
4833       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4834   assert(Semantics == &semPPCDoubleDouble);
4835 }
4836
4837 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4838     : Semantics(&S),
4839       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4840                             APFloat(semIEEEdouble, uninitialized)}) {
4841   assert(Semantics == &semPPCDoubleDouble);
4842 }
4843
4844 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4845     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4846                                            APFloat(semIEEEdouble)}) {
4847   assert(Semantics == &semPPCDoubleDouble);
4848 }
4849
4850 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4851     : Semantics(&S),
4852       Floats(new APFloat[2]{
4853           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4854           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4855   assert(Semantics == &semPPCDoubleDouble);
4856 }
4857
4858 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4859                              APFloat &&Second)
4860     : Semantics(&S),
4861       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4862   assert(Semantics == &semPPCDoubleDouble);
4863   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4864   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4865 }
4866
4867 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4868     : Semantics(RHS.Semantics),
4869       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4870                                          APFloat(RHS.Floats[1])}
4871                         : nullptr) {
4872   assert(Semantics == &semPPCDoubleDouble);
4873 }
4874
4875 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4876     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4877   RHS.Semantics = &semBogus;
4878   assert(Semantics == &semPPCDoubleDouble);
4879 }
4880
4881 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4882   if (Semantics == RHS.Semantics && RHS.Floats) {
4883     Floats[0] = RHS.Floats[0];
4884     Floats[1] = RHS.Floats[1];
4885   } else if (this != &RHS) {
4886     this->~DoubleAPFloat();
4887     new (this) DoubleAPFloat(RHS);
4888   }
4889   return *this;
4890 }
4891
4892 // Implement addition, subtraction, multiplication and division based on:
4893 // "Software for Doubled-Precision Floating-Point Computations",
4894 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4895 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4896                                          const APFloat &c, const APFloat &cc,
4897                                          roundingMode RM) {
4898   int Status = opOK;
4899   APFloat z = a;
4900   Status |= z.add(c, RM);
4901   if (!z.isFinite()) {
4902     if (!z.isInfinity()) {
4903       Floats[0] = std::move(z);
4904       Floats[1].makeZero(/* Neg = */ false);
4905       return (opStatus)Status;
4906     }
4907     Status = opOK;
4908     auto AComparedToC = a.compareAbsoluteValue(c);
4909     z = cc;
4910     Status |= z.add(aa, RM);
4911     if (AComparedToC == APFloat::cmpGreaterThan) {
4912       // z = cc + aa + c + a;
4913       Status |= z.add(c, RM);
4914       Status |= z.add(a, RM);
4915     } else {
4916       // z = cc + aa + a + c;
4917       Status |= z.add(a, RM);
4918       Status |= z.add(c, RM);
4919     }
4920     if (!z.isFinite()) {
4921       Floats[0] = std::move(z);
4922       Floats[1].makeZero(/* Neg = */ false);
4923       return (opStatus)Status;
4924     }
4925     Floats[0] = z;
4926     APFloat zz = aa;
4927     Status |= zz.add(cc, RM);
4928     if (AComparedToC == APFloat::cmpGreaterThan) {
4929       // Floats[1] = a - z + c + zz;
4930       Floats[1] = a;
4931       Status |= Floats[1].subtract(z, RM);
4932       Status |= Floats[1].add(c, RM);
4933       Status |= Floats[1].add(zz, RM);
4934     } else {
4935       // Floats[1] = c - z + a + zz;
4936       Floats[1] = c;
4937       Status |= Floats[1].subtract(z, RM);
4938       Status |= Floats[1].add(a, RM);
4939       Status |= Floats[1].add(zz, RM);
4940     }
4941   } else {
4942     // q = a - z;
4943     APFloat q = a;
4944     Status |= q.subtract(z, RM);
4945
4946     // zz = q + c + (a - (q + z)) + aa + cc;
4947     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4948     auto zz = q;
4949     Status |= zz.add(c, RM);
4950     Status |= q.add(z, RM);
4951     Status |= q.subtract(a, RM);
4952     q.changeSign();
4953     Status |= zz.add(q, RM);
4954     Status |= zz.add(aa, RM);
4955     Status |= zz.add(cc, RM);
4956     if (zz.isZero() && !zz.isNegative()) {
4957       Floats[0] = std::move(z);
4958       Floats[1].makeZero(/* Neg = */ false);
4959       return opOK;
4960     }
4961     Floats[0] = z;
4962     Status |= Floats[0].add(zz, RM);
4963     if (!Floats[0].isFinite()) {
4964       Floats[1].makeZero(/* Neg = */ false);
4965       return (opStatus)Status;
4966     }
4967     Floats[1] = std::move(z);
4968     Status |= Floats[1].subtract(Floats[0], RM);
4969     Status |= Floats[1].add(zz, RM);
4970   }
4971   return (opStatus)Status;
4972 }
4973
4974 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4975                                                 const DoubleAPFloat &RHS,
4976                                                 DoubleAPFloat &Out,
4977                                                 roundingMode RM) {
4978   if (LHS.getCategory() == fcNaN) {
4979     Out = LHS;
4980     return opOK;
4981   }
4982   if (RHS.getCategory() == fcNaN) {
4983     Out = RHS;
4984     return opOK;
4985   }
4986   if (LHS.getCategory() == fcZero) {
4987     Out = RHS;
4988     return opOK;
4989   }
4990   if (RHS.getCategory() == fcZero) {
4991     Out = LHS;
4992     return opOK;
4993   }
4994   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4995       LHS.isNegative() != RHS.isNegative()) {
4996     Out.makeNaN(false, Out.isNegative(), nullptr);
4997     return opInvalidOp;
4998   }
4999   if (LHS.getCategory() == fcInfinity) {
5000     Out = LHS;
5001     return opOK;
5002   }
5003   if (RHS.getCategory() == fcInfinity) {
5004     Out = RHS;
5005     return opOK;
5006   }
5007   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
5008
5009   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
5010       CC(RHS.Floats[1]);
5011   assert(&A.getSemantics() == &semIEEEdouble);
5012   assert(&AA.getSemantics() == &semIEEEdouble);
5013   assert(&C.getSemantics() == &semIEEEdouble);
5014   assert(&CC.getSemantics() == &semIEEEdouble);
5015   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
5016   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
5017   return Out.addImpl(A, AA, C, CC, RM);
5018 }
5019
5020 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
5021                                      roundingMode RM) {
5022   return addWithSpecial(*this, RHS, *this, RM);
5023 }
5024
5025 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
5026                                           roundingMode RM) {
5027   changeSign();
5028   auto Ret = add(RHS, RM);
5029   changeSign();
5030   return Ret;
5031 }
5032
5033 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
5034                                           APFloat::roundingMode RM) {
5035   const auto &LHS = *this;
5036   auto &Out = *this;
5037   /* Interesting observation: For special categories, finding the lowest
5038      common ancestor of the following layered graph gives the correct
5039      return category:
5040
5041         NaN
5042        /   \
5043      Zero  Inf
5044        \   /
5045        Normal
5046
5047      e.g. NaN * NaN = NaN
5048           Zero * Inf = NaN
5049           Normal * Zero = Zero
5050           Normal * Inf = Inf
5051   */
5052   if (LHS.getCategory() == fcNaN) {
5053     Out = LHS;
5054     return opOK;
5055   }
5056   if (RHS.getCategory() == fcNaN) {
5057     Out = RHS;
5058     return opOK;
5059   }
5060   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
5061       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
5062     Out.makeNaN(false, false, nullptr);
5063     return opOK;
5064   }
5065   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
5066     Out = LHS;
5067     return opOK;
5068   }
5069   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
5070     Out = RHS;
5071     return opOK;
5072   }
5073   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
5074          "Special cases not handled exhaustively");
5075
5076   int Status = opOK;
5077   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
5078   // t = a * c
5079   APFloat T = A;
5080   Status |= T.multiply(C, RM);
5081   if (!T.isFiniteNonZero()) {
5082     Floats[0] = T;
5083     Floats[1].makeZero(/* Neg = */ false);
5084     return (opStatus)Status;
5085   }
5086
5087   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
5088   APFloat Tau = A;
5089   T.changeSign();
5090   Status |= Tau.fusedMultiplyAdd(C, T, RM);
5091   T.changeSign();
5092   {
5093     // v = a * d
5094     APFloat V = A;
5095     Status |= V.multiply(D, RM);
5096     // w = b * c
5097     APFloat W = B;
5098     Status |= W.multiply(C, RM);
5099     Status |= V.add(W, RM);
5100     // tau += v + w
5101     Status |= Tau.add(V, RM);
5102   }
5103   // u = t + tau
5104   APFloat U = T;
5105   Status |= U.add(Tau, RM);
5106
5107   Floats[0] = U;
5108   if (!U.isFinite()) {
5109     Floats[1].makeZero(/* Neg = */ false);
5110   } else {
5111     // Floats[1] = (t - u) + tau
5112     Status |= T.subtract(U, RM);
5113     Status |= T.add(Tau, RM);
5114     Floats[1] = T;
5115   }
5116   return (opStatus)Status;
5117 }
5118
5119 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
5120                                         APFloat::roundingMode RM) {
5121   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5122   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5123   auto Ret =
5124       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
5125   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5126   return Ret;
5127 }
5128
5129 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
5130   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5131   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5132   auto Ret =
5133       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5134   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5135   return Ret;
5136 }
5137
5138 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
5139   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5140   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5141   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
5142   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5143   return Ret;
5144 }
5145
5146 APFloat::opStatus
5147 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
5148                                 const DoubleAPFloat &Addend,
5149                                 APFloat::roundingMode RM) {
5150   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5151   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5152   auto Ret = Tmp.fusedMultiplyAdd(
5153       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
5154       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
5155   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5156   return Ret;
5157 }
5158
5159 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
5160   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5161   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5162   auto Ret = Tmp.roundToIntegral(RM);
5163   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5164   return Ret;
5165 }
5166
5167 void DoubleAPFloat::changeSign() {
5168   Floats[0].changeSign();
5169   Floats[1].changeSign();
5170 }
5171
5172 APFloat::cmpResult
5173 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
5174   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
5175   if (Result != cmpEqual)
5176     return Result;
5177   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
5178   if (Result == cmpLessThan || Result == cmpGreaterThan) {
5179     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
5180     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
5181     if (Against && !RHSAgainst)
5182       return cmpLessThan;
5183     if (!Against && RHSAgainst)
5184       return cmpGreaterThan;
5185     if (!Against && !RHSAgainst)
5186       return Result;
5187     if (Against && RHSAgainst)
5188       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
5189   }
5190   return Result;
5191 }
5192
5193 APFloat::fltCategory DoubleAPFloat::getCategory() const {
5194   return Floats[0].getCategory();
5195 }
5196
5197 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
5198
5199 void DoubleAPFloat::makeInf(bool Neg) {
5200   Floats[0].makeInf(Neg);
5201   Floats[1].makeZero(/* Neg = */ false);
5202 }
5203
5204 void DoubleAPFloat::makeZero(bool Neg) {
5205   Floats[0].makeZero(Neg);
5206   Floats[1].makeZero(/* Neg = */ false);
5207 }
5208
5209 void DoubleAPFloat::makeLargest(bool Neg) {
5210   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5211   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
5212   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
5213   if (Neg)
5214     changeSign();
5215 }
5216
5217 void DoubleAPFloat::makeSmallest(bool Neg) {
5218   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5219   Floats[0].makeSmallest(Neg);
5220   Floats[1].makeZero(/* Neg = */ false);
5221 }
5222
5223 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
5224   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5225   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
5226   if (Neg)
5227     Floats[0].changeSign();
5228   Floats[1].makeZero(/* Neg = */ false);
5229 }
5230
5231 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
5232   Floats[0].makeNaN(SNaN, Neg, fill);
5233   Floats[1].makeZero(/* Neg = */ false);
5234 }
5235
5236 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
5237   auto Result = Floats[0].compare(RHS.Floats[0]);
5238   // |Float[0]| > |Float[1]|
5239   if (Result == APFloat::cmpEqual)
5240     return Floats[1].compare(RHS.Floats[1]);
5241   return Result;
5242 }
5243
5244 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
5245   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
5246          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
5247 }
5248
5249 hash_code hash_value(const DoubleAPFloat &Arg) {
5250   if (Arg.Floats)
5251     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
5252   return hash_combine(Arg.Semantics);
5253 }
5254
5255 APInt DoubleAPFloat::bitcastToAPInt() const {
5256   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5257   uint64_t Data[] = {
5258       Floats[0].bitcastToAPInt().getRawData()[0],
5259       Floats[1].bitcastToAPInt().getRawData()[0],
5260   };
5261   return APInt(128, 2, Data);
5262 }
5263
5264 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
5265                                                              roundingMode RM) {
5266   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5267   APFloat Tmp(semPPCDoubleDoubleLegacy);
5268   auto Ret = Tmp.convertFromString(S, RM);
5269   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5270   return Ret;
5271 }
5272
5273 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5274   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5275   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5276   auto Ret = Tmp.next(nextDown);
5277   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5278   return Ret;
5279 }
5280
5281 APFloat::opStatus
5282 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5283                                 unsigned int Width, bool IsSigned,
5284                                 roundingMode RM, bool *IsExact) const {
5285   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5286   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5287       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5288 }
5289
5290 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5291                                                   bool IsSigned,
5292                                                   roundingMode RM) {
5293   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5294   APFloat Tmp(semPPCDoubleDoubleLegacy);
5295   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5296   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5297   return Ret;
5298 }
5299
5300 APFloat::opStatus
5301 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5302                                               unsigned int InputSize,
5303                                               bool IsSigned, roundingMode RM) {
5304   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5305   APFloat Tmp(semPPCDoubleDoubleLegacy);
5306   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5307   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5308   return Ret;
5309 }
5310
5311 APFloat::opStatus
5312 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5313                                               unsigned int InputSize,
5314                                               bool IsSigned, roundingMode RM) {
5315   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5316   APFloat Tmp(semPPCDoubleDoubleLegacy);
5317   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5318   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5319   return Ret;
5320 }
5321
5322 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5323                                                unsigned int HexDigits,
5324                                                bool UpperCase,
5325                                                roundingMode RM) const {
5326   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5327   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5328       .convertToHexString(DST, HexDigits, UpperCase, RM);
5329 }
5330
5331 bool DoubleAPFloat::isDenormal() const {
5332   return getCategory() == fcNormal &&
5333          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5334           // (double)(Hi + Lo) == Hi defines a normal number.
5335           Floats[0] != Floats[0] + Floats[1]);
5336 }
5337
5338 bool DoubleAPFloat::isSmallest() const {
5339   if (getCategory() != fcNormal)
5340     return false;
5341   DoubleAPFloat Tmp(*this);
5342   Tmp.makeSmallest(this->isNegative());
5343   return Tmp.compare(*this) == cmpEqual;
5344 }
5345
5346 bool DoubleAPFloat::isSmallestNormalized() const {
5347   if (getCategory() != fcNormal)
5348     return false;
5349
5350   DoubleAPFloat Tmp(*this);
5351   Tmp.makeSmallestNormalized(this->isNegative());
5352   return Tmp.compare(*this) == cmpEqual;
5353 }
5354
5355 bool DoubleAPFloat::isLargest() const {
5356   if (getCategory() != fcNormal)
5357     return false;
5358   DoubleAPFloat Tmp(*this);
5359   Tmp.makeLargest(this->isNegative());
5360   return Tmp.compare(*this) == cmpEqual;
5361 }
5362
5363 bool DoubleAPFloat::isInteger() const {
5364   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5365   return Floats[0].isInteger() && Floats[1].isInteger();
5366 }
5367
5368 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5369                              unsigned FormatPrecision,
5370                              unsigned FormatMaxPadding,
5371                              bool TruncateZero) const {
5372   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5373   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5374       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5375 }
5376
5377 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5378   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5379   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5380   if (!inv)
5381     return Tmp.getExactInverse(nullptr);
5382   APFloat Inv(semPPCDoubleDoubleLegacy);
5383   auto Ret = Tmp.getExactInverse(&Inv);
5384   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5385   return Ret;
5386 }
5387
5388 int DoubleAPFloat::getExactLog2() const {
5389   // TODO: Implement me
5390   return INT_MIN;
5391 }
5392
5393 int DoubleAPFloat::getExactLog2Abs() const {
5394   // TODO: Implement me
5395   return INT_MIN;
5396 }
5397
5398 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5399                      APFloat::roundingMode RM) {
5400   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5401   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5402                        scalbn(Arg.Floats[1], Exp, RM));
5403 }
5404
5405 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5406                     APFloat::roundingMode RM) {
5407   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5408   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5409   APFloat Second = Arg.Floats[1];
5410   if (Arg.getCategory() == APFloat::fcNormal)
5411     Second = scalbn(Second, -Exp, RM);
5412   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5413 }
5414
5415 } // namespace detail
5416
5417 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5418   if (usesLayout<IEEEFloat>(Semantics)) {
5419     new (&IEEE) IEEEFloat(std::move(F));
5420     return;
5421   }
5422   if (usesLayout<DoubleAPFloat>(Semantics)) {
5423     const fltSemantics& S = F.getSemantics();
5424     new (&Double)
5425         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5426                       APFloat(semIEEEdouble));
5427     return;
5428   }
5429   llvm_unreachable("Unexpected semantics");
5430 }
5431
5432 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5433                                                        roundingMode RM) {
5434   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5435 }
5436
5437 hash_code hash_value(const APFloat &Arg) {
5438   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5439     return hash_value(Arg.U.IEEE);
5440   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5441     return hash_value(Arg.U.Double);
5442   llvm_unreachable("Unexpected semantics");
5443 }
5444
5445 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5446     : APFloat(Semantics) {
5447   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5448   assert(StatusOrErr && "Invalid floating point representation");
5449   consumeError(StatusOrErr.takeError());
5450 }
5451
5452 FPClassTest APFloat::classify() const {
5453   if (isZero())
5454     return isNegative() ? fcNegZero : fcPosZero;
5455   if (isNormal())
5456     return isNegative() ? fcNegNormal : fcPosNormal;
5457   if (isDenormal())
5458     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5459   if (isInfinity())
5460     return isNegative() ? fcNegInf : fcPosInf;
5461   assert(isNaN() && "Other class of FP constant");
5462   return isSignaling() ? fcSNan : fcQNan;
5463 }
5464
5465 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5466                                    roundingMode RM, bool *losesInfo) {
5467   if (&getSemantics() == &ToSemantics) {
5468     *losesInfo = false;
5469     return opOK;
5470   }
5471   if (usesLayout<IEEEFloat>(getSemantics()) &&
5472       usesLayout<IEEEFloat>(ToSemantics))
5473     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5474   if (usesLayout<IEEEFloat>(getSemantics()) &&
5475       usesLayout<DoubleAPFloat>(ToSemantics)) {
5476     assert(&ToSemantics == &semPPCDoubleDouble);
5477     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5478     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5479     return Ret;
5480   }
5481   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5482       usesLayout<IEEEFloat>(ToSemantics)) {
5483     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5484     *this = APFloat(std::move(getIEEE()), ToSemantics);
5485     return Ret;
5486   }
5487   llvm_unreachable("Unexpected semantics");
5488 }
5489
5490 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5491   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5492 }
5493
5494 void APFloat::print(raw_ostream &OS) const {
5495   SmallVector<char, 16> Buffer;
5496   toString(Buffer);
5497   OS << Buffer;
5498 }
5499
5500 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5501 LLVM_DUMP_METHOD void APFloat::dump() const {
5502   print(dbgs());
5503   dbgs() << '\n';
5504 }
5505 #endif
5506
5507 void APFloat::Profile(FoldingSetNodeID &NID) const {
5508   NID.Add(bitcastToAPInt());
5509 }
5510
5511 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5512    an APSInt, whose initial bit-width and signed-ness are used to determine the
5513    precision of the conversion.
5514  */
5515 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5516                                             roundingMode rounding_mode,
5517                                             bool *isExact) const {
5518   unsigned bitWidth = result.getBitWidth();
5519   SmallVector<uint64_t, 4> parts(result.getNumWords());
5520   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5521                                      rounding_mode, isExact);
5522   // Keeps the original signed-ness.
5523   result = APInt(bitWidth, parts);
5524   return status;
5525 }
5526
5527 double APFloat::convertToDouble() const {
5528   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5529     return getIEEE().convertToDouble();
5530   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5531          "Float semantics is not representable by IEEEdouble");
5532   APFloat Temp = *this;
5533   bool LosesInfo;
5534   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5535   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5536   (void)St;
5537   return Temp.getIEEE().convertToDouble();
5538 }
5539
5540 #ifdef HAS_IEE754_FLOAT128
5541 float128 APFloat::convertToQuad() const {
5542   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEquad)
5543     return getIEEE().convertToQuad();
5544   assert(getSemantics().isRepresentableBy(semIEEEquad) &&
5545          "Float semantics is not representable by IEEEquad");
5546   APFloat Temp = *this;
5547   bool LosesInfo;
5548   opStatus St = Temp.convert(semIEEEquad, rmNearestTiesToEven, &LosesInfo);
5549   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5550   (void)St;
5551   return Temp.getIEEE().convertToQuad();
5552 }
5553 #endif
5554
5555 float APFloat::convertToFloat() const {
5556   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5557     return getIEEE().convertToFloat();
5558   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5559          "Float semantics is not representable by IEEEsingle");
5560   APFloat Temp = *this;
5561   bool LosesInfo;
5562   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5563   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5564   (void)St;
5565   return Temp.getIEEE().convertToFloat();
5566 }
5567
5568 } // namespace llvm
5569
5570 #undef APFLOAT_DISPATCH_ON_SEMANTICS