llvm/lib/Support/APFloat.cpp

   1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements a class to represent arbitrary precision floating
  10 // point values and provide a variety of arithmetic operations on them.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "llvm/ADT/APFloat.h"
  15 #include "llvm/ADT/APSInt.h"
  16 #include "llvm/ADT/ArrayRef.h"
  17 #include "llvm/ADT/FloatingPointMode.h"
  18 #include "llvm/ADT/FoldingSet.h"
  19 #include "llvm/ADT/Hashing.h"
  20 #include "llvm/ADT/STLExtras.h"
  21 #include "llvm/ADT/StringExtras.h"
  22 #include "llvm/ADT/StringRef.h"
  23 #include "llvm/Config/llvm-config.h"
  24 #include "llvm/Support/Debug.h"
  25 #include "llvm/Support/Error.h"
  26 #include "llvm/Support/MathExtras.h"
  27 #include "llvm/Support/raw_ostream.h"
  28 #include <cstring>
  29 #include <limits.h>
  30
  31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL)                             \
  32   do {                                                                         \
  33     if (usesLayout<IEEEFloat>(getSemantics()))                                 \
  34       return U.IEEE.METHOD_CALL;                                               \
  35     if (usesLayout<DoubleAPFloat>(getSemantics()))                             \
  36       return U.Double.METHOD_CALL;                                             \
  37     llvm_unreachable("Unexpected semantics");                                  \
  38   } while (false)
  39
  40 using namespace llvm;
  41
  42 /// A macro used to combine two fcCategory enums into one key which can be used
  43 /// in a switch statement to classify how the interaction of two APFloat's
  44 /// categories affects an operation.
  45 ///
  46 /// TODO: If clang source code is ever allowed to use constexpr in its own
  47 /// codebase, change this into a static inline function.
  48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
  49
  50 /* Assumed in hexadecimal significand parsing, and conversion to
  51    hexadecimal strings.  */
  52 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
  53
  54 namespace llvm {
  55
  56 // How the nonfinite values Inf and NaN are represented.
  57 enum class fltNonfiniteBehavior {
  58   // Represents standard IEEE 754 behavior. A value is nonfinite if the
  59   // exponent field is all 1s. In such cases, a value is Inf if the
  60   // significand bits are all zero, and NaN otherwise
  61   IEEE754,
  62
  63   // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
  64   // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
  65   // representation for Inf, and operations that would ordinarily produce Inf
  66   // produce NaN instead.
  67   // The details of the NaN representation(s) in this form are determined by the
  68   // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
  69   // encodings do not distinguish between signalling and quiet NaN.
  70   NanOnly,
  71 };
  72
  73 // How NaN values are represented. This is curently only used in combination
  74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
  75 // while having IEEE non-finite behavior is liable to lead to unexpected
  76 // results.
  77 enum class fltNanEncoding {
  78   // Represents the standard IEEE behavior where a value is NaN if its
  79   // exponent is all 1s and the significand is non-zero.
  80   IEEE,
  81
  82   // Represents the behavior in the Float8E4M3 floating point type where NaN is
  83   // represented by having the exponent and mantissa set to all 1s.
  84   // This behavior matches the FP8 E4M3 type described in
  85   // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
  86   // as non-signalling, although the paper does not state whether the NaN
  87   // values are signalling or not.
  88   AllOnes,
  89
  90   // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
  91   // where NaN is represented by a sign bit of 1 and all 0s in the exponent
  92   // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
  93   // there is only one NaN value, it is treated as quiet NaN. This matches the
  94   // behavior described in https://arxiv.org/abs/2206.02915 .
  95   NegativeZero,
  96 };
  97
  98 /* Represents floating point arithmetic semantics.  */
  99 struct fltSemantics {
 100   /* The largest E such that 2^E is representable; this matches the
 101      definition of IEEE 754.  */
 102   APFloatBase::ExponentType maxExponent;
 103
 104   /* The smallest E such that 2^E is a normalized number; this
 105      matches the definition of IEEE 754.  */
 106   APFloatBase::ExponentType minExponent;
 107
 108   /* Number of bits in the significand.  This includes the integer
 109      bit.  */
 110   unsigned int precision;
 111
 112   /* Number of bits actually used in the semantics. */
 113   unsigned int sizeInBits;
 114
 115   fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
 116
 117   fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
 118   // Returns true if any number described by this semantics can be precisely
 119   // represented by the specified semantics. Does not take into account
 120   // the value of fltNonfiniteBehavior.
 121   bool isRepresentableBy(const fltSemantics &S) const {
 122     return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
 123            precision <= S.precision;
 124   }
 125 };
 126
 127 static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
 128 static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
 129 static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
 130 static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
 131 static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
 132 static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
 133 static constexpr fltSemantics semFloat8E5M2FNUZ = {
 134     15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 135 static constexpr fltSemantics semFloat8E4M3FN = {
 136     8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
 137 static constexpr fltSemantics semFloat8E4M3FNUZ = {
 138     7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 139 static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
 140     4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
 141 static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
 142 static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
 143 static constexpr fltSemantics semBogus = {0, 0, 0, 0};
 144
 145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
 146    64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
 147    (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
 148    Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
 149    to each other, and two 11-bit exponents.
 150
 151    Note: we need to make the value different from semBogus as otherwise
 152    an unsafe optimization may collapse both values to a single address,
 153    and we heavily rely on them having distinct addresses.             */
 154 static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
 155
 156 /* These are legacy semantics for the fallback, inaccrurate implementation of
 157    IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
 158    operation. It's equivalent to having an IEEE number with consecutive 106
 159    bits of mantissa and 11 bits of exponent.
 160
 161    It's not equivalent to IBM double-double. For example, a legit IBM
 162    double-double, 1 + epsilon:
 163
 164      1 + epsilon = 1 + (1 >> 1076)
 165
 166    is not representable by a consecutive 106 bits of mantissa.
 167
 168    Currently, these semantics are used in the following way:
 169
 170      semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
 171      (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
 172      semPPCDoubleDoubleLegacy -> IEEE operations
 173
 174    We use bitcastToAPInt() to get the bit representation (in APInt) of the
 175    underlying IEEEdouble, then use the APInt constructor to construct the
 176    legacy IEEE float.
 177
 178    TODO: Implement all operations in semPPCDoubleDouble, and delete these
 179    semantics.  */
 180 static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
 181                                                           53 + 53, 128};
 182
 183 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
 184   switch (S) {
 185   case S_IEEEhalf:
 186     return IEEEhalf();
 187   case S_BFloat:
 188     return BFloat();
 189   case S_IEEEsingle:
 190     return IEEEsingle();
 191   case S_IEEEdouble:
 192     return IEEEdouble();
 193   case S_IEEEquad:
 194     return IEEEquad();
 195   case S_PPCDoubleDouble:
 196     return PPCDoubleDouble();
 197   case S_Float8E5M2:
 198     return Float8E5M2();
 199   case S_Float8E5M2FNUZ:
 200     return Float8E5M2FNUZ();
 201   case S_Float8E4M3FN:
 202     return Float8E4M3FN();
 203   case S_Float8E4M3FNUZ:
 204     return Float8E4M3FNUZ();
 205   case S_Float8E4M3B11FNUZ:
 206     return Float8E4M3B11FNUZ();
 207   case S_FloatTF32:
 208     return FloatTF32();
 209   case S_x87DoubleExtended:
 210     return x87DoubleExtended();
 211   }
 212   llvm_unreachable("Unrecognised floating semantics");
 213 }
 214
 215 APFloatBase::Semantics
 216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
 217   if (&Sem == &llvm::APFloat::IEEEhalf())
 218     return S_IEEEhalf;
 219   else if (&Sem == &llvm::APFloat::BFloat())
 220     return S_BFloat;
 221   else if (&Sem == &llvm::APFloat::IEEEsingle())
 222     return S_IEEEsingle;
 223   else if (&Sem == &llvm::APFloat::IEEEdouble())
 224     return S_IEEEdouble;
 225   else if (&Sem == &llvm::APFloat::IEEEquad())
 226     return S_IEEEquad;
 227   else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
 228     return S_PPCDoubleDouble;
 229   else if (&Sem == &llvm::APFloat::Float8E5M2())
 230     return S_Float8E5M2;
 231   else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
 232     return S_Float8E5M2FNUZ;
 233   else if (&Sem == &llvm::APFloat::Float8E4M3FN())
 234     return S_Float8E4M3FN;
 235   else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
 236     return S_Float8E4M3FNUZ;
 237   else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
 238     return S_Float8E4M3B11FNUZ;
 239   else if (&Sem == &llvm::APFloat::FloatTF32())
 240     return S_FloatTF32;
 241   else if (&Sem == &llvm::APFloat::x87DoubleExtended())
 242     return S_x87DoubleExtended;
 243   else
 244     llvm_unreachable("Unknown floating semantics");
 245 }
 246
 247 const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
 248 const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
 249 const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
 250 const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
 251 const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
 252 const fltSemantics &APFloatBase::PPCDoubleDouble() {
 253   return semPPCDoubleDouble;
 254 }
 255 const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
 256 const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
 257 const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
 258 const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
 259 const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
 260   return semFloat8E4M3B11FNUZ;
 261 }
 262 const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
 263 const fltSemantics &APFloatBase::x87DoubleExtended() {
 264   return semX87DoubleExtended;
 265 }
 266 const fltSemantics &APFloatBase::Bogus() { return semBogus; }
 267
 268 constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
 269 constexpr RoundingMode APFloatBase::rmTowardPositive;
 270 constexpr RoundingMode APFloatBase::rmTowardNegative;
 271 constexpr RoundingMode APFloatBase::rmTowardZero;
 272 constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
 273
 274 /* A tight upper bound on number of parts required to hold the value
 275    pow(5, power) is
 276
 277      power * 815 / (351 * integerPartWidth) + 1
 278
 279    However, whilst the result may require only this many parts,
 280    because we are multiplying two values to get it, the
 281    multiplication may require an extra part with the excess part
 282    being zero (consider the trivial case of 1 * 1, tcFullMultiply
 283    requires two parts to hold the single-part result).  So we add an
 284    extra one to guarantee enough space whilst multiplying.  */
 285 const unsigned int maxExponent = 16383;
 286 const unsigned int maxPrecision = 113;
 287 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
 288 const unsigned int maxPowerOfFiveParts =
 289     2 +
 290     ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
 291
 292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
 293   return semantics.precision;
 294 }
 295 APFloatBase::ExponentType
 296 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
 297   return semantics.maxExponent;
 298 }
 299 APFloatBase::ExponentType
 300 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
 301   return semantics.minExponent;
 302 }
 303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
 304   return semantics.sizeInBits;
 305 }
 306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
 307                                                  bool isSigned) {
 308   // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
 309   // at least one more bit than the MaxExponent to hold the max FP value.
 310   unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
 311   // Extra sign bit needed.
 312   if (isSigned)
 313     ++MinBitWidth;
 314   return MinBitWidth;
 315 }
 316
 317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
 318                                             const fltSemantics &Dst) {
 319   // Exponent range must be larger.
 320   if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
 321     return false;
 322
 323   // If the mantissa is long enough, the result value could still be denormal
 324   // with a larger exponent range.
 325   //
 326   // FIXME: This condition is probably not accurate but also shouldn't be a
 327   // practical concern with existing types.
 328   return Dst.precision >= Src.precision;
 329 }
 330
 331 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
 332   return Sem.sizeInBits;
 333 }
 334
 335 static constexpr APFloatBase::ExponentType
 336 exponentZero(const fltSemantics &semantics) {
 337   return semantics.minExponent - 1;
 338 }
 339
 340 static constexpr APFloatBase::ExponentType
 341 exponentInf(const fltSemantics &semantics) {
 342   return semantics.maxExponent + 1;
 343 }
 344
 345 static constexpr APFloatBase::ExponentType
 346 exponentNaN(const fltSemantics &semantics) {
 347   if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
 348     if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
 349       return exponentZero(semantics);
 350     return semantics.maxExponent;
 351   }
 352   return semantics.maxExponent + 1;
 353 }
 354
 355 /* A bunch of private, handy routines.  */
 356
 357 static inline Error createError(const Twine &Err) {
 358   return make_error<StringError>(Err, inconvertibleErrorCode());
 359 }
 360
 361 static constexpr inline unsigned int partCountForBits(unsigned int bits) {
 362   return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
 363 }
 364
 365 /* Returns 0U-9U.  Return values >= 10U are not digits.  */
 366 static inline unsigned int
 367 decDigitValue(unsigned int c)
 368 {
 369   return c - '0';
 370 }
 371
 372 /* Return the value of a decimal exponent of the form
 373    [+-]ddddddd.
 374
 375    If the exponent overflows, returns a large exponent with the
 376    appropriate sign.  */
 377 static Expected<int> readExponent(StringRef::iterator begin,
 378                                   StringRef::iterator end) {
 379   bool isNegative;
 380   unsigned int absExponent;
 381   const unsigned int overlargeExponent = 24000;  /* FIXME.  */
 382   StringRef::iterator p = begin;
 383
 384   // Treat no exponent as 0 to match binutils
 385   if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
 386     return 0;
 387   }
 388
 389   isNegative = (*p == '-');
 390   if (*p == '-' || *p == '+') {
 391     p++;
 392     if (p == end)
 393       return createError("Exponent has no digits");
 394   }
 395
 396   absExponent = decDigitValue(*p++);
 397   if (absExponent >= 10U)
 398     return createError("Invalid character in exponent");
 399
 400   for (; p != end; ++p) {
 401     unsigned int value;
 402
 403     value = decDigitValue(*p);
 404     if (value >= 10U)
 405       return createError("Invalid character in exponent");
 406
 407     absExponent = absExponent * 10U + value;
 408     if (absExponent >= overlargeExponent) {
 409       absExponent = overlargeExponent;
 410       break;
 411     }
 412   }
 413
 414   if (isNegative)
 415     return -(int) absExponent;
 416   else
 417     return (int) absExponent;
 418 }
 419
 420 /* This is ugly and needs cleaning up, but I don't immediately see
 421    how whilst remaining safe.  */
 422 static Expected<int> totalExponent(StringRef::iterator p,
 423                                    StringRef::iterator end,
 424                                    int exponentAdjustment) {
 425   int unsignedExponent;
 426   bool negative, overflow;
 427   int exponent = 0;
 428
 429   if (p == end)
 430     return createError("Exponent has no digits");
 431
 432   negative = *p == '-';
 433   if (*p == '-' || *p == '+') {
 434     p++;
 435     if (p == end)
 436       return createError("Exponent has no digits");
 437   }
 438
 439   unsignedExponent = 0;
 440   overflow = false;
 441   for (; p != end; ++p) {
 442     unsigned int value;
 443
 444     value = decDigitValue(*p);
 445     if (value >= 10U)
 446       return createError("Invalid character in exponent");
 447
 448     unsignedExponent = unsignedExponent * 10 + value;
 449     if (unsignedExponent > 32767) {
 450       overflow = true;
 451       break;
 452     }
 453   }
 454
 455   if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
 456     overflow = true;
 457
 458   if (!overflow) {
 459     exponent = unsignedExponent;
 460     if (negative)
 461       exponent = -exponent;
 462     exponent += exponentAdjustment;
 463     if (exponent > 32767 || exponent < -32768)
 464       overflow = true;
 465   }
 466
 467   if (overflow)
 468     exponent = negative ? -32768: 32767;
 469
 470   return exponent;
 471 }
 472
 473 static Expected<StringRef::iterator>
 474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
 475                            StringRef::iterator *dot) {
 476   StringRef::iterator p = begin;
 477   *dot = end;
 478   while (p != end && *p == '0')
 479     p++;
 480
 481   if (p != end && *p == '.') {
 482     *dot = p++;
 483
 484     if (end - begin == 1)
 485       return createError("Significand has no digits");
 486
 487     while (p != end && *p == '0')
 488       p++;
 489   }
 490
 491   return p;
 492 }
 493
 494 /* Given a normal decimal floating point number of the form
 495
 496      dddd.dddd[eE][+-]ddd
 497
 498    where the decimal point and exponent are optional, fill out the
 499    structure D.  Exponent is appropriate if the significand is
 500    treated as an integer, and normalizedExponent if the significand
 501    is taken to have the decimal point after a single leading
 502    non-zero digit.
 503
 504    If the value is zero, V->firstSigDigit points to a non-digit, and
 505    the return exponent is zero.
 506 */
 507 struct decimalInfo {
 508   const char *firstSigDigit;
 509   const char *lastSigDigit;
 510   int exponent;
 511   int normalizedExponent;
 512 };
 513
 514 static Error interpretDecimal(StringRef::iterator begin,
 515                               StringRef::iterator end, decimalInfo *D) {
 516   StringRef::iterator dot = end;
 517
 518   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
 519   if (!PtrOrErr)
 520     return PtrOrErr.takeError();
 521   StringRef::iterator p = *PtrOrErr;
 522
 523   D->firstSigDigit = p;
 524   D->exponent = 0;
 525   D->normalizedExponent = 0;
 526
 527   for (; p != end; ++p) {
 528     if (*p == '.') {
 529       if (dot != end)
 530         return createError("String contains multiple dots");
 531       dot = p++;
 532       if (p == end)
 533         break;
 534     }
 535     if (decDigitValue(*p) >= 10U)
 536       break;
 537   }
 538
 539   if (p != end) {
 540     if (*p != 'e' && *p != 'E')
 541       return createError("Invalid character in significand");
 542     if (p == begin)
 543       return createError("Significand has no digits");
 544     if (dot != end && p - begin == 1)
 545       return createError("Significand has no digits");
 546
 547     /* p points to the first non-digit in the string */
 548     auto ExpOrErr = readExponent(p + 1, end);
 549     if (!ExpOrErr)
 550       return ExpOrErr.takeError();
 551     D->exponent = *ExpOrErr;
 552
 553     /* Implied decimal point?  */
 554     if (dot == end)
 555       dot = p;
 556   }
 557
 558   /* If number is all zeroes accept any exponent.  */
 559   if (p != D->firstSigDigit) {
 560     /* Drop insignificant trailing zeroes.  */
 561     if (p != begin) {
 562       do
 563         do
 564           p--;
 565         while (p != begin && *p == '0');
 566       while (p != begin && *p == '.');
 567     }
 568
 569     /* Adjust the exponents for any decimal point.  */
 570     D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
 571     D->normalizedExponent = (D->exponent +
 572               static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
 573                                       - (dot > D->firstSigDigit && dot < p)));
 574   }
 575
 576   D->lastSigDigit = p;
 577   return Error::success();
 578 }
 579
 580 /* Return the trailing fraction of a hexadecimal number.
 581    DIGITVALUE is the first hex digit of the fraction, P points to
 582    the next digit.  */
 583 static Expected<lostFraction>
 584 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
 585                             unsigned int digitValue) {
 586   unsigned int hexDigit;
 587
 588   /* If the first trailing digit isn't 0 or 8 we can work out the
 589      fraction immediately.  */
 590   if (digitValue > 8)
 591     return lfMoreThanHalf;
 592   else if (digitValue < 8 && digitValue > 0)
 593     return lfLessThanHalf;
 594
 595   // Otherwise we need to find the first non-zero digit.
 596   while (p != end && (*p == '0' || *p == '.'))
 597     p++;
 598
 599   if (p == end)
 600     return createError("Invalid trailing hexadecimal fraction!");
 601
 602   hexDigit = hexDigitValue(*p);
 603
 604   /* If we ran off the end it is exactly zero or one-half, otherwise
 605      a little more.  */
 606   if (hexDigit == UINT_MAX)
 607     return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
 608   else
 609     return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
 610 }
 611
 612 /* Return the fraction lost were a bignum truncated losing the least
 613    significant BITS bits.  */
 614 static lostFraction
 615 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
 616                               unsigned int partCount,
 617                               unsigned int bits)
 618 {
 619   unsigned int lsb;
 620
 621   lsb = APInt::tcLSB(parts, partCount);
 622
 623   /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX.  */
 624   if (bits <= lsb)
 625     return lfExactlyZero;
 626   if (bits == lsb + 1)
 627     return lfExactlyHalf;
 628   if (bits <= partCount * APFloatBase::integerPartWidth &&
 629       APInt::tcExtractBit(parts, bits - 1))
 630     return lfMoreThanHalf;
 631
 632   return lfLessThanHalf;
 633 }
 634
 635 /* Shift DST right BITS bits noting lost fraction.  */
 636 static lostFraction
 637 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
 638 {
 639   lostFraction lost_fraction;
 640
 641   lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
 642
 643   APInt::tcShiftRight(dst, parts, bits);
 644
 645   return lost_fraction;
 646 }
 647
 648 /* Combine the effect of two lost fractions.  */
 649 static lostFraction
 650 combineLostFractions(lostFraction moreSignificant,
 651                      lostFraction lessSignificant)
 652 {
 653   if (lessSignificant != lfExactlyZero) {
 654     if (moreSignificant == lfExactlyZero)
 655       moreSignificant = lfLessThanHalf;
 656     else if (moreSignificant == lfExactlyHalf)
 657       moreSignificant = lfMoreThanHalf;
 658   }
 659
 660   return moreSignificant;
 661 }
 662
 663 /* The error from the true value, in half-ulps, on multiplying two
 664    floating point numbers, which differ from the value they
 665    approximate by at most HUE1 and HUE2 half-ulps, is strictly less
 666    than the returned value.
 667
 668    See "How to Read Floating Point Numbers Accurately" by William D
 669    Clinger.  */
 670 static unsigned int
 671 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
 672 {
 673   assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
 674
 675   if (HUerr1 + HUerr2 == 0)
 676     return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
 677   else
 678     return inexactMultiply + 2 * (HUerr1 + HUerr2);
 679 }
 680
 681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
 682    when the least significant BITS are truncated.  BITS cannot be
 683    zero.  */
 684 static APFloatBase::integerPart
 685 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
 686                  bool isNearest) {
 687   unsigned int count, partBits;
 688   APFloatBase::integerPart part, boundary;
 689
 690   assert(bits != 0);
 691
 692   bits--;
 693   count = bits / APFloatBase::integerPartWidth;
 694   partBits = bits % APFloatBase::integerPartWidth + 1;
 695
 696   part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
 697
 698   if (isNearest)
 699     boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
 700   else
 701     boundary = 0;
 702
 703   if (count == 0) {
 704     if (part - boundary <= boundary - part)
 705       return part - boundary;
 706     else
 707       return boundary - part;
 708   }
 709
 710   if (part == boundary) {
 711     while (--count)
 712       if (parts[count])
 713         return ~(APFloatBase::integerPart) 0; /* A lot.  */
 714
 715     return parts[0];
 716   } else if (part == boundary - 1) {
 717     while (--count)
 718       if (~parts[count])
 719         return ~(APFloatBase::integerPart) 0; /* A lot.  */
 720
 721     return -parts[0];
 722   }
 723
 724   return ~(APFloatBase::integerPart) 0; /* A lot.  */
 725 }
 726
 727 /* Place pow(5, power) in DST, and return the number of parts used.
 728    DST must be at least one part larger than size of the answer.  */
 729 static unsigned int
 730 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
 731   static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
 732   APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
 733   pow5s[0] = 78125 * 5;
 734
 735   unsigned int partsCount[16] = { 1 };
 736   APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
 737   unsigned int result;
 738   assert(power <= maxExponent);
 739
 740   p1 = dst;
 741   p2 = scratch;
 742
 743   *p1 = firstEightPowers[power & 7];
 744   power >>= 3;
 745
 746   result = 1;
 747   pow5 = pow5s;
 748
 749   for (unsigned int n = 0; power; power >>= 1, n++) {
 750     unsigned int pc;
 751
 752     pc = partsCount[n];
 753
 754     /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
 755     if (pc == 0) {
 756       pc = partsCount[n - 1];
 757       APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
 758       pc *= 2;
 759       if (pow5[pc - 1] == 0)
 760         pc--;
 761       partsCount[n] = pc;
 762     }
 763
 764     if (power & 1) {
 765       APFloatBase::integerPart *tmp;
 766
 767       APInt::tcFullMultiply(p2, p1, pow5, result, pc);
 768       result += pc;
 769       if (p2[result - 1] == 0)
 770         result--;
 771
 772       /* Now result is in p1 with partsCount parts and p2 is scratch
 773          space.  */
 774       tmp = p1;
 775       p1 = p2;
 776       p2 = tmp;
 777     }
 778
 779     pow5 += pc;
 780   }
 781
 782   if (p1 != dst)
 783     APInt::tcAssign(dst, p1, result);
 784
 785   return result;
 786 }
 787
 788 /* Zero at the end to avoid modular arithmetic when adding one; used
 789    when rounding up during hexadecimal output.  */
 790 static const char hexDigitsLower[] = "0123456789abcdef0";
 791 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
 792 static const char infinityL[] = "infinity";
 793 static const char infinityU[] = "INFINITY";
 794 static const char NaNL[] = "nan";
 795 static const char NaNU[] = "NAN";
 796
 797 /* Write out an integerPart in hexadecimal, starting with the most
 798    significant nibble.  Write out exactly COUNT hexdigits, return
 799    COUNT.  */
 800 static unsigned int
 801 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
 802            const char *hexDigitChars)
 803 {
 804   unsigned int result = count;
 805
 806   assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
 807
 808   part >>= (APFloatBase::integerPartWidth - 4 * count);
 809   while (count--) {
 810     dst[count] = hexDigitChars[part & 0xf];
 811     part >>= 4;
 812   }
 813
 814   return result;
 815 }
 816
 817 /* Write out an unsigned decimal integer.  */
 818 static char *
 819 writeUnsignedDecimal (char *dst, unsigned int n)
 820 {
 821   char buff[40], *p;
 822
 823   p = buff;
 824   do
 825     *p++ = '0' + n % 10;
 826   while (n /= 10);
 827
 828   do
 829     *dst++ = *--p;
 830   while (p != buff);
 831
 832   return dst;
 833 }
 834
 835 /* Write out a signed decimal integer.  */
 836 static char *
 837 writeSignedDecimal (char *dst, int value)
 838 {
 839   if (value < 0) {
 840     *dst++ = '-';
 841     dst = writeUnsignedDecimal(dst, -(unsigned) value);
 842   } else
 843     dst = writeUnsignedDecimal(dst, value);
 844
 845   return dst;
 846 }
 847
 848 namespace detail {
 849 /* Constructors.  */
 850 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
 851   unsigned int count;
 852
 853   semantics = ourSemantics;
 854   count = partCount();
 855   if (count > 1)
 856     significand.parts = new integerPart[count];
 857 }
 858
 859 void IEEEFloat::freeSignificand() {
 860   if (needsCleanup())
 861     delete [] significand.parts;
 862 }
 863
 864 void IEEEFloat::assign(const IEEEFloat &rhs) {
 865   assert(semantics == rhs.semantics);
 866
 867   sign = rhs.sign;
 868   category = rhs.category;
 869   exponent = rhs.exponent;
 870   if (isFiniteNonZero() || category == fcNaN)
 871     copySignificand(rhs);
 872 }
 873
 874 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
 875   assert(isFiniteNonZero() || category == fcNaN);
 876   assert(rhs.partCount() >= partCount());
 877
 878   APInt::tcAssign(significandParts(), rhs.significandParts(),
 879                   partCount());
 880 }
 881
 882 /* Make this number a NaN, with an arbitrary but deterministic value
 883    for the significand.  If double or longer, this is a signalling NaN,
 884    which may not be ideal.  If float, this is QNaN(0).  */
 885 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
 886   category = fcNaN;
 887   sign = Negative;
 888   exponent = exponentNaN();
 889
 890   integerPart *significand = significandParts();
 891   unsigned numParts = partCount();
 892
 893   APInt fill_storage;
 894   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
 895     // Finite-only types do not distinguish signalling and quiet NaN, so
 896     // make them all signalling.
 897     SNaN = false;
 898     if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
 899       sign = true;
 900       fill_storage = APInt::getZero(semantics->precision - 1);
 901     } else {
 902       fill_storage = APInt::getAllOnes(semantics->precision - 1);
 903     }
 904     fill = &fill_storage;
 905   }
 906
 907   // Set the significand bits to the fill.
 908   if (!fill || fill->getNumWords() < numParts)
 909     APInt::tcSet(significand, 0, numParts);
 910   if (fill) {
 911     APInt::tcAssign(significand, fill->getRawData(),
 912                     std::min(fill->getNumWords(), numParts));
 913
 914     // Zero out the excess bits of the significand.
 915     unsigned bitsToPreserve = semantics->precision - 1;
 916     unsigned part = bitsToPreserve / 64;
 917     bitsToPreserve %= 64;
 918     significand[part] &= ((1ULL << bitsToPreserve) - 1);
 919     for (part++; part != numParts; ++part)
 920       significand[part] = 0;
 921   }
 922
 923   unsigned QNaNBit = semantics->precision - 2;
 924
 925   if (SNaN) {
 926     // We always have to clear the QNaN bit to make it an SNaN.
 927     APInt::tcClearBit(significand, QNaNBit);
 928
 929     // If there are no bits set in the payload, we have to set
 930     // *something* to make it a NaN instead of an infinity;
 931     // conventionally, this is the next bit down from the QNaN bit.
 932     if (APInt::tcIsZero(significand, numParts))
 933       APInt::tcSetBit(significand, QNaNBit - 1);
 934   } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
 935     // The only NaN is a quiet NaN, and it has no bits sets in the significand.
 936     // Do nothing.
 937   } else {
 938     // We always have to set the QNaN bit to make it a QNaN.
 939     APInt::tcSetBit(significand, QNaNBit);
 940   }
 941
 942   // For x87 extended precision, we want to make a NaN, not a
 943   // pseudo-NaN.  Maybe we should expose the ability to make
 944   // pseudo-NaNs?
 945   if (semantics == &semX87DoubleExtended)
 946     APInt::tcSetBit(significand, QNaNBit + 1);
 947 }
 948
 949 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
 950   if (this != &rhs) {
 951     if (semantics != rhs.semantics) {
 952       freeSignificand();
 953       initialize(rhs.semantics);
 954     }
 955     assign(rhs);
 956   }
 957
 958   return *this;
 959 }
 960
 961 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
 962   freeSignificand();
 963
 964   semantics = rhs.semantics;
 965   significand = rhs.significand;
 966   exponent = rhs.exponent;
 967   category = rhs.category;
 968   sign = rhs.sign;
 969
 970   rhs.semantics = &semBogus;
 971   return *this;
 972 }
 973
 974 bool IEEEFloat::isDenormal() const {
 975   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
 976          (APInt::tcExtractBit(significandParts(),
 977                               semantics->precision - 1) == 0);
 978 }
 979
 980 bool IEEEFloat::isSmallest() const {
 981   // The smallest number by magnitude in our format will be the smallest
 982   // denormal, i.e. the floating point number with exponent being minimum
 983   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
 984   return isFiniteNonZero() && exponent == semantics->minExponent &&
 985     significandMSB() == 0;
 986 }
 987
 988 bool IEEEFloat::isSmallestNormalized() const {
 989   return getCategory() == fcNormal && exponent == semantics->minExponent &&
 990          isSignificandAllZerosExceptMSB();
 991 }
 992
 993 bool IEEEFloat::isSignificandAllOnes() const {
 994   // Test if the significand excluding the integral bit is all ones. This allows
 995   // us to test for binade boundaries.
 996   const integerPart *Parts = significandParts();
 997   const unsigned PartCount = partCountForBits(semantics->precision);
 998   for (unsigned i = 0; i < PartCount - 1; i++)
 999     if (~Parts[i])
1000       return false;
1001
1002   // Set the unused high bits to all ones when we compare.
1003   const unsigned NumHighBits =
1004     PartCount*integerPartWidth - semantics->precision + 1;
1005   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1006          "Can not have more high bits to fill than integerPartWidth");
1007   const integerPart HighBitFill =
1008     ~integerPart(0) << (integerPartWidth - NumHighBits);
1009   if (~(Parts[PartCount - 1] | HighBitFill))
1010     return false;
1011
1012   return true;
1013 }
1014
1015 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1016   // Test if the significand excluding the integral bit is all ones except for
1017   // the least significant bit.
1018   const integerPart *Parts = significandParts();
1019
1020   if (Parts[0] & 1)
1021     return false;
1022
1023   const unsigned PartCount = partCountForBits(semantics->precision);
1024   for (unsigned i = 0; i < PartCount - 1; i++) {
1025     if (~Parts[i] & ~unsigned{!i})
1026       return false;
1027   }
1028
1029   // Set the unused high bits to all ones when we compare.
1030   const unsigned NumHighBits =
1031       PartCount * integerPartWidth - semantics->precision + 1;
1032   assert(NumHighBits <= integerPartWidth && NumHighBits > 0 &&
1033          "Can not have more high bits to fill than integerPartWidth");
1034   const integerPart HighBitFill = ~integerPart(0)
1035                                   << (integerPartWidth - NumHighBits);
1036   if (~(Parts[PartCount - 1] | HighBitFill | 0x1))
1037     return false;
1038
1039   return true;
1040 }
1041
1042 bool IEEEFloat::isSignificandAllZeros() const {
1043   // Test if the significand excluding the integral bit is all zeros. This
1044   // allows us to test for binade boundaries.
1045   const integerPart *Parts = significandParts();
1046   const unsigned PartCount = partCountForBits(semantics->precision);
1047
1048   for (unsigned i = 0; i < PartCount - 1; i++)
1049     if (Parts[i])
1050       return false;
1051
1052   // Compute how many bits are used in the final word.
1053   const unsigned NumHighBits =
1054     PartCount*integerPartWidth - semantics->precision + 1;
1055   assert(NumHighBits < integerPartWidth && "Can not have more high bits to "
1056          "clear than integerPartWidth");
1057   const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
1058
1059   if (Parts[PartCount - 1] & HighBitMask)
1060     return false;
1061
1062   return true;
1063 }
1064
1065 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1066   const integerPart *Parts = significandParts();
1067   const unsigned PartCount = partCountForBits(semantics->precision);
1068
1069   for (unsigned i = 0; i < PartCount - 1; i++) {
1070     if (Parts[i])
1071       return false;
1072   }
1073
1074   const unsigned NumHighBits =
1075       PartCount * integerPartWidth - semantics->precision + 1;
1076   return Parts[PartCount - 1] == integerPart(1)
1077                                      << (integerPartWidth - NumHighBits);
1078 }
1079
1080 bool IEEEFloat::isLargest() const {
1081   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1082       semantics->nanEncoding == fltNanEncoding::AllOnes) {
1083     // The largest number by magnitude in our format will be the floating point
1084     // number with maximum exponent and with significand that is all ones except
1085     // the LSB.
1086     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1087            isSignificandAllOnesExceptLSB();
1088   } else {
1089     // The largest number by magnitude in our format will be the floating point
1090     // number with maximum exponent and with significand that is all ones.
1091     return isFiniteNonZero() && exponent == semantics->maxExponent &&
1092            isSignificandAllOnes();
1093   }
1094 }
1095
1096 bool IEEEFloat::isInteger() const {
1097   // This could be made more efficient; I'm going for obviously correct.
1098   if (!isFinite()) return false;
1099   IEEEFloat truncated = *this;
1100   truncated.roundToIntegral(rmTowardZero);
1101   return compare(truncated) == cmpEqual;
1102 }
1103
1104 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
1105   if (this == &rhs)
1106     return true;
1107   if (semantics != rhs.semantics ||
1108       category != rhs.category ||
1109       sign != rhs.sign)
1110     return false;
1111   if (category==fcZero || category==fcInfinity)
1112     return true;
1113
1114   if (isFiniteNonZero() && exponent != rhs.exponent)
1115     return false;
1116
1117   return std::equal(significandParts(), significandParts() + partCount(),
1118                     rhs.significandParts());
1119 }
1120
1121 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
1122   initialize(&ourSemantics);
1123   sign = 0;
1124   category = fcNormal;
1125   zeroSignificand();
1126   exponent = ourSemantics.precision - 1;
1127   significandParts()[0] = value;
1128   normalize(rmNearestTiesToEven, lfExactlyZero);
1129 }
1130
1131 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
1132   initialize(&ourSemantics);
1133   makeZero(false);
1134 }
1135
1136 // Delegate to the previous constructor, because later copy constructor may
1137 // actually inspects category, which can't be garbage.
1138 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
1139     : IEEEFloat(ourSemantics) {}
1140
1141 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
1142   initialize(rhs.semantics);
1143   assign(rhs);
1144 }
1145
1146 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
1147   *this = std::move(rhs);
1148 }
1149
1150 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1151
1152 unsigned int IEEEFloat::partCount() const {
1153   return partCountForBits(semantics->precision + 1);
1154 }
1155
1156 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
1157   return const_cast<IEEEFloat *>(this)->significandParts();
1158 }
1159
1160 IEEEFloat::integerPart *IEEEFloat::significandParts() {
1161   if (partCount() > 1)
1162     return significand.parts;
1163   else
1164     return &significand.part;
1165 }
1166
1167 void IEEEFloat::zeroSignificand() {
1168   APInt::tcSet(significandParts(), 0, partCount());
1169 }
1170
1171 /* Increment an fcNormal floating point number's significand.  */
1172 void IEEEFloat::incrementSignificand() {
1173   integerPart carry;
1174
1175   carry = APInt::tcIncrement(significandParts(), partCount());
1176
1177   /* Our callers should never cause us to overflow.  */
1178   assert(carry == 0);
1179   (void)carry;
1180 }
1181
1182 /* Add the significand of the RHS.  Returns the carry flag.  */
1183 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
1184   integerPart *parts;
1185
1186   parts = significandParts();
1187
1188   assert(semantics == rhs.semantics);
1189   assert(exponent == rhs.exponent);
1190
1191   return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
1192 }
1193
1194 /* Subtract the significand of the RHS with a borrow flag.  Returns
1195    the borrow flag.  */
1196 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
1197                                                       integerPart borrow) {
1198   integerPart *parts;
1199
1200   parts = significandParts();
1201
1202   assert(semantics == rhs.semantics);
1203   assert(exponent == rhs.exponent);
1204
1205   return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
1206                            partCount());
1207 }
1208
1209 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
1210    on to the full-precision result of the multiplication.  Returns the
1211    lost fraction.  */
1212 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
1213                                             IEEEFloat addend) {
1214   unsigned int omsb;        // One, not zero, based MSB.
1215   unsigned int partsCount, newPartsCount, precision;
1216   integerPart *lhsSignificand;
1217   integerPart scratch[4];
1218   integerPart *fullSignificand;
1219   lostFraction lost_fraction;
1220   bool ignored;
1221
1222   assert(semantics == rhs.semantics);
1223
1224   precision = semantics->precision;
1225
1226   // Allocate space for twice as many bits as the original significand, plus one
1227   // extra bit for the addition to overflow into.
1228   newPartsCount = partCountForBits(precision * 2 + 1);
1229
1230   if (newPartsCount > 4)
1231     fullSignificand = new integerPart[newPartsCount];
1232   else
1233     fullSignificand = scratch;
1234
1235   lhsSignificand = significandParts();
1236   partsCount = partCount();
1237
1238   APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1239                         rhs.significandParts(), partsCount, partsCount);
1240
1241   lost_fraction = lfExactlyZero;
1242   omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1243   exponent += rhs.exponent;
1244
1245   // Assume the operands involved in the multiplication are single-precision
1246   // FP, and the two multiplicants are:
1247   //   *this = a23 . a22 ... a0 * 2^e1
1248   //     rhs = b23 . b22 ... b0 * 2^e2
1249   // the result of multiplication is:
1250   //   *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1251   // Note that there are three significant bits at the left-hand side of the
1252   // radix point: two for the multiplication, and an overflow bit for the
1253   // addition (that will always be zero at this point). Move the radix point
1254   // toward left by two bits, and adjust exponent accordingly.
1255   exponent += 2;
1256
1257   if (addend.isNonZero()) {
1258     // The intermediate result of the multiplication has "2 * precision"
1259     // signicant bit; adjust the addend to be consistent with mul result.
1260     //
1261     Significand savedSignificand = significand;
1262     const fltSemantics *savedSemantics = semantics;
1263     fltSemantics extendedSemantics;
1264     opStatus status;
1265     unsigned int extendedPrecision;
1266
1267     // Normalize our MSB to one below the top bit to allow for overflow.
1268     extendedPrecision = 2 * precision + 1;
1269     if (omsb != extendedPrecision - 1) {
1270       assert(extendedPrecision > omsb);
1271       APInt::tcShiftLeft(fullSignificand, newPartsCount,
1272                          (extendedPrecision - 1) - omsb);
1273       exponent -= (extendedPrecision - 1) - omsb;
1274     }
1275
1276     /* Create new semantics.  */
1277     extendedSemantics = *semantics;
1278     extendedSemantics.precision = extendedPrecision;
1279
1280     if (newPartsCount == 1)
1281       significand.part = fullSignificand[0];
1282     else
1283       significand.parts = fullSignificand;
1284     semantics = &extendedSemantics;
1285
1286     // Make a copy so we can convert it to the extended semantics.
1287     // Note that we cannot convert the addend directly, as the extendedSemantics
1288     // is a local variable (which we take a reference to).
1289     IEEEFloat extendedAddend(addend);
1290     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1291     assert(status == opOK);
1292     (void)status;
1293
1294     // Shift the significand of the addend right by one bit. This guarantees
1295     // that the high bit of the significand is zero (same as fullSignificand),
1296     // so the addition will overflow (if it does overflow at all) into the top bit.
1297     lost_fraction = extendedAddend.shiftSignificandRight(1);
1298     assert(lost_fraction == lfExactlyZero &&
1299            "Lost precision while shifting addend for fused-multiply-add.");
1300
1301     lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1302
1303     /* Restore our state.  */
1304     if (newPartsCount == 1)
1305       fullSignificand[0] = significand.part;
1306     significand = savedSignificand;
1307     semantics = savedSemantics;
1308
1309     omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1310   }
1311
1312   // Convert the result having "2 * precision" significant-bits back to the one
1313   // having "precision" significant-bits. First, move the radix point from
1314   // poision "2*precision - 1" to "precision - 1". The exponent need to be
1315   // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1316   exponent -= precision + 1;
1317
1318   // In case MSB resides at the left-hand side of radix point, shift the
1319   // mantissa right by some amount to make sure the MSB reside right before
1320   // the radix point (i.e. "MSB . rest-significant-bits").
1321   //
1322   // Note that the result is not normalized when "omsb < precision". So, the
1323   // caller needs to call IEEEFloat::normalize() if normalized value is
1324   // expected.
1325   if (omsb > precision) {
1326     unsigned int bits, significantParts;
1327     lostFraction lf;
1328
1329     bits = omsb - precision;
1330     significantParts = partCountForBits(omsb);
1331     lf = shiftRight(fullSignificand, significantParts, bits);
1332     lost_fraction = combineLostFractions(lf, lost_fraction);
1333     exponent += bits;
1334   }
1335
1336   APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1337
1338   if (newPartsCount > 4)
1339     delete [] fullSignificand;
1340
1341   return lost_fraction;
1342 }
1343
1344 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs) {
1345   return multiplySignificand(rhs, IEEEFloat(*semantics));
1346 }
1347
1348 /* Multiply the significands of LHS and RHS to DST.  */
1349 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1350   unsigned int bit, i, partsCount;
1351   const integerPart *rhsSignificand;
1352   integerPart *lhsSignificand, *dividend, *divisor;
1353   integerPart scratch[4];
1354   lostFraction lost_fraction;
1355
1356   assert(semantics == rhs.semantics);
1357
1358   lhsSignificand = significandParts();
1359   rhsSignificand = rhs.significandParts();
1360   partsCount = partCount();
1361
1362   if (partsCount > 2)
1363     dividend = new integerPart[partsCount * 2];
1364   else
1365     dividend = scratch;
1366
1367   divisor = dividend + partsCount;
1368
1369   /* Copy the dividend and divisor as they will be modified in-place.  */
1370   for (i = 0; i < partsCount; i++) {
1371     dividend[i] = lhsSignificand[i];
1372     divisor[i] = rhsSignificand[i];
1373     lhsSignificand[i] = 0;
1374   }
1375
1376   exponent -= rhs.exponent;
1377
1378   unsigned int precision = semantics->precision;
1379
1380   /* Normalize the divisor.  */
1381   bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1382   if (bit) {
1383     exponent += bit;
1384     APInt::tcShiftLeft(divisor, partsCount, bit);
1385   }
1386
1387   /* Normalize the dividend.  */
1388   bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1389   if (bit) {
1390     exponent -= bit;
1391     APInt::tcShiftLeft(dividend, partsCount, bit);
1392   }
1393
1394   /* Ensure the dividend >= divisor initially for the loop below.
1395      Incidentally, this means that the division loop below is
1396      guaranteed to set the integer bit to one.  */
1397   if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1398     exponent--;
1399     APInt::tcShiftLeft(dividend, partsCount, 1);
1400     assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1401   }
1402
1403   /* Long division.  */
1404   for (bit = precision; bit; bit -= 1) {
1405     if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1406       APInt::tcSubtract(dividend, divisor, 0, partsCount);
1407       APInt::tcSetBit(lhsSignificand, bit - 1);
1408     }
1409
1410     APInt::tcShiftLeft(dividend, partsCount, 1);
1411   }
1412
1413   /* Figure out the lost fraction.  */
1414   int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1415
1416   if (cmp > 0)
1417     lost_fraction = lfMoreThanHalf;
1418   else if (cmp == 0)
1419     lost_fraction = lfExactlyHalf;
1420   else if (APInt::tcIsZero(dividend, partsCount))
1421     lost_fraction = lfExactlyZero;
1422   else
1423     lost_fraction = lfLessThanHalf;
1424
1425   if (partsCount > 2)
1426     delete [] dividend;
1427
1428   return lost_fraction;
1429 }
1430
1431 unsigned int IEEEFloat::significandMSB() const {
1432   return APInt::tcMSB(significandParts(), partCount());
1433 }
1434
1435 unsigned int IEEEFloat::significandLSB() const {
1436   return APInt::tcLSB(significandParts(), partCount());
1437 }
1438
1439 /* Note that a zero result is NOT normalized to fcZero.  */
1440 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1441   /* Our exponent should not overflow.  */
1442   assert((ExponentType) (exponent + bits) >= exponent);
1443
1444   exponent += bits;
1445
1446   return shiftRight(significandParts(), partCount(), bits);
1447 }
1448
1449 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
1450 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1451   assert(bits < semantics->precision);
1452
1453   if (bits) {
1454     unsigned int partsCount = partCount();
1455
1456     APInt::tcShiftLeft(significandParts(), partsCount, bits);
1457     exponent -= bits;
1458
1459     assert(!APInt::tcIsZero(significandParts(), partsCount));
1460   }
1461 }
1462
1463 IEEEFloat::cmpResult
1464 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1465   int compare;
1466
1467   assert(semantics == rhs.semantics);
1468   assert(isFiniteNonZero());
1469   assert(rhs.isFiniteNonZero());
1470
1471   compare = exponent - rhs.exponent;
1472
1473   /* If exponents are equal, do an unsigned bignum comparison of the
1474      significands.  */
1475   if (compare == 0)
1476     compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1477                                partCount());
1478
1479   if (compare > 0)
1480     return cmpGreaterThan;
1481   else if (compare < 0)
1482     return cmpLessThan;
1483   else
1484     return cmpEqual;
1485 }
1486
1487 /* Set the least significant BITS bits of a bignum, clear the
1488    rest.  */
1489 static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
1490                                       unsigned bits) {
1491   unsigned i = 0;
1492   while (bits > APInt::APINT_BITS_PER_WORD) {
1493     dst[i++] = ~(APInt::WordType)0;
1494     bits -= APInt::APINT_BITS_PER_WORD;
1495   }
1496
1497   if (bits)
1498     dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
1499
1500   while (i < parts)
1501     dst[i++] = 0;
1502 }
1503
1504 /* Handle overflow.  Sign is preserved.  We either become infinity or
1505    the largest finite number.  */
1506 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1507   /* Infinity?  */
1508   if (rounding_mode == rmNearestTiesToEven ||
1509       rounding_mode == rmNearestTiesToAway ||
1510       (rounding_mode == rmTowardPositive && !sign) ||
1511       (rounding_mode == rmTowardNegative && sign)) {
1512     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1513       makeNaN(false, sign);
1514     else
1515       category = fcInfinity;
1516     return (opStatus) (opOverflow | opInexact);
1517   }
1518
1519   /* Otherwise we become the largest finite number.  */
1520   category = fcNormal;
1521   exponent = semantics->maxExponent;
1522   tcSetLeastSignificantBits(significandParts(), partCount(),
1523                             semantics->precision);
1524   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1525       semantics->nanEncoding == fltNanEncoding::AllOnes)
1526     APInt::tcClearBit(significandParts(), 0);
1527
1528   return opInexact;
1529 }
1530
1531 /* Returns TRUE if, when truncating the current number, with BIT the
1532    new LSB, with the given lost fraction and rounding mode, the result
1533    would need to be rounded away from zero (i.e., by increasing the
1534    signficand).  This routine must work for fcZero of both signs, and
1535    fcNormal numbers.  */
1536 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1537                                   lostFraction lost_fraction,
1538                                   unsigned int bit) const {
1539   /* NaNs and infinities should not have lost fractions.  */
1540   assert(isFiniteNonZero() || category == fcZero);
1541
1542   /* Current callers never pass this so we don't handle it.  */
1543   assert(lost_fraction != lfExactlyZero);
1544
1545   switch (rounding_mode) {
1546   case rmNearestTiesToAway:
1547     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1548
1549   case rmNearestTiesToEven:
1550     if (lost_fraction == lfMoreThanHalf)
1551       return true;
1552
1553     /* Our zeroes don't have a significand to test.  */
1554     if (lost_fraction == lfExactlyHalf && category != fcZero)
1555       return APInt::tcExtractBit(significandParts(), bit);
1556
1557     return false;
1558
1559   case rmTowardZero:
1560     return false;
1561
1562   case rmTowardPositive:
1563     return !sign;
1564
1565   case rmTowardNegative:
1566     return sign;
1567
1568   default:
1569     break;
1570   }
1571   llvm_unreachable("Invalid rounding mode found");
1572 }
1573
1574 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1575                                          lostFraction lost_fraction) {
1576   unsigned int omsb;                /* One, not zero, based MSB.  */
1577   int exponentChange;
1578
1579   if (!isFiniteNonZero())
1580     return opOK;
1581
1582   /* Before rounding normalize the exponent of fcNormal numbers.  */
1583   omsb = significandMSB() + 1;
1584
1585   if (omsb) {
1586     /* OMSB is numbered from 1.  We want to place it in the integer
1587        bit numbered PRECISION if possible, with a compensating change in
1588        the exponent.  */
1589     exponentChange = omsb - semantics->precision;
1590
1591     /* If the resulting exponent is too high, overflow according to
1592        the rounding mode.  */
1593     if (exponent + exponentChange > semantics->maxExponent)
1594       return handleOverflow(rounding_mode);
1595
1596     /* Subnormal numbers have exponent minExponent, and their MSB
1597        is forced based on that.  */
1598     if (exponent + exponentChange < semantics->minExponent)
1599       exponentChange = semantics->minExponent - exponent;
1600
1601     /* Shifting left is easy as we don't lose precision.  */
1602     if (exponentChange < 0) {
1603       assert(lost_fraction == lfExactlyZero);
1604
1605       shiftSignificandLeft(-exponentChange);
1606
1607       return opOK;
1608     }
1609
1610     if (exponentChange > 0) {
1611       lostFraction lf;
1612
1613       /* Shift right and capture any new lost fraction.  */
1614       lf = shiftSignificandRight(exponentChange);
1615
1616       lost_fraction = combineLostFractions(lf, lost_fraction);
1617
1618       /* Keep OMSB up-to-date.  */
1619       if (omsb > (unsigned) exponentChange)
1620         omsb -= exponentChange;
1621       else
1622         omsb = 0;
1623     }
1624   }
1625
1626   // The all-ones values is an overflow if NaN is all ones. If NaN is
1627   // represented by negative zero, then it is a valid finite value.
1628   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1629       semantics->nanEncoding == fltNanEncoding::AllOnes &&
1630       exponent == semantics->maxExponent && isSignificandAllOnes())
1631     return handleOverflow(rounding_mode);
1632
1633   /* Now round the number according to rounding_mode given the lost
1634      fraction.  */
1635
1636   /* As specified in IEEE 754, since we do not trap we do not report
1637      underflow for exact results.  */
1638   if (lost_fraction == lfExactlyZero) {
1639     /* Canonicalize zeroes.  */
1640     if (omsb == 0) {
1641       category = fcZero;
1642       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1643         sign = false;
1644     }
1645
1646     return opOK;
1647   }
1648
1649   /* Increment the significand if we're rounding away from zero.  */
1650   if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1651     if (omsb == 0)
1652       exponent = semantics->minExponent;
1653
1654     incrementSignificand();
1655     omsb = significandMSB() + 1;
1656
1657     /* Did the significand increment overflow?  */
1658     if (omsb == (unsigned) semantics->precision + 1) {
1659       /* Renormalize by incrementing the exponent and shifting our
1660          significand right one.  However if we already have the
1661          maximum exponent we overflow to infinity.  */
1662       if (exponent == semantics->maxExponent)
1663         // Invoke overflow handling with a rounding mode that will guarantee
1664         // that the result gets turned into the correct infinity representation.
1665         // This is needed instead of just setting the category to infinity to
1666         // account for 8-bit floating point types that have no inf, only NaN.
1667         return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
1668
1669       shiftSignificandRight(1);
1670
1671       return opInexact;
1672     }
1673
1674     // The all-ones values is an overflow if NaN is all ones. If NaN is
1675     // represented by negative zero, then it is a valid finite value.
1676     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
1677         semantics->nanEncoding == fltNanEncoding::AllOnes &&
1678         exponent == semantics->maxExponent && isSignificandAllOnes())
1679       return handleOverflow(rounding_mode);
1680   }
1681
1682   /* The normal case - we were and are not denormal, and any
1683      significand increment above didn't overflow.  */
1684   if (omsb == semantics->precision)
1685     return opInexact;
1686
1687   /* We have a non-zero denormal.  */
1688   assert(omsb < semantics->precision);
1689
1690   /* Canonicalize zeroes.  */
1691   if (omsb == 0) {
1692     category = fcZero;
1693     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
1694       sign = false;
1695   }
1696
1697   /* The fcZero case is a denormal that underflowed to zero.  */
1698   return (opStatus) (opUnderflow | opInexact);
1699 }
1700
1701 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1702                                                      bool subtract) {
1703   switch (PackCategoriesIntoKey(category, rhs.category)) {
1704   default:
1705     llvm_unreachable(nullptr);
1706
1707   case PackCategoriesIntoKey(fcZero, fcNaN):
1708   case PackCategoriesIntoKey(fcNormal, fcNaN):
1709   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1710     assign(rhs);
1711     [[fallthrough]];
1712   case PackCategoriesIntoKey(fcNaN, fcZero):
1713   case PackCategoriesIntoKey(fcNaN, fcNormal):
1714   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1715   case PackCategoriesIntoKey(fcNaN, fcNaN):
1716     if (isSignaling()) {
1717       makeQuiet();
1718       return opInvalidOp;
1719     }
1720     return rhs.isSignaling() ? opInvalidOp : opOK;
1721
1722   case PackCategoriesIntoKey(fcNormal, fcZero):
1723   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1724   case PackCategoriesIntoKey(fcInfinity, fcZero):
1725     return opOK;
1726
1727   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1728   case PackCategoriesIntoKey(fcZero, fcInfinity):
1729     category = fcInfinity;
1730     sign = rhs.sign ^ subtract;
1731     return opOK;
1732
1733   case PackCategoriesIntoKey(fcZero, fcNormal):
1734     assign(rhs);
1735     sign = rhs.sign ^ subtract;
1736     return opOK;
1737
1738   case PackCategoriesIntoKey(fcZero, fcZero):
1739     /* Sign depends on rounding mode; handled by caller.  */
1740     return opOK;
1741
1742   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1743     /* Differently signed infinities can only be validly
1744        subtracted.  */
1745     if (((sign ^ rhs.sign)!=0) != subtract) {
1746       makeNaN();
1747       return opInvalidOp;
1748     }
1749
1750     return opOK;
1751
1752   case PackCategoriesIntoKey(fcNormal, fcNormal):
1753     return opDivByZero;
1754   }
1755 }
1756
1757 /* Add or subtract two normal numbers.  */
1758 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1759                                                  bool subtract) {
1760   integerPart carry;
1761   lostFraction lost_fraction;
1762   int bits;
1763
1764   /* Determine if the operation on the absolute values is effectively
1765      an addition or subtraction.  */
1766   subtract ^= static_cast<bool>(sign ^ rhs.sign);
1767
1768   /* Are we bigger exponent-wise than the RHS?  */
1769   bits = exponent - rhs.exponent;
1770
1771   /* Subtraction is more subtle than one might naively expect.  */
1772   if (subtract) {
1773     IEEEFloat temp_rhs(rhs);
1774
1775     if (bits == 0)
1776       lost_fraction = lfExactlyZero;
1777     else if (bits > 0) {
1778       lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1779       shiftSignificandLeft(1);
1780     } else {
1781       lost_fraction = shiftSignificandRight(-bits - 1);
1782       temp_rhs.shiftSignificandLeft(1);
1783     }
1784
1785     // Should we reverse the subtraction.
1786     if (compareAbsoluteValue(temp_rhs) == cmpLessThan) {
1787       carry = temp_rhs.subtractSignificand
1788         (*this, lost_fraction != lfExactlyZero);
1789       copySignificand(temp_rhs);
1790       sign = !sign;
1791     } else {
1792       carry = subtractSignificand
1793         (temp_rhs, lost_fraction != lfExactlyZero);
1794     }
1795
1796     /* Invert the lost fraction - it was on the RHS and
1797        subtracted.  */
1798     if (lost_fraction == lfLessThanHalf)
1799       lost_fraction = lfMoreThanHalf;
1800     else if (lost_fraction == lfMoreThanHalf)
1801       lost_fraction = lfLessThanHalf;
1802
1803     /* The code above is intended to ensure that no borrow is
1804        necessary.  */
1805     assert(!carry);
1806     (void)carry;
1807   } else {
1808     if (bits > 0) {
1809       IEEEFloat temp_rhs(rhs);
1810
1811       lost_fraction = temp_rhs.shiftSignificandRight(bits);
1812       carry = addSignificand(temp_rhs);
1813     } else {
1814       lost_fraction = shiftSignificandRight(-bits);
1815       carry = addSignificand(rhs);
1816     }
1817
1818     /* We have a guard bit; generating a carry cannot happen.  */
1819     assert(!carry);
1820     (void)carry;
1821   }
1822
1823   return lost_fraction;
1824 }
1825
1826 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1827   switch (PackCategoriesIntoKey(category, rhs.category)) {
1828   default:
1829     llvm_unreachable(nullptr);
1830
1831   case PackCategoriesIntoKey(fcZero, fcNaN):
1832   case PackCategoriesIntoKey(fcNormal, fcNaN):
1833   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1834     assign(rhs);
1835     sign = false;
1836     [[fallthrough]];
1837   case PackCategoriesIntoKey(fcNaN, fcZero):
1838   case PackCategoriesIntoKey(fcNaN, fcNormal):
1839   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1840   case PackCategoriesIntoKey(fcNaN, fcNaN):
1841     sign ^= rhs.sign; // restore the original sign
1842     if (isSignaling()) {
1843       makeQuiet();
1844       return opInvalidOp;
1845     }
1846     return rhs.isSignaling() ? opInvalidOp : opOK;
1847
1848   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1849   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1850   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1851     category = fcInfinity;
1852     return opOK;
1853
1854   case PackCategoriesIntoKey(fcZero, fcNormal):
1855   case PackCategoriesIntoKey(fcNormal, fcZero):
1856   case PackCategoriesIntoKey(fcZero, fcZero):
1857     category = fcZero;
1858     return opOK;
1859
1860   case PackCategoriesIntoKey(fcZero, fcInfinity):
1861   case PackCategoriesIntoKey(fcInfinity, fcZero):
1862     makeNaN();
1863     return opInvalidOp;
1864
1865   case PackCategoriesIntoKey(fcNormal, fcNormal):
1866     return opOK;
1867   }
1868 }
1869
1870 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1871   switch (PackCategoriesIntoKey(category, rhs.category)) {
1872   default:
1873     llvm_unreachable(nullptr);
1874
1875   case PackCategoriesIntoKey(fcZero, fcNaN):
1876   case PackCategoriesIntoKey(fcNormal, fcNaN):
1877   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1878     assign(rhs);
1879     sign = false;
1880     [[fallthrough]];
1881   case PackCategoriesIntoKey(fcNaN, fcZero):
1882   case PackCategoriesIntoKey(fcNaN, fcNormal):
1883   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1884   case PackCategoriesIntoKey(fcNaN, fcNaN):
1885     sign ^= rhs.sign; // restore the original sign
1886     if (isSignaling()) {
1887       makeQuiet();
1888       return opInvalidOp;
1889     }
1890     return rhs.isSignaling() ? opInvalidOp : opOK;
1891
1892   case PackCategoriesIntoKey(fcInfinity, fcZero):
1893   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1894   case PackCategoriesIntoKey(fcZero, fcInfinity):
1895   case PackCategoriesIntoKey(fcZero, fcNormal):
1896     return opOK;
1897
1898   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1899     category = fcZero;
1900     return opOK;
1901
1902   case PackCategoriesIntoKey(fcNormal, fcZero):
1903     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
1904       makeNaN(false, sign);
1905     else
1906       category = fcInfinity;
1907     return opDivByZero;
1908
1909   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1910   case PackCategoriesIntoKey(fcZero, fcZero):
1911     makeNaN();
1912     return opInvalidOp;
1913
1914   case PackCategoriesIntoKey(fcNormal, fcNormal):
1915     return opOK;
1916   }
1917 }
1918
1919 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1920   switch (PackCategoriesIntoKey(category, rhs.category)) {
1921   default:
1922     llvm_unreachable(nullptr);
1923
1924   case PackCategoriesIntoKey(fcZero, fcNaN):
1925   case PackCategoriesIntoKey(fcNormal, fcNaN):
1926   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1927     assign(rhs);
1928     [[fallthrough]];
1929   case PackCategoriesIntoKey(fcNaN, fcZero):
1930   case PackCategoriesIntoKey(fcNaN, fcNormal):
1931   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1932   case PackCategoriesIntoKey(fcNaN, fcNaN):
1933     if (isSignaling()) {
1934       makeQuiet();
1935       return opInvalidOp;
1936     }
1937     return rhs.isSignaling() ? opInvalidOp : opOK;
1938
1939   case PackCategoriesIntoKey(fcZero, fcInfinity):
1940   case PackCategoriesIntoKey(fcZero, fcNormal):
1941   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1942     return opOK;
1943
1944   case PackCategoriesIntoKey(fcNormal, fcZero):
1945   case PackCategoriesIntoKey(fcInfinity, fcZero):
1946   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1947   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1948   case PackCategoriesIntoKey(fcZero, fcZero):
1949     makeNaN();
1950     return opInvalidOp;
1951
1952   case PackCategoriesIntoKey(fcNormal, fcNormal):
1953     return opOK;
1954   }
1955 }
1956
1957 IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
1958   switch (PackCategoriesIntoKey(category, rhs.category)) {
1959   default:
1960     llvm_unreachable(nullptr);
1961
1962   case PackCategoriesIntoKey(fcZero, fcNaN):
1963   case PackCategoriesIntoKey(fcNormal, fcNaN):
1964   case PackCategoriesIntoKey(fcInfinity, fcNaN):
1965     assign(rhs);
1966     [[fallthrough]];
1967   case PackCategoriesIntoKey(fcNaN, fcZero):
1968   case PackCategoriesIntoKey(fcNaN, fcNormal):
1969   case PackCategoriesIntoKey(fcNaN, fcInfinity):
1970   case PackCategoriesIntoKey(fcNaN, fcNaN):
1971     if (isSignaling()) {
1972       makeQuiet();
1973       return opInvalidOp;
1974     }
1975     return rhs.isSignaling() ? opInvalidOp : opOK;
1976
1977   case PackCategoriesIntoKey(fcZero, fcInfinity):
1978   case PackCategoriesIntoKey(fcZero, fcNormal):
1979   case PackCategoriesIntoKey(fcNormal, fcInfinity):
1980     return opOK;
1981
1982   case PackCategoriesIntoKey(fcNormal, fcZero):
1983   case PackCategoriesIntoKey(fcInfinity, fcZero):
1984   case PackCategoriesIntoKey(fcInfinity, fcNormal):
1985   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1986   case PackCategoriesIntoKey(fcZero, fcZero):
1987     makeNaN();
1988     return opInvalidOp;
1989
1990   case PackCategoriesIntoKey(fcNormal, fcNormal):
1991     return opDivByZero; // fake status, indicating this is not a special case
1992   }
1993 }
1994
1995 /* Change sign.  */
1996 void IEEEFloat::changeSign() {
1997   // With NaN-as-negative-zero, neither NaN or negative zero can change
1998   // their signs.
1999   if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
2000       (isZero() || isNaN()))
2001     return;
2002   /* Look mummy, this one's easy.  */
2003   sign = !sign;
2004 }
2005
2006 /* Normalized addition or subtraction.  */
2007 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
2008                                              roundingMode rounding_mode,
2009                                              bool subtract) {
2010   opStatus fs;
2011
2012   fs = addOrSubtractSpecials(rhs, subtract);
2013
2014   /* This return code means it was not a simple case.  */
2015   if (fs == opDivByZero) {
2016     lostFraction lost_fraction;
2017
2018     lost_fraction = addOrSubtractSignificand(rhs, subtract);
2019     fs = normalize(rounding_mode, lost_fraction);
2020
2021     /* Can only be zero if we lost no fraction.  */
2022     assert(category != fcZero || lost_fraction == lfExactlyZero);
2023   }
2024
2025   /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026      positive zero unless rounding to minus infinity, except that
2027      adding two like-signed zeroes gives that zero.  */
2028   if (category == fcZero) {
2029     if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
2030       sign = (rounding_mode == rmTowardNegative);
2031     // NaN-in-negative-zero means zeros need to be normalized to +0.
2032     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2033       sign = false;
2034   }
2035
2036   return fs;
2037 }
2038
2039 /* Normalized addition.  */
2040 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
2041                                    roundingMode rounding_mode) {
2042   return addOrSubtract(rhs, rounding_mode, false);
2043 }
2044
2045 /* Normalized subtraction.  */
2046 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
2047                                         roundingMode rounding_mode) {
2048   return addOrSubtract(rhs, rounding_mode, true);
2049 }
2050
2051 /* Normalized multiply.  */
2052 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
2053                                         roundingMode rounding_mode) {
2054   opStatus fs;
2055
2056   sign ^= rhs.sign;
2057   fs = multiplySpecials(rhs);
2058
2059   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2060     sign = false;
2061   if (isFiniteNonZero()) {
2062     lostFraction lost_fraction = multiplySignificand(rhs);
2063     fs = normalize(rounding_mode, lost_fraction);
2064     if (lost_fraction != lfExactlyZero)
2065       fs = (opStatus) (fs | opInexact);
2066   }
2067
2068   return fs;
2069 }
2070
2071 /* Normalized divide.  */
2072 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
2073                                       roundingMode rounding_mode) {
2074   opStatus fs;
2075
2076   sign ^= rhs.sign;
2077   fs = divideSpecials(rhs);
2078
2079   if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
2080     sign = false;
2081   if (isFiniteNonZero()) {
2082     lostFraction lost_fraction = divideSignificand(rhs);
2083     fs = normalize(rounding_mode, lost_fraction);
2084     if (lost_fraction != lfExactlyZero)
2085       fs = (opStatus) (fs | opInexact);
2086   }
2087
2088   return fs;
2089 }
2090
2091 /* Normalized remainder.  */
2092 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
2093   opStatus fs;
2094   unsigned int origSign = sign;
2095
2096   // First handle the special cases.
2097   fs = remainderSpecials(rhs);
2098   if (fs != opDivByZero)
2099     return fs;
2100
2101   fs = opOK;
2102
2103   // Make sure the current value is less than twice the denom. If the addition
2104   // did not succeed (an overflow has happened), which means that the finite
2105   // value we currently posses must be less than twice the denom (as we are
2106   // using the same semantics).
2107   IEEEFloat P2 = rhs;
2108   if (P2.add(rhs, rmNearestTiesToEven) == opOK) {
2109     fs = mod(P2);
2110     assert(fs == opOK);
2111   }
2112
2113   // Lets work with absolute numbers.
2114   IEEEFloat P = rhs;
2115   P.sign = false;
2116   sign = false;
2117
2118   //
2119   // To calculate the remainder we use the following scheme.
2120   //
2121   // The remainder is defained as follows:
2122   //
2123   // remainder = numer - rquot * denom = x - r * p
2124   //
2125   // Where r is the result of: x/p, rounded toward the nearest integral value
2126   // (with halfway cases rounded toward the even number).
2127   //
2128   // Currently, (after x mod 2p):
2129   // r is the number of 2p's present inside x, which is inherently, an even
2130   // number of p's.
2131   //
2132   // We may split the remaining calculation into 4 options:
2133   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2134   // - if x == 0.5p then we round to the nearest even number which is 0, and we
2135   //   are done as well.
2136   // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2137   //   to subtract 1p at least once.
2138   // - if x >= p then we must subtract p at least once, as x must be a
2139   //   remainder.
2140   //
2141   // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2142   //
2143   // We can now split the remaining calculation to the following 3 options:
2144   // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2145   // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2146   //   must round up to the next even number. so we must subtract p once more.
2147   // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2148   //   integral, and subtract p once more.
2149   //
2150
2151   // Extend the semantics to prevent an overflow/underflow or inexact result.
2152   bool losesInfo;
2153   fltSemantics extendedSemantics = *semantics;
2154   extendedSemantics.maxExponent++;
2155   extendedSemantics.minExponent--;
2156   extendedSemantics.precision += 2;
2157
2158   IEEEFloat VEx = *this;
2159   fs = VEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2160   assert(fs == opOK && !losesInfo);
2161   IEEEFloat PEx = P;
2162   fs = PEx.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2163   assert(fs == opOK && !losesInfo);
2164
2165   // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2166   // any fraction.
2167   fs = VEx.add(VEx, rmNearestTiesToEven);
2168   assert(fs == opOK);
2169
2170   if (VEx.compare(PEx) == cmpGreaterThan) {
2171     fs = subtract(P, rmNearestTiesToEven);
2172     assert(fs == opOK);
2173
2174     // Make VEx = this.add(this), but because we have different semantics, we do
2175     // not want to `convert` again, so we just subtract PEx twice (which equals
2176     // to the desired value).
2177     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2178     assert(fs == opOK);
2179     fs = VEx.subtract(PEx, rmNearestTiesToEven);
2180     assert(fs == opOK);
2181
2182     cmpResult result = VEx.compare(PEx);
2183     if (result == cmpGreaterThan || result == cmpEqual) {
2184       fs = subtract(P, rmNearestTiesToEven);
2185       assert(fs == opOK);
2186     }
2187   }
2188
2189   if (isZero()) {
2190     sign = origSign;    // IEEE754 requires this
2191     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2192       // But some 8-bit floats only have positive 0.
2193       sign = false;
2194   }
2195
2196   else
2197     sign ^= origSign;
2198   return fs;
2199 }
2200
2201 /* Normalized llvm frem (C fmod). */
2202 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
2203   opStatus fs;
2204   fs = modSpecials(rhs);
2205   unsigned int origSign = sign;
2206
2207   while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
2208          compareAbsoluteValue(rhs) != cmpLessThan) {
2209     int Exp = ilogb(*this) - ilogb(rhs);
2210     IEEEFloat V = scalbn(rhs, Exp, rmNearestTiesToEven);
2211     // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2212     // check for it.
2213     if (V.isNaN() || compareAbsoluteValue(V) == cmpLessThan)
2214       V = scalbn(rhs, Exp - 1, rmNearestTiesToEven);
2215     V.sign = sign;
2216
2217     fs = subtract(V, rmNearestTiesToEven);
2218     assert(fs==opOK);
2219   }
2220   if (isZero()) {
2221     sign = origSign; // fmod requires this
2222     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2223       sign = false;
2224   }
2225   return fs;
2226 }
2227
2228 /* Normalized fused-multiply-add.  */
2229 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
2230                                                 const IEEEFloat &addend,
2231                                                 roundingMode rounding_mode) {
2232   opStatus fs;
2233
2234   /* Post-multiplication sign, before addition.  */
2235   sign ^= multiplicand.sign;
2236
2237   /* If and only if all arguments are normal do we need to do an
2238      extended-precision calculation.  */
2239   if (isFiniteNonZero() &&
2240       multiplicand.isFiniteNonZero() &&
2241       addend.isFinite()) {
2242     lostFraction lost_fraction;
2243
2244     lost_fraction = multiplySignificand(multiplicand, addend);
2245     fs = normalize(rounding_mode, lost_fraction);
2246     if (lost_fraction != lfExactlyZero)
2247       fs = (opStatus) (fs | opInexact);
2248
2249     /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2250        positive zero unless rounding to minus infinity, except that
2251        adding two like-signed zeroes gives that zero.  */
2252     if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
2253       sign = (rounding_mode == rmTowardNegative);
2254       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
2255         sign = false;
2256     }
2257   } else {
2258     fs = multiplySpecials(multiplicand);
2259
2260     /* FS can only be opOK or opInvalidOp.  There is no more work
2261        to do in the latter case.  The IEEE-754R standard says it is
2262        implementation-defined in this case whether, if ADDEND is a
2263        quiet NaN, we raise invalid op; this implementation does so.
2264
2265        If we need to do the addition we can do so with normal
2266        precision.  */
2267     if (fs == opOK)
2268       fs = addOrSubtract(addend, rounding_mode, false);
2269   }
2270
2271   return fs;
2272 }
2273
2274 /* Rounding-mode correct round to integral value.  */
2275 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
2276   opStatus fs;
2277
2278   if (isInfinity())
2279     // [IEEE Std 754-2008 6.1]:
2280     // The behavior of infinity in floating-point arithmetic is derived from the
2281     // limiting cases of real arithmetic with operands of arbitrarily
2282     // large magnitude, when such a limit exists.
2283     // ...
2284     // Operations on infinite operands are usually exact and therefore signal no
2285     // exceptions ...
2286     return opOK;
2287
2288   if (isNaN()) {
2289     if (isSignaling()) {
2290       // [IEEE Std 754-2008 6.2]:
2291       // Under default exception handling, any operation signaling an invalid
2292       // operation exception and for which a floating-point result is to be
2293       // delivered shall deliver a quiet NaN.
2294       makeQuiet();
2295       // [IEEE Std 754-2008 6.2]:
2296       // Signaling NaNs shall be reserved operands that, under default exception
2297       // handling, signal the invalid operation exception(see 7.2) for every
2298       // general-computational and signaling-computational operation except for
2299       // the conversions described in 5.12.
2300       return opInvalidOp;
2301     } else {
2302       // [IEEE Std 754-2008 6.2]:
2303       // For an operation with quiet NaN inputs, other than maximum and minimum
2304       // operations, if a floating-point result is to be delivered the result
2305       // shall be a quiet NaN which should be one of the input NaNs.
2306       // ...
2307       // Every general-computational and quiet-computational operation involving
2308       // one or more input NaNs, none of them signaling, shall signal no
2309       // exception, except fusedMultiplyAdd might signal the invalid operation
2310       // exception(see 7.2).
2311       return opOK;
2312     }
2313   }
2314
2315   if (isZero()) {
2316     // [IEEE Std 754-2008 6.3]:
2317     // ... the sign of the result of conversions, the quantize operation, the
2318     // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319     // the sign of the first or only operand.
2320     return opOK;
2321   }
2322
2323   // If the exponent is large enough, we know that this value is already
2324   // integral, and the arithmetic below would potentially cause it to saturate
2325   // to +/-Inf.  Bail out early instead.
2326   if (exponent+1 >= (int)semanticsPrecision(*semantics))
2327     return opOK;
2328
2329   // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330   // precision of our format, and then subtract it back off again.  The choice
2331   // of rounding modes for the addition/subtraction determines the rounding mode
2332   // for our integral rounding as well.
2333   // NOTE: When the input value is negative, we do subtraction followed by
2334   // addition instead.
2335   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
2336   IntegerConstant <<= semanticsPrecision(*semantics)-1;
2337   IEEEFloat MagicConstant(*semantics);
2338   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
2339                                       rmNearestTiesToEven);
2340   assert(fs == opOK);
2341   MagicConstant.sign = sign;
2342
2343   // Preserve the input sign so that we can handle the case of zero result
2344   // correctly.
2345   bool inputSign = isNegative();
2346
2347   fs = add(MagicConstant, rounding_mode);
2348
2349   // Current value and 'MagicConstant' are both integers, so the result of the
2350   // subtraction is always exact according to Sterbenz' lemma.
2351   subtract(MagicConstant, rounding_mode);
2352
2353   // Restore the input sign.
2354   if (inputSign != isNegative())
2355     changeSign();
2356
2357   return fs;
2358 }
2359
2360
2361 /* Comparison requires normalized numbers.  */
2362 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
2363   cmpResult result;
2364
2365   assert(semantics == rhs.semantics);
2366
2367   switch (PackCategoriesIntoKey(category, rhs.category)) {
2368   default:
2369     llvm_unreachable(nullptr);
2370
2371   case PackCategoriesIntoKey(fcNaN, fcZero):
2372   case PackCategoriesIntoKey(fcNaN, fcNormal):
2373   case PackCategoriesIntoKey(fcNaN, fcInfinity):
2374   case PackCategoriesIntoKey(fcNaN, fcNaN):
2375   case PackCategoriesIntoKey(fcZero, fcNaN):
2376   case PackCategoriesIntoKey(fcNormal, fcNaN):
2377   case PackCategoriesIntoKey(fcInfinity, fcNaN):
2378     return cmpUnordered;
2379
2380   case PackCategoriesIntoKey(fcInfinity, fcNormal):
2381   case PackCategoriesIntoKey(fcInfinity, fcZero):
2382   case PackCategoriesIntoKey(fcNormal, fcZero):
2383     if (sign)
2384       return cmpLessThan;
2385     else
2386       return cmpGreaterThan;
2387
2388   case PackCategoriesIntoKey(fcNormal, fcInfinity):
2389   case PackCategoriesIntoKey(fcZero, fcInfinity):
2390   case PackCategoriesIntoKey(fcZero, fcNormal):
2391     if (rhs.sign)
2392       return cmpGreaterThan;
2393     else
2394       return cmpLessThan;
2395
2396   case PackCategoriesIntoKey(fcInfinity, fcInfinity):
2397     if (sign == rhs.sign)
2398       return cmpEqual;
2399     else if (sign)
2400       return cmpLessThan;
2401     else
2402       return cmpGreaterThan;
2403
2404   case PackCategoriesIntoKey(fcZero, fcZero):
2405     return cmpEqual;
2406
2407   case PackCategoriesIntoKey(fcNormal, fcNormal):
2408     break;
2409   }
2410
2411   /* Two normal numbers.  Do they have the same sign?  */
2412   if (sign != rhs.sign) {
2413     if (sign)
2414       result = cmpLessThan;
2415     else
2416       result = cmpGreaterThan;
2417   } else {
2418     /* Compare absolute values; invert result if negative.  */
2419     result = compareAbsoluteValue(rhs);
2420
2421     if (sign) {
2422       if (result == cmpLessThan)
2423         result = cmpGreaterThan;
2424       else if (result == cmpGreaterThan)
2425         result = cmpLessThan;
2426     }
2427   }
2428
2429   return result;
2430 }
2431
2432 /// IEEEFloat::convert - convert a value of one floating point type to another.
2433 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
2434 /// records whether the transformation lost information, i.e. whether
2435 /// converting the result back to the original type will produce the
2436 /// original value (this is almost the same as return value==fsOK, but there
2437 /// are edge cases where this is not so).
2438
2439 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
2440                                        roundingMode rounding_mode,
2441                                        bool *losesInfo) {
2442   lostFraction lostFraction;
2443   unsigned int newPartCount, oldPartCount;
2444   opStatus fs;
2445   int shift;
2446   const fltSemantics &fromSemantics = *semantics;
2447   bool is_signaling = isSignaling();
2448
2449   lostFraction = lfExactlyZero;
2450   newPartCount = partCountForBits(toSemantics.precision + 1);
2451   oldPartCount = partCount();
2452   shift = toSemantics.precision - fromSemantics.precision;
2453
2454   bool X86SpecialNan = false;
2455   if (&fromSemantics == &semX87DoubleExtended &&
2456       &toSemantics != &semX87DoubleExtended && category == fcNaN &&
2457       (!(*significandParts() & 0x8000000000000000ULL) ||
2458        !(*significandParts() & 0x4000000000000000ULL))) {
2459     // x86 has some unusual NaNs which cannot be represented in any other
2460     // format; note them here.
2461     X86SpecialNan = true;
2462   }
2463
2464   // If this is a truncation of a denormal number, and the target semantics
2465   // has larger exponent range than the source semantics (this can happen
2466   // when truncating from PowerPC double-double to double format), the
2467   // right shift could lose result mantissa bits.  Adjust exponent instead
2468   // of performing excessive shift.
2469   // Also do a similar trick in case shifting denormal would produce zero
2470   // significand as this case isn't handled correctly by normalize.
2471   if (shift < 0 && isFiniteNonZero()) {
2472     int omsb = significandMSB() + 1;
2473     int exponentChange = omsb - fromSemantics.precision;
2474     if (exponent + exponentChange < toSemantics.minExponent)
2475       exponentChange = toSemantics.minExponent - exponent;
2476     if (exponentChange < shift)
2477       exponentChange = shift;
2478     if (exponentChange < 0) {
2479       shift -= exponentChange;
2480       exponent += exponentChange;
2481     } else if (omsb <= -shift) {
2482       exponentChange = omsb + shift - 1; // leave at least one bit set
2483       shift -= exponentChange;
2484       exponent += exponentChange;
2485     }
2486   }
2487
2488   // If this is a truncation, perform the shift before we narrow the storage.
2489   if (shift < 0 && (isFiniteNonZero() ||
2490                     (category == fcNaN && semantics->nonFiniteBehavior !=
2491                                               fltNonfiniteBehavior::NanOnly)))
2492     lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2493
2494   // Fix the storage so it can hold to new value.
2495   if (newPartCount > oldPartCount) {
2496     // The new type requires more storage; make it available.
2497     integerPart *newParts;
2498     newParts = new integerPart[newPartCount];
2499     APInt::tcSet(newParts, 0, newPartCount);
2500     if (isFiniteNonZero() || category==fcNaN)
2501       APInt::tcAssign(newParts, significandParts(), oldPartCount);
2502     freeSignificand();
2503     significand.parts = newParts;
2504   } else if (newPartCount == 1 && oldPartCount != 1) {
2505     // Switch to built-in storage for a single part.
2506     integerPart newPart = 0;
2507     if (isFiniteNonZero() || category==fcNaN)
2508       newPart = significandParts()[0];
2509     freeSignificand();
2510     significand.part = newPart;
2511   }
2512
2513   // Now that we have the right storage, switch the semantics.
2514   semantics = &toSemantics;
2515
2516   // If this is an extension, perform the shift now that the storage is
2517   // available.
2518   if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2519     APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2520
2521   if (isFiniteNonZero()) {
2522     fs = normalize(rounding_mode, lostFraction);
2523     *losesInfo = (fs != opOK);
2524   } else if (category == fcNaN) {
2525     if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2526       *losesInfo =
2527           fromSemantics.nonFiniteBehavior != fltNonfiniteBehavior::NanOnly;
2528       makeNaN(false, sign);
2529       return is_signaling ? opInvalidOp : opOK;
2530     }
2531
2532     // If NaN is negative zero, we need to create a new NaN to avoid converting
2533     // NaN to -Inf.
2534     if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
2535         semantics->nanEncoding != fltNanEncoding::NegativeZero)
2536       makeNaN(false, false);
2537
2538     *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2539
2540     // For x87 extended precision, we want to make a NaN, not a special NaN if
2541     // the input wasn't special either.
2542     if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2543       APInt::tcSetBit(significandParts(), semantics->precision - 1);
2544
2545     // Convert of sNaN creates qNaN and raises an exception (invalid op).
2546     // This also guarantees that a sNaN does not become Inf on a truncation
2547     // that loses all payload bits.
2548     if (is_signaling) {
2549       makeQuiet();
2550       fs = opInvalidOp;
2551     } else {
2552       fs = opOK;
2553     }
2554   } else if (category == fcInfinity &&
2555              semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
2556     makeNaN(false, sign);
2557     *losesInfo = true;
2558     fs = opInexact;
2559   } else if (category == fcZero &&
2560              semantics->nanEncoding == fltNanEncoding::NegativeZero) {
2561     // Negative zero loses info, but positive zero doesn't.
2562     *losesInfo =
2563         fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
2564     fs = *losesInfo ? opInexact : opOK;
2565     // NaN is negative zero means -0 -> +0, which can lose information
2566     sign = false;
2567   } else {
2568     *losesInfo = false;
2569     fs = opOK;
2570   }
2571
2572   return fs;
2573 }
2574
2575 /* Convert a floating point number to an integer according to the
2576    rounding mode.  If the rounded integer value is out of range this
2577    returns an invalid operation exception and the contents of the
2578    destination parts are unspecified.  If the rounded value is in
2579    range but the floating point number is not the exact integer, the C
2580    standard doesn't require an inexact exception to be raised.  IEEE
2581    854 does require it so we do that.
2582
2583    Note that for conversions to integer type the C standard requires
2584    round-to-zero to always be used.  */
2585 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2586     MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2587     roundingMode rounding_mode, bool *isExact) const {
2588   lostFraction lost_fraction;
2589   const integerPart *src;
2590   unsigned int dstPartsCount, truncatedBits;
2591
2592   *isExact = false;
2593
2594   /* Handle the three special cases first.  */
2595   if (category == fcInfinity || category == fcNaN)
2596     return opInvalidOp;
2597
2598   dstPartsCount = partCountForBits(width);
2599   assert(dstPartsCount <= parts.size() && "Integer too big");
2600
2601   if (category == fcZero) {
2602     APInt::tcSet(parts.data(), 0, dstPartsCount);
2603     // Negative zero can't be represented as an int.
2604     *isExact = !sign;
2605     return opOK;
2606   }
2607
2608   src = significandParts();
2609
2610   /* Step 1: place our absolute value, with any fraction truncated, in
2611      the destination.  */
2612   if (exponent < 0) {
2613     /* Our absolute value is less than one; truncate everything.  */
2614     APInt::tcSet(parts.data(), 0, dstPartsCount);
2615     /* For exponent -1 the integer bit represents .5, look at that.
2616        For smaller exponents leftmost truncated bit is 0. */
2617     truncatedBits = semantics->precision -1U - exponent;
2618   } else {
2619     /* We want the most significant (exponent + 1) bits; the rest are
2620        truncated.  */
2621     unsigned int bits = exponent + 1U;
2622
2623     /* Hopelessly large in magnitude?  */
2624     if (bits > width)
2625       return opInvalidOp;
2626
2627     if (bits < semantics->precision) {
2628       /* We truncate (semantics->precision - bits) bits.  */
2629       truncatedBits = semantics->precision - bits;
2630       APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2631     } else {
2632       /* We want at least as many bits as are available.  */
2633       APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2634                        0);
2635       APInt::tcShiftLeft(parts.data(), dstPartsCount,
2636                          bits - semantics->precision);
2637       truncatedBits = 0;
2638     }
2639   }
2640
2641   /* Step 2: work out any lost fraction, and increment the absolute
2642      value if we would round away from zero.  */
2643   if (truncatedBits) {
2644     lost_fraction = lostFractionThroughTruncation(src, partCount(),
2645                                                   truncatedBits);
2646     if (lost_fraction != lfExactlyZero &&
2647         roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2648       if (APInt::tcIncrement(parts.data(), dstPartsCount))
2649         return opInvalidOp;     /* Overflow.  */
2650     }
2651   } else {
2652     lost_fraction = lfExactlyZero;
2653   }
2654
2655   /* Step 3: check if we fit in the destination.  */
2656   unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2657
2658   if (sign) {
2659     if (!isSigned) {
2660       /* Negative numbers cannot be represented as unsigned.  */
2661       if (omsb != 0)
2662         return opInvalidOp;
2663     } else {
2664       /* It takes omsb bits to represent the unsigned integer value.
2665          We lose a bit for the sign, but care is needed as the
2666          maximally negative integer is a special case.  */
2667       if (omsb == width &&
2668           APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2669         return opInvalidOp;
2670
2671       /* This case can happen because of rounding.  */
2672       if (omsb > width)
2673         return opInvalidOp;
2674     }
2675
2676     APInt::tcNegate (parts.data(), dstPartsCount);
2677   } else {
2678     if (omsb >= width + !isSigned)
2679       return opInvalidOp;
2680   }
2681
2682   if (lost_fraction == lfExactlyZero) {
2683     *isExact = true;
2684     return opOK;
2685   } else
2686     return opInexact;
2687 }
2688
2689 /* Same as convertToSignExtendedInteger, except we provide
2690    deterministic values in case of an invalid operation exception,
2691    namely zero for NaNs and the minimal or maximal value respectively
2692    for underflow or overflow.
2693    The *isExact output tells whether the result is exact, in the sense
2694    that converting it back to the original floating point type produces
2695    the original value.  This is almost equivalent to result==opOK,
2696    except for negative zeroes.
2697 */
2698 IEEEFloat::opStatus
2699 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2700                             unsigned int width, bool isSigned,
2701                             roundingMode rounding_mode, bool *isExact) const {
2702   opStatus fs;
2703
2704   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2705                                     isExact);
2706
2707   if (fs == opInvalidOp) {
2708     unsigned int bits, dstPartsCount;
2709
2710     dstPartsCount = partCountForBits(width);
2711     assert(dstPartsCount <= parts.size() && "Integer too big");
2712
2713     if (category == fcNaN)
2714       bits = 0;
2715     else if (sign)
2716       bits = isSigned;
2717     else
2718       bits = width - isSigned;
2719
2720     tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2721     if (sign && isSigned)
2722       APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2723   }
2724
2725   return fs;
2726 }
2727
2728 /* Convert an unsigned integer SRC to a floating point number,
2729    rounding according to ROUNDING_MODE.  The sign of the floating
2730    point number is not modified.  */
2731 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2732     const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2733   unsigned int omsb, precision, dstCount;
2734   integerPart *dst;
2735   lostFraction lost_fraction;
2736
2737   category = fcNormal;
2738   omsb = APInt::tcMSB(src, srcCount) + 1;
2739   dst = significandParts();
2740   dstCount = partCount();
2741   precision = semantics->precision;
2742
2743   /* We want the most significant PRECISION bits of SRC.  There may not
2744      be that many; extract what we can.  */
2745   if (precision <= omsb) {
2746     exponent = omsb - 1;
2747     lost_fraction = lostFractionThroughTruncation(src, srcCount,
2748                                                   omsb - precision);
2749     APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2750   } else {
2751     exponent = precision - 1;
2752     lost_fraction = lfExactlyZero;
2753     APInt::tcExtract(dst, dstCount, src, omsb, 0);
2754   }
2755
2756   return normalize(rounding_mode, lost_fraction);
2757 }
2758
2759 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2760                                                 roundingMode rounding_mode) {
2761   unsigned int partCount = Val.getNumWords();
2762   APInt api = Val;
2763
2764   sign = false;
2765   if (isSigned && api.isNegative()) {
2766     sign = true;
2767     api = -api;
2768   }
2769
2770   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2771 }
2772
2773 /* Convert a two's complement integer SRC to a floating point number,
2774    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
2775    integer is signed, in which case it must be sign-extended.  */
2776 IEEEFloat::opStatus
2777 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2778                                           unsigned int srcCount, bool isSigned,
2779                                           roundingMode rounding_mode) {
2780   opStatus status;
2781
2782   if (isSigned &&
2783       APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2784     integerPart *copy;
2785
2786     /* If we're signed and negative negate a copy.  */
2787     sign = true;
2788     copy = new integerPart[srcCount];
2789     APInt::tcAssign(copy, src, srcCount);
2790     APInt::tcNegate(copy, srcCount);
2791     status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2792     delete [] copy;
2793   } else {
2794     sign = false;
2795     status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2796   }
2797
2798   return status;
2799 }
2800
2801 /* FIXME: should this just take a const APInt reference?  */
2802 IEEEFloat::opStatus
2803 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2804                                           unsigned int width, bool isSigned,
2805                                           roundingMode rounding_mode) {
2806   unsigned int partCount = partCountForBits(width);
2807   APInt api = APInt(width, ArrayRef(parts, partCount));
2808
2809   sign = false;
2810   if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2811     sign = true;
2812     api = -api;
2813   }
2814
2815   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2816 }
2817
2818 Expected<IEEEFloat::opStatus>
2819 IEEEFloat::convertFromHexadecimalString(StringRef s,
2820                                         roundingMode rounding_mode) {
2821   lostFraction lost_fraction = lfExactlyZero;
2822
2823   category = fcNormal;
2824   zeroSignificand();
2825   exponent = 0;
2826
2827   integerPart *significand = significandParts();
2828   unsigned partsCount = partCount();
2829   unsigned bitPos = partsCount * integerPartWidth;
2830   bool computedTrailingFraction = false;
2831
2832   // Skip leading zeroes and any (hexa)decimal point.
2833   StringRef::iterator begin = s.begin();
2834   StringRef::iterator end = s.end();
2835   StringRef::iterator dot;
2836   auto PtrOrErr = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2837   if (!PtrOrErr)
2838     return PtrOrErr.takeError();
2839   StringRef::iterator p = *PtrOrErr;
2840   StringRef::iterator firstSignificantDigit = p;
2841
2842   while (p != end) {
2843     integerPart hex_value;
2844
2845     if (*p == '.') {
2846       if (dot != end)
2847         return createError("String contains multiple dots");
2848       dot = p++;
2849       continue;
2850     }
2851
2852     hex_value = hexDigitValue(*p);
2853     if (hex_value == UINT_MAX)
2854       break;
2855
2856     p++;
2857
2858     // Store the number while we have space.
2859     if (bitPos) {
2860       bitPos -= 4;
2861       hex_value <<= bitPos % integerPartWidth;
2862       significand[bitPos / integerPartWidth] |= hex_value;
2863     } else if (!computedTrailingFraction) {
2864       auto FractOrErr = trailingHexadecimalFraction(p, end, hex_value);
2865       if (!FractOrErr)
2866         return FractOrErr.takeError();
2867       lost_fraction = *FractOrErr;
2868       computedTrailingFraction = true;
2869     }
2870   }
2871
2872   /* Hex floats require an exponent but not a hexadecimal point.  */
2873   if (p == end)
2874     return createError("Hex strings require an exponent");
2875   if (*p != 'p' && *p != 'P')
2876     return createError("Invalid character in significand");
2877   if (p == begin)
2878     return createError("Significand has no digits");
2879   if (dot != end && p - begin == 1)
2880     return createError("Significand has no digits");
2881
2882   /* Ignore the exponent if we are zero.  */
2883   if (p != firstSignificantDigit) {
2884     int expAdjustment;
2885
2886     /* Implicit hexadecimal point?  */
2887     if (dot == end)
2888       dot = p;
2889
2890     /* Calculate the exponent adjustment implicit in the number of
2891        significant digits.  */
2892     expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2893     if (expAdjustment < 0)
2894       expAdjustment++;
2895     expAdjustment = expAdjustment * 4 - 1;
2896
2897     /* Adjust for writing the significand starting at the most
2898        significant nibble.  */
2899     expAdjustment += semantics->precision;
2900     expAdjustment -= partsCount * integerPartWidth;
2901
2902     /* Adjust for the given exponent.  */
2903     auto ExpOrErr = totalExponent(p + 1, end, expAdjustment);
2904     if (!ExpOrErr)
2905       return ExpOrErr.takeError();
2906     exponent = *ExpOrErr;
2907   }
2908
2909   return normalize(rounding_mode, lost_fraction);
2910 }
2911
2912 IEEEFloat::opStatus
2913 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2914                                         unsigned sigPartCount, int exp,
2915                                         roundingMode rounding_mode) {
2916   unsigned int parts, pow5PartCount;
2917   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2918   integerPart pow5Parts[maxPowerOfFiveParts];
2919   bool isNearest;
2920
2921   isNearest = (rounding_mode == rmNearestTiesToEven ||
2922                rounding_mode == rmNearestTiesToAway);
2923
2924   parts = partCountForBits(semantics->precision + 11);
2925
2926   /* Calculate pow(5, abs(exp)).  */
2927   pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2928
2929   for (;; parts *= 2) {
2930     opStatus sigStatus, powStatus;
2931     unsigned int excessPrecision, truncatedBits;
2932
2933     calcSemantics.precision = parts * integerPartWidth - 1;
2934     excessPrecision = calcSemantics.precision - semantics->precision;
2935     truncatedBits = excessPrecision;
2936
2937     IEEEFloat decSig(calcSemantics, uninitialized);
2938     decSig.makeZero(sign);
2939     IEEEFloat pow5(calcSemantics);
2940
2941     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2942                                                 rmNearestTiesToEven);
2943     powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2944                                               rmNearestTiesToEven);
2945     /* Add exp, as 10^n = 5^n * 2^n.  */
2946     decSig.exponent += exp;
2947
2948     lostFraction calcLostFraction;
2949     integerPart HUerr, HUdistance;
2950     unsigned int powHUerr;
2951
2952     if (exp >= 0) {
2953       /* multiplySignificand leaves the precision-th bit set to 1.  */
2954       calcLostFraction = decSig.multiplySignificand(pow5);
2955       powHUerr = powStatus != opOK;
2956     } else {
2957       calcLostFraction = decSig.divideSignificand(pow5);
2958       /* Denormal numbers have less precision.  */
2959       if (decSig.exponent < semantics->minExponent) {
2960         excessPrecision += (semantics->minExponent - decSig.exponent);
2961         truncatedBits = excessPrecision;
2962         if (excessPrecision > calcSemantics.precision)
2963           excessPrecision = calcSemantics.precision;
2964       }
2965       /* Extra half-ulp lost in reciprocal of exponent.  */
2966       powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2967     }
2968
2969     /* Both multiplySignificand and divideSignificand return the
2970        result with the integer bit set.  */
2971     assert(APInt::tcExtractBit
2972            (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2973
2974     HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2975                        powHUerr);
2976     HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2977                                       excessPrecision, isNearest);
2978
2979     /* Are we guaranteed to round correctly if we truncate?  */
2980     if (HUdistance >= HUerr) {
2981       APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2982                        calcSemantics.precision - excessPrecision,
2983                        excessPrecision);
2984       /* Take the exponent of decSig.  If we tcExtract-ed less bits
2985          above we must adjust our exponent to compensate for the
2986          implicit right shift.  */
2987       exponent = (decSig.exponent + semantics->precision
2988                   - (calcSemantics.precision - excessPrecision));
2989       calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2990                                                        decSig.partCount(),
2991                                                        truncatedBits);
2992       return normalize(rounding_mode, calcLostFraction);
2993     }
2994   }
2995 }
2996
2997 Expected<IEEEFloat::opStatus>
2998 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2999   decimalInfo D;
3000   opStatus fs;
3001
3002   /* Scan the text.  */
3003   StringRef::iterator p = str.begin();
3004   if (Error Err = interpretDecimal(p, str.end(), &D))
3005     return std::move(Err);
3006
3007   /* Handle the quick cases.  First the case of no significant digits,
3008      i.e. zero, and then exponents that are obviously too large or too
3009      small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010      definitely overflows if
3011
3012            (exp - 1) * L >= maxExponent
3013
3014      and definitely underflows to zero where
3015
3016            (exp + 1) * L <= minExponent - precision
3017
3018      With integer arithmetic the tightest bounds for L are
3019
3020            93/28 < L < 196/59            [ numerator <= 256 ]
3021            42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
3022   */
3023
3024   // Test if we have a zero number allowing for strings with no null terminators
3025   // and zero decimals with non-zero exponents.
3026   //
3027   // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028   // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029   // be at most one dot. On the other hand, if we have a zero with a non-zero
3030   // exponent, then we know that D.firstSigDigit will be non-numeric.
3031   if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
3032     category = fcZero;
3033     fs = opOK;
3034     if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
3035       sign = false;
3036
3037     /* Check whether the normalized exponent is high enough to overflow
3038        max during the log-rebasing in the max-exponent check below. */
3039   } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
3040     fs = handleOverflow(rounding_mode);
3041
3042   /* If it wasn't, then it also wasn't high enough to overflow max
3043      during the log-rebasing in the min-exponent check.  Check that it
3044      won't overflow min in either check, then perform the min-exponent
3045      check. */
3046   } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
3047              (D.normalizedExponent + 1) * 28738 <=
3048                8651 * (semantics->minExponent - (int) semantics->precision)) {
3049     /* Underflow to zero and round.  */
3050     category = fcNormal;
3051     zeroSignificand();
3052     fs = normalize(rounding_mode, lfLessThanHalf);
3053
3054   /* We can finally safely perform the max-exponent check. */
3055   } else if ((D.normalizedExponent - 1) * 42039
3056              >= 12655 * semantics->maxExponent) {
3057     /* Overflow and round.  */
3058     fs = handleOverflow(rounding_mode);
3059   } else {
3060     integerPart *decSignificand;
3061     unsigned int partCount;
3062
3063     /* A tight upper bound on number of bits required to hold an
3064        N-digit decimal integer is N * 196 / 59.  Allocate enough space
3065        to hold the full significand, and an extra part required by
3066        tcMultiplyPart.  */
3067     partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
3068     partCount = partCountForBits(1 + 196 * partCount / 59);
3069     decSignificand = new integerPart[partCount + 1];
3070     partCount = 0;
3071
3072     /* Convert to binary efficiently - we do almost all multiplication
3073        in an integerPart.  When this would overflow do we do a single
3074        bignum multiplication, and then revert again to multiplication
3075        in an integerPart.  */
3076     do {
3077       integerPart decValue, val, multiplier;
3078
3079       val = 0;
3080       multiplier = 1;
3081
3082       do {
3083         if (*p == '.') {
3084           p++;
3085           if (p == str.end()) {
3086             break;
3087           }
3088         }
3089         decValue = decDigitValue(*p++);
3090         if (decValue >= 10U) {
3091           delete[] decSignificand;
3092           return createError("Invalid character in significand");
3093         }
3094         multiplier *= 10;
3095         val = val * 10 + decValue;
3096         /* The maximum number that can be multiplied by ten with any
3097            digit added without overflowing an integerPart.  */
3098       } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
3099
3100       /* Multiply out the current part.  */
3101       APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
3102                             partCount, partCount + 1, false);
3103
3104       /* If we used another part (likely but not guaranteed), increase
3105          the count.  */
3106       if (decSignificand[partCount])
3107         partCount++;
3108     } while (p <= D.lastSigDigit);
3109
3110     category = fcNormal;
3111     fs = roundSignificandWithExponent(decSignificand, partCount,
3112                                       D.exponent, rounding_mode);
3113
3114     delete [] decSignificand;
3115   }
3116
3117   return fs;
3118 }
3119
3120 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
3121   const size_t MIN_NAME_SIZE = 3;
3122
3123   if (str.size() < MIN_NAME_SIZE)
3124     return false;
3125
3126   if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
3127     makeInf(false);
3128     return true;
3129   }
3130
3131   bool IsNegative = str.front() == '-';
3132   if (IsNegative) {
3133     str = str.drop_front();
3134     if (str.size() < MIN_NAME_SIZE)
3135       return false;
3136
3137     if (str.equals("inf") || str.equals("INFINITY") || str.equals("Inf")) {
3138       makeInf(true);
3139       return true;
3140     }
3141   }
3142
3143   // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3144   bool IsSignaling = str.front() == 's' || str.front() == 'S';
3145   if (IsSignaling) {
3146     str = str.drop_front();
3147     if (str.size() < MIN_NAME_SIZE)
3148       return false;
3149   }
3150
3151   if (str.starts_with("nan") || str.starts_with("NaN")) {
3152     str = str.drop_front(3);
3153
3154     // A NaN without payload.
3155     if (str.empty()) {
3156       makeNaN(IsSignaling, IsNegative);
3157       return true;
3158     }
3159
3160     // Allow the payload to be inside parentheses.
3161     if (str.front() == '(') {
3162       // Parentheses should be balanced (and not empty).
3163       if (str.size() <= 2 || str.back() != ')')
3164         return false;
3165
3166       str = str.slice(1, str.size() - 1);
3167     }
3168
3169     // Determine the payload number's radix.
3170     unsigned Radix = 10;
3171     if (str[0] == '0') {
3172       if (str.size() > 1 && tolower(str[1]) == 'x') {
3173         str = str.drop_front(2);
3174         Radix = 16;
3175       } else
3176         Radix = 8;
3177     }
3178
3179     // Parse the payload and make the NaN.
3180     APInt Payload;
3181     if (!str.getAsInteger(Radix, Payload)) {
3182       makeNaN(IsSignaling, IsNegative, &Payload);
3183       return true;
3184     }
3185   }
3186
3187   return false;
3188 }
3189
3190 Expected<IEEEFloat::opStatus>
3191 IEEEFloat::convertFromString(StringRef str, roundingMode rounding_mode) {
3192   if (str.empty())
3193     return createError("Invalid string length");
3194
3195   // Handle special cases.
3196   if (convertFromStringSpecials(str))
3197     return opOK;
3198
3199   /* Handle a leading minus sign.  */
3200   StringRef::iterator p = str.begin();
3201   size_t slen = str.size();
3202   sign = *p == '-' ? 1 : 0;
3203   if (*p == '-' || *p == '+') {
3204     p++;
3205     slen--;
3206     if (!slen)
3207       return createError("String has no digits");
3208   }
3209
3210   if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
3211     if (slen == 2)
3212       return createError("Invalid string");
3213     return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
3214                                         rounding_mode);
3215   }
3216
3217   return convertFromDecimalString(StringRef(p, slen), rounding_mode);
3218 }
3219
3220 /* Write out a hexadecimal representation of the floating point value
3221    to DST, which must be of sufficient size, in the C99 form
3222    [-]0xh.hhhhp[+-]d.  Return the number of characters written,
3223    excluding the terminating NUL.
3224
3225    If UPPERCASE, the output is in upper case, otherwise in lower case.
3226
3227    HEXDIGITS digits appear altogether, rounding the value if
3228    necessary.  If HEXDIGITS is 0, the minimal precision to display the
3229    number precisely is used instead.  If nothing would appear after
3230    the decimal point it is suppressed.
3231
3232    The decimal exponent is always printed and has at least one digit.
3233    Zero values display an exponent of zero.  Infinities and NaNs
3234    appear as "infinity" or "nan" respectively.
3235
3236    The above rules are as specified by C99.  There is ambiguity about
3237    what the leading hexadecimal digit should be.  This implementation
3238    uses whatever is necessary so that the exponent is displayed as
3239    stored.  This implies the exponent will fall within the IEEE format
3240    range, and the leading hexadecimal digit will be 0 (for denormals),
3241    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3242    any other digits zero).
3243 */
3244 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
3245                                            bool upperCase,
3246                                            roundingMode rounding_mode) const {
3247   char *p;
3248
3249   p = dst;
3250   if (sign)
3251     *dst++ = '-';
3252
3253   switch (category) {
3254   case fcInfinity:
3255     memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
3256     dst += sizeof infinityL - 1;
3257     break;
3258
3259   case fcNaN:
3260     memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
3261     dst += sizeof NaNU - 1;
3262     break;
3263
3264   case fcZero:
3265     *dst++ = '0';
3266     *dst++ = upperCase ? 'X': 'x';
3267     *dst++ = '0';
3268     if (hexDigits > 1) {
3269       *dst++ = '.';
3270       memset (dst, '0', hexDigits - 1);
3271       dst += hexDigits - 1;
3272     }
3273     *dst++ = upperCase ? 'P': 'p';
3274     *dst++ = '0';
3275     break;
3276
3277   case fcNormal:
3278     dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
3279     break;
3280   }
3281
3282   *dst = 0;
3283
3284   return static_cast<unsigned int>(dst - p);
3285 }
3286
3287 /* Does the hard work of outputting the correctly rounded hexadecimal
3288    form of a normal floating point number with the specified number of
3289    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
3290    digits necessary to print the value precisely is output.  */
3291 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
3292                                           bool upperCase,
3293                                           roundingMode rounding_mode) const {
3294   unsigned int count, valueBits, shift, partsCount, outputDigits;
3295   const char *hexDigitChars;
3296   const integerPart *significand;
3297   char *p;
3298   bool roundUp;
3299
3300   *dst++ = '0';
3301   *dst++ = upperCase ? 'X': 'x';
3302
3303   roundUp = false;
3304   hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
3305
3306   significand = significandParts();
3307   partsCount = partCount();
3308
3309   /* +3 because the first digit only uses the single integer bit, so
3310      we have 3 virtual zero most-significant-bits.  */
3311   valueBits = semantics->precision + 3;
3312   shift = integerPartWidth - valueBits % integerPartWidth;
3313
3314   /* The natural number of digits required ignoring trailing
3315      insignificant zeroes.  */
3316   outputDigits = (valueBits - significandLSB () + 3) / 4;
3317
3318   /* hexDigits of zero means use the required number for the
3319      precision.  Otherwise, see if we are truncating.  If we are,
3320      find out if we need to round away from zero.  */
3321   if (hexDigits) {
3322     if (hexDigits < outputDigits) {
3323       /* We are dropping non-zero bits, so need to check how to round.
3324          "bits" is the number of dropped bits.  */
3325       unsigned int bits;
3326       lostFraction fraction;
3327
3328       bits = valueBits - hexDigits * 4;
3329       fraction = lostFractionThroughTruncation (significand, partsCount, bits);
3330       roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
3331     }
3332     outputDigits = hexDigits;
3333   }
3334
3335   /* Write the digits consecutively, and start writing in the location
3336      of the hexadecimal point.  We move the most significant digit
3337      left and add the hexadecimal point later.  */
3338   p = ++dst;
3339
3340   count = (valueBits + integerPartWidth - 1) / integerPartWidth;
3341
3342   while (outputDigits && count) {
3343     integerPart part;
3344
3345     /* Put the most significant integerPartWidth bits in "part".  */
3346     if (--count == partsCount)
3347       part = 0;  /* An imaginary higher zero part.  */
3348     else
3349       part = significand[count] << shift;
3350
3351     if (count && shift)
3352       part |= significand[count - 1] >> (integerPartWidth - shift);
3353
3354     /* Convert as much of "part" to hexdigits as we can.  */
3355     unsigned int curDigits = integerPartWidth / 4;
3356
3357     if (curDigits > outputDigits)
3358       curDigits = outputDigits;
3359     dst += partAsHex (dst, part, curDigits, hexDigitChars);
3360     outputDigits -= curDigits;
3361   }
3362
3363   if (roundUp) {
3364     char *q = dst;
3365
3366     /* Note that hexDigitChars has a trailing '0'.  */
3367     do {
3368       q--;
3369       *q = hexDigitChars[hexDigitValue (*q) + 1];
3370     } while (*q == '0');
3371     assert(q >= p);
3372   } else {
3373     /* Add trailing zeroes.  */
3374     memset (dst, '0', outputDigits);
3375     dst += outputDigits;
3376   }
3377
3378   /* Move the most significant digit to before the point, and if there
3379      is something after the decimal point add it.  This must come
3380      after rounding above.  */
3381   p[-1] = p[0];
3382   if (dst -1 == p)
3383     dst--;
3384   else
3385     p[0] = '.';
3386
3387   /* Finally output the exponent.  */
3388   *dst++ = upperCase ? 'P': 'p';
3389
3390   return writeSignedDecimal (dst, exponent);
3391 }
3392
3393 hash_code hash_value(const IEEEFloat &Arg) {
3394   if (!Arg.isFiniteNonZero())
3395     return hash_combine((uint8_t)Arg.category,
3396                         // NaN has no sign, fix it at zero.
3397                         Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
3398                         Arg.semantics->precision);
3399
3400   // Normal floats need their exponent and significand hashed.
3401   return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
3402                       Arg.semantics->precision, Arg.exponent,
3403                       hash_combine_range(
3404                         Arg.significandParts(),
3405                         Arg.significandParts() + Arg.partCount()));
3406 }
3407
3408 // Conversion from APFloat to/from host float/double.  It may eventually be
3409 // possible to eliminate these and have everybody deal with APFloats, but that
3410 // will take a while.  This approach will not easily extend to long double.
3411 // Current implementation requires integerPartWidth==64, which is correct at
3412 // the moment but could be made more general.
3413
3414 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3415 // the actual IEEE respresentations.  We compensate for that here.
3416
3417 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3418   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
3419   assert(partCount()==2);
3420
3421   uint64_t myexponent, mysignificand;
3422
3423   if (isFiniteNonZero()) {
3424     myexponent = exponent+16383; //bias
3425     mysignificand = significandParts()[0];
3426     if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
3427       myexponent = 0;   // denormal
3428   } else if (category==fcZero) {
3429     myexponent = 0;
3430     mysignificand = 0;
3431   } else if (category==fcInfinity) {
3432     myexponent = 0x7fff;
3433     mysignificand = 0x8000000000000000ULL;
3434   } else {
3435     assert(category == fcNaN && "Unknown category");
3436     myexponent = 0x7fff;
3437     mysignificand = significandParts()[0];
3438   }
3439
3440   uint64_t words[2];
3441   words[0] = mysignificand;
3442   words[1] =  ((uint64_t)(sign & 1) << 15) |
3443               (myexponent & 0x7fffLL);
3444   return APInt(80, words);
3445 }
3446
3447 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3448   assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
3449   assert(partCount()==2);
3450
3451   uint64_t words[2];
3452   opStatus fs;
3453   bool losesInfo;
3454
3455   // Convert number to double.  To avoid spurious underflows, we re-
3456   // normalize against the "double" minExponent first, and only *then*
3457   // truncate the mantissa.  The result of that second conversion
3458   // may be inexact, but should never underflow.
3459   // Declare fltSemantics before APFloat that uses it (and
3460   // saves pointer to it) to ensure correct destruction order.
3461   fltSemantics extendedSemantics = *semantics;
3462   extendedSemantics.minExponent = semIEEEdouble.minExponent;
3463   IEEEFloat extended(*this);
3464   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3465   assert(fs == opOK && !losesInfo);
3466   (void)fs;
3467
3468   IEEEFloat u(extended);
3469   fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3470   assert(fs == opOK || fs == opInexact);
3471   (void)fs;
3472   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
3473
3474   // If conversion was exact or resulted in a special case, we're done;
3475   // just set the second double to zero.  Otherwise, re-convert back to
3476   // the extended format and compute the difference.  This now should
3477   // convert exactly to double.
3478   if (u.isFiniteNonZero() && losesInfo) {
3479     fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
3480     assert(fs == opOK && !losesInfo);
3481     (void)fs;
3482
3483     IEEEFloat v(extended);
3484     v.subtract(u, rmNearestTiesToEven);
3485     fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
3486     assert(fs == opOK && !losesInfo);
3487     (void)fs;
3488     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
3489   } else {
3490     words[1] = 0;
3491   }
3492
3493   return APInt(128, words);
3494 }
3495
3496 template <const fltSemantics &S>
3497 APInt IEEEFloat::convertIEEEFloatToAPInt() const {
3498   assert(semantics == &S);
3499
3500   constexpr int bias = -(S.minExponent - 1);
3501   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3502   constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
3503   constexpr integerPart integer_bit =
3504       integerPart{1} << (trailing_significand_bits % integerPartWidth);
3505   constexpr uint64_t significand_mask = integer_bit - 1;
3506   constexpr unsigned int exponent_bits =
3507       S.sizeInBits - 1 - trailing_significand_bits;
3508   static_assert(exponent_bits < 64);
3509   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3510
3511   uint64_t myexponent;
3512   std::array<integerPart, partCountForBits(trailing_significand_bits)>
3513       mysignificand;
3514
3515   if (isFiniteNonZero()) {
3516     myexponent = exponent + bias;
3517     std::copy_n(significandParts(), mysignificand.size(),
3518                 mysignificand.begin());
3519     if (myexponent == 1 &&
3520         !(significandParts()[integer_bit_part] & integer_bit))
3521       myexponent = 0; // denormal
3522   } else if (category == fcZero) {
3523     myexponent = ::exponentZero(S) + bias;
3524     mysignificand.fill(0);
3525   } else if (category == fcInfinity) {
3526     if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
3527       llvm_unreachable("semantics don't support inf!");
3528     }
3529     myexponent = ::exponentInf(S) + bias;
3530     mysignificand.fill(0);
3531   } else {
3532     assert(category == fcNaN && "Unknown category!");
3533     myexponent = ::exponentNaN(S) + bias;
3534     std::copy_n(significandParts(), mysignificand.size(),
3535                 mysignificand.begin());
3536   }
3537   std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
3538   auto words_iter =
3539       std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
3540   if constexpr (significand_mask != 0) {
3541     // Clear the integer bit.
3542     words[mysignificand.size() - 1] &= significand_mask;
3543   }
3544   std::fill(words_iter, words.end(), uint64_t{0});
3545   constexpr size_t last_word = words.size() - 1;
3546   uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
3547                           << ((S.sizeInBits - 1) % 64);
3548   words[last_word] |= shifted_sign;
3549   uint64_t shifted_exponent = (myexponent & exponent_mask)
3550                               << (trailing_significand_bits % 64);
3551   words[last_word] |= shifted_exponent;
3552   if constexpr (last_word == 0) {
3553     return APInt(S.sizeInBits, words[0]);
3554   }
3555   return APInt(S.sizeInBits, words);
3556 }
3557
3558 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3559   assert(partCount() == 2);
3560   return convertIEEEFloatToAPInt<semIEEEquad>();
3561 }
3562
3563 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
3564   assert(partCount()==1);
3565   return convertIEEEFloatToAPInt<semIEEEdouble>();
3566 }
3567
3568 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
3569   assert(partCount()==1);
3570   return convertIEEEFloatToAPInt<semIEEEsingle>();
3571 }
3572
3573 APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
3574   assert(partCount() == 1);
3575   return convertIEEEFloatToAPInt<semBFloat>();
3576 }
3577
3578 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3579   assert(partCount()==1);
3580   return convertIEEEFloatToAPInt<semIEEEhalf>();
3581 }
3582
3583 APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3584   assert(partCount() == 1);
3585   return convertIEEEFloatToAPInt<semFloat8E5M2>();
3586 }
3587
3588 APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3589   assert(partCount() == 1);
3590   return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
3591 }
3592
3593 APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3594   assert(partCount() == 1);
3595   return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
3596 }
3597
3598 APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3599   assert(partCount() == 1);
3600   return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
3601 }
3602
3603 APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3604   assert(partCount() == 1);
3605   return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
3606 }
3607
3608 APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3609   assert(partCount() == 1);
3610   return convertIEEEFloatToAPInt<semFloatTF32>();
3611 }
3612
3613 // This function creates an APInt that is just a bit map of the floating
3614 // point constant as it would appear in memory.  It is not a conversion,
3615 // and treating the result as a normal integer is unlikely to be useful.
3616
3617 APInt IEEEFloat::bitcastToAPInt() const {
3618   if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3619     return convertHalfAPFloatToAPInt();
3620
3621   if (semantics == (const llvm::fltSemantics *)&semBFloat)
3622     return convertBFloatAPFloatToAPInt();
3623
3624   if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3625     return convertFloatAPFloatToAPInt();
3626
3627   if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3628     return convertDoubleAPFloatToAPInt();
3629
3630   if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3631     return convertQuadrupleAPFloatToAPInt();
3632
3633   if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3634     return convertPPCDoubleDoubleAPFloatToAPInt();
3635
3636   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
3637     return convertFloat8E5M2APFloatToAPInt();
3638
3639   if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
3640     return convertFloat8E5M2FNUZAPFloatToAPInt();
3641
3642   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
3643     return convertFloat8E4M3FNAPFloatToAPInt();
3644
3645   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
3646     return convertFloat8E4M3FNUZAPFloatToAPInt();
3647
3648   if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
3649     return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3650
3651   if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
3652     return convertFloatTF32APFloatToAPInt();
3653
3654   assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3655          "unknown format!");
3656   return convertF80LongDoubleAPFloatToAPInt();
3657 }
3658
3659 float IEEEFloat::convertToFloat() const {
3660   assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3661          "Float semantics are not IEEEsingle");
3662   APInt api = bitcastToAPInt();
3663   return api.bitsToFloat();
3664 }
3665
3666 double IEEEFloat::convertToDouble() const {
3667   assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3668          "Float semantics are not IEEEdouble");
3669   APInt api = bitcastToAPInt();
3670   return api.bitsToDouble();
3671 }
3672
3673 /// Integer bit is explicit in this format.  Intel hardware (387 and later)
3674 /// does not support these bit patterns:
3675 ///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676 ///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678 ///  exponent = 0, integer bit 1 ("pseudodenormal")
3679 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3680 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3681   uint64_t i1 = api.getRawData()[0];
3682   uint64_t i2 = api.getRawData()[1];
3683   uint64_t myexponent = (i2 & 0x7fff);
3684   uint64_t mysignificand = i1;
3685   uint8_t myintegerbit = mysignificand >> 63;
3686
3687   initialize(&semX87DoubleExtended);
3688   assert(partCount()==2);
3689
3690   sign = static_cast<unsigned int>(i2>>15);
3691   if (myexponent == 0 && mysignificand == 0) {
3692     makeZero(sign);
3693   } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3694     makeInf(sign);
3695   } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3696              (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3697     category = fcNaN;
3698     exponent = exponentNaN();
3699     significandParts()[0] = mysignificand;
3700     significandParts()[1] = 0;
3701   } else {
3702     category = fcNormal;
3703     exponent = myexponent - 16383;
3704     significandParts()[0] = mysignificand;
3705     significandParts()[1] = 0;
3706     if (myexponent==0)          // denormal
3707       exponent = -16382;
3708   }
3709 }
3710
3711 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3712   uint64_t i1 = api.getRawData()[0];
3713   uint64_t i2 = api.getRawData()[1];
3714   opStatus fs;
3715   bool losesInfo;
3716
3717   // Get the first double and convert to our format.
3718   initFromDoubleAPInt(APInt(64, i1));
3719   fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3720   assert(fs == opOK && !losesInfo);
3721   (void)fs;
3722
3723   // Unless we have a special case, add in second double.
3724   if (isFiniteNonZero()) {
3725     IEEEFloat v(semIEEEdouble, APInt(64, i2));
3726     fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3727     assert(fs == opOK && !losesInfo);
3728     (void)fs;
3729
3730     add(v, rmNearestTiesToEven);
3731   }
3732 }
3733
3734 template <const fltSemantics &S>
3735 void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
3736   assert(api.getBitWidth() == S.sizeInBits);
3737   constexpr integerPart integer_bit = integerPart{1}
3738                                       << ((S.precision - 1) % integerPartWidth);
3739   constexpr uint64_t significand_mask = integer_bit - 1;
3740   constexpr unsigned int trailing_significand_bits = S.precision - 1;
3741   constexpr unsigned int stored_significand_parts =
3742       partCountForBits(trailing_significand_bits);
3743   constexpr unsigned int exponent_bits =
3744       S.sizeInBits - 1 - trailing_significand_bits;
3745   static_assert(exponent_bits < 64);
3746   constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
3747   constexpr int bias = -(S.minExponent - 1);
3748
3749   // Copy the bits of the significand. We need to clear out the exponent and
3750   // sign bit in the last word.
3751   std::array<integerPart, stored_significand_parts> mysignificand;
3752   std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
3753   if constexpr (significand_mask != 0) {
3754     mysignificand[mysignificand.size() - 1] &= significand_mask;
3755   }
3756
3757   // We assume the last word holds the sign bit, the exponent, and potentially
3758   // some of the trailing significand field.
3759   uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
3760   uint64_t myexponent =
3761       (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
3762
3763   initialize(&S);
3764   assert(partCount() == mysignificand.size());
3765
3766   sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
3767
3768   bool all_zero_significand =
3769       llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
3770
3771   bool is_zero = myexponent == 0 && all_zero_significand;
3772
3773   if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
3774     if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
3775       makeInf(sign);
3776       return;
3777     }
3778   }
3779
3780   bool is_nan = false;
3781
3782   if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
3783     is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
3784   } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
3785     bool all_ones_significand =
3786         std::all_of(mysignificand.begin(), mysignificand.end() - 1,
3787                     [](integerPart bits) { return bits == ~integerPart{0}; }) &&
3788         (!significand_mask ||
3789          mysignificand[mysignificand.size() - 1] == significand_mask);
3790     is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
3791   } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
3792     is_nan = is_zero && sign;
3793   }
3794
3795   if (is_nan) {
3796     category = fcNaN;
3797     exponent = ::exponentNaN(S);
3798     std::copy_n(mysignificand.begin(), mysignificand.size(),
3799                 significandParts());
3800     return;
3801   }
3802
3803   if (is_zero) {
3804     makeZero(sign);
3805     return;
3806   }
3807
3808   category = fcNormal;
3809   exponent = myexponent - bias;
3810   std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
3811   if (myexponent == 0) // denormal
3812     exponent = S.minExponent;
3813   else
3814     significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
3815 }
3816
3817 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3818   initFromIEEEAPInt<semIEEEquad>(api);
3819 }
3820
3821 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3822   initFromIEEEAPInt<semIEEEdouble>(api);
3823 }
3824
3825 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3826   initFromIEEEAPInt<semIEEEsingle>(api);
3827 }
3828
3829 void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
3830   initFromIEEEAPInt<semBFloat>(api);
3831 }
3832
3833 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3834   initFromIEEEAPInt<semIEEEhalf>(api);
3835 }
3836
3837 void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
3838   initFromIEEEAPInt<semFloat8E5M2>(api);
3839 }
3840
3841 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
3842   initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
3843 }
3844
3845 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
3846   initFromIEEEAPInt<semFloat8E4M3FN>(api);
3847 }
3848
3849 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
3850   initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
3851 }
3852
3853 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
3854   initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
3855 }
3856
3857 void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
3858   initFromIEEEAPInt<semFloatTF32>(api);
3859 }
3860
3861 /// Treat api as containing the bits of a floating point number.
3862 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3863   assert(api.getBitWidth() == Sem->sizeInBits);
3864   if (Sem == &semIEEEhalf)
3865     return initFromHalfAPInt(api);
3866   if (Sem == &semBFloat)
3867     return initFromBFloatAPInt(api);
3868   if (Sem == &semIEEEsingle)
3869     return initFromFloatAPInt(api);
3870   if (Sem == &semIEEEdouble)
3871     return initFromDoubleAPInt(api);
3872   if (Sem == &semX87DoubleExtended)
3873     return initFromF80LongDoubleAPInt(api);
3874   if (Sem == &semIEEEquad)
3875     return initFromQuadrupleAPInt(api);
3876   if (Sem == &semPPCDoubleDoubleLegacy)
3877     return initFromPPCDoubleDoubleAPInt(api);
3878   if (Sem == &semFloat8E5M2)
3879     return initFromFloat8E5M2APInt(api);
3880   if (Sem == &semFloat8E5M2FNUZ)
3881     return initFromFloat8E5M2FNUZAPInt(api);
3882   if (Sem == &semFloat8E4M3FN)
3883     return initFromFloat8E4M3FNAPInt(api);
3884   if (Sem == &semFloat8E4M3FNUZ)
3885     return initFromFloat8E4M3FNUZAPInt(api);
3886   if (Sem == &semFloat8E4M3B11FNUZ)
3887     return initFromFloat8E4M3B11FNUZAPInt(api);
3888   if (Sem == &semFloatTF32)
3889     return initFromFloatTF32APInt(api);
3890
3891   llvm_unreachable(nullptr);
3892 }
3893
3894 /// Make this number the largest magnitude normal number in the given
3895 /// semantics.
3896 void IEEEFloat::makeLargest(bool Negative) {
3897   // We want (in interchange format):
3898   //   sign = {Negative}
3899   //   exponent = 1..10
3900   //   significand = 1..1
3901   category = fcNormal;
3902   sign = Negative;
3903   exponent = semantics->maxExponent;
3904
3905   // Use memset to set all but the highest integerPart to all ones.
3906   integerPart *significand = significandParts();
3907   unsigned PartCount = partCount();
3908   memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3909
3910   // Set the high integerPart especially setting all unused top bits for
3911   // internal consistency.
3912   const unsigned NumUnusedHighBits =
3913     PartCount*integerPartWidth - semantics->precision;
3914   significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3915                                    ? (~integerPart(0) >> NumUnusedHighBits)
3916                                    : 0;
3917
3918   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
3919       semantics->nanEncoding == fltNanEncoding::AllOnes)
3920     significand[0] &= ~integerPart(1);
3921 }
3922
3923 /// Make this number the smallest magnitude denormal number in the given
3924 /// semantics.
3925 void IEEEFloat::makeSmallest(bool Negative) {
3926   // We want (in interchange format):
3927   //   sign = {Negative}
3928   //   exponent = 0..0
3929   //   significand = 0..01
3930   category = fcNormal;
3931   sign = Negative;
3932   exponent = semantics->minExponent;
3933   APInt::tcSet(significandParts(), 1, partCount());
3934 }
3935
3936 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3937   // We want (in interchange format):
3938   //   sign = {Negative}
3939   //   exponent = 0..0
3940   //   significand = 10..0
3941
3942   category = fcNormal;
3943   zeroSignificand();
3944   sign = Negative;
3945   exponent = semantics->minExponent;
3946   APInt::tcSetBit(significandParts(), semantics->precision - 1);
3947 }
3948
3949 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3950   initFromAPInt(&Sem, API);
3951 }
3952
3953 IEEEFloat::IEEEFloat(float f) {
3954   initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3955 }
3956
3957 IEEEFloat::IEEEFloat(double d) {
3958   initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3959 }
3960
3961 namespace {
3962   void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3963     Buffer.append(Str.begin(), Str.end());
3964   }
3965
3966   /// Removes data from the given significand until it is no more
3967   /// precise than is required for the desired precision.
3968   void AdjustToPrecision(APInt &significand,
3969                          int &exp, unsigned FormatPrecision) {
3970     unsigned bits = significand.getActiveBits();
3971
3972     // 196/59 is a very slight overestimate of lg_2(10).
3973     unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3974
3975     if (bits <= bitsRequired) return;
3976
3977     unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3978     if (!tensRemovable) return;
3979
3980     exp += tensRemovable;
3981
3982     APInt divisor(significand.getBitWidth(), 1);
3983     APInt powten(significand.getBitWidth(), 10);
3984     while (true) {
3985       if (tensRemovable & 1)
3986         divisor *= powten;
3987       tensRemovable >>= 1;
3988       if (!tensRemovable) break;
3989       powten *= powten;
3990     }
3991
3992     significand = significand.udiv(divisor);
3993
3994     // Truncate the significand down to its active bit count.
3995     significand = significand.trunc(significand.getActiveBits());
3996   }
3997
3998
3999   void AdjustToPrecision(SmallVectorImpl<char> &buffer,
4000                          int &exp, unsigned FormatPrecision) {
4001     unsigned N = buffer.size();
4002     if (N <= FormatPrecision) return;
4003
4004     // The most significant figures are the last ones in the buffer.
4005     unsigned FirstSignificant = N - FormatPrecision;
4006
4007     // Round.
4008     // FIXME: this probably shouldn't use 'round half up'.
4009
4010     // Rounding down is just a truncation, except we also want to drop
4011     // trailing zeros from the new result.
4012     if (buffer[FirstSignificant - 1] < '5') {
4013       while (FirstSignificant < N && buffer[FirstSignificant] == '0')
4014         FirstSignificant++;
4015
4016       exp += FirstSignificant;
4017       buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4018       return;
4019     }
4020
4021     // Rounding up requires a decimal add-with-carry.  If we continue
4022     // the carry, the newly-introduced zeros will just be truncated.
4023     for (unsigned I = FirstSignificant; I != N; ++I) {
4024       if (buffer[I] == '9') {
4025         FirstSignificant++;
4026       } else {
4027         buffer[I]++;
4028         break;
4029       }
4030     }
4031
4032     // If we carried through, we have exactly one digit of precision.
4033     if (FirstSignificant == N) {
4034       exp += FirstSignificant;
4035       buffer.clear();
4036       buffer.push_back('1');
4037       return;
4038     }
4039
4040     exp += FirstSignificant;
4041     buffer.erase(&buffer[0], &buffer[FirstSignificant]);
4042   }
4043 } // namespace
4044
4045 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
4046                          unsigned FormatMaxPadding, bool TruncateZero) const {
4047   switch (category) {
4048   case fcInfinity:
4049     if (isNegative())
4050       return append(Str, "-Inf");
4051     else
4052       return append(Str, "+Inf");
4053
4054   case fcNaN: return append(Str, "NaN");
4055
4056   case fcZero:
4057     if (isNegative())
4058       Str.push_back('-');
4059
4060     if (!FormatMaxPadding) {
4061       if (TruncateZero)
4062         append(Str, "0.0E+0");
4063       else {
4064         append(Str, "0.0");
4065         if (FormatPrecision > 1)
4066           Str.append(FormatPrecision - 1, '0');
4067         append(Str, "e+00");
4068       }
4069     } else
4070       Str.push_back('0');
4071     return;
4072
4073   case fcNormal:
4074     break;
4075   }
4076
4077   if (isNegative())
4078     Str.push_back('-');
4079
4080   // Decompose the number into an APInt and an exponent.
4081   int exp = exponent - ((int) semantics->precision - 1);
4082   APInt significand(
4083       semantics->precision,
4084       ArrayRef(significandParts(), partCountForBits(semantics->precision)));
4085
4086   // Set FormatPrecision if zero.  We want to do this before we
4087   // truncate trailing zeros, as those are part of the precision.
4088   if (!FormatPrecision) {
4089     // We use enough digits so the number can be round-tripped back to an
4090     // APFloat. The formula comes from "How to Print Floating-Point Numbers
4091     // Accurately" by Steele and White.
4092     // FIXME: Using a formula based purely on the precision is conservative;
4093     // we can print fewer digits depending on the actual value being printed.
4094
4095     // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4096     FormatPrecision = 2 + semantics->precision * 59 / 196;
4097   }
4098
4099   // Ignore trailing binary zeros.
4100   int trailingZeros = significand.countr_zero();
4101   exp += trailingZeros;
4102   significand.lshrInPlace(trailingZeros);
4103
4104   // Change the exponent from 2^e to 10^e.
4105   if (exp == 0) {
4106     // Nothing to do.
4107   } else if (exp > 0) {
4108     // Just shift left.
4109     significand = significand.zext(semantics->precision + exp);
4110     significand <<= exp;
4111     exp = 0;
4112   } else { /* exp < 0 */
4113     int texp = -exp;
4114
4115     // We transform this using the identity:
4116     //   (N)(2^-e) == (N)(5^e)(10^-e)
4117     // This means we have to multiply N (the significand) by 5^e.
4118     // To avoid overflow, we have to operate on numbers large
4119     // enough to store N * 5^e:
4120     //   log2(N * 5^e) == log2(N) + e * log2(5)
4121     //                 <= semantics->precision + e * 137 / 59
4122     //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4123
4124     unsigned precision = semantics->precision + (137 * texp + 136) / 59;
4125
4126     // Multiply significand by 5^e.
4127     //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4128     significand = significand.zext(precision);
4129     APInt five_to_the_i(precision, 5);
4130     while (true) {
4131       if (texp & 1) significand *= five_to_the_i;
4132
4133       texp >>= 1;
4134       if (!texp) break;
4135       five_to_the_i *= five_to_the_i;
4136     }
4137   }
4138
4139   AdjustToPrecision(significand, exp, FormatPrecision);
4140
4141   SmallVector<char, 256> buffer;
4142
4143   // Fill the buffer.
4144   unsigned precision = significand.getBitWidth();
4145   if (precision < 4) {
4146     // We need enough precision to store the value 10.
4147     precision = 4;
4148     significand = significand.zext(precision);
4149   }
4150   APInt ten(precision, 10);
4151   APInt digit(precision, 0);
4152
4153   bool inTrail = true;
4154   while (significand != 0) {
4155     // digit <- significand % 10
4156     // significand <- significand / 10
4157     APInt::udivrem(significand, ten, significand, digit);
4158
4159     unsigned d = digit.getZExtValue();
4160
4161     // Drop trailing zeros.
4162     if (inTrail && !d) exp++;
4163     else {
4164       buffer.push_back((char) ('0' + d));
4165       inTrail = false;
4166     }
4167   }
4168
4169   assert(!buffer.empty() && "no characters in buffer!");
4170
4171   // Drop down to FormatPrecision.
4172   // TODO: don't do more precise calculations above than are required.
4173   AdjustToPrecision(buffer, exp, FormatPrecision);
4174
4175   unsigned NDigits = buffer.size();
4176
4177   // Check whether we should use scientific notation.
4178   bool FormatScientific;
4179   if (!FormatMaxPadding)
4180     FormatScientific = true;
4181   else {
4182     if (exp >= 0) {
4183       // 765e3 --> 765000
4184       //              ^^^
4185       // But we shouldn't make the number look more precise than it is.
4186       FormatScientific = ((unsigned) exp > FormatMaxPadding ||
4187                           NDigits + (unsigned) exp > FormatPrecision);
4188     } else {
4189       // Power of the most significant digit.
4190       int MSD = exp + (int) (NDigits - 1);
4191       if (MSD >= 0) {
4192         // 765e-2 == 7.65
4193         FormatScientific = false;
4194       } else {
4195         // 765e-5 == 0.00765
4196         //           ^ ^^
4197         FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
4198       }
4199     }
4200   }
4201
4202   // Scientific formatting is pretty straightforward.
4203   if (FormatScientific) {
4204     exp += (NDigits - 1);
4205
4206     Str.push_back(buffer[NDigits-1]);
4207     Str.push_back('.');
4208     if (NDigits == 1 && TruncateZero)
4209       Str.push_back('0');
4210     else
4211       for (unsigned I = 1; I != NDigits; ++I)
4212         Str.push_back(buffer[NDigits-1-I]);
4213     // Fill with zeros up to FormatPrecision.
4214     if (!TruncateZero && FormatPrecision > NDigits - 1)
4215       Str.append(FormatPrecision - NDigits + 1, '0');
4216     // For !TruncateZero we use lower 'e'.
4217     Str.push_back(TruncateZero ? 'E' : 'e');
4218
4219     Str.push_back(exp >= 0 ? '+' : '-');
4220     if (exp < 0) exp = -exp;
4221     SmallVector<char, 6> expbuf;
4222     do {
4223       expbuf.push_back((char) ('0' + (exp % 10)));
4224       exp /= 10;
4225     } while (exp);
4226     // Exponent always at least two digits if we do not truncate zeros.
4227     if (!TruncateZero && expbuf.size() < 2)
4228       expbuf.push_back('0');
4229     for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
4230       Str.push_back(expbuf[E-1-I]);
4231     return;
4232   }
4233
4234   // Non-scientific, positive exponents.
4235   if (exp >= 0) {
4236     for (unsigned I = 0; I != NDigits; ++I)
4237       Str.push_back(buffer[NDigits-1-I]);
4238     for (unsigned I = 0; I != (unsigned) exp; ++I)
4239       Str.push_back('0');
4240     return;
4241   }
4242
4243   // Non-scientific, negative exponents.
4244
4245   // The number of digits to the left of the decimal point.
4246   int NWholeDigits = exp + (int) NDigits;
4247
4248   unsigned I = 0;
4249   if (NWholeDigits > 0) {
4250     for (; I != (unsigned) NWholeDigits; ++I)
4251       Str.push_back(buffer[NDigits-I-1]);
4252     Str.push_back('.');
4253   } else {
4254     unsigned NZeros = 1 + (unsigned) -NWholeDigits;
4255
4256     Str.push_back('0');
4257     Str.push_back('.');
4258     for (unsigned Z = 1; Z != NZeros; ++Z)
4259       Str.push_back('0');
4260   }
4261
4262   for (; I != NDigits; ++I)
4263     Str.push_back(buffer[NDigits-I-1]);
4264 }
4265
4266 bool IEEEFloat::getExactInverse(APFloat *inv) const {
4267   // Special floats and denormals have no exact inverse.
4268   if (!isFiniteNonZero())
4269     return false;
4270
4271   // Check that the number is a power of two by making sure that only the
4272   // integer bit is set in the significand.
4273   if (significandLSB() != semantics->precision - 1)
4274     return false;
4275
4276   // Get the inverse.
4277   IEEEFloat reciprocal(*semantics, 1ULL);
4278   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
4279     return false;
4280
4281   // Avoid multiplication with a denormal, it is not safe on all platforms and
4282   // may be slower than a normal division.
4283   if (reciprocal.isDenormal())
4284     return false;
4285
4286   assert(reciprocal.isFiniteNonZero() &&
4287          reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
4288
4289   if (inv)
4290     *inv = APFloat(reciprocal, *semantics);
4291
4292   return true;
4293 }
4294
4295 int IEEEFloat::getExactLog2Abs() const {
4296   if (!isFinite() || isZero())
4297     return INT_MIN;
4298
4299   const integerPart *Parts = significandParts();
4300   const int PartCount = partCountForBits(semantics->precision);
4301
4302   int PopCount = 0;
4303   for (int i = 0; i < PartCount; ++i) {
4304     PopCount += llvm::popcount(Parts[i]);
4305     if (PopCount > 1)
4306       return INT_MIN;
4307   }
4308
4309   if (exponent != semantics->minExponent)
4310     return exponent;
4311
4312   int CountrParts = 0;
4313   for (int i = 0; i < PartCount;
4314        ++i, CountrParts += APInt::APINT_BITS_PER_WORD) {
4315     if (Parts[i] != 0) {
4316       return exponent - semantics->precision + CountrParts +
4317              llvm::countr_zero(Parts[i]) + 1;
4318     }
4319   }
4320
4321   llvm_unreachable("didn't find the set bit");
4322 }
4323
4324 bool IEEEFloat::isSignaling() const {
4325   if (!isNaN())
4326     return false;
4327   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
4328     return false;
4329
4330   // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4331   // first bit of the trailing significand being 0.
4332   return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
4333 }
4334
4335 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4336 ///
4337 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4338 /// appropriate sign switching before/after the computation.
4339 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
4340   // If we are performing nextDown, swap sign so we have -x.
4341   if (nextDown)
4342     changeSign();
4343
4344   // Compute nextUp(x)
4345   opStatus result = opOK;
4346
4347   // Handle each float category separately.
4348   switch (category) {
4349   case fcInfinity:
4350     // nextUp(+inf) = +inf
4351     if (!isNegative())
4352       break;
4353     // nextUp(-inf) = -getLargest()
4354     makeLargest(true);
4355     break;
4356   case fcNaN:
4357     // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4358     // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4359     //                     change the payload.
4360     if (isSignaling()) {
4361       result = opInvalidOp;
4362       // For consistency, propagate the sign of the sNaN to the qNaN.
4363       makeNaN(false, isNegative(), nullptr);
4364     }
4365     break;
4366   case fcZero:
4367     // nextUp(pm 0) = +getSmallest()
4368     makeSmallest(false);
4369     break;
4370   case fcNormal:
4371     // nextUp(-getSmallest()) = -0
4372     if (isSmallest() && isNegative()) {
4373       APInt::tcSet(significandParts(), 0, partCount());
4374       category = fcZero;
4375       exponent = 0;
4376       if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
4377         sign = false;
4378       break;
4379     }
4380
4381     if (isLargest() && !isNegative()) {
4382       if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4383         // nextUp(getLargest()) == NAN
4384         makeNaN();
4385         break;
4386       } else {
4387         // nextUp(getLargest()) == INFINITY
4388         APInt::tcSet(significandParts(), 0, partCount());
4389         category = fcInfinity;
4390         exponent = semantics->maxExponent + 1;
4391         break;
4392       }
4393     }
4394
4395     // nextUp(normal) == normal + inc.
4396     if (isNegative()) {
4397       // If we are negative, we need to decrement the significand.
4398
4399       // We only cross a binade boundary that requires adjusting the exponent
4400       // if:
4401       //   1. exponent != semantics->minExponent. This implies we are not in the
4402       //   smallest binade or are dealing with denormals.
4403       //   2. Our significand excluding the integral bit is all zeros.
4404       bool WillCrossBinadeBoundary =
4405         exponent != semantics->minExponent && isSignificandAllZeros();
4406
4407       // Decrement the significand.
4408       //
4409       // We always do this since:
4410       //   1. If we are dealing with a non-binade decrement, by definition we
4411       //   just decrement the significand.
4412       //   2. If we are dealing with a normal -> normal binade decrement, since
4413       //   we have an explicit integral bit the fact that all bits but the
4414       //   integral bit are zero implies that subtracting one will yield a
4415       //   significand with 0 integral bit and 1 in all other spots. Thus we
4416       //   must just adjust the exponent and set the integral bit to 1.
4417       //   3. If we are dealing with a normal -> denormal binade decrement,
4418       //   since we set the integral bit to 0 when we represent denormals, we
4419       //   just decrement the significand.
4420       integerPart *Parts = significandParts();
4421       APInt::tcDecrement(Parts, partCount());
4422
4423       if (WillCrossBinadeBoundary) {
4424         // Our result is a normal number. Do the following:
4425         // 1. Set the integral bit to 1.
4426         // 2. Decrement the exponent.
4427         APInt::tcSetBit(Parts, semantics->precision - 1);
4428         exponent--;
4429       }
4430     } else {
4431       // If we are positive, we need to increment the significand.
4432
4433       // We only cross a binade boundary that requires adjusting the exponent if
4434       // the input is not a denormal and all of said input's significand bits
4435       // are set. If all of said conditions are true: clear the significand, set
4436       // the integral bit to 1, and increment the exponent. If we have a
4437       // denormal always increment since moving denormals and the numbers in the
4438       // smallest normal binade have the same exponent in our representation.
4439       bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
4440
4441       if (WillCrossBinadeBoundary) {
4442         integerPart *Parts = significandParts();
4443         APInt::tcSet(Parts, 0, partCount());
4444         APInt::tcSetBit(Parts, semantics->precision - 1);
4445         assert(exponent != semantics->maxExponent &&
4446                "We can not increment an exponent beyond the maxExponent allowed"
4447                " by the given floating point semantics.");
4448         exponent++;
4449       } else {
4450         incrementSignificand();
4451       }
4452     }
4453     break;
4454   }
4455
4456   // If we are performing nextDown, swap sign so we have -nextUp(-x)
4457   if (nextDown)
4458     changeSign();
4459
4460   return result;
4461 }
4462
4463 APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
4464   return ::exponentNaN(*semantics);
4465 }
4466
4467 APFloatBase::ExponentType IEEEFloat::exponentInf() const {
4468   return ::exponentInf(*semantics);
4469 }
4470
4471 APFloatBase::ExponentType IEEEFloat::exponentZero() const {
4472   return ::exponentZero(*semantics);
4473 }
4474
4475 void IEEEFloat::makeInf(bool Negative) {
4476   if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
4477     // There is no Inf, so make NaN instead.
4478     makeNaN(false, Negative);
4479     return;
4480   }
4481   category = fcInfinity;
4482   sign = Negative;
4483   exponent = exponentInf();
4484   APInt::tcSet(significandParts(), 0, partCount());
4485 }
4486
4487 void IEEEFloat::makeZero(bool Negative) {
4488   category = fcZero;
4489   sign = Negative;
4490   if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
4491     // Merge negative zero to positive because 0b10000...000 is used for NaN
4492     sign = false;
4493   }
4494   exponent = exponentZero();
4495   APInt::tcSet(significandParts(), 0, partCount());
4496 }
4497
4498 void IEEEFloat::makeQuiet() {
4499   assert(isNaN());
4500   if (semantics->nonFiniteBehavior != fltNonfiniteBehavior::NanOnly)
4501     APInt::tcSetBit(significandParts(), semantics->precision - 2);
4502 }
4503
4504 int ilogb(const IEEEFloat &Arg) {
4505   if (Arg.isNaN())
4506     return IEEEFloat::IEK_NaN;
4507   if (Arg.isZero())
4508     return IEEEFloat::IEK_Zero;
4509   if (Arg.isInfinity())
4510     return IEEEFloat::IEK_Inf;
4511   if (!Arg.isDenormal())
4512     return Arg.exponent;
4513
4514   IEEEFloat Normalized(Arg);
4515   int SignificandBits = Arg.getSemantics().precision - 1;
4516
4517   Normalized.exponent += SignificandBits;
4518   Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
4519   return Normalized.exponent - SignificandBits;
4520 }
4521
4522 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
4523   auto MaxExp = X.getSemantics().maxExponent;
4524   auto MinExp = X.getSemantics().minExponent;
4525
4526   // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4527   // overflow; clamp it to a safe range before adding, but ensure that the range
4528   // is large enough that the clamp does not change the result. The range we
4529   // need to support is the difference between the largest possible exponent and
4530   // the normalized exponent of half the smallest denormal.
4531
4532   int SignificandBits = X.getSemantics().precision - 1;
4533   int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
4534
4535   // Clamp to one past the range ends to let normalize handle overlflow.
4536   X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
4537   X.normalize(RoundingMode, lfExactlyZero);
4538   if (X.isNaN())
4539     X.makeQuiet();
4540   return X;
4541 }
4542
4543 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
4544   Exp = ilogb(Val);
4545
4546   // Quiet signalling nans.
4547   if (Exp == IEEEFloat::IEK_NaN) {
4548     IEEEFloat Quiet(Val);
4549     Quiet.makeQuiet();
4550     return Quiet;
4551   }
4552
4553   if (Exp == IEEEFloat::IEK_Inf)
4554     return Val;
4555
4556   // 1 is added because frexp is defined to return a normalized fraction in
4557   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4558   Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
4559   return scalbn(Val, -Exp, RM);
4560 }
4561
4562 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
4563     : Semantics(&S),
4564       Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
4565   assert(Semantics == &semPPCDoubleDouble);
4566 }
4567
4568 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
4569     : Semantics(&S),
4570       Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
4571                             APFloat(semIEEEdouble, uninitialized)}) {
4572   assert(Semantics == &semPPCDoubleDouble);
4573 }
4574
4575 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
4576     : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
4577                                            APFloat(semIEEEdouble)}) {
4578   assert(Semantics == &semPPCDoubleDouble);
4579 }
4580
4581 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
4582     : Semantics(&S),
4583       Floats(new APFloat[2]{
4584           APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
4585           APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
4586   assert(Semantics == &semPPCDoubleDouble);
4587 }
4588
4589 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
4590                              APFloat &&Second)
4591     : Semantics(&S),
4592       Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
4593   assert(Semantics == &semPPCDoubleDouble);
4594   assert(&Floats[0].getSemantics() == &semIEEEdouble);
4595   assert(&Floats[1].getSemantics() == &semIEEEdouble);
4596 }
4597
4598 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
4599     : Semantics(RHS.Semantics),
4600       Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
4601                                          APFloat(RHS.Floats[1])}
4602                         : nullptr) {
4603   assert(Semantics == &semPPCDoubleDouble);
4604 }
4605
4606 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
4607     : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
4608   RHS.Semantics = &semBogus;
4609   assert(Semantics == &semPPCDoubleDouble);
4610 }
4611
4612 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
4613   if (Semantics == RHS.Semantics && RHS.Floats) {
4614     Floats[0] = RHS.Floats[0];
4615     Floats[1] = RHS.Floats[1];
4616   } else if (this != &RHS) {
4617     this->~DoubleAPFloat();
4618     new (this) DoubleAPFloat(RHS);
4619   }
4620   return *this;
4621 }
4622
4623 // Implement addition, subtraction, multiplication and division based on:
4624 // "Software for Doubled-Precision Floating-Point Computations",
4625 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4626 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
4627                                          const APFloat &c, const APFloat &cc,
4628                                          roundingMode RM) {
4629   int Status = opOK;
4630   APFloat z = a;
4631   Status |= z.add(c, RM);
4632   if (!z.isFinite()) {
4633     if (!z.isInfinity()) {
4634       Floats[0] = std::move(z);
4635       Floats[1].makeZero(/* Neg = */ false);
4636       return (opStatus)Status;
4637     }
4638     Status = opOK;
4639     auto AComparedToC = a.compareAbsoluteValue(c);
4640     z = cc;
4641     Status |= z.add(aa, RM);
4642     if (AComparedToC == APFloat::cmpGreaterThan) {
4643       // z = cc + aa + c + a;
4644       Status |= z.add(c, RM);
4645       Status |= z.add(a, RM);
4646     } else {
4647       // z = cc + aa + a + c;
4648       Status |= z.add(a, RM);
4649       Status |= z.add(c, RM);
4650     }
4651     if (!z.isFinite()) {
4652       Floats[0] = std::move(z);
4653       Floats[1].makeZero(/* Neg = */ false);
4654       return (opStatus)Status;
4655     }
4656     Floats[0] = z;
4657     APFloat zz = aa;
4658     Status |= zz.add(cc, RM);
4659     if (AComparedToC == APFloat::cmpGreaterThan) {
4660       // Floats[1] = a - z + c + zz;
4661       Floats[1] = a;
4662       Status |= Floats[1].subtract(z, RM);
4663       Status |= Floats[1].add(c, RM);
4664       Status |= Floats[1].add(zz, RM);
4665     } else {
4666       // Floats[1] = c - z + a + zz;
4667       Floats[1] = c;
4668       Status |= Floats[1].subtract(z, RM);
4669       Status |= Floats[1].add(a, RM);
4670       Status |= Floats[1].add(zz, RM);
4671     }
4672   } else {
4673     // q = a - z;
4674     APFloat q = a;
4675     Status |= q.subtract(z, RM);
4676
4677     // zz = q + c + (a - (q + z)) + aa + cc;
4678     // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4679     auto zz = q;
4680     Status |= zz.add(c, RM);
4681     Status |= q.add(z, RM);
4682     Status |= q.subtract(a, RM);
4683     q.changeSign();
4684     Status |= zz.add(q, RM);
4685     Status |= zz.add(aa, RM);
4686     Status |= zz.add(cc, RM);
4687     if (zz.isZero() && !zz.isNegative()) {
4688       Floats[0] = std::move(z);
4689       Floats[1].makeZero(/* Neg = */ false);
4690       return opOK;
4691     }
4692     Floats[0] = z;
4693     Status |= Floats[0].add(zz, RM);
4694     if (!Floats[0].isFinite()) {
4695       Floats[1].makeZero(/* Neg = */ false);
4696       return (opStatus)Status;
4697     }
4698     Floats[1] = std::move(z);
4699     Status |= Floats[1].subtract(Floats[0], RM);
4700     Status |= Floats[1].add(zz, RM);
4701   }
4702   return (opStatus)Status;
4703 }
4704
4705 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4706                                                 const DoubleAPFloat &RHS,
4707                                                 DoubleAPFloat &Out,
4708                                                 roundingMode RM) {
4709   if (LHS.getCategory() == fcNaN) {
4710     Out = LHS;
4711     return opOK;
4712   }
4713   if (RHS.getCategory() == fcNaN) {
4714     Out = RHS;
4715     return opOK;
4716   }
4717   if (LHS.getCategory() == fcZero) {
4718     Out = RHS;
4719     return opOK;
4720   }
4721   if (RHS.getCategory() == fcZero) {
4722     Out = LHS;
4723     return opOK;
4724   }
4725   if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4726       LHS.isNegative() != RHS.isNegative()) {
4727     Out.makeNaN(false, Out.isNegative(), nullptr);
4728     return opInvalidOp;
4729   }
4730   if (LHS.getCategory() == fcInfinity) {
4731     Out = LHS;
4732     return opOK;
4733   }
4734   if (RHS.getCategory() == fcInfinity) {
4735     Out = RHS;
4736     return opOK;
4737   }
4738   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4739
4740   APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4741       CC(RHS.Floats[1]);
4742   assert(&A.getSemantics() == &semIEEEdouble);
4743   assert(&AA.getSemantics() == &semIEEEdouble);
4744   assert(&C.getSemantics() == &semIEEEdouble);
4745   assert(&CC.getSemantics() == &semIEEEdouble);
4746   assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4747   assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4748   return Out.addImpl(A, AA, C, CC, RM);
4749 }
4750
4751 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4752                                      roundingMode RM) {
4753   return addWithSpecial(*this, RHS, *this, RM);
4754 }
4755
4756 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4757                                           roundingMode RM) {
4758   changeSign();
4759   auto Ret = add(RHS, RM);
4760   changeSign();
4761   return Ret;
4762 }
4763
4764 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4765                                           APFloat::roundingMode RM) {
4766   const auto &LHS = *this;
4767   auto &Out = *this;
4768   /* Interesting observation: For special categories, finding the lowest
4769      common ancestor of the following layered graph gives the correct
4770      return category:
4771
4772         NaN
4773        /   \
4774      Zero  Inf
4775        \   /
4776        Normal
4777
4778      e.g. NaN * NaN = NaN
4779           Zero * Inf = NaN
4780           Normal * Zero = Zero
4781           Normal * Inf = Inf
4782   */
4783   if (LHS.getCategory() == fcNaN) {
4784     Out = LHS;
4785     return opOK;
4786   }
4787   if (RHS.getCategory() == fcNaN) {
4788     Out = RHS;
4789     return opOK;
4790   }
4791   if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4792       (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4793     Out.makeNaN(false, false, nullptr);
4794     return opOK;
4795   }
4796   if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4797     Out = LHS;
4798     return opOK;
4799   }
4800   if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4801     Out = RHS;
4802     return opOK;
4803   }
4804   assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4805          "Special cases not handled exhaustively");
4806
4807   int Status = opOK;
4808   APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4809   // t = a * c
4810   APFloat T = A;
4811   Status |= T.multiply(C, RM);
4812   if (!T.isFiniteNonZero()) {
4813     Floats[0] = T;
4814     Floats[1].makeZero(/* Neg = */ false);
4815     return (opStatus)Status;
4816   }
4817
4818   // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4819   APFloat Tau = A;
4820   T.changeSign();
4821   Status |= Tau.fusedMultiplyAdd(C, T, RM);
4822   T.changeSign();
4823   {
4824     // v = a * d
4825     APFloat V = A;
4826     Status |= V.multiply(D, RM);
4827     // w = b * c
4828     APFloat W = B;
4829     Status |= W.multiply(C, RM);
4830     Status |= V.add(W, RM);
4831     // tau += v + w
4832     Status |= Tau.add(V, RM);
4833   }
4834   // u = t + tau
4835   APFloat U = T;
4836   Status |= U.add(Tau, RM);
4837
4838   Floats[0] = U;
4839   if (!U.isFinite()) {
4840     Floats[1].makeZero(/* Neg = */ false);
4841   } else {
4842     // Floats[1] = (t - u) + tau
4843     Status |= T.subtract(U, RM);
4844     Status |= T.add(Tau, RM);
4845     Floats[1] = T;
4846   }
4847   return (opStatus)Status;
4848 }
4849
4850 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4851                                         APFloat::roundingMode RM) {
4852   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4853   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4854   auto Ret =
4855       Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4856   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4857   return Ret;
4858 }
4859
4860 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4861   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4862   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4863   auto Ret =
4864       Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4865   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4866   return Ret;
4867 }
4868
4869 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4870   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4871   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4872   auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4873   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4874   return Ret;
4875 }
4876
4877 APFloat::opStatus
4878 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4879                                 const DoubleAPFloat &Addend,
4880                                 APFloat::roundingMode RM) {
4881   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4882   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4883   auto Ret = Tmp.fusedMultiplyAdd(
4884       APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4885       APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4886   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4887   return Ret;
4888 }
4889
4890 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4891   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4892   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4893   auto Ret = Tmp.roundToIntegral(RM);
4894   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4895   return Ret;
4896 }
4897
4898 void DoubleAPFloat::changeSign() {
4899   Floats[0].changeSign();
4900   Floats[1].changeSign();
4901 }
4902
4903 APFloat::cmpResult
4904 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4905   auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4906   if (Result != cmpEqual)
4907     return Result;
4908   Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4909   if (Result == cmpLessThan || Result == cmpGreaterThan) {
4910     auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4911     auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4912     if (Against && !RHSAgainst)
4913       return cmpLessThan;
4914     if (!Against && RHSAgainst)
4915       return cmpGreaterThan;
4916     if (!Against && !RHSAgainst)
4917       return Result;
4918     if (Against && RHSAgainst)
4919       return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4920   }
4921   return Result;
4922 }
4923
4924 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4925   return Floats[0].getCategory();
4926 }
4927
4928 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4929
4930 void DoubleAPFloat::makeInf(bool Neg) {
4931   Floats[0].makeInf(Neg);
4932   Floats[1].makeZero(/* Neg = */ false);
4933 }
4934
4935 void DoubleAPFloat::makeZero(bool Neg) {
4936   Floats[0].makeZero(Neg);
4937   Floats[1].makeZero(/* Neg = */ false);
4938 }
4939
4940 void DoubleAPFloat::makeLargest(bool Neg) {
4941   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4942   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4943   Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4944   if (Neg)
4945     changeSign();
4946 }
4947
4948 void DoubleAPFloat::makeSmallest(bool Neg) {
4949   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4950   Floats[0].makeSmallest(Neg);
4951   Floats[1].makeZero(/* Neg = */ false);
4952 }
4953
4954 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4955   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4956   Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4957   if (Neg)
4958     Floats[0].changeSign();
4959   Floats[1].makeZero(/* Neg = */ false);
4960 }
4961
4962 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4963   Floats[0].makeNaN(SNaN, Neg, fill);
4964   Floats[1].makeZero(/* Neg = */ false);
4965 }
4966
4967 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4968   auto Result = Floats[0].compare(RHS.Floats[0]);
4969   // |Float[0]| > |Float[1]|
4970   if (Result == APFloat::cmpEqual)
4971     return Floats[1].compare(RHS.Floats[1]);
4972   return Result;
4973 }
4974
4975 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4976   return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4977          Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4978 }
4979
4980 hash_code hash_value(const DoubleAPFloat &Arg) {
4981   if (Arg.Floats)
4982     return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4983   return hash_combine(Arg.Semantics);
4984 }
4985
4986 APInt DoubleAPFloat::bitcastToAPInt() const {
4987   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4988   uint64_t Data[] = {
4989       Floats[0].bitcastToAPInt().getRawData()[0],
4990       Floats[1].bitcastToAPInt().getRawData()[0],
4991   };
4992   return APInt(128, 2, Data);
4993 }
4994
4995 Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S,
4996                                                              roundingMode RM) {
4997   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4998   APFloat Tmp(semPPCDoubleDoubleLegacy);
4999   auto Ret = Tmp.convertFromString(S, RM);
5000   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5001   return Ret;
5002 }
5003
5004 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
5005   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5006   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5007   auto Ret = Tmp.next(nextDown);
5008   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5009   return Ret;
5010 }
5011
5012 APFloat::opStatus
5013 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
5014                                 unsigned int Width, bool IsSigned,
5015                                 roundingMode RM, bool *IsExact) const {
5016   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5017   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5018       .convertToInteger(Input, Width, IsSigned, RM, IsExact);
5019 }
5020
5021 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
5022                                                   bool IsSigned,
5023                                                   roundingMode RM) {
5024   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5025   APFloat Tmp(semPPCDoubleDoubleLegacy);
5026   auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
5027   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5028   return Ret;
5029 }
5030
5031 APFloat::opStatus
5032 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
5033                                               unsigned int InputSize,
5034                                               bool IsSigned, roundingMode RM) {
5035   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5036   APFloat Tmp(semPPCDoubleDoubleLegacy);
5037   auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
5038   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5039   return Ret;
5040 }
5041
5042 APFloat::opStatus
5043 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
5044                                               unsigned int InputSize,
5045                                               bool IsSigned, roundingMode RM) {
5046   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5047   APFloat Tmp(semPPCDoubleDoubleLegacy);
5048   auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
5049   *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
5050   return Ret;
5051 }
5052
5053 unsigned int DoubleAPFloat::convertToHexString(char *DST,
5054                                                unsigned int HexDigits,
5055                                                bool UpperCase,
5056                                                roundingMode RM) const {
5057   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5058   return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5059       .convertToHexString(DST, HexDigits, UpperCase, RM);
5060 }
5061
5062 bool DoubleAPFloat::isDenormal() const {
5063   return getCategory() == fcNormal &&
5064          (Floats[0].isDenormal() || Floats[1].isDenormal() ||
5065           // (double)(Hi + Lo) == Hi defines a normal number.
5066           Floats[0] != Floats[0] + Floats[1]);
5067 }
5068
5069 bool DoubleAPFloat::isSmallest() const {
5070   if (getCategory() != fcNormal)
5071     return false;
5072   DoubleAPFloat Tmp(*this);
5073   Tmp.makeSmallest(this->isNegative());
5074   return Tmp.compare(*this) == cmpEqual;
5075 }
5076
5077 bool DoubleAPFloat::isSmallestNormalized() const {
5078   if (getCategory() != fcNormal)
5079     return false;
5080
5081   DoubleAPFloat Tmp(*this);
5082   Tmp.makeSmallestNormalized(this->isNegative());
5083   return Tmp.compare(*this) == cmpEqual;
5084 }
5085
5086 bool DoubleAPFloat::isLargest() const {
5087   if (getCategory() != fcNormal)
5088     return false;
5089   DoubleAPFloat Tmp(*this);
5090   Tmp.makeLargest(this->isNegative());
5091   return Tmp.compare(*this) == cmpEqual;
5092 }
5093
5094 bool DoubleAPFloat::isInteger() const {
5095   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5096   return Floats[0].isInteger() && Floats[1].isInteger();
5097 }
5098
5099 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
5100                              unsigned FormatPrecision,
5101                              unsigned FormatMaxPadding,
5102                              bool TruncateZero) const {
5103   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5104   APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
5105       .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
5106 }
5107
5108 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
5109   assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5110   APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
5111   if (!inv)
5112     return Tmp.getExactInverse(nullptr);
5113   APFloat Inv(semPPCDoubleDoubleLegacy);
5114   auto Ret = Tmp.getExactInverse(&Inv);
5115   *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
5116   return Ret;
5117 }
5118
5119 int DoubleAPFloat::getExactLog2() const {
5120   // TODO: Implement me
5121   return INT_MIN;
5122 }
5123
5124 int DoubleAPFloat::getExactLog2Abs() const {
5125   // TODO: Implement me
5126   return INT_MIN;
5127 }
5128
5129 DoubleAPFloat scalbn(const DoubleAPFloat &Arg, int Exp,
5130                      APFloat::roundingMode RM) {
5131   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5132   return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
5133                        scalbn(Arg.Floats[1], Exp, RM));
5134 }
5135
5136 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
5137                     APFloat::roundingMode RM) {
5138   assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
5139   APFloat First = frexp(Arg.Floats[0], Exp, RM);
5140   APFloat Second = Arg.Floats[1];
5141   if (Arg.getCategory() == APFloat::fcNormal)
5142     Second = scalbn(Second, -Exp, RM);
5143   return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
5144 }
5145
5146 } // namespace detail
5147
5148 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
5149   if (usesLayout<IEEEFloat>(Semantics)) {
5150     new (&IEEE) IEEEFloat(std::move(F));
5151     return;
5152   }
5153   if (usesLayout<DoubleAPFloat>(Semantics)) {
5154     const fltSemantics& S = F.getSemantics();
5155     new (&Double)
5156         DoubleAPFloat(Semantics, APFloat(std::move(F), S),
5157                       APFloat(semIEEEdouble));
5158     return;
5159   }
5160   llvm_unreachable("Unexpected semantics");
5161 }
5162
5163 Expected<APFloat::opStatus> APFloat::convertFromString(StringRef Str,
5164                                                        roundingMode RM) {
5165   APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
5166 }
5167
5168 hash_code hash_value(const APFloat &Arg) {
5169   if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
5170     return hash_value(Arg.U.IEEE);
5171   if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
5172     return hash_value(Arg.U.Double);
5173   llvm_unreachable("Unexpected semantics");
5174 }
5175
5176 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
5177     : APFloat(Semantics) {
5178   auto StatusOrErr = convertFromString(S, rmNearestTiesToEven);
5179   assert(StatusOrErr && "Invalid floating point representation");
5180   consumeError(StatusOrErr.takeError());
5181 }
5182
5183 FPClassTest APFloat::classify() const {
5184   if (isZero())
5185     return isNegative() ? fcNegZero : fcPosZero;
5186   if (isNormal())
5187     return isNegative() ? fcNegNormal : fcPosNormal;
5188   if (isDenormal())
5189     return isNegative() ? fcNegSubnormal : fcPosSubnormal;
5190   if (isInfinity())
5191     return isNegative() ? fcNegInf : fcPosInf;
5192   assert(isNaN() && "Other class of FP constant");
5193   return isSignaling() ? fcSNan : fcQNan;
5194 }
5195
5196 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
5197                                    roundingMode RM, bool *losesInfo) {
5198   if (&getSemantics() == &ToSemantics) {
5199     *losesInfo = false;
5200     return opOK;
5201   }
5202   if (usesLayout<IEEEFloat>(getSemantics()) &&
5203       usesLayout<IEEEFloat>(ToSemantics))
5204     return U.IEEE.convert(ToSemantics, RM, losesInfo);
5205   if (usesLayout<IEEEFloat>(getSemantics()) &&
5206       usesLayout<DoubleAPFloat>(ToSemantics)) {
5207     assert(&ToSemantics == &semPPCDoubleDouble);
5208     auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
5209     *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
5210     return Ret;
5211   }
5212   if (usesLayout<DoubleAPFloat>(getSemantics()) &&
5213       usesLayout<IEEEFloat>(ToSemantics)) {
5214     auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
5215     *this = APFloat(std::move(getIEEE()), ToSemantics);
5216     return Ret;
5217   }
5218   llvm_unreachable("Unexpected semantics");
5219 }
5220
5221 APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
5222   return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
5223 }
5224
5225 void APFloat::print(raw_ostream &OS) const {
5226   SmallVector<char, 16> Buffer;
5227   toString(Buffer);
5228   OS << Buffer << "\n";
5229 }
5230
5231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5232 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
5233 #endif
5234
5235 void APFloat::Profile(FoldingSetNodeID &NID) const {
5236   NID.Add(bitcastToAPInt());
5237 }
5238
5239 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5240    an APSInt, whose initial bit-width and signed-ness are used to determine the
5241    precision of the conversion.
5242  */
5243 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
5244                                             roundingMode rounding_mode,
5245                                             bool *isExact) const {
5246   unsigned bitWidth = result.getBitWidth();
5247   SmallVector<uint64_t, 4> parts(result.getNumWords());
5248   opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
5249                                      rounding_mode, isExact);
5250   // Keeps the original signed-ness.
5251   result = APInt(bitWidth, parts);
5252   return status;
5253 }
5254
5255 double APFloat::convertToDouble() const {
5256   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEdouble)
5257     return getIEEE().convertToDouble();
5258   assert(getSemantics().isRepresentableBy(semIEEEdouble) &&
5259          "Float semantics is not representable by IEEEdouble");
5260   APFloat Temp = *this;
5261   bool LosesInfo;
5262   opStatus St = Temp.convert(semIEEEdouble, rmNearestTiesToEven, &LosesInfo);
5263   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5264   (void)St;
5265   return Temp.getIEEE().convertToDouble();
5266 }
5267
5268 float APFloat::convertToFloat() const {
5269   if (&getSemantics() == (const llvm::fltSemantics *)&semIEEEsingle)
5270     return getIEEE().convertToFloat();
5271   assert(getSemantics().isRepresentableBy(semIEEEsingle) &&
5272          "Float semantics is not representable by IEEEsingle");
5273   APFloat Temp = *this;
5274   bool LosesInfo;
5275   opStatus St = Temp.convert(semIEEEsingle, rmNearestTiesToEven, &LosesInfo);
5276   assert(!(St & opInexact) && !LosesInfo && "Unexpected imprecision");
5277   (void)St;
5278   return Temp.getIEEE().convertToFloat();
5279 }
5280
5281 } // namespace llvm
5282
5283 #undef APFLOAT_DISPATCH_ON_SEMANTICS