[x86] fix assert with horizontal math + broadcast of vector (PR43402)
[llvm-core.git] / lib / Support / APFloat.cpp
blobb79baf1834a78aba698d2fb5dfe8a0ff502f2b7a
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
38 using namespace llvm;
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
52 namespace llvm {
53 /* Represents floating point arithmetic semantics. */
54 struct fltSemantics {
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
64 bit. */
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
109 legacy IEEE float.
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
112 semantics. */
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
114 53 + 53, 128};
116 const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
117 switch (S) {
118 case S_IEEEhalf:
119 return IEEEhalf();
120 case S_IEEEsingle:
121 return IEEEsingle();
122 case S_IEEEdouble:
123 return IEEEdouble();
124 case S_x87DoubleExtended:
125 return x87DoubleExtended();
126 case S_IEEEquad:
127 return IEEEquad();
128 case S_PPCDoubleDouble:
129 return PPCDoubleDouble();
131 llvm_unreachable("Unrecognised floating semantics");
134 APFloatBase::Semantics
135 APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
136 if (&Sem == &llvm::APFloat::IEEEhalf())
137 return S_IEEEhalf;
138 else if (&Sem == &llvm::APFloat::IEEEsingle())
139 return S_IEEEsingle;
140 else if (&Sem == &llvm::APFloat::IEEEdouble())
141 return S_IEEEdouble;
142 else if (&Sem == &llvm::APFloat::x87DoubleExtended())
143 return S_x87DoubleExtended;
144 else if (&Sem == &llvm::APFloat::IEEEquad())
145 return S_IEEEquad;
146 else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
147 return S_PPCDoubleDouble;
148 else
149 llvm_unreachable("Unknown floating semantics");
152 const fltSemantics &APFloatBase::IEEEhalf() {
153 return semIEEEhalf;
155 const fltSemantics &APFloatBase::IEEEsingle() {
156 return semIEEEsingle;
158 const fltSemantics &APFloatBase::IEEEdouble() {
159 return semIEEEdouble;
161 const fltSemantics &APFloatBase::IEEEquad() {
162 return semIEEEquad;
164 const fltSemantics &APFloatBase::x87DoubleExtended() {
165 return semX87DoubleExtended;
167 const fltSemantics &APFloatBase::Bogus() {
168 return semBogus;
170 const fltSemantics &APFloatBase::PPCDoubleDouble() {
171 return semPPCDoubleDouble;
174 /* A tight upper bound on number of parts required to hold the value
175 pow(5, power) is
177 power * 815 / (351 * integerPartWidth) + 1
179 However, whilst the result may require only this many parts,
180 because we are multiplying two values to get it, the
181 multiplication may require an extra part with the excess part
182 being zero (consider the trivial case of 1 * 1, tcFullMultiply
183 requires two parts to hold the single-part result). So we add an
184 extra one to guarantee enough space whilst multiplying. */
185 const unsigned int maxExponent = 16383;
186 const unsigned int maxPrecision = 113;
187 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
188 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
190 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
191 return semantics.precision;
193 APFloatBase::ExponentType
194 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
195 return semantics.maxExponent;
197 APFloatBase::ExponentType
198 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
199 return semantics.minExponent;
201 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
202 return semantics.sizeInBits;
205 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
206 return Sem.sizeInBits;
209 /* A bunch of private, handy routines. */
211 static inline unsigned int
212 partCountForBits(unsigned int bits)
214 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
217 /* Returns 0U-9U. Return values >= 10U are not digits. */
218 static inline unsigned int
219 decDigitValue(unsigned int c)
221 return c - '0';
224 /* Return the value of a decimal exponent of the form
225 [+-]ddddddd.
227 If the exponent overflows, returns a large exponent with the
228 appropriate sign. */
229 static int
230 readExponent(StringRef::iterator begin, StringRef::iterator end)
232 bool isNegative;
233 unsigned int absExponent;
234 const unsigned int overlargeExponent = 24000; /* FIXME. */
235 StringRef::iterator p = begin;
237 // Treat no exponent as 0 to match binutils
238 if (p == end || ((*p == '-' || *p == '+') && (p + 1) == end)) {
239 return 0;
242 isNegative = (*p == '-');
243 if (*p == '-' || *p == '+') {
244 p++;
245 assert(p != end && "Exponent has no digits");
248 absExponent = decDigitValue(*p++);
249 assert(absExponent < 10U && "Invalid character in exponent");
251 for (; p != end; ++p) {
252 unsigned int value;
254 value = decDigitValue(*p);
255 assert(value < 10U && "Invalid character in exponent");
257 value += absExponent * 10;
258 if (absExponent >= overlargeExponent) {
259 absExponent = overlargeExponent;
260 p = end; /* outwit assert below */
261 break;
263 absExponent = value;
266 assert(p == end && "Invalid exponent in exponent");
268 if (isNegative)
269 return -(int) absExponent;
270 else
271 return (int) absExponent;
274 /* This is ugly and needs cleaning up, but I don't immediately see
275 how whilst remaining safe. */
276 static int
277 totalExponent(StringRef::iterator p, StringRef::iterator end,
278 int exponentAdjustment)
280 int unsignedExponent;
281 bool negative, overflow;
282 int exponent = 0;
284 assert(p != end && "Exponent has no digits");
286 negative = *p == '-';
287 if (*p == '-' || *p == '+') {
288 p++;
289 assert(p != end && "Exponent has no digits");
292 unsignedExponent = 0;
293 overflow = false;
294 for (; p != end; ++p) {
295 unsigned int value;
297 value = decDigitValue(*p);
298 assert(value < 10U && "Invalid character in exponent");
300 unsignedExponent = unsignedExponent * 10 + value;
301 if (unsignedExponent > 32767) {
302 overflow = true;
303 break;
307 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
308 overflow = true;
310 if (!overflow) {
311 exponent = unsignedExponent;
312 if (negative)
313 exponent = -exponent;
314 exponent += exponentAdjustment;
315 if (exponent > 32767 || exponent < -32768)
316 overflow = true;
319 if (overflow)
320 exponent = negative ? -32768: 32767;
322 return exponent;
325 static StringRef::iterator
326 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
327 StringRef::iterator *dot)
329 StringRef::iterator p = begin;
330 *dot = end;
331 while (p != end && *p == '0')
332 p++;
334 if (p != end && *p == '.') {
335 *dot = p++;
337 assert(end - begin != 1 && "Significand has no digits");
339 while (p != end && *p == '0')
340 p++;
343 return p;
346 /* Given a normal decimal floating point number of the form
348 dddd.dddd[eE][+-]ddd
350 where the decimal point and exponent are optional, fill out the
351 structure D. Exponent is appropriate if the significand is
352 treated as an integer, and normalizedExponent if the significand
353 is taken to have the decimal point after a single leading
354 non-zero digit.
356 If the value is zero, V->firstSigDigit points to a non-digit, and
357 the return exponent is zero.
359 struct decimalInfo {
360 const char *firstSigDigit;
361 const char *lastSigDigit;
362 int exponent;
363 int normalizedExponent;
366 static void
367 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
368 decimalInfo *D)
370 StringRef::iterator dot = end;
371 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
373 D->firstSigDigit = p;
374 D->exponent = 0;
375 D->normalizedExponent = 0;
377 for (; p != end; ++p) {
378 if (*p == '.') {
379 assert(dot == end && "String contains multiple dots");
380 dot = p++;
381 if (p == end)
382 break;
384 if (decDigitValue(*p) >= 10U)
385 break;
388 if (p != end) {
389 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
390 assert(p != begin && "Significand has no digits");
391 assert((dot == end || p - begin != 1) && "Significand has no digits");
393 /* p points to the first non-digit in the string */
394 D->exponent = readExponent(p + 1, end);
396 /* Implied decimal point? */
397 if (dot == end)
398 dot = p;
401 /* If number is all zeroes accept any exponent. */
402 if (p != D->firstSigDigit) {
403 /* Drop insignificant trailing zeroes. */
404 if (p != begin) {
407 p--;
408 while (p != begin && *p == '0');
409 while (p != begin && *p == '.');
412 /* Adjust the exponents for any decimal point. */
413 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
414 D->normalizedExponent = (D->exponent +
415 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
416 - (dot > D->firstSigDigit && dot < p)));
419 D->lastSigDigit = p;
422 /* Return the trailing fraction of a hexadecimal number.
423 DIGITVALUE is the first hex digit of the fraction, P points to
424 the next digit. */
425 static lostFraction
426 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
427 unsigned int digitValue)
429 unsigned int hexDigit;
431 /* If the first trailing digit isn't 0 or 8 we can work out the
432 fraction immediately. */
433 if (digitValue > 8)
434 return lfMoreThanHalf;
435 else if (digitValue < 8 && digitValue > 0)
436 return lfLessThanHalf;
438 // Otherwise we need to find the first non-zero digit.
439 while (p != end && (*p == '0' || *p == '.'))
440 p++;
442 assert(p != end && "Invalid trailing hexadecimal fraction!");
444 hexDigit = hexDigitValue(*p);
446 /* If we ran off the end it is exactly zero or one-half, otherwise
447 a little more. */
448 if (hexDigit == -1U)
449 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
450 else
451 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
454 /* Return the fraction lost were a bignum truncated losing the least
455 significant BITS bits. */
456 static lostFraction
457 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
458 unsigned int partCount,
459 unsigned int bits)
461 unsigned int lsb;
463 lsb = APInt::tcLSB(parts, partCount);
465 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
466 if (bits <= lsb)
467 return lfExactlyZero;
468 if (bits == lsb + 1)
469 return lfExactlyHalf;
470 if (bits <= partCount * APFloatBase::integerPartWidth &&
471 APInt::tcExtractBit(parts, bits - 1))
472 return lfMoreThanHalf;
474 return lfLessThanHalf;
477 /* Shift DST right BITS bits noting lost fraction. */
478 static lostFraction
479 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
481 lostFraction lost_fraction;
483 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
485 APInt::tcShiftRight(dst, parts, bits);
487 return lost_fraction;
490 /* Combine the effect of two lost fractions. */
491 static lostFraction
492 combineLostFractions(lostFraction moreSignificant,
493 lostFraction lessSignificant)
495 if (lessSignificant != lfExactlyZero) {
496 if (moreSignificant == lfExactlyZero)
497 moreSignificant = lfLessThanHalf;
498 else if (moreSignificant == lfExactlyHalf)
499 moreSignificant = lfMoreThanHalf;
502 return moreSignificant;
505 /* The error from the true value, in half-ulps, on multiplying two
506 floating point numbers, which differ from the value they
507 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
508 than the returned value.
510 See "How to Read Floating Point Numbers Accurately" by William D
511 Clinger. */
512 static unsigned int
513 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
515 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
517 if (HUerr1 + HUerr2 == 0)
518 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
519 else
520 return inexactMultiply + 2 * (HUerr1 + HUerr2);
523 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
524 when the least significant BITS are truncated. BITS cannot be
525 zero. */
526 static APFloatBase::integerPart
527 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
528 bool isNearest) {
529 unsigned int count, partBits;
530 APFloatBase::integerPart part, boundary;
532 assert(bits != 0);
534 bits--;
535 count = bits / APFloatBase::integerPartWidth;
536 partBits = bits % APFloatBase::integerPartWidth + 1;
538 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
540 if (isNearest)
541 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
542 else
543 boundary = 0;
545 if (count == 0) {
546 if (part - boundary <= boundary - part)
547 return part - boundary;
548 else
549 return boundary - part;
552 if (part == boundary) {
553 while (--count)
554 if (parts[count])
555 return ~(APFloatBase::integerPart) 0; /* A lot. */
557 return parts[0];
558 } else if (part == boundary - 1) {
559 while (--count)
560 if (~parts[count])
561 return ~(APFloatBase::integerPart) 0; /* A lot. */
563 return -parts[0];
566 return ~(APFloatBase::integerPart) 0; /* A lot. */
569 /* Place pow(5, power) in DST, and return the number of parts used.
570 DST must be at least one part larger than size of the answer. */
571 static unsigned int
572 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
573 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
574 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
575 pow5s[0] = 78125 * 5;
577 unsigned int partsCount[16] = { 1 };
578 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
579 unsigned int result;
580 assert(power <= maxExponent);
582 p1 = dst;
583 p2 = scratch;
585 *p1 = firstEightPowers[power & 7];
586 power >>= 3;
588 result = 1;
589 pow5 = pow5s;
591 for (unsigned int n = 0; power; power >>= 1, n++) {
592 unsigned int pc;
594 pc = partsCount[n];
596 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
597 if (pc == 0) {
598 pc = partsCount[n - 1];
599 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
600 pc *= 2;
601 if (pow5[pc - 1] == 0)
602 pc--;
603 partsCount[n] = pc;
606 if (power & 1) {
607 APFloatBase::integerPart *tmp;
609 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
610 result += pc;
611 if (p2[result - 1] == 0)
612 result--;
614 /* Now result is in p1 with partsCount parts and p2 is scratch
615 space. */
616 tmp = p1;
617 p1 = p2;
618 p2 = tmp;
621 pow5 += pc;
624 if (p1 != dst)
625 APInt::tcAssign(dst, p1, result);
627 return result;
630 /* Zero at the end to avoid modular arithmetic when adding one; used
631 when rounding up during hexadecimal output. */
632 static const char hexDigitsLower[] = "0123456789abcdef0";
633 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
634 static const char infinityL[] = "infinity";
635 static const char infinityU[] = "INFINITY";
636 static const char NaNL[] = "nan";
637 static const char NaNU[] = "NAN";
639 /* Write out an integerPart in hexadecimal, starting with the most
640 significant nibble. Write out exactly COUNT hexdigits, return
641 COUNT. */
642 static unsigned int
643 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
644 const char *hexDigitChars)
646 unsigned int result = count;
648 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
650 part >>= (APFloatBase::integerPartWidth - 4 * count);
651 while (count--) {
652 dst[count] = hexDigitChars[part & 0xf];
653 part >>= 4;
656 return result;
659 /* Write out an unsigned decimal integer. */
660 static char *
661 writeUnsignedDecimal (char *dst, unsigned int n)
663 char buff[40], *p;
665 p = buff;
667 *p++ = '0' + n % 10;
668 while (n /= 10);
671 *dst++ = *--p;
672 while (p != buff);
674 return dst;
677 /* Write out a signed decimal integer. */
678 static char *
679 writeSignedDecimal (char *dst, int value)
681 if (value < 0) {
682 *dst++ = '-';
683 dst = writeUnsignedDecimal(dst, -(unsigned) value);
684 } else
685 dst = writeUnsignedDecimal(dst, value);
687 return dst;
690 namespace detail {
691 /* Constructors. */
692 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
693 unsigned int count;
695 semantics = ourSemantics;
696 count = partCount();
697 if (count > 1)
698 significand.parts = new integerPart[count];
701 void IEEEFloat::freeSignificand() {
702 if (needsCleanup())
703 delete [] significand.parts;
706 void IEEEFloat::assign(const IEEEFloat &rhs) {
707 assert(semantics == rhs.semantics);
709 sign = rhs.sign;
710 category = rhs.category;
711 exponent = rhs.exponent;
712 if (isFiniteNonZero() || category == fcNaN)
713 copySignificand(rhs);
716 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
717 assert(isFiniteNonZero() || category == fcNaN);
718 assert(rhs.partCount() >= partCount());
720 APInt::tcAssign(significandParts(), rhs.significandParts(),
721 partCount());
724 /* Make this number a NaN, with an arbitrary but deterministic value
725 for the significand. If double or longer, this is a signalling NaN,
726 which may not be ideal. If float, this is QNaN(0). */
727 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
728 category = fcNaN;
729 sign = Negative;
731 integerPart *significand = significandParts();
732 unsigned numParts = partCount();
734 // Set the significand bits to the fill.
735 if (!fill || fill->getNumWords() < numParts)
736 APInt::tcSet(significand, 0, numParts);
737 if (fill) {
738 APInt::tcAssign(significand, fill->getRawData(),
739 std::min(fill->getNumWords(), numParts));
741 // Zero out the excess bits of the significand.
742 unsigned bitsToPreserve = semantics->precision - 1;
743 unsigned part = bitsToPreserve / 64;
744 bitsToPreserve %= 64;
745 significand[part] &= ((1ULL << bitsToPreserve) - 1);
746 for (part++; part != numParts; ++part)
747 significand[part] = 0;
750 unsigned QNaNBit = semantics->precision - 2;
752 if (SNaN) {
753 // We always have to clear the QNaN bit to make it an SNaN.
754 APInt::tcClearBit(significand, QNaNBit);
756 // If there are no bits set in the payload, we have to set
757 // *something* to make it a NaN instead of an infinity;
758 // conventionally, this is the next bit down from the QNaN bit.
759 if (APInt::tcIsZero(significand, numParts))
760 APInt::tcSetBit(significand, QNaNBit - 1);
761 } else {
762 // We always have to set the QNaN bit to make it a QNaN.
763 APInt::tcSetBit(significand, QNaNBit);
766 // For x87 extended precision, we want to make a NaN, not a
767 // pseudo-NaN. Maybe we should expose the ability to make
768 // pseudo-NaNs?
769 if (semantics == &semX87DoubleExtended)
770 APInt::tcSetBit(significand, QNaNBit + 1);
773 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
774 if (this != &rhs) {
775 if (semantics != rhs.semantics) {
776 freeSignificand();
777 initialize(rhs.semantics);
779 assign(rhs);
782 return *this;
785 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
786 freeSignificand();
788 semantics = rhs.semantics;
789 significand = rhs.significand;
790 exponent = rhs.exponent;
791 category = rhs.category;
792 sign = rhs.sign;
794 rhs.semantics = &semBogus;
795 return *this;
798 bool IEEEFloat::isDenormal() const {
799 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
800 (APInt::tcExtractBit(significandParts(),
801 semantics->precision - 1) == 0);
804 bool IEEEFloat::isSmallest() const {
805 // The smallest number by magnitude in our format will be the smallest
806 // denormal, i.e. the floating point number with exponent being minimum
807 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
808 return isFiniteNonZero() && exponent == semantics->minExponent &&
809 significandMSB() == 0;
812 bool IEEEFloat::isSignificandAllOnes() const {
813 // Test if the significand excluding the integral bit is all ones. This allows
814 // us to test for binade boundaries.
815 const integerPart *Parts = significandParts();
816 const unsigned PartCount = partCount();
817 for (unsigned i = 0; i < PartCount - 1; i++)
818 if (~Parts[i])
819 return false;
821 // Set the unused high bits to all ones when we compare.
822 const unsigned NumHighBits =
823 PartCount*integerPartWidth - semantics->precision + 1;
824 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
825 "fill than integerPartWidth");
826 const integerPart HighBitFill =
827 ~integerPart(0) << (integerPartWidth - NumHighBits);
828 if (~(Parts[PartCount - 1] | HighBitFill))
829 return false;
831 return true;
834 bool IEEEFloat::isSignificandAllZeros() const {
835 // Test if the significand excluding the integral bit is all zeros. This
836 // allows us to test for binade boundaries.
837 const integerPart *Parts = significandParts();
838 const unsigned PartCount = partCount();
840 for (unsigned i = 0; i < PartCount - 1; i++)
841 if (Parts[i])
842 return false;
844 const unsigned NumHighBits =
845 PartCount*integerPartWidth - semantics->precision + 1;
846 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
847 "clear than integerPartWidth");
848 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
850 if (Parts[PartCount - 1] & HighBitMask)
851 return false;
853 return true;
856 bool IEEEFloat::isLargest() const {
857 // The largest number by magnitude in our format will be the floating point
858 // number with maximum exponent and with significand that is all ones.
859 return isFiniteNonZero() && exponent == semantics->maxExponent
860 && isSignificandAllOnes();
863 bool IEEEFloat::isInteger() const {
864 // This could be made more efficient; I'm going for obviously correct.
865 if (!isFinite()) return false;
866 IEEEFloat truncated = *this;
867 truncated.roundToIntegral(rmTowardZero);
868 return compare(truncated) == cmpEqual;
871 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
872 if (this == &rhs)
873 return true;
874 if (semantics != rhs.semantics ||
875 category != rhs.category ||
876 sign != rhs.sign)
877 return false;
878 if (category==fcZero || category==fcInfinity)
879 return true;
881 if (isFiniteNonZero() && exponent != rhs.exponent)
882 return false;
884 return std::equal(significandParts(), significandParts() + partCount(),
885 rhs.significandParts());
888 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
889 initialize(&ourSemantics);
890 sign = 0;
891 category = fcNormal;
892 zeroSignificand();
893 exponent = ourSemantics.precision - 1;
894 significandParts()[0] = value;
895 normalize(rmNearestTiesToEven, lfExactlyZero);
898 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
899 initialize(&ourSemantics);
900 category = fcZero;
901 sign = false;
904 // Delegate to the previous constructor, because later copy constructor may
905 // actually inspects category, which can't be garbage.
906 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
907 : IEEEFloat(ourSemantics) {}
909 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
910 initialize(rhs.semantics);
911 assign(rhs);
914 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
915 *this = std::move(rhs);
918 IEEEFloat::~IEEEFloat() { freeSignificand(); }
920 unsigned int IEEEFloat::partCount() const {
921 return partCountForBits(semantics->precision + 1);
924 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
925 return const_cast<IEEEFloat *>(this)->significandParts();
928 IEEEFloat::integerPart *IEEEFloat::significandParts() {
929 if (partCount() > 1)
930 return significand.parts;
931 else
932 return &significand.part;
935 void IEEEFloat::zeroSignificand() {
936 APInt::tcSet(significandParts(), 0, partCount());
939 /* Increment an fcNormal floating point number's significand. */
940 void IEEEFloat::incrementSignificand() {
941 integerPart carry;
943 carry = APInt::tcIncrement(significandParts(), partCount());
945 /* Our callers should never cause us to overflow. */
946 assert(carry == 0);
947 (void)carry;
950 /* Add the significand of the RHS. Returns the carry flag. */
951 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
952 integerPart *parts;
954 parts = significandParts();
956 assert(semantics == rhs.semantics);
957 assert(exponent == rhs.exponent);
959 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
962 /* Subtract the significand of the RHS with a borrow flag. Returns
963 the borrow flag. */
964 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
965 integerPart borrow) {
966 integerPart *parts;
968 parts = significandParts();
970 assert(semantics == rhs.semantics);
971 assert(exponent == rhs.exponent);
973 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
974 partCount());
977 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
978 on to the full-precision result of the multiplication. Returns the
979 lost fraction. */
980 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
981 const IEEEFloat *addend) {
982 unsigned int omsb; // One, not zero, based MSB.
983 unsigned int partsCount, newPartsCount, precision;
984 integerPart *lhsSignificand;
985 integerPart scratch[4];
986 integerPart *fullSignificand;
987 lostFraction lost_fraction;
988 bool ignored;
990 assert(semantics == rhs.semantics);
992 precision = semantics->precision;
994 // Allocate space for twice as many bits as the original significand, plus one
995 // extra bit for the addition to overflow into.
996 newPartsCount = partCountForBits(precision * 2 + 1);
998 if (newPartsCount > 4)
999 fullSignificand = new integerPart[newPartsCount];
1000 else
1001 fullSignificand = scratch;
1003 lhsSignificand = significandParts();
1004 partsCount = partCount();
1006 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
1007 rhs.significandParts(), partsCount, partsCount);
1009 lost_fraction = lfExactlyZero;
1010 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1011 exponent += rhs.exponent;
1013 // Assume the operands involved in the multiplication are single-precision
1014 // FP, and the two multiplicants are:
1015 // *this = a23 . a22 ... a0 * 2^e1
1016 // rhs = b23 . b22 ... b0 * 2^e2
1017 // the result of multiplication is:
1018 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1019 // Note that there are three significant bits at the left-hand side of the
1020 // radix point: two for the multiplication, and an overflow bit for the
1021 // addition (that will always be zero at this point). Move the radix point
1022 // toward left by two bits, and adjust exponent accordingly.
1023 exponent += 2;
1025 if (addend && addend->isNonZero()) {
1026 // The intermediate result of the multiplication has "2 * precision"
1027 // signicant bit; adjust the addend to be consistent with mul result.
1029 Significand savedSignificand = significand;
1030 const fltSemantics *savedSemantics = semantics;
1031 fltSemantics extendedSemantics;
1032 opStatus status;
1033 unsigned int extendedPrecision;
1035 // Normalize our MSB to one below the top bit to allow for overflow.
1036 extendedPrecision = 2 * precision + 1;
1037 if (omsb != extendedPrecision - 1) {
1038 assert(extendedPrecision > omsb);
1039 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1040 (extendedPrecision - 1) - omsb);
1041 exponent -= (extendedPrecision - 1) - omsb;
1044 /* Create new semantics. */
1045 extendedSemantics = *semantics;
1046 extendedSemantics.precision = extendedPrecision;
1048 if (newPartsCount == 1)
1049 significand.part = fullSignificand[0];
1050 else
1051 significand.parts = fullSignificand;
1052 semantics = &extendedSemantics;
1054 IEEEFloat extendedAddend(*addend);
1055 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1056 assert(status == opOK);
1057 (void)status;
1059 // Shift the significand of the addend right by one bit. This guarantees
1060 // that the high bit of the significand is zero (same as fullSignificand),
1061 // so the addition will overflow (if it does overflow at all) into the top bit.
1062 lost_fraction = extendedAddend.shiftSignificandRight(1);
1063 assert(lost_fraction == lfExactlyZero &&
1064 "Lost precision while shifting addend for fused-multiply-add.");
1066 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1068 /* Restore our state. */
1069 if (newPartsCount == 1)
1070 fullSignificand[0] = significand.part;
1071 significand = savedSignificand;
1072 semantics = savedSemantics;
1074 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1077 // Convert the result having "2 * precision" significant-bits back to the one
1078 // having "precision" significant-bits. First, move the radix point from
1079 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1080 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1081 exponent -= precision + 1;
1083 // In case MSB resides at the left-hand side of radix point, shift the
1084 // mantissa right by some amount to make sure the MSB reside right before
1085 // the radix point (i.e. "MSB . rest-significant-bits").
1087 // Note that the result is not normalized when "omsb < precision". So, the
1088 // caller needs to call IEEEFloat::normalize() if normalized value is
1089 // expected.
1090 if (omsb > precision) {
1091 unsigned int bits, significantParts;
1092 lostFraction lf;
1094 bits = omsb - precision;
1095 significantParts = partCountForBits(omsb);
1096 lf = shiftRight(fullSignificand, significantParts, bits);
1097 lost_fraction = combineLostFractions(lf, lost_fraction);
1098 exponent += bits;
1101 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1103 if (newPartsCount > 4)
1104 delete [] fullSignificand;
1106 return lost_fraction;
1109 /* Multiply the significands of LHS and RHS to DST. */
1110 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1111 unsigned int bit, i, partsCount;
1112 const integerPart *rhsSignificand;
1113 integerPart *lhsSignificand, *dividend, *divisor;
1114 integerPart scratch[4];
1115 lostFraction lost_fraction;
1117 assert(semantics == rhs.semantics);
1119 lhsSignificand = significandParts();
1120 rhsSignificand = rhs.significandParts();
1121 partsCount = partCount();
1123 if (partsCount > 2)
1124 dividend = new integerPart[partsCount * 2];
1125 else
1126 dividend = scratch;
1128 divisor = dividend + partsCount;
1130 /* Copy the dividend and divisor as they will be modified in-place. */
1131 for (i = 0; i < partsCount; i++) {
1132 dividend[i] = lhsSignificand[i];
1133 divisor[i] = rhsSignificand[i];
1134 lhsSignificand[i] = 0;
1137 exponent -= rhs.exponent;
1139 unsigned int precision = semantics->precision;
1141 /* Normalize the divisor. */
1142 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1143 if (bit) {
1144 exponent += bit;
1145 APInt::tcShiftLeft(divisor, partsCount, bit);
1148 /* Normalize the dividend. */
1149 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1150 if (bit) {
1151 exponent -= bit;
1152 APInt::tcShiftLeft(dividend, partsCount, bit);
1155 /* Ensure the dividend >= divisor initially for the loop below.
1156 Incidentally, this means that the division loop below is
1157 guaranteed to set the integer bit to one. */
1158 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1159 exponent--;
1160 APInt::tcShiftLeft(dividend, partsCount, 1);
1161 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1164 /* Long division. */
1165 for (bit = precision; bit; bit -= 1) {
1166 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1167 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1168 APInt::tcSetBit(lhsSignificand, bit - 1);
1171 APInt::tcShiftLeft(dividend, partsCount, 1);
1174 /* Figure out the lost fraction. */
1175 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1177 if (cmp > 0)
1178 lost_fraction = lfMoreThanHalf;
1179 else if (cmp == 0)
1180 lost_fraction = lfExactlyHalf;
1181 else if (APInt::tcIsZero(dividend, partsCount))
1182 lost_fraction = lfExactlyZero;
1183 else
1184 lost_fraction = lfLessThanHalf;
1186 if (partsCount > 2)
1187 delete [] dividend;
1189 return lost_fraction;
1192 unsigned int IEEEFloat::significandMSB() const {
1193 return APInt::tcMSB(significandParts(), partCount());
1196 unsigned int IEEEFloat::significandLSB() const {
1197 return APInt::tcLSB(significandParts(), partCount());
1200 /* Note that a zero result is NOT normalized to fcZero. */
1201 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1202 /* Our exponent should not overflow. */
1203 assert((ExponentType) (exponent + bits) >= exponent);
1205 exponent += bits;
1207 return shiftRight(significandParts(), partCount(), bits);
1210 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1211 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1212 assert(bits < semantics->precision);
1214 if (bits) {
1215 unsigned int partsCount = partCount();
1217 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1218 exponent -= bits;
1220 assert(!APInt::tcIsZero(significandParts(), partsCount));
1224 IEEEFloat::cmpResult
1225 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1226 int compare;
1228 assert(semantics == rhs.semantics);
1229 assert(isFiniteNonZero());
1230 assert(rhs.isFiniteNonZero());
1232 compare = exponent - rhs.exponent;
1234 /* If exponents are equal, do an unsigned bignum comparison of the
1235 significands. */
1236 if (compare == 0)
1237 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1238 partCount());
1240 if (compare > 0)
1241 return cmpGreaterThan;
1242 else if (compare < 0)
1243 return cmpLessThan;
1244 else
1245 return cmpEqual;
1248 /* Handle overflow. Sign is preserved. We either become infinity or
1249 the largest finite number. */
1250 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1251 /* Infinity? */
1252 if (rounding_mode == rmNearestTiesToEven ||
1253 rounding_mode == rmNearestTiesToAway ||
1254 (rounding_mode == rmTowardPositive && !sign) ||
1255 (rounding_mode == rmTowardNegative && sign)) {
1256 category = fcInfinity;
1257 return (opStatus) (opOverflow | opInexact);
1260 /* Otherwise we become the largest finite number. */
1261 category = fcNormal;
1262 exponent = semantics->maxExponent;
1263 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1264 semantics->precision);
1266 return opInexact;
1269 /* Returns TRUE if, when truncating the current number, with BIT the
1270 new LSB, with the given lost fraction and rounding mode, the result
1271 would need to be rounded away from zero (i.e., by increasing the
1272 signficand). This routine must work for fcZero of both signs, and
1273 fcNormal numbers. */
1274 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1275 lostFraction lost_fraction,
1276 unsigned int bit) const {
1277 /* NaNs and infinities should not have lost fractions. */
1278 assert(isFiniteNonZero() || category == fcZero);
1280 /* Current callers never pass this so we don't handle it. */
1281 assert(lost_fraction != lfExactlyZero);
1283 switch (rounding_mode) {
1284 case rmNearestTiesToAway:
1285 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1287 case rmNearestTiesToEven:
1288 if (lost_fraction == lfMoreThanHalf)
1289 return true;
1291 /* Our zeroes don't have a significand to test. */
1292 if (lost_fraction == lfExactlyHalf && category != fcZero)
1293 return APInt::tcExtractBit(significandParts(), bit);
1295 return false;
1297 case rmTowardZero:
1298 return false;
1300 case rmTowardPositive:
1301 return !sign;
1303 case rmTowardNegative:
1304 return sign;
1306 llvm_unreachable("Invalid rounding mode found");
1309 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1310 lostFraction lost_fraction) {
1311 unsigned int omsb; /* One, not zero, based MSB. */
1312 int exponentChange;
1314 if (!isFiniteNonZero())
1315 return opOK;
1317 /* Before rounding normalize the exponent of fcNormal numbers. */
1318 omsb = significandMSB() + 1;
1320 if (omsb) {
1321 /* OMSB is numbered from 1. We want to place it in the integer
1322 bit numbered PRECISION if possible, with a compensating change in
1323 the exponent. */
1324 exponentChange = omsb - semantics->precision;
1326 /* If the resulting exponent is too high, overflow according to
1327 the rounding mode. */
1328 if (exponent + exponentChange > semantics->maxExponent)
1329 return handleOverflow(rounding_mode);
1331 /* Subnormal numbers have exponent minExponent, and their MSB
1332 is forced based on that. */
1333 if (exponent + exponentChange < semantics->minExponent)
1334 exponentChange = semantics->minExponent - exponent;
1336 /* Shifting left is easy as we don't lose precision. */
1337 if (exponentChange < 0) {
1338 assert(lost_fraction == lfExactlyZero);
1340 shiftSignificandLeft(-exponentChange);
1342 return opOK;
1345 if (exponentChange > 0) {
1346 lostFraction lf;
1348 /* Shift right and capture any new lost fraction. */
1349 lf = shiftSignificandRight(exponentChange);
1351 lost_fraction = combineLostFractions(lf, lost_fraction);
1353 /* Keep OMSB up-to-date. */
1354 if (omsb > (unsigned) exponentChange)
1355 omsb -= exponentChange;
1356 else
1357 omsb = 0;
1361 /* Now round the number according to rounding_mode given the lost
1362 fraction. */
1364 /* As specified in IEEE 754, since we do not trap we do not report
1365 underflow for exact results. */
1366 if (lost_fraction == lfExactlyZero) {
1367 /* Canonicalize zeroes. */
1368 if (omsb == 0)
1369 category = fcZero;
1371 return opOK;
1374 /* Increment the significand if we're rounding away from zero. */
1375 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1376 if (omsb == 0)
1377 exponent = semantics->minExponent;
1379 incrementSignificand();
1380 omsb = significandMSB() + 1;
1382 /* Did the significand increment overflow? */
1383 if (omsb == (unsigned) semantics->precision + 1) {
1384 /* Renormalize by incrementing the exponent and shifting our
1385 significand right one. However if we already have the
1386 maximum exponent we overflow to infinity. */
1387 if (exponent == semantics->maxExponent) {
1388 category = fcInfinity;
1390 return (opStatus) (opOverflow | opInexact);
1393 shiftSignificandRight(1);
1395 return opInexact;
1399 /* The normal case - we were and are not denormal, and any
1400 significand increment above didn't overflow. */
1401 if (omsb == semantics->precision)
1402 return opInexact;
1404 /* We have a non-zero denormal. */
1405 assert(omsb < semantics->precision);
1407 /* Canonicalize zeroes. */
1408 if (omsb == 0)
1409 category = fcZero;
1411 /* The fcZero case is a denormal that underflowed to zero. */
1412 return (opStatus) (opUnderflow | opInexact);
1415 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1416 bool subtract) {
1417 switch (PackCategoriesIntoKey(category, rhs.category)) {
1418 default:
1419 llvm_unreachable(nullptr);
1421 case PackCategoriesIntoKey(fcNaN, fcZero):
1422 case PackCategoriesIntoKey(fcNaN, fcNormal):
1423 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1424 case PackCategoriesIntoKey(fcNaN, fcNaN):
1425 case PackCategoriesIntoKey(fcNormal, fcZero):
1426 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1427 case PackCategoriesIntoKey(fcInfinity, fcZero):
1428 return opOK;
1430 case PackCategoriesIntoKey(fcZero, fcNaN):
1431 case PackCategoriesIntoKey(fcNormal, fcNaN):
1432 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1433 // We need to be sure to flip the sign here for subtraction because we
1434 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1435 sign = rhs.sign ^ subtract;
1436 category = fcNaN;
1437 copySignificand(rhs);
1438 return opOK;
1440 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1441 case PackCategoriesIntoKey(fcZero, fcInfinity):
1442 category = fcInfinity;
1443 sign = rhs.sign ^ subtract;
1444 return opOK;
1446 case PackCategoriesIntoKey(fcZero, fcNormal):
1447 assign(rhs);
1448 sign = rhs.sign ^ subtract;
1449 return opOK;
1451 case PackCategoriesIntoKey(fcZero, fcZero):
1452 /* Sign depends on rounding mode; handled by caller. */
1453 return opOK;
1455 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1456 /* Differently signed infinities can only be validly
1457 subtracted. */
1458 if (((sign ^ rhs.sign)!=0) != subtract) {
1459 makeNaN();
1460 return opInvalidOp;
1463 return opOK;
1465 case PackCategoriesIntoKey(fcNormal, fcNormal):
1466 return opDivByZero;
1470 /* Add or subtract two normal numbers. */
1471 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1472 bool subtract) {
1473 integerPart carry;
1474 lostFraction lost_fraction;
1475 int bits;
1477 /* Determine if the operation on the absolute values is effectively
1478 an addition or subtraction. */
1479 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1481 /* Are we bigger exponent-wise than the RHS? */
1482 bits = exponent - rhs.exponent;
1484 /* Subtraction is more subtle than one might naively expect. */
1485 if (subtract) {
1486 IEEEFloat temp_rhs(rhs);
1487 bool reverse;
1489 if (bits == 0) {
1490 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1491 lost_fraction = lfExactlyZero;
1492 } else if (bits > 0) {
1493 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1494 shiftSignificandLeft(1);
1495 reverse = false;
1496 } else {
1497 lost_fraction = shiftSignificandRight(-bits - 1);
1498 temp_rhs.shiftSignificandLeft(1);
1499 reverse = true;
1502 if (reverse) {
1503 carry = temp_rhs.subtractSignificand
1504 (*this, lost_fraction != lfExactlyZero);
1505 copySignificand(temp_rhs);
1506 sign = !sign;
1507 } else {
1508 carry = subtractSignificand
1509 (temp_rhs, lost_fraction != lfExactlyZero);
1512 /* Invert the lost fraction - it was on the RHS and
1513 subtracted. */
1514 if (lost_fraction == lfLessThanHalf)
1515 lost_fraction = lfMoreThanHalf;
1516 else if (lost_fraction == lfMoreThanHalf)
1517 lost_fraction = lfLessThanHalf;
1519 /* The code above is intended to ensure that no borrow is
1520 necessary. */
1521 assert(!carry);
1522 (void)carry;
1523 } else {
1524 if (bits > 0) {
1525 IEEEFloat temp_rhs(rhs);
1527 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1528 carry = addSignificand(temp_rhs);
1529 } else {
1530 lost_fraction = shiftSignificandRight(-bits);
1531 carry = addSignificand(rhs);
1534 /* We have a guard bit; generating a carry cannot happen. */
1535 assert(!carry);
1536 (void)carry;
1539 return lost_fraction;
1542 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1543 switch (PackCategoriesIntoKey(category, rhs.category)) {
1544 default:
1545 llvm_unreachable(nullptr);
1547 case PackCategoriesIntoKey(fcNaN, fcZero):
1548 case PackCategoriesIntoKey(fcNaN, fcNormal):
1549 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1550 case PackCategoriesIntoKey(fcNaN, fcNaN):
1551 sign = false;
1552 return opOK;
1554 case PackCategoriesIntoKey(fcZero, fcNaN):
1555 case PackCategoriesIntoKey(fcNormal, fcNaN):
1556 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1557 sign = false;
1558 category = fcNaN;
1559 copySignificand(rhs);
1560 return opOK;
1562 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1563 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1564 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1565 category = fcInfinity;
1566 return opOK;
1568 case PackCategoriesIntoKey(fcZero, fcNormal):
1569 case PackCategoriesIntoKey(fcNormal, fcZero):
1570 case PackCategoriesIntoKey(fcZero, fcZero):
1571 category = fcZero;
1572 return opOK;
1574 case PackCategoriesIntoKey(fcZero, fcInfinity):
1575 case PackCategoriesIntoKey(fcInfinity, fcZero):
1576 makeNaN();
1577 return opInvalidOp;
1579 case PackCategoriesIntoKey(fcNormal, fcNormal):
1580 return opOK;
1584 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1585 switch (PackCategoriesIntoKey(category, rhs.category)) {
1586 default:
1587 llvm_unreachable(nullptr);
1589 case PackCategoriesIntoKey(fcZero, fcNaN):
1590 case PackCategoriesIntoKey(fcNormal, fcNaN):
1591 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1592 category = fcNaN;
1593 copySignificand(rhs);
1594 LLVM_FALLTHROUGH;
1595 case PackCategoriesIntoKey(fcNaN, fcZero):
1596 case PackCategoriesIntoKey(fcNaN, fcNormal):
1597 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1598 case PackCategoriesIntoKey(fcNaN, fcNaN):
1599 sign = false;
1600 LLVM_FALLTHROUGH;
1601 case PackCategoriesIntoKey(fcInfinity, fcZero):
1602 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1603 case PackCategoriesIntoKey(fcZero, fcInfinity):
1604 case PackCategoriesIntoKey(fcZero, fcNormal):
1605 return opOK;
1607 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1608 category = fcZero;
1609 return opOK;
1611 case PackCategoriesIntoKey(fcNormal, fcZero):
1612 category = fcInfinity;
1613 return opDivByZero;
1615 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1616 case PackCategoriesIntoKey(fcZero, fcZero):
1617 makeNaN();
1618 return opInvalidOp;
1620 case PackCategoriesIntoKey(fcNormal, fcNormal):
1621 return opOK;
1625 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1626 switch (PackCategoriesIntoKey(category, rhs.category)) {
1627 default:
1628 llvm_unreachable(nullptr);
1630 case PackCategoriesIntoKey(fcNaN, fcZero):
1631 case PackCategoriesIntoKey(fcNaN, fcNormal):
1632 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1633 case PackCategoriesIntoKey(fcNaN, fcNaN):
1634 case PackCategoriesIntoKey(fcZero, fcInfinity):
1635 case PackCategoriesIntoKey(fcZero, fcNormal):
1636 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1637 return opOK;
1639 case PackCategoriesIntoKey(fcZero, fcNaN):
1640 case PackCategoriesIntoKey(fcNormal, fcNaN):
1641 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1642 sign = false;
1643 category = fcNaN;
1644 copySignificand(rhs);
1645 return opOK;
1647 case PackCategoriesIntoKey(fcNormal, fcZero):
1648 case PackCategoriesIntoKey(fcInfinity, fcZero):
1649 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1650 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1651 case PackCategoriesIntoKey(fcZero, fcZero):
1652 makeNaN();
1653 return opInvalidOp;
1655 case PackCategoriesIntoKey(fcNormal, fcNormal):
1656 return opOK;
1660 /* Change sign. */
1661 void IEEEFloat::changeSign() {
1662 /* Look mummy, this one's easy. */
1663 sign = !sign;
1666 /* Normalized addition or subtraction. */
1667 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1668 roundingMode rounding_mode,
1669 bool subtract) {
1670 opStatus fs;
1672 fs = addOrSubtractSpecials(rhs, subtract);
1674 /* This return code means it was not a simple case. */
1675 if (fs == opDivByZero) {
1676 lostFraction lost_fraction;
1678 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1679 fs = normalize(rounding_mode, lost_fraction);
1681 /* Can only be zero if we lost no fraction. */
1682 assert(category != fcZero || lost_fraction == lfExactlyZero);
1685 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1686 positive zero unless rounding to minus infinity, except that
1687 adding two like-signed zeroes gives that zero. */
1688 if (category == fcZero) {
1689 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1690 sign = (rounding_mode == rmTowardNegative);
1693 return fs;
1696 /* Normalized addition. */
1697 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1698 roundingMode rounding_mode) {
1699 return addOrSubtract(rhs, rounding_mode, false);
1702 /* Normalized subtraction. */
1703 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1704 roundingMode rounding_mode) {
1705 return addOrSubtract(rhs, rounding_mode, true);
1708 /* Normalized multiply. */
1709 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1710 roundingMode rounding_mode) {
1711 opStatus fs;
1713 sign ^= rhs.sign;
1714 fs = multiplySpecials(rhs);
1716 if (isFiniteNonZero()) {
1717 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1718 fs = normalize(rounding_mode, lost_fraction);
1719 if (lost_fraction != lfExactlyZero)
1720 fs = (opStatus) (fs | opInexact);
1723 return fs;
1726 /* Normalized divide. */
1727 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1728 roundingMode rounding_mode) {
1729 opStatus fs;
1731 sign ^= rhs.sign;
1732 fs = divideSpecials(rhs);
1734 if (isFiniteNonZero()) {
1735 lostFraction lost_fraction = divideSignificand(rhs);
1736 fs = normalize(rounding_mode, lost_fraction);
1737 if (lost_fraction != lfExactlyZero)
1738 fs = (opStatus) (fs | opInexact);
1741 return fs;
1744 /* Normalized remainder. This is not currently correct in all cases. */
1745 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1746 opStatus fs;
1747 IEEEFloat V = *this;
1748 unsigned int origSign = sign;
1750 fs = V.divide(rhs, rmNearestTiesToEven);
1751 if (fs == opDivByZero)
1752 return fs;
1754 int parts = partCount();
1755 integerPart *x = new integerPart[parts];
1756 bool ignored;
1757 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1758 parts * integerPartWidth, true, rmNearestTiesToEven,
1759 &ignored);
1760 if (fs == opInvalidOp) {
1761 delete[] x;
1762 return fs;
1765 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1766 rmNearestTiesToEven);
1767 assert(fs==opOK); // should always work
1769 fs = V.multiply(rhs, rmNearestTiesToEven);
1770 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1772 fs = subtract(V, rmNearestTiesToEven);
1773 assert(fs==opOK || fs==opInexact); // likewise
1775 if (isZero())
1776 sign = origSign; // IEEE754 requires this
1777 delete[] x;
1778 return fs;
1781 /* Normalized llvm frem (C fmod). */
1782 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1783 opStatus fs;
1784 fs = modSpecials(rhs);
1785 unsigned int origSign = sign;
1787 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1788 compareAbsoluteValue(rhs) != cmpLessThan) {
1789 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1790 if (compareAbsoluteValue(V) == cmpLessThan)
1791 V = scalbn(V, -1, rmNearestTiesToEven);
1792 V.sign = sign;
1794 fs = subtract(V, rmNearestTiesToEven);
1795 assert(fs==opOK);
1797 if (isZero())
1798 sign = origSign; // fmod requires this
1799 return fs;
1802 /* Normalized fused-multiply-add. */
1803 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1804 const IEEEFloat &addend,
1805 roundingMode rounding_mode) {
1806 opStatus fs;
1808 /* Post-multiplication sign, before addition. */
1809 sign ^= multiplicand.sign;
1811 /* If and only if all arguments are normal do we need to do an
1812 extended-precision calculation. */
1813 if (isFiniteNonZero() &&
1814 multiplicand.isFiniteNonZero() &&
1815 addend.isFinite()) {
1816 lostFraction lost_fraction;
1818 lost_fraction = multiplySignificand(multiplicand, &addend);
1819 fs = normalize(rounding_mode, lost_fraction);
1820 if (lost_fraction != lfExactlyZero)
1821 fs = (opStatus) (fs | opInexact);
1823 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1824 positive zero unless rounding to minus infinity, except that
1825 adding two like-signed zeroes gives that zero. */
1826 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1827 sign = (rounding_mode == rmTowardNegative);
1828 } else {
1829 fs = multiplySpecials(multiplicand);
1831 /* FS can only be opOK or opInvalidOp. There is no more work
1832 to do in the latter case. The IEEE-754R standard says it is
1833 implementation-defined in this case whether, if ADDEND is a
1834 quiet NaN, we raise invalid op; this implementation does so.
1836 If we need to do the addition we can do so with normal
1837 precision. */
1838 if (fs == opOK)
1839 fs = addOrSubtract(addend, rounding_mode, false);
1842 return fs;
1845 /* Rounding-mode corrrect round to integral value. */
1846 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1847 opStatus fs;
1849 // If the exponent is large enough, we know that this value is already
1850 // integral, and the arithmetic below would potentially cause it to saturate
1851 // to +/-Inf. Bail out early instead.
1852 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1853 return opOK;
1855 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1856 // precision of our format, and then subtract it back off again. The choice
1857 // of rounding modes for the addition/subtraction determines the rounding mode
1858 // for our integral rounding as well.
1859 // NOTE: When the input value is negative, we do subtraction followed by
1860 // addition instead.
1861 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1862 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1863 IEEEFloat MagicConstant(*semantics);
1864 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1865 rmNearestTiesToEven);
1866 MagicConstant.sign = sign;
1868 if (fs != opOK)
1869 return fs;
1871 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1872 bool inputSign = isNegative();
1874 fs = add(MagicConstant, rounding_mode);
1875 if (fs != opOK && fs != opInexact)
1876 return fs;
1878 fs = subtract(MagicConstant, rounding_mode);
1880 // Restore the input sign.
1881 if (inputSign != isNegative())
1882 changeSign();
1884 return fs;
1888 /* Comparison requires normalized numbers. */
1889 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1890 cmpResult result;
1892 assert(semantics == rhs.semantics);
1894 switch (PackCategoriesIntoKey(category, rhs.category)) {
1895 default:
1896 llvm_unreachable(nullptr);
1898 case PackCategoriesIntoKey(fcNaN, fcZero):
1899 case PackCategoriesIntoKey(fcNaN, fcNormal):
1900 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1901 case PackCategoriesIntoKey(fcNaN, fcNaN):
1902 case PackCategoriesIntoKey(fcZero, fcNaN):
1903 case PackCategoriesIntoKey(fcNormal, fcNaN):
1904 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1905 return cmpUnordered;
1907 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1908 case PackCategoriesIntoKey(fcInfinity, fcZero):
1909 case PackCategoriesIntoKey(fcNormal, fcZero):
1910 if (sign)
1911 return cmpLessThan;
1912 else
1913 return cmpGreaterThan;
1915 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1916 case PackCategoriesIntoKey(fcZero, fcInfinity):
1917 case PackCategoriesIntoKey(fcZero, fcNormal):
1918 if (rhs.sign)
1919 return cmpGreaterThan;
1920 else
1921 return cmpLessThan;
1923 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1924 if (sign == rhs.sign)
1925 return cmpEqual;
1926 else if (sign)
1927 return cmpLessThan;
1928 else
1929 return cmpGreaterThan;
1931 case PackCategoriesIntoKey(fcZero, fcZero):
1932 return cmpEqual;
1934 case PackCategoriesIntoKey(fcNormal, fcNormal):
1935 break;
1938 /* Two normal numbers. Do they have the same sign? */
1939 if (sign != rhs.sign) {
1940 if (sign)
1941 result = cmpLessThan;
1942 else
1943 result = cmpGreaterThan;
1944 } else {
1945 /* Compare absolute values; invert result if negative. */
1946 result = compareAbsoluteValue(rhs);
1948 if (sign) {
1949 if (result == cmpLessThan)
1950 result = cmpGreaterThan;
1951 else if (result == cmpGreaterThan)
1952 result = cmpLessThan;
1956 return result;
1959 /// IEEEFloat::convert - convert a value of one floating point type to another.
1960 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1961 /// records whether the transformation lost information, i.e. whether
1962 /// converting the result back to the original type will produce the
1963 /// original value (this is almost the same as return value==fsOK, but there
1964 /// are edge cases where this is not so).
1966 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1967 roundingMode rounding_mode,
1968 bool *losesInfo) {
1969 lostFraction lostFraction;
1970 unsigned int newPartCount, oldPartCount;
1971 opStatus fs;
1972 int shift;
1973 const fltSemantics &fromSemantics = *semantics;
1975 lostFraction = lfExactlyZero;
1976 newPartCount = partCountForBits(toSemantics.precision + 1);
1977 oldPartCount = partCount();
1978 shift = toSemantics.precision - fromSemantics.precision;
1980 bool X86SpecialNan = false;
1981 if (&fromSemantics == &semX87DoubleExtended &&
1982 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1983 (!(*significandParts() & 0x8000000000000000ULL) ||
1984 !(*significandParts() & 0x4000000000000000ULL))) {
1985 // x86 has some unusual NaNs which cannot be represented in any other
1986 // format; note them here.
1987 X86SpecialNan = true;
1990 // If this is a truncation of a denormal number, and the target semantics
1991 // has larger exponent range than the source semantics (this can happen
1992 // when truncating from PowerPC double-double to double format), the
1993 // right shift could lose result mantissa bits. Adjust exponent instead
1994 // of performing excessive shift.
1995 if (shift < 0 && isFiniteNonZero()) {
1996 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1997 if (exponent + exponentChange < toSemantics.minExponent)
1998 exponentChange = toSemantics.minExponent - exponent;
1999 if (exponentChange < shift)
2000 exponentChange = shift;
2001 if (exponentChange < 0) {
2002 shift -= exponentChange;
2003 exponent += exponentChange;
2007 // If this is a truncation, perform the shift before we narrow the storage.
2008 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
2009 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
2011 // Fix the storage so it can hold to new value.
2012 if (newPartCount > oldPartCount) {
2013 // The new type requires more storage; make it available.
2014 integerPart *newParts;
2015 newParts = new integerPart[newPartCount];
2016 APInt::tcSet(newParts, 0, newPartCount);
2017 if (isFiniteNonZero() || category==fcNaN)
2018 APInt::tcAssign(newParts, significandParts(), oldPartCount);
2019 freeSignificand();
2020 significand.parts = newParts;
2021 } else if (newPartCount == 1 && oldPartCount != 1) {
2022 // Switch to built-in storage for a single part.
2023 integerPart newPart = 0;
2024 if (isFiniteNonZero() || category==fcNaN)
2025 newPart = significandParts()[0];
2026 freeSignificand();
2027 significand.part = newPart;
2030 // Now that we have the right storage, switch the semantics.
2031 semantics = &toSemantics;
2033 // If this is an extension, perform the shift now that the storage is
2034 // available.
2035 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
2036 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2038 if (isFiniteNonZero()) {
2039 fs = normalize(rounding_mode, lostFraction);
2040 *losesInfo = (fs != opOK);
2041 } else if (category == fcNaN) {
2042 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2044 // For x87 extended precision, we want to make a NaN, not a special NaN if
2045 // the input wasn't special either.
2046 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2047 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2049 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2050 // does not give you back the same bits. This is dubious, and we
2051 // don't currently do it. You're really supposed to get
2052 // an invalid operation signal at runtime, but nobody does that.
2053 fs = opOK;
2054 } else {
2055 *losesInfo = false;
2056 fs = opOK;
2059 return fs;
2062 /* Convert a floating point number to an integer according to the
2063 rounding mode. If the rounded integer value is out of range this
2064 returns an invalid operation exception and the contents of the
2065 destination parts are unspecified. If the rounded value is in
2066 range but the floating point number is not the exact integer, the C
2067 standard doesn't require an inexact exception to be raised. IEEE
2068 854 does require it so we do that.
2070 Note that for conversions to integer type the C standard requires
2071 round-to-zero to always be used. */
2072 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2073 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2074 roundingMode rounding_mode, bool *isExact) const {
2075 lostFraction lost_fraction;
2076 const integerPart *src;
2077 unsigned int dstPartsCount, truncatedBits;
2079 *isExact = false;
2081 /* Handle the three special cases first. */
2082 if (category == fcInfinity || category == fcNaN)
2083 return opInvalidOp;
2085 dstPartsCount = partCountForBits(width);
2086 assert(dstPartsCount <= parts.size() && "Integer too big");
2088 if (category == fcZero) {
2089 APInt::tcSet(parts.data(), 0, dstPartsCount);
2090 // Negative zero can't be represented as an int.
2091 *isExact = !sign;
2092 return opOK;
2095 src = significandParts();
2097 /* Step 1: place our absolute value, with any fraction truncated, in
2098 the destination. */
2099 if (exponent < 0) {
2100 /* Our absolute value is less than one; truncate everything. */
2101 APInt::tcSet(parts.data(), 0, dstPartsCount);
2102 /* For exponent -1 the integer bit represents .5, look at that.
2103 For smaller exponents leftmost truncated bit is 0. */
2104 truncatedBits = semantics->precision -1U - exponent;
2105 } else {
2106 /* We want the most significant (exponent + 1) bits; the rest are
2107 truncated. */
2108 unsigned int bits = exponent + 1U;
2110 /* Hopelessly large in magnitude? */
2111 if (bits > width)
2112 return opInvalidOp;
2114 if (bits < semantics->precision) {
2115 /* We truncate (semantics->precision - bits) bits. */
2116 truncatedBits = semantics->precision - bits;
2117 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2118 } else {
2119 /* We want at least as many bits as are available. */
2120 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2122 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2123 bits - semantics->precision);
2124 truncatedBits = 0;
2128 /* Step 2: work out any lost fraction, and increment the absolute
2129 value if we would round away from zero. */
2130 if (truncatedBits) {
2131 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2132 truncatedBits);
2133 if (lost_fraction != lfExactlyZero &&
2134 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2135 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2136 return opInvalidOp; /* Overflow. */
2138 } else {
2139 lost_fraction = lfExactlyZero;
2142 /* Step 3: check if we fit in the destination. */
2143 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2145 if (sign) {
2146 if (!isSigned) {
2147 /* Negative numbers cannot be represented as unsigned. */
2148 if (omsb != 0)
2149 return opInvalidOp;
2150 } else {
2151 /* It takes omsb bits to represent the unsigned integer value.
2152 We lose a bit for the sign, but care is needed as the
2153 maximally negative integer is a special case. */
2154 if (omsb == width &&
2155 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2156 return opInvalidOp;
2158 /* This case can happen because of rounding. */
2159 if (omsb > width)
2160 return opInvalidOp;
2163 APInt::tcNegate (parts.data(), dstPartsCount);
2164 } else {
2165 if (omsb >= width + !isSigned)
2166 return opInvalidOp;
2169 if (lost_fraction == lfExactlyZero) {
2170 *isExact = true;
2171 return opOK;
2172 } else
2173 return opInexact;
2176 /* Same as convertToSignExtendedInteger, except we provide
2177 deterministic values in case of an invalid operation exception,
2178 namely zero for NaNs and the minimal or maximal value respectively
2179 for underflow or overflow.
2180 The *isExact output tells whether the result is exact, in the sense
2181 that converting it back to the original floating point type produces
2182 the original value. This is almost equivalent to result==opOK,
2183 except for negative zeroes.
2185 IEEEFloat::opStatus
2186 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2187 unsigned int width, bool isSigned,
2188 roundingMode rounding_mode, bool *isExact) const {
2189 opStatus fs;
2191 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2192 isExact);
2194 if (fs == opInvalidOp) {
2195 unsigned int bits, dstPartsCount;
2197 dstPartsCount = partCountForBits(width);
2198 assert(dstPartsCount <= parts.size() && "Integer too big");
2200 if (category == fcNaN)
2201 bits = 0;
2202 else if (sign)
2203 bits = isSigned;
2204 else
2205 bits = width - isSigned;
2207 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2208 if (sign && isSigned)
2209 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2212 return fs;
2215 /* Convert an unsigned integer SRC to a floating point number,
2216 rounding according to ROUNDING_MODE. The sign of the floating
2217 point number is not modified. */
2218 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2219 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2220 unsigned int omsb, precision, dstCount;
2221 integerPart *dst;
2222 lostFraction lost_fraction;
2224 category = fcNormal;
2225 omsb = APInt::tcMSB(src, srcCount) + 1;
2226 dst = significandParts();
2227 dstCount = partCount();
2228 precision = semantics->precision;
2230 /* We want the most significant PRECISION bits of SRC. There may not
2231 be that many; extract what we can. */
2232 if (precision <= omsb) {
2233 exponent = omsb - 1;
2234 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2235 omsb - precision);
2236 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2237 } else {
2238 exponent = precision - 1;
2239 lost_fraction = lfExactlyZero;
2240 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2243 return normalize(rounding_mode, lost_fraction);
2246 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2247 roundingMode rounding_mode) {
2248 unsigned int partCount = Val.getNumWords();
2249 APInt api = Val;
2251 sign = false;
2252 if (isSigned && api.isNegative()) {
2253 sign = true;
2254 api = -api;
2257 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2260 /* Convert a two's complement integer SRC to a floating point number,
2261 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2262 integer is signed, in which case it must be sign-extended. */
2263 IEEEFloat::opStatus
2264 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2265 unsigned int srcCount, bool isSigned,
2266 roundingMode rounding_mode) {
2267 opStatus status;
2269 if (isSigned &&
2270 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2271 integerPart *copy;
2273 /* If we're signed and negative negate a copy. */
2274 sign = true;
2275 copy = new integerPart[srcCount];
2276 APInt::tcAssign(copy, src, srcCount);
2277 APInt::tcNegate(copy, srcCount);
2278 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2279 delete [] copy;
2280 } else {
2281 sign = false;
2282 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2285 return status;
2288 /* FIXME: should this just take a const APInt reference? */
2289 IEEEFloat::opStatus
2290 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2291 unsigned int width, bool isSigned,
2292 roundingMode rounding_mode) {
2293 unsigned int partCount = partCountForBits(width);
2294 APInt api = APInt(width, makeArrayRef(parts, partCount));
2296 sign = false;
2297 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2298 sign = true;
2299 api = -api;
2302 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2305 IEEEFloat::opStatus
2306 IEEEFloat::convertFromHexadecimalString(StringRef s,
2307 roundingMode rounding_mode) {
2308 lostFraction lost_fraction = lfExactlyZero;
2310 category = fcNormal;
2311 zeroSignificand();
2312 exponent = 0;
2314 integerPart *significand = significandParts();
2315 unsigned partsCount = partCount();
2316 unsigned bitPos = partsCount * integerPartWidth;
2317 bool computedTrailingFraction = false;
2319 // Skip leading zeroes and any (hexa)decimal point.
2320 StringRef::iterator begin = s.begin();
2321 StringRef::iterator end = s.end();
2322 StringRef::iterator dot;
2323 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2324 StringRef::iterator firstSignificantDigit = p;
2326 while (p != end) {
2327 integerPart hex_value;
2329 if (*p == '.') {
2330 assert(dot == end && "String contains multiple dots");
2331 dot = p++;
2332 continue;
2335 hex_value = hexDigitValue(*p);
2336 if (hex_value == -1U)
2337 break;
2339 p++;
2341 // Store the number while we have space.
2342 if (bitPos) {
2343 bitPos -= 4;
2344 hex_value <<= bitPos % integerPartWidth;
2345 significand[bitPos / integerPartWidth] |= hex_value;
2346 } else if (!computedTrailingFraction) {
2347 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2348 computedTrailingFraction = true;
2352 /* Hex floats require an exponent but not a hexadecimal point. */
2353 assert(p != end && "Hex strings require an exponent");
2354 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2355 assert(p != begin && "Significand has no digits");
2356 assert((dot == end || p - begin != 1) && "Significand has no digits");
2358 /* Ignore the exponent if we are zero. */
2359 if (p != firstSignificantDigit) {
2360 int expAdjustment;
2362 /* Implicit hexadecimal point? */
2363 if (dot == end)
2364 dot = p;
2366 /* Calculate the exponent adjustment implicit in the number of
2367 significant digits. */
2368 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2369 if (expAdjustment < 0)
2370 expAdjustment++;
2371 expAdjustment = expAdjustment * 4 - 1;
2373 /* Adjust for writing the significand starting at the most
2374 significant nibble. */
2375 expAdjustment += semantics->precision;
2376 expAdjustment -= partsCount * integerPartWidth;
2378 /* Adjust for the given exponent. */
2379 exponent = totalExponent(p + 1, end, expAdjustment);
2382 return normalize(rounding_mode, lost_fraction);
2385 IEEEFloat::opStatus
2386 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2387 unsigned sigPartCount, int exp,
2388 roundingMode rounding_mode) {
2389 unsigned int parts, pow5PartCount;
2390 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2391 integerPart pow5Parts[maxPowerOfFiveParts];
2392 bool isNearest;
2394 isNearest = (rounding_mode == rmNearestTiesToEven ||
2395 rounding_mode == rmNearestTiesToAway);
2397 parts = partCountForBits(semantics->precision + 11);
2399 /* Calculate pow(5, abs(exp)). */
2400 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2402 for (;; parts *= 2) {
2403 opStatus sigStatus, powStatus;
2404 unsigned int excessPrecision, truncatedBits;
2406 calcSemantics.precision = parts * integerPartWidth - 1;
2407 excessPrecision = calcSemantics.precision - semantics->precision;
2408 truncatedBits = excessPrecision;
2410 IEEEFloat decSig(calcSemantics, uninitialized);
2411 decSig.makeZero(sign);
2412 IEEEFloat pow5(calcSemantics);
2414 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2415 rmNearestTiesToEven);
2416 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2417 rmNearestTiesToEven);
2418 /* Add exp, as 10^n = 5^n * 2^n. */
2419 decSig.exponent += exp;
2421 lostFraction calcLostFraction;
2422 integerPart HUerr, HUdistance;
2423 unsigned int powHUerr;
2425 if (exp >= 0) {
2426 /* multiplySignificand leaves the precision-th bit set to 1. */
2427 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2428 powHUerr = powStatus != opOK;
2429 } else {
2430 calcLostFraction = decSig.divideSignificand(pow5);
2431 /* Denormal numbers have less precision. */
2432 if (decSig.exponent < semantics->minExponent) {
2433 excessPrecision += (semantics->minExponent - decSig.exponent);
2434 truncatedBits = excessPrecision;
2435 if (excessPrecision > calcSemantics.precision)
2436 excessPrecision = calcSemantics.precision;
2438 /* Extra half-ulp lost in reciprocal of exponent. */
2439 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2442 /* Both multiplySignificand and divideSignificand return the
2443 result with the integer bit set. */
2444 assert(APInt::tcExtractBit
2445 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2447 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2448 powHUerr);
2449 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2450 excessPrecision, isNearest);
2452 /* Are we guaranteed to round correctly if we truncate? */
2453 if (HUdistance >= HUerr) {
2454 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2455 calcSemantics.precision - excessPrecision,
2456 excessPrecision);
2457 /* Take the exponent of decSig. If we tcExtract-ed less bits
2458 above we must adjust our exponent to compensate for the
2459 implicit right shift. */
2460 exponent = (decSig.exponent + semantics->precision
2461 - (calcSemantics.precision - excessPrecision));
2462 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2463 decSig.partCount(),
2464 truncatedBits);
2465 return normalize(rounding_mode, calcLostFraction);
2470 IEEEFloat::opStatus
2471 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2472 decimalInfo D;
2473 opStatus fs;
2475 /* Scan the text. */
2476 StringRef::iterator p = str.begin();
2477 interpretDecimal(p, str.end(), &D);
2479 /* Handle the quick cases. First the case of no significant digits,
2480 i.e. zero, and then exponents that are obviously too large or too
2481 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2482 definitely overflows if
2484 (exp - 1) * L >= maxExponent
2486 and definitely underflows to zero where
2488 (exp + 1) * L <= minExponent - precision
2490 With integer arithmetic the tightest bounds for L are
2492 93/28 < L < 196/59 [ numerator <= 256 ]
2493 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2496 // Test if we have a zero number allowing for strings with no null terminators
2497 // and zero decimals with non-zero exponents.
2499 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2500 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2501 // be at most one dot. On the other hand, if we have a zero with a non-zero
2502 // exponent, then we know that D.firstSigDigit will be non-numeric.
2503 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2504 category = fcZero;
2505 fs = opOK;
2507 /* Check whether the normalized exponent is high enough to overflow
2508 max during the log-rebasing in the max-exponent check below. */
2509 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2510 fs = handleOverflow(rounding_mode);
2512 /* If it wasn't, then it also wasn't high enough to overflow max
2513 during the log-rebasing in the min-exponent check. Check that it
2514 won't overflow min in either check, then perform the min-exponent
2515 check. */
2516 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2517 (D.normalizedExponent + 1) * 28738 <=
2518 8651 * (semantics->minExponent - (int) semantics->precision)) {
2519 /* Underflow to zero and round. */
2520 category = fcNormal;
2521 zeroSignificand();
2522 fs = normalize(rounding_mode, lfLessThanHalf);
2524 /* We can finally safely perform the max-exponent check. */
2525 } else if ((D.normalizedExponent - 1) * 42039
2526 >= 12655 * semantics->maxExponent) {
2527 /* Overflow and round. */
2528 fs = handleOverflow(rounding_mode);
2529 } else {
2530 integerPart *decSignificand;
2531 unsigned int partCount;
2533 /* A tight upper bound on number of bits required to hold an
2534 N-digit decimal integer is N * 196 / 59. Allocate enough space
2535 to hold the full significand, and an extra part required by
2536 tcMultiplyPart. */
2537 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2538 partCount = partCountForBits(1 + 196 * partCount / 59);
2539 decSignificand = new integerPart[partCount + 1];
2540 partCount = 0;
2542 /* Convert to binary efficiently - we do almost all multiplication
2543 in an integerPart. When this would overflow do we do a single
2544 bignum multiplication, and then revert again to multiplication
2545 in an integerPart. */
2546 do {
2547 integerPart decValue, val, multiplier;
2549 val = 0;
2550 multiplier = 1;
2552 do {
2553 if (*p == '.') {
2554 p++;
2555 if (p == str.end()) {
2556 break;
2559 decValue = decDigitValue(*p++);
2560 assert(decValue < 10U && "Invalid character in significand");
2561 multiplier *= 10;
2562 val = val * 10 + decValue;
2563 /* The maximum number that can be multiplied by ten with any
2564 digit added without overflowing an integerPart. */
2565 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2567 /* Multiply out the current part. */
2568 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2569 partCount, partCount + 1, false);
2571 /* If we used another part (likely but not guaranteed), increase
2572 the count. */
2573 if (decSignificand[partCount])
2574 partCount++;
2575 } while (p <= D.lastSigDigit);
2577 category = fcNormal;
2578 fs = roundSignificandWithExponent(decSignificand, partCount,
2579 D.exponent, rounding_mode);
2581 delete [] decSignificand;
2584 return fs;
2587 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2588 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2589 makeInf(false);
2590 return true;
2593 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2594 makeInf(true);
2595 return true;
2598 if (str.equals("nan") || str.equals("NaN")) {
2599 makeNaN(false, false);
2600 return true;
2603 if (str.equals("-nan") || str.equals("-NaN")) {
2604 makeNaN(false, true);
2605 return true;
2608 return false;
2611 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2612 roundingMode rounding_mode) {
2613 assert(!str.empty() && "Invalid string length");
2615 // Handle special cases.
2616 if (convertFromStringSpecials(str))
2617 return opOK;
2619 /* Handle a leading minus sign. */
2620 StringRef::iterator p = str.begin();
2621 size_t slen = str.size();
2622 sign = *p == '-' ? 1 : 0;
2623 if (*p == '-' || *p == '+') {
2624 p++;
2625 slen--;
2626 assert(slen && "String has no digits");
2629 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2630 assert(slen - 2 && "Invalid string");
2631 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2632 rounding_mode);
2635 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2638 /* Write out a hexadecimal representation of the floating point value
2639 to DST, which must be of sufficient size, in the C99 form
2640 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2641 excluding the terminating NUL.
2643 If UPPERCASE, the output is in upper case, otherwise in lower case.
2645 HEXDIGITS digits appear altogether, rounding the value if
2646 necessary. If HEXDIGITS is 0, the minimal precision to display the
2647 number precisely is used instead. If nothing would appear after
2648 the decimal point it is suppressed.
2650 The decimal exponent is always printed and has at least one digit.
2651 Zero values display an exponent of zero. Infinities and NaNs
2652 appear as "infinity" or "nan" respectively.
2654 The above rules are as specified by C99. There is ambiguity about
2655 what the leading hexadecimal digit should be. This implementation
2656 uses whatever is necessary so that the exponent is displayed as
2657 stored. This implies the exponent will fall within the IEEE format
2658 range, and the leading hexadecimal digit will be 0 (for denormals),
2659 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2660 any other digits zero).
2662 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2663 bool upperCase,
2664 roundingMode rounding_mode) const {
2665 char *p;
2667 p = dst;
2668 if (sign)
2669 *dst++ = '-';
2671 switch (category) {
2672 case fcInfinity:
2673 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2674 dst += sizeof infinityL - 1;
2675 break;
2677 case fcNaN:
2678 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2679 dst += sizeof NaNU - 1;
2680 break;
2682 case fcZero:
2683 *dst++ = '0';
2684 *dst++ = upperCase ? 'X': 'x';
2685 *dst++ = '0';
2686 if (hexDigits > 1) {
2687 *dst++ = '.';
2688 memset (dst, '0', hexDigits - 1);
2689 dst += hexDigits - 1;
2691 *dst++ = upperCase ? 'P': 'p';
2692 *dst++ = '0';
2693 break;
2695 case fcNormal:
2696 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2697 break;
2700 *dst = 0;
2702 return static_cast<unsigned int>(dst - p);
2705 /* Does the hard work of outputting the correctly rounded hexadecimal
2706 form of a normal floating point number with the specified number of
2707 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2708 digits necessary to print the value precisely is output. */
2709 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2710 bool upperCase,
2711 roundingMode rounding_mode) const {
2712 unsigned int count, valueBits, shift, partsCount, outputDigits;
2713 const char *hexDigitChars;
2714 const integerPart *significand;
2715 char *p;
2716 bool roundUp;
2718 *dst++ = '0';
2719 *dst++ = upperCase ? 'X': 'x';
2721 roundUp = false;
2722 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2724 significand = significandParts();
2725 partsCount = partCount();
2727 /* +3 because the first digit only uses the single integer bit, so
2728 we have 3 virtual zero most-significant-bits. */
2729 valueBits = semantics->precision + 3;
2730 shift = integerPartWidth - valueBits % integerPartWidth;
2732 /* The natural number of digits required ignoring trailing
2733 insignificant zeroes. */
2734 outputDigits = (valueBits - significandLSB () + 3) / 4;
2736 /* hexDigits of zero means use the required number for the
2737 precision. Otherwise, see if we are truncating. If we are,
2738 find out if we need to round away from zero. */
2739 if (hexDigits) {
2740 if (hexDigits < outputDigits) {
2741 /* We are dropping non-zero bits, so need to check how to round.
2742 "bits" is the number of dropped bits. */
2743 unsigned int bits;
2744 lostFraction fraction;
2746 bits = valueBits - hexDigits * 4;
2747 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2748 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2750 outputDigits = hexDigits;
2753 /* Write the digits consecutively, and start writing in the location
2754 of the hexadecimal point. We move the most significant digit
2755 left and add the hexadecimal point later. */
2756 p = ++dst;
2758 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2760 while (outputDigits && count) {
2761 integerPart part;
2763 /* Put the most significant integerPartWidth bits in "part". */
2764 if (--count == partsCount)
2765 part = 0; /* An imaginary higher zero part. */
2766 else
2767 part = significand[count] << shift;
2769 if (count && shift)
2770 part |= significand[count - 1] >> (integerPartWidth - shift);
2772 /* Convert as much of "part" to hexdigits as we can. */
2773 unsigned int curDigits = integerPartWidth / 4;
2775 if (curDigits > outputDigits)
2776 curDigits = outputDigits;
2777 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2778 outputDigits -= curDigits;
2781 if (roundUp) {
2782 char *q = dst;
2784 /* Note that hexDigitChars has a trailing '0'. */
2785 do {
2786 q--;
2787 *q = hexDigitChars[hexDigitValue (*q) + 1];
2788 } while (*q == '0');
2789 assert(q >= p);
2790 } else {
2791 /* Add trailing zeroes. */
2792 memset (dst, '0', outputDigits);
2793 dst += outputDigits;
2796 /* Move the most significant digit to before the point, and if there
2797 is something after the decimal point add it. This must come
2798 after rounding above. */
2799 p[-1] = p[0];
2800 if (dst -1 == p)
2801 dst--;
2802 else
2803 p[0] = '.';
2805 /* Finally output the exponent. */
2806 *dst++ = upperCase ? 'P': 'p';
2808 return writeSignedDecimal (dst, exponent);
2811 hash_code hash_value(const IEEEFloat &Arg) {
2812 if (!Arg.isFiniteNonZero())
2813 return hash_combine((uint8_t)Arg.category,
2814 // NaN has no sign, fix it at zero.
2815 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2816 Arg.semantics->precision);
2818 // Normal floats need their exponent and significand hashed.
2819 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2820 Arg.semantics->precision, Arg.exponent,
2821 hash_combine_range(
2822 Arg.significandParts(),
2823 Arg.significandParts() + Arg.partCount()));
2826 // Conversion from APFloat to/from host float/double. It may eventually be
2827 // possible to eliminate these and have everybody deal with APFloats, but that
2828 // will take a while. This approach will not easily extend to long double.
2829 // Current implementation requires integerPartWidth==64, which is correct at
2830 // the moment but could be made more general.
2832 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2833 // the actual IEEE respresentations. We compensate for that here.
2835 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2836 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2837 assert(partCount()==2);
2839 uint64_t myexponent, mysignificand;
2841 if (isFiniteNonZero()) {
2842 myexponent = exponent+16383; //bias
2843 mysignificand = significandParts()[0];
2844 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2845 myexponent = 0; // denormal
2846 } else if (category==fcZero) {
2847 myexponent = 0;
2848 mysignificand = 0;
2849 } else if (category==fcInfinity) {
2850 myexponent = 0x7fff;
2851 mysignificand = 0x8000000000000000ULL;
2852 } else {
2853 assert(category == fcNaN && "Unknown category");
2854 myexponent = 0x7fff;
2855 mysignificand = significandParts()[0];
2858 uint64_t words[2];
2859 words[0] = mysignificand;
2860 words[1] = ((uint64_t)(sign & 1) << 15) |
2861 (myexponent & 0x7fffLL);
2862 return APInt(80, words);
2865 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2866 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2867 assert(partCount()==2);
2869 uint64_t words[2];
2870 opStatus fs;
2871 bool losesInfo;
2873 // Convert number to double. To avoid spurious underflows, we re-
2874 // normalize against the "double" minExponent first, and only *then*
2875 // truncate the mantissa. The result of that second conversion
2876 // may be inexact, but should never underflow.
2877 // Declare fltSemantics before APFloat that uses it (and
2878 // saves pointer to it) to ensure correct destruction order.
2879 fltSemantics extendedSemantics = *semantics;
2880 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2881 IEEEFloat extended(*this);
2882 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2883 assert(fs == opOK && !losesInfo);
2884 (void)fs;
2886 IEEEFloat u(extended);
2887 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2888 assert(fs == opOK || fs == opInexact);
2889 (void)fs;
2890 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2892 // If conversion was exact or resulted in a special case, we're done;
2893 // just set the second double to zero. Otherwise, re-convert back to
2894 // the extended format and compute the difference. This now should
2895 // convert exactly to double.
2896 if (u.isFiniteNonZero() && losesInfo) {
2897 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2898 assert(fs == opOK && !losesInfo);
2899 (void)fs;
2901 IEEEFloat v(extended);
2902 v.subtract(u, rmNearestTiesToEven);
2903 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2904 assert(fs == opOK && !losesInfo);
2905 (void)fs;
2906 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2907 } else {
2908 words[1] = 0;
2911 return APInt(128, words);
2914 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2915 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2916 assert(partCount()==2);
2918 uint64_t myexponent, mysignificand, mysignificand2;
2920 if (isFiniteNonZero()) {
2921 myexponent = exponent+16383; //bias
2922 mysignificand = significandParts()[0];
2923 mysignificand2 = significandParts()[1];
2924 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2925 myexponent = 0; // denormal
2926 } else if (category==fcZero) {
2927 myexponent = 0;
2928 mysignificand = mysignificand2 = 0;
2929 } else if (category==fcInfinity) {
2930 myexponent = 0x7fff;
2931 mysignificand = mysignificand2 = 0;
2932 } else {
2933 assert(category == fcNaN && "Unknown category!");
2934 myexponent = 0x7fff;
2935 mysignificand = significandParts()[0];
2936 mysignificand2 = significandParts()[1];
2939 uint64_t words[2];
2940 words[0] = mysignificand;
2941 words[1] = ((uint64_t)(sign & 1) << 63) |
2942 ((myexponent & 0x7fff) << 48) |
2943 (mysignificand2 & 0xffffffffffffLL);
2945 return APInt(128, words);
2948 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2949 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2950 assert(partCount()==1);
2952 uint64_t myexponent, mysignificand;
2954 if (isFiniteNonZero()) {
2955 myexponent = exponent+1023; //bias
2956 mysignificand = *significandParts();
2957 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2958 myexponent = 0; // denormal
2959 } else if (category==fcZero) {
2960 myexponent = 0;
2961 mysignificand = 0;
2962 } else if (category==fcInfinity) {
2963 myexponent = 0x7ff;
2964 mysignificand = 0;
2965 } else {
2966 assert(category == fcNaN && "Unknown category!");
2967 myexponent = 0x7ff;
2968 mysignificand = *significandParts();
2971 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2972 ((myexponent & 0x7ff) << 52) |
2973 (mysignificand & 0xfffffffffffffLL))));
2976 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2977 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2978 assert(partCount()==1);
2980 uint32_t myexponent, mysignificand;
2982 if (isFiniteNonZero()) {
2983 myexponent = exponent+127; //bias
2984 mysignificand = (uint32_t)*significandParts();
2985 if (myexponent == 1 && !(mysignificand & 0x800000))
2986 myexponent = 0; // denormal
2987 } else if (category==fcZero) {
2988 myexponent = 0;
2989 mysignificand = 0;
2990 } else if (category==fcInfinity) {
2991 myexponent = 0xff;
2992 mysignificand = 0;
2993 } else {
2994 assert(category == fcNaN && "Unknown category!");
2995 myexponent = 0xff;
2996 mysignificand = (uint32_t)*significandParts();
2999 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
3000 (mysignificand & 0x7fffff)));
3003 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
3004 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
3005 assert(partCount()==1);
3007 uint32_t myexponent, mysignificand;
3009 if (isFiniteNonZero()) {
3010 myexponent = exponent+15; //bias
3011 mysignificand = (uint32_t)*significandParts();
3012 if (myexponent == 1 && !(mysignificand & 0x400))
3013 myexponent = 0; // denormal
3014 } else if (category==fcZero) {
3015 myexponent = 0;
3016 mysignificand = 0;
3017 } else if (category==fcInfinity) {
3018 myexponent = 0x1f;
3019 mysignificand = 0;
3020 } else {
3021 assert(category == fcNaN && "Unknown category!");
3022 myexponent = 0x1f;
3023 mysignificand = (uint32_t)*significandParts();
3026 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
3027 (mysignificand & 0x3ff)));
3030 // This function creates an APInt that is just a bit map of the floating
3031 // point constant as it would appear in memory. It is not a conversion,
3032 // and treating the result as a normal integer is unlikely to be useful.
3034 APInt IEEEFloat::bitcastToAPInt() const {
3035 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
3036 return convertHalfAPFloatToAPInt();
3038 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3039 return convertFloatAPFloatToAPInt();
3041 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3042 return convertDoubleAPFloatToAPInt();
3044 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3045 return convertQuadrupleAPFloatToAPInt();
3047 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3048 return convertPPCDoubleDoubleAPFloatToAPInt();
3050 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3051 "unknown format!");
3052 return convertF80LongDoubleAPFloatToAPInt();
3055 float IEEEFloat::convertToFloat() const {
3056 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3057 "Float semantics are not IEEEsingle");
3058 APInt api = bitcastToAPInt();
3059 return api.bitsToFloat();
3062 double IEEEFloat::convertToDouble() const {
3063 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3064 "Float semantics are not IEEEdouble");
3065 APInt api = bitcastToAPInt();
3066 return api.bitsToDouble();
3069 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3070 /// does not support these bit patterns:
3071 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3072 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3073 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3074 /// exponent = 0, integer bit 1 ("pseudodenormal")
3075 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3076 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3077 assert(api.getBitWidth()==80);
3078 uint64_t i1 = api.getRawData()[0];
3079 uint64_t i2 = api.getRawData()[1];
3080 uint64_t myexponent = (i2 & 0x7fff);
3081 uint64_t mysignificand = i1;
3082 uint8_t myintegerbit = mysignificand >> 63;
3084 initialize(&semX87DoubleExtended);
3085 assert(partCount()==2);
3087 sign = static_cast<unsigned int>(i2>>15);
3088 if (myexponent == 0 && mysignificand == 0) {
3089 // exponent, significand meaningless
3090 category = fcZero;
3091 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3092 // exponent, significand meaningless
3093 category = fcInfinity;
3094 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3095 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3096 // exponent meaningless
3097 category = fcNaN;
3098 significandParts()[0] = mysignificand;
3099 significandParts()[1] = 0;
3100 } else {
3101 category = fcNormal;
3102 exponent = myexponent - 16383;
3103 significandParts()[0] = mysignificand;
3104 significandParts()[1] = 0;
3105 if (myexponent==0) // denormal
3106 exponent = -16382;
3110 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3111 assert(api.getBitWidth()==128);
3112 uint64_t i1 = api.getRawData()[0];
3113 uint64_t i2 = api.getRawData()[1];
3114 opStatus fs;
3115 bool losesInfo;
3117 // Get the first double and convert to our format.
3118 initFromDoubleAPInt(APInt(64, i1));
3119 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3120 assert(fs == opOK && !losesInfo);
3121 (void)fs;
3123 // Unless we have a special case, add in second double.
3124 if (isFiniteNonZero()) {
3125 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3126 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3127 assert(fs == opOK && !losesInfo);
3128 (void)fs;
3130 add(v, rmNearestTiesToEven);
3134 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3135 assert(api.getBitWidth()==128);
3136 uint64_t i1 = api.getRawData()[0];
3137 uint64_t i2 = api.getRawData()[1];
3138 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3139 uint64_t mysignificand = i1;
3140 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3142 initialize(&semIEEEquad);
3143 assert(partCount()==2);
3145 sign = static_cast<unsigned int>(i2>>63);
3146 if (myexponent==0 &&
3147 (mysignificand==0 && mysignificand2==0)) {
3148 // exponent, significand meaningless
3149 category = fcZero;
3150 } else if (myexponent==0x7fff &&
3151 (mysignificand==0 && mysignificand2==0)) {
3152 // exponent, significand meaningless
3153 category = fcInfinity;
3154 } else if (myexponent==0x7fff &&
3155 (mysignificand!=0 || mysignificand2 !=0)) {
3156 // exponent meaningless
3157 category = fcNaN;
3158 significandParts()[0] = mysignificand;
3159 significandParts()[1] = mysignificand2;
3160 } else {
3161 category = fcNormal;
3162 exponent = myexponent - 16383;
3163 significandParts()[0] = mysignificand;
3164 significandParts()[1] = mysignificand2;
3165 if (myexponent==0) // denormal
3166 exponent = -16382;
3167 else
3168 significandParts()[1] |= 0x1000000000000LL; // integer bit
3172 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3173 assert(api.getBitWidth()==64);
3174 uint64_t i = *api.getRawData();
3175 uint64_t myexponent = (i >> 52) & 0x7ff;
3176 uint64_t mysignificand = i & 0xfffffffffffffLL;
3178 initialize(&semIEEEdouble);
3179 assert(partCount()==1);
3181 sign = static_cast<unsigned int>(i>>63);
3182 if (myexponent==0 && mysignificand==0) {
3183 // exponent, significand meaningless
3184 category = fcZero;
3185 } else if (myexponent==0x7ff && mysignificand==0) {
3186 // exponent, significand meaningless
3187 category = fcInfinity;
3188 } else if (myexponent==0x7ff && mysignificand!=0) {
3189 // exponent meaningless
3190 category = fcNaN;
3191 *significandParts() = mysignificand;
3192 } else {
3193 category = fcNormal;
3194 exponent = myexponent - 1023;
3195 *significandParts() = mysignificand;
3196 if (myexponent==0) // denormal
3197 exponent = -1022;
3198 else
3199 *significandParts() |= 0x10000000000000LL; // integer bit
3203 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3204 assert(api.getBitWidth()==32);
3205 uint32_t i = (uint32_t)*api.getRawData();
3206 uint32_t myexponent = (i >> 23) & 0xff;
3207 uint32_t mysignificand = i & 0x7fffff;
3209 initialize(&semIEEEsingle);
3210 assert(partCount()==1);
3212 sign = i >> 31;
3213 if (myexponent==0 && mysignificand==0) {
3214 // exponent, significand meaningless
3215 category = fcZero;
3216 } else if (myexponent==0xff && mysignificand==0) {
3217 // exponent, significand meaningless
3218 category = fcInfinity;
3219 } else if (myexponent==0xff && mysignificand!=0) {
3220 // sign, exponent, significand meaningless
3221 category = fcNaN;
3222 *significandParts() = mysignificand;
3223 } else {
3224 category = fcNormal;
3225 exponent = myexponent - 127; //bias
3226 *significandParts() = mysignificand;
3227 if (myexponent==0) // denormal
3228 exponent = -126;
3229 else
3230 *significandParts() |= 0x800000; // integer bit
3234 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3235 assert(api.getBitWidth()==16);
3236 uint32_t i = (uint32_t)*api.getRawData();
3237 uint32_t myexponent = (i >> 10) & 0x1f;
3238 uint32_t mysignificand = i & 0x3ff;
3240 initialize(&semIEEEhalf);
3241 assert(partCount()==1);
3243 sign = i >> 15;
3244 if (myexponent==0 && mysignificand==0) {
3245 // exponent, significand meaningless
3246 category = fcZero;
3247 } else if (myexponent==0x1f && mysignificand==0) {
3248 // exponent, significand meaningless
3249 category = fcInfinity;
3250 } else if (myexponent==0x1f && mysignificand!=0) {
3251 // sign, exponent, significand meaningless
3252 category = fcNaN;
3253 *significandParts() = mysignificand;
3254 } else {
3255 category = fcNormal;
3256 exponent = myexponent - 15; //bias
3257 *significandParts() = mysignificand;
3258 if (myexponent==0) // denormal
3259 exponent = -14;
3260 else
3261 *significandParts() |= 0x400; // integer bit
3265 /// Treat api as containing the bits of a floating point number. Currently
3266 /// we infer the floating point type from the size of the APInt. The
3267 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3268 /// when the size is anything else).
3269 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3270 if (Sem == &semIEEEhalf)
3271 return initFromHalfAPInt(api);
3272 if (Sem == &semIEEEsingle)
3273 return initFromFloatAPInt(api);
3274 if (Sem == &semIEEEdouble)
3275 return initFromDoubleAPInt(api);
3276 if (Sem == &semX87DoubleExtended)
3277 return initFromF80LongDoubleAPInt(api);
3278 if (Sem == &semIEEEquad)
3279 return initFromQuadrupleAPInt(api);
3280 if (Sem == &semPPCDoubleDoubleLegacy)
3281 return initFromPPCDoubleDoubleAPInt(api);
3283 llvm_unreachable(nullptr);
3286 /// Make this number the largest magnitude normal number in the given
3287 /// semantics.
3288 void IEEEFloat::makeLargest(bool Negative) {
3289 // We want (in interchange format):
3290 // sign = {Negative}
3291 // exponent = 1..10
3292 // significand = 1..1
3293 category = fcNormal;
3294 sign = Negative;
3295 exponent = semantics->maxExponent;
3297 // Use memset to set all but the highest integerPart to all ones.
3298 integerPart *significand = significandParts();
3299 unsigned PartCount = partCount();
3300 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3302 // Set the high integerPart especially setting all unused top bits for
3303 // internal consistency.
3304 const unsigned NumUnusedHighBits =
3305 PartCount*integerPartWidth - semantics->precision;
3306 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3307 ? (~integerPart(0) >> NumUnusedHighBits)
3308 : 0;
3311 /// Make this number the smallest magnitude denormal number in the given
3312 /// semantics.
3313 void IEEEFloat::makeSmallest(bool Negative) {
3314 // We want (in interchange format):
3315 // sign = {Negative}
3316 // exponent = 0..0
3317 // significand = 0..01
3318 category = fcNormal;
3319 sign = Negative;
3320 exponent = semantics->minExponent;
3321 APInt::tcSet(significandParts(), 1, partCount());
3324 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3325 // We want (in interchange format):
3326 // sign = {Negative}
3327 // exponent = 0..0
3328 // significand = 10..0
3330 category = fcNormal;
3331 zeroSignificand();
3332 sign = Negative;
3333 exponent = semantics->minExponent;
3334 significandParts()[partCountForBits(semantics->precision) - 1] |=
3335 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3338 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3339 initFromAPInt(&Sem, API);
3342 IEEEFloat::IEEEFloat(float f) {
3343 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3346 IEEEFloat::IEEEFloat(double d) {
3347 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3350 namespace {
3351 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3352 Buffer.append(Str.begin(), Str.end());
3355 /// Removes data from the given significand until it is no more
3356 /// precise than is required for the desired precision.
3357 void AdjustToPrecision(APInt &significand,
3358 int &exp, unsigned FormatPrecision) {
3359 unsigned bits = significand.getActiveBits();
3361 // 196/59 is a very slight overestimate of lg_2(10).
3362 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3364 if (bits <= bitsRequired) return;
3366 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3367 if (!tensRemovable) return;
3369 exp += tensRemovable;
3371 APInt divisor(significand.getBitWidth(), 1);
3372 APInt powten(significand.getBitWidth(), 10);
3373 while (true) {
3374 if (tensRemovable & 1)
3375 divisor *= powten;
3376 tensRemovable >>= 1;
3377 if (!tensRemovable) break;
3378 powten *= powten;
3381 significand = significand.udiv(divisor);
3383 // Truncate the significand down to its active bit count.
3384 significand = significand.trunc(significand.getActiveBits());
3388 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3389 int &exp, unsigned FormatPrecision) {
3390 unsigned N = buffer.size();
3391 if (N <= FormatPrecision) return;
3393 // The most significant figures are the last ones in the buffer.
3394 unsigned FirstSignificant = N - FormatPrecision;
3396 // Round.
3397 // FIXME: this probably shouldn't use 'round half up'.
3399 // Rounding down is just a truncation, except we also want to drop
3400 // trailing zeros from the new result.
3401 if (buffer[FirstSignificant - 1] < '5') {
3402 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3403 FirstSignificant++;
3405 exp += FirstSignificant;
3406 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3407 return;
3410 // Rounding up requires a decimal add-with-carry. If we continue
3411 // the carry, the newly-introduced zeros will just be truncated.
3412 for (unsigned I = FirstSignificant; I != N; ++I) {
3413 if (buffer[I] == '9') {
3414 FirstSignificant++;
3415 } else {
3416 buffer[I]++;
3417 break;
3421 // If we carried through, we have exactly one digit of precision.
3422 if (FirstSignificant == N) {
3423 exp += FirstSignificant;
3424 buffer.clear();
3425 buffer.push_back('1');
3426 return;
3429 exp += FirstSignificant;
3430 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3434 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3435 unsigned FormatMaxPadding, bool TruncateZero) const {
3436 switch (category) {
3437 case fcInfinity:
3438 if (isNegative())
3439 return append(Str, "-Inf");
3440 else
3441 return append(Str, "+Inf");
3443 case fcNaN: return append(Str, "NaN");
3445 case fcZero:
3446 if (isNegative())
3447 Str.push_back('-');
3449 if (!FormatMaxPadding) {
3450 if (TruncateZero)
3451 append(Str, "0.0E+0");
3452 else {
3453 append(Str, "0.0");
3454 if (FormatPrecision > 1)
3455 Str.append(FormatPrecision - 1, '0');
3456 append(Str, "e+00");
3458 } else
3459 Str.push_back('0');
3460 return;
3462 case fcNormal:
3463 break;
3466 if (isNegative())
3467 Str.push_back('-');
3469 // Decompose the number into an APInt and an exponent.
3470 int exp = exponent - ((int) semantics->precision - 1);
3471 APInt significand(semantics->precision,
3472 makeArrayRef(significandParts(),
3473 partCountForBits(semantics->precision)));
3475 // Set FormatPrecision if zero. We want to do this before we
3476 // truncate trailing zeros, as those are part of the precision.
3477 if (!FormatPrecision) {
3478 // We use enough digits so the number can be round-tripped back to an
3479 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3480 // Accurately" by Steele and White.
3481 // FIXME: Using a formula based purely on the precision is conservative;
3482 // we can print fewer digits depending on the actual value being printed.
3484 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3485 FormatPrecision = 2 + semantics->precision * 59 / 196;
3488 // Ignore trailing binary zeros.
3489 int trailingZeros = significand.countTrailingZeros();
3490 exp += trailingZeros;
3491 significand.lshrInPlace(trailingZeros);
3493 // Change the exponent from 2^e to 10^e.
3494 if (exp == 0) {
3495 // Nothing to do.
3496 } else if (exp > 0) {
3497 // Just shift left.
3498 significand = significand.zext(semantics->precision + exp);
3499 significand <<= exp;
3500 exp = 0;
3501 } else { /* exp < 0 */
3502 int texp = -exp;
3504 // We transform this using the identity:
3505 // (N)(2^-e) == (N)(5^e)(10^-e)
3506 // This means we have to multiply N (the significand) by 5^e.
3507 // To avoid overflow, we have to operate on numbers large
3508 // enough to store N * 5^e:
3509 // log2(N * 5^e) == log2(N) + e * log2(5)
3510 // <= semantics->precision + e * 137 / 59
3511 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3513 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3515 // Multiply significand by 5^e.
3516 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3517 significand = significand.zext(precision);
3518 APInt five_to_the_i(precision, 5);
3519 while (true) {
3520 if (texp & 1) significand *= five_to_the_i;
3522 texp >>= 1;
3523 if (!texp) break;
3524 five_to_the_i *= five_to_the_i;
3528 AdjustToPrecision(significand, exp, FormatPrecision);
3530 SmallVector<char, 256> buffer;
3532 // Fill the buffer.
3533 unsigned precision = significand.getBitWidth();
3534 APInt ten(precision, 10);
3535 APInt digit(precision, 0);
3537 bool inTrail = true;
3538 while (significand != 0) {
3539 // digit <- significand % 10
3540 // significand <- significand / 10
3541 APInt::udivrem(significand, ten, significand, digit);
3543 unsigned d = digit.getZExtValue();
3545 // Drop trailing zeros.
3546 if (inTrail && !d) exp++;
3547 else {
3548 buffer.push_back((char) ('0' + d));
3549 inTrail = false;
3553 assert(!buffer.empty() && "no characters in buffer!");
3555 // Drop down to FormatPrecision.
3556 // TODO: don't do more precise calculations above than are required.
3557 AdjustToPrecision(buffer, exp, FormatPrecision);
3559 unsigned NDigits = buffer.size();
3561 // Check whether we should use scientific notation.
3562 bool FormatScientific;
3563 if (!FormatMaxPadding)
3564 FormatScientific = true;
3565 else {
3566 if (exp >= 0) {
3567 // 765e3 --> 765000
3568 // ^^^
3569 // But we shouldn't make the number look more precise than it is.
3570 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3571 NDigits + (unsigned) exp > FormatPrecision);
3572 } else {
3573 // Power of the most significant digit.
3574 int MSD = exp + (int) (NDigits - 1);
3575 if (MSD >= 0) {
3576 // 765e-2 == 7.65
3577 FormatScientific = false;
3578 } else {
3579 // 765e-5 == 0.00765
3580 // ^ ^^
3581 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3586 // Scientific formatting is pretty straightforward.
3587 if (FormatScientific) {
3588 exp += (NDigits - 1);
3590 Str.push_back(buffer[NDigits-1]);
3591 Str.push_back('.');
3592 if (NDigits == 1 && TruncateZero)
3593 Str.push_back('0');
3594 else
3595 for (unsigned I = 1; I != NDigits; ++I)
3596 Str.push_back(buffer[NDigits-1-I]);
3597 // Fill with zeros up to FormatPrecision.
3598 if (!TruncateZero && FormatPrecision > NDigits - 1)
3599 Str.append(FormatPrecision - NDigits + 1, '0');
3600 // For !TruncateZero we use lower 'e'.
3601 Str.push_back(TruncateZero ? 'E' : 'e');
3603 Str.push_back(exp >= 0 ? '+' : '-');
3604 if (exp < 0) exp = -exp;
3605 SmallVector<char, 6> expbuf;
3606 do {
3607 expbuf.push_back((char) ('0' + (exp % 10)));
3608 exp /= 10;
3609 } while (exp);
3610 // Exponent always at least two digits if we do not truncate zeros.
3611 if (!TruncateZero && expbuf.size() < 2)
3612 expbuf.push_back('0');
3613 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3614 Str.push_back(expbuf[E-1-I]);
3615 return;
3618 // Non-scientific, positive exponents.
3619 if (exp >= 0) {
3620 for (unsigned I = 0; I != NDigits; ++I)
3621 Str.push_back(buffer[NDigits-1-I]);
3622 for (unsigned I = 0; I != (unsigned) exp; ++I)
3623 Str.push_back('0');
3624 return;
3627 // Non-scientific, negative exponents.
3629 // The number of digits to the left of the decimal point.
3630 int NWholeDigits = exp + (int) NDigits;
3632 unsigned I = 0;
3633 if (NWholeDigits > 0) {
3634 for (; I != (unsigned) NWholeDigits; ++I)
3635 Str.push_back(buffer[NDigits-I-1]);
3636 Str.push_back('.');
3637 } else {
3638 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3640 Str.push_back('0');
3641 Str.push_back('.');
3642 for (unsigned Z = 1; Z != NZeros; ++Z)
3643 Str.push_back('0');
3646 for (; I != NDigits; ++I)
3647 Str.push_back(buffer[NDigits-I-1]);
3650 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3651 // Special floats and denormals have no exact inverse.
3652 if (!isFiniteNonZero())
3653 return false;
3655 // Check that the number is a power of two by making sure that only the
3656 // integer bit is set in the significand.
3657 if (significandLSB() != semantics->precision - 1)
3658 return false;
3660 // Get the inverse.
3661 IEEEFloat reciprocal(*semantics, 1ULL);
3662 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3663 return false;
3665 // Avoid multiplication with a denormal, it is not safe on all platforms and
3666 // may be slower than a normal division.
3667 if (reciprocal.isDenormal())
3668 return false;
3670 assert(reciprocal.isFiniteNonZero() &&
3671 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3673 if (inv)
3674 *inv = APFloat(reciprocal, *semantics);
3676 return true;
3679 bool IEEEFloat::isSignaling() const {
3680 if (!isNaN())
3681 return false;
3683 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3684 // first bit of the trailing significand being 0.
3685 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3688 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3690 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3691 /// appropriate sign switching before/after the computation.
3692 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3693 // If we are performing nextDown, swap sign so we have -x.
3694 if (nextDown)
3695 changeSign();
3697 // Compute nextUp(x)
3698 opStatus result = opOK;
3700 // Handle each float category separately.
3701 switch (category) {
3702 case fcInfinity:
3703 // nextUp(+inf) = +inf
3704 if (!isNegative())
3705 break;
3706 // nextUp(-inf) = -getLargest()
3707 makeLargest(true);
3708 break;
3709 case fcNaN:
3710 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3711 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3712 // change the payload.
3713 if (isSignaling()) {
3714 result = opInvalidOp;
3715 // For consistency, propagate the sign of the sNaN to the qNaN.
3716 makeNaN(false, isNegative(), nullptr);
3718 break;
3719 case fcZero:
3720 // nextUp(pm 0) = +getSmallest()
3721 makeSmallest(false);
3722 break;
3723 case fcNormal:
3724 // nextUp(-getSmallest()) = -0
3725 if (isSmallest() && isNegative()) {
3726 APInt::tcSet(significandParts(), 0, partCount());
3727 category = fcZero;
3728 exponent = 0;
3729 break;
3732 // nextUp(getLargest()) == INFINITY
3733 if (isLargest() && !isNegative()) {
3734 APInt::tcSet(significandParts(), 0, partCount());
3735 category = fcInfinity;
3736 exponent = semantics->maxExponent + 1;
3737 break;
3740 // nextUp(normal) == normal + inc.
3741 if (isNegative()) {
3742 // If we are negative, we need to decrement the significand.
3744 // We only cross a binade boundary that requires adjusting the exponent
3745 // if:
3746 // 1. exponent != semantics->minExponent. This implies we are not in the
3747 // smallest binade or are dealing with denormals.
3748 // 2. Our significand excluding the integral bit is all zeros.
3749 bool WillCrossBinadeBoundary =
3750 exponent != semantics->minExponent && isSignificandAllZeros();
3752 // Decrement the significand.
3754 // We always do this since:
3755 // 1. If we are dealing with a non-binade decrement, by definition we
3756 // just decrement the significand.
3757 // 2. If we are dealing with a normal -> normal binade decrement, since
3758 // we have an explicit integral bit the fact that all bits but the
3759 // integral bit are zero implies that subtracting one will yield a
3760 // significand with 0 integral bit and 1 in all other spots. Thus we
3761 // must just adjust the exponent and set the integral bit to 1.
3762 // 3. If we are dealing with a normal -> denormal binade decrement,
3763 // since we set the integral bit to 0 when we represent denormals, we
3764 // just decrement the significand.
3765 integerPart *Parts = significandParts();
3766 APInt::tcDecrement(Parts, partCount());
3768 if (WillCrossBinadeBoundary) {
3769 // Our result is a normal number. Do the following:
3770 // 1. Set the integral bit to 1.
3771 // 2. Decrement the exponent.
3772 APInt::tcSetBit(Parts, semantics->precision - 1);
3773 exponent--;
3775 } else {
3776 // If we are positive, we need to increment the significand.
3778 // We only cross a binade boundary that requires adjusting the exponent if
3779 // the input is not a denormal and all of said input's significand bits
3780 // are set. If all of said conditions are true: clear the significand, set
3781 // the integral bit to 1, and increment the exponent. If we have a
3782 // denormal always increment since moving denormals and the numbers in the
3783 // smallest normal binade have the same exponent in our representation.
3784 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3786 if (WillCrossBinadeBoundary) {
3787 integerPart *Parts = significandParts();
3788 APInt::tcSet(Parts, 0, partCount());
3789 APInt::tcSetBit(Parts, semantics->precision - 1);
3790 assert(exponent != semantics->maxExponent &&
3791 "We can not increment an exponent beyond the maxExponent allowed"
3792 " by the given floating point semantics.");
3793 exponent++;
3794 } else {
3795 incrementSignificand();
3798 break;
3801 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3802 if (nextDown)
3803 changeSign();
3805 return result;
3808 void IEEEFloat::makeInf(bool Negative) {
3809 category = fcInfinity;
3810 sign = Negative;
3811 exponent = semantics->maxExponent + 1;
3812 APInt::tcSet(significandParts(), 0, partCount());
3815 void IEEEFloat::makeZero(bool Negative) {
3816 category = fcZero;
3817 sign = Negative;
3818 exponent = semantics->minExponent-1;
3819 APInt::tcSet(significandParts(), 0, partCount());
3822 void IEEEFloat::makeQuiet() {
3823 assert(isNaN());
3824 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3827 int ilogb(const IEEEFloat &Arg) {
3828 if (Arg.isNaN())
3829 return IEEEFloat::IEK_NaN;
3830 if (Arg.isZero())
3831 return IEEEFloat::IEK_Zero;
3832 if (Arg.isInfinity())
3833 return IEEEFloat::IEK_Inf;
3834 if (!Arg.isDenormal())
3835 return Arg.exponent;
3837 IEEEFloat Normalized(Arg);
3838 int SignificandBits = Arg.getSemantics().precision - 1;
3840 Normalized.exponent += SignificandBits;
3841 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3842 return Normalized.exponent - SignificandBits;
3845 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3846 auto MaxExp = X.getSemantics().maxExponent;
3847 auto MinExp = X.getSemantics().minExponent;
3849 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3850 // overflow; clamp it to a safe range before adding, but ensure that the range
3851 // is large enough that the clamp does not change the result. The range we
3852 // need to support is the difference between the largest possible exponent and
3853 // the normalized exponent of half the smallest denormal.
3855 int SignificandBits = X.getSemantics().precision - 1;
3856 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3858 // Clamp to one past the range ends to let normalize handle overlflow.
3859 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3860 X.normalize(RoundingMode, lfExactlyZero);
3861 if (X.isNaN())
3862 X.makeQuiet();
3863 return X;
3866 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3867 Exp = ilogb(Val);
3869 // Quiet signalling nans.
3870 if (Exp == IEEEFloat::IEK_NaN) {
3871 IEEEFloat Quiet(Val);
3872 Quiet.makeQuiet();
3873 return Quiet;
3876 if (Exp == IEEEFloat::IEK_Inf)
3877 return Val;
3879 // 1 is added because frexp is defined to return a normalized fraction in
3880 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3881 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3882 return scalbn(Val, -Exp, RM);
3885 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3886 : Semantics(&S),
3887 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3888 assert(Semantics == &semPPCDoubleDouble);
3891 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3892 : Semantics(&S),
3893 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3894 APFloat(semIEEEdouble, uninitialized)}) {
3895 assert(Semantics == &semPPCDoubleDouble);
3898 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3899 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3900 APFloat(semIEEEdouble)}) {
3901 assert(Semantics == &semPPCDoubleDouble);
3904 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3905 : Semantics(&S),
3906 Floats(new APFloat[2]{
3907 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3908 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3909 assert(Semantics == &semPPCDoubleDouble);
3912 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3913 APFloat &&Second)
3914 : Semantics(&S),
3915 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3916 assert(Semantics == &semPPCDoubleDouble);
3917 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3918 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3921 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3922 : Semantics(RHS.Semantics),
3923 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3924 APFloat(RHS.Floats[1])}
3925 : nullptr) {
3926 assert(Semantics == &semPPCDoubleDouble);
3929 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3930 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3931 RHS.Semantics = &semBogus;
3932 assert(Semantics == &semPPCDoubleDouble);
3935 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3936 if (Semantics == RHS.Semantics && RHS.Floats) {
3937 Floats[0] = RHS.Floats[0];
3938 Floats[1] = RHS.Floats[1];
3939 } else if (this != &RHS) {
3940 this->~DoubleAPFloat();
3941 new (this) DoubleAPFloat(RHS);
3943 return *this;
3946 // Implement addition, subtraction, multiplication and division based on:
3947 // "Software for Doubled-Precision Floating-Point Computations",
3948 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3949 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3950 const APFloat &c, const APFloat &cc,
3951 roundingMode RM) {
3952 int Status = opOK;
3953 APFloat z = a;
3954 Status |= z.add(c, RM);
3955 if (!z.isFinite()) {
3956 if (!z.isInfinity()) {
3957 Floats[0] = std::move(z);
3958 Floats[1].makeZero(/* Neg = */ false);
3959 return (opStatus)Status;
3961 Status = opOK;
3962 auto AComparedToC = a.compareAbsoluteValue(c);
3963 z = cc;
3964 Status |= z.add(aa, RM);
3965 if (AComparedToC == APFloat::cmpGreaterThan) {
3966 // z = cc + aa + c + a;
3967 Status |= z.add(c, RM);
3968 Status |= z.add(a, RM);
3969 } else {
3970 // z = cc + aa + a + c;
3971 Status |= z.add(a, RM);
3972 Status |= z.add(c, RM);
3974 if (!z.isFinite()) {
3975 Floats[0] = std::move(z);
3976 Floats[1].makeZero(/* Neg = */ false);
3977 return (opStatus)Status;
3979 Floats[0] = z;
3980 APFloat zz = aa;
3981 Status |= zz.add(cc, RM);
3982 if (AComparedToC == APFloat::cmpGreaterThan) {
3983 // Floats[1] = a - z + c + zz;
3984 Floats[1] = a;
3985 Status |= Floats[1].subtract(z, RM);
3986 Status |= Floats[1].add(c, RM);
3987 Status |= Floats[1].add(zz, RM);
3988 } else {
3989 // Floats[1] = c - z + a + zz;
3990 Floats[1] = c;
3991 Status |= Floats[1].subtract(z, RM);
3992 Status |= Floats[1].add(a, RM);
3993 Status |= Floats[1].add(zz, RM);
3995 } else {
3996 // q = a - z;
3997 APFloat q = a;
3998 Status |= q.subtract(z, RM);
4000 // zz = q + c + (a - (q + z)) + aa + cc;
4001 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4002 auto zz = q;
4003 Status |= zz.add(c, RM);
4004 Status |= q.add(z, RM);
4005 Status |= q.subtract(a, RM);
4006 q.changeSign();
4007 Status |= zz.add(q, RM);
4008 Status |= zz.add(aa, RM);
4009 Status |= zz.add(cc, RM);
4010 if (zz.isZero() && !zz.isNegative()) {
4011 Floats[0] = std::move(z);
4012 Floats[1].makeZero(/* Neg = */ false);
4013 return opOK;
4015 Floats[0] = z;
4016 Status |= Floats[0].add(zz, RM);
4017 if (!Floats[0].isFinite()) {
4018 Floats[1].makeZero(/* Neg = */ false);
4019 return (opStatus)Status;
4021 Floats[1] = std::move(z);
4022 Status |= Floats[1].subtract(Floats[0], RM);
4023 Status |= Floats[1].add(zz, RM);
4025 return (opStatus)Status;
4028 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
4029 const DoubleAPFloat &RHS,
4030 DoubleAPFloat &Out,
4031 roundingMode RM) {
4032 if (LHS.getCategory() == fcNaN) {
4033 Out = LHS;
4034 return opOK;
4036 if (RHS.getCategory() == fcNaN) {
4037 Out = RHS;
4038 return opOK;
4040 if (LHS.getCategory() == fcZero) {
4041 Out = RHS;
4042 return opOK;
4044 if (RHS.getCategory() == fcZero) {
4045 Out = LHS;
4046 return opOK;
4048 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4049 LHS.isNegative() != RHS.isNegative()) {
4050 Out.makeNaN(false, Out.isNegative(), nullptr);
4051 return opInvalidOp;
4053 if (LHS.getCategory() == fcInfinity) {
4054 Out = LHS;
4055 return opOK;
4057 if (RHS.getCategory() == fcInfinity) {
4058 Out = RHS;
4059 return opOK;
4061 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4063 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4064 CC(RHS.Floats[1]);
4065 assert(&A.getSemantics() == &semIEEEdouble);
4066 assert(&AA.getSemantics() == &semIEEEdouble);
4067 assert(&C.getSemantics() == &semIEEEdouble);
4068 assert(&CC.getSemantics() == &semIEEEdouble);
4069 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4070 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4071 return Out.addImpl(A, AA, C, CC, RM);
4074 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4075 roundingMode RM) {
4076 return addWithSpecial(*this, RHS, *this, RM);
4079 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4080 roundingMode RM) {
4081 changeSign();
4082 auto Ret = add(RHS, RM);
4083 changeSign();
4084 return Ret;
4087 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4088 APFloat::roundingMode RM) {
4089 const auto &LHS = *this;
4090 auto &Out = *this;
4091 /* Interesting observation: For special categories, finding the lowest
4092 common ancestor of the following layered graph gives the correct
4093 return category:
4097 Zero Inf
4099 Normal
4101 e.g. NaN * NaN = NaN
4102 Zero * Inf = NaN
4103 Normal * Zero = Zero
4104 Normal * Inf = Inf
4106 if (LHS.getCategory() == fcNaN) {
4107 Out = LHS;
4108 return opOK;
4110 if (RHS.getCategory() == fcNaN) {
4111 Out = RHS;
4112 return opOK;
4114 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4115 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4116 Out.makeNaN(false, false, nullptr);
4117 return opOK;
4119 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4120 Out = LHS;
4121 return opOK;
4123 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4124 Out = RHS;
4125 return opOK;
4127 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4128 "Special cases not handled exhaustively");
4130 int Status = opOK;
4131 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4132 // t = a * c
4133 APFloat T = A;
4134 Status |= T.multiply(C, RM);
4135 if (!T.isFiniteNonZero()) {
4136 Floats[0] = T;
4137 Floats[1].makeZero(/* Neg = */ false);
4138 return (opStatus)Status;
4141 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4142 APFloat Tau = A;
4143 T.changeSign();
4144 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4145 T.changeSign();
4147 // v = a * d
4148 APFloat V = A;
4149 Status |= V.multiply(D, RM);
4150 // w = b * c
4151 APFloat W = B;
4152 Status |= W.multiply(C, RM);
4153 Status |= V.add(W, RM);
4154 // tau += v + w
4155 Status |= Tau.add(V, RM);
4157 // u = t + tau
4158 APFloat U = T;
4159 Status |= U.add(Tau, RM);
4161 Floats[0] = U;
4162 if (!U.isFinite()) {
4163 Floats[1].makeZero(/* Neg = */ false);
4164 } else {
4165 // Floats[1] = (t - u) + tau
4166 Status |= T.subtract(U, RM);
4167 Status |= T.add(Tau, RM);
4168 Floats[1] = T;
4170 return (opStatus)Status;
4173 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4174 APFloat::roundingMode RM) {
4175 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4176 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4177 auto Ret =
4178 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4179 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4180 return Ret;
4183 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4184 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4185 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4186 auto Ret =
4187 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4188 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4189 return Ret;
4192 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4193 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4194 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4195 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4196 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4197 return Ret;
4200 APFloat::opStatus
4201 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4202 const DoubleAPFloat &Addend,
4203 APFloat::roundingMode RM) {
4204 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4205 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4206 auto Ret = Tmp.fusedMultiplyAdd(
4207 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4208 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4209 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4210 return Ret;
4213 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4214 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4215 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4216 auto Ret = Tmp.roundToIntegral(RM);
4217 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4218 return Ret;
4221 void DoubleAPFloat::changeSign() {
4222 Floats[0].changeSign();
4223 Floats[1].changeSign();
4226 APFloat::cmpResult
4227 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4228 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4229 if (Result != cmpEqual)
4230 return Result;
4231 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4232 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4233 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4234 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4235 if (Against && !RHSAgainst)
4236 return cmpLessThan;
4237 if (!Against && RHSAgainst)
4238 return cmpGreaterThan;
4239 if (!Against && !RHSAgainst)
4240 return Result;
4241 if (Against && RHSAgainst)
4242 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4244 return Result;
4247 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4248 return Floats[0].getCategory();
4251 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4253 void DoubleAPFloat::makeInf(bool Neg) {
4254 Floats[0].makeInf(Neg);
4255 Floats[1].makeZero(/* Neg = */ false);
4258 void DoubleAPFloat::makeZero(bool Neg) {
4259 Floats[0].makeZero(Neg);
4260 Floats[1].makeZero(/* Neg = */ false);
4263 void DoubleAPFloat::makeLargest(bool Neg) {
4264 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4265 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4266 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4267 if (Neg)
4268 changeSign();
4271 void DoubleAPFloat::makeSmallest(bool Neg) {
4272 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4273 Floats[0].makeSmallest(Neg);
4274 Floats[1].makeZero(/* Neg = */ false);
4277 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4278 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4279 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4280 if (Neg)
4281 Floats[0].changeSign();
4282 Floats[1].makeZero(/* Neg = */ false);
4285 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4286 Floats[0].makeNaN(SNaN, Neg, fill);
4287 Floats[1].makeZero(/* Neg = */ false);
4290 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4291 auto Result = Floats[0].compare(RHS.Floats[0]);
4292 // |Float[0]| > |Float[1]|
4293 if (Result == APFloat::cmpEqual)
4294 return Floats[1].compare(RHS.Floats[1]);
4295 return Result;
4298 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4299 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4300 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4303 hash_code hash_value(const DoubleAPFloat &Arg) {
4304 if (Arg.Floats)
4305 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4306 return hash_combine(Arg.Semantics);
4309 APInt DoubleAPFloat::bitcastToAPInt() const {
4310 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4311 uint64_t Data[] = {
4312 Floats[0].bitcastToAPInt().getRawData()[0],
4313 Floats[1].bitcastToAPInt().getRawData()[0],
4315 return APInt(128, 2, Data);
4318 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4319 roundingMode RM) {
4320 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4321 APFloat Tmp(semPPCDoubleDoubleLegacy);
4322 auto Ret = Tmp.convertFromString(S, RM);
4323 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4324 return Ret;
4327 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4328 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4329 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4330 auto Ret = Tmp.next(nextDown);
4331 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4332 return Ret;
4335 APFloat::opStatus
4336 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4337 unsigned int Width, bool IsSigned,
4338 roundingMode RM, bool *IsExact) const {
4339 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4340 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4341 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4344 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4345 bool IsSigned,
4346 roundingMode RM) {
4347 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4348 APFloat Tmp(semPPCDoubleDoubleLegacy);
4349 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4350 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4351 return Ret;
4354 APFloat::opStatus
4355 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4356 unsigned int InputSize,
4357 bool IsSigned, roundingMode RM) {
4358 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4359 APFloat Tmp(semPPCDoubleDoubleLegacy);
4360 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4361 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4362 return Ret;
4365 APFloat::opStatus
4366 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4367 unsigned int InputSize,
4368 bool IsSigned, roundingMode RM) {
4369 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4370 APFloat Tmp(semPPCDoubleDoubleLegacy);
4371 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4372 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4373 return Ret;
4376 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4377 unsigned int HexDigits,
4378 bool UpperCase,
4379 roundingMode RM) const {
4380 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4381 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4382 .convertToHexString(DST, HexDigits, UpperCase, RM);
4385 bool DoubleAPFloat::isDenormal() const {
4386 return getCategory() == fcNormal &&
4387 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4388 // (double)(Hi + Lo) == Hi defines a normal number.
4389 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4392 bool DoubleAPFloat::isSmallest() const {
4393 if (getCategory() != fcNormal)
4394 return false;
4395 DoubleAPFloat Tmp(*this);
4396 Tmp.makeSmallest(this->isNegative());
4397 return Tmp.compare(*this) == cmpEqual;
4400 bool DoubleAPFloat::isLargest() const {
4401 if (getCategory() != fcNormal)
4402 return false;
4403 DoubleAPFloat Tmp(*this);
4404 Tmp.makeLargest(this->isNegative());
4405 return Tmp.compare(*this) == cmpEqual;
4408 bool DoubleAPFloat::isInteger() const {
4409 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4410 return Floats[0].isInteger() && Floats[1].isInteger();
4413 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4414 unsigned FormatPrecision,
4415 unsigned FormatMaxPadding,
4416 bool TruncateZero) const {
4417 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4418 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4419 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4422 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4423 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4424 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4425 if (!inv)
4426 return Tmp.getExactInverse(nullptr);
4427 APFloat Inv(semPPCDoubleDoubleLegacy);
4428 auto Ret = Tmp.getExactInverse(&Inv);
4429 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4430 return Ret;
4433 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4434 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4435 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4436 scalbn(Arg.Floats[1], Exp, RM));
4439 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4440 APFloat::roundingMode RM) {
4441 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4442 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4443 APFloat Second = Arg.Floats[1];
4444 if (Arg.getCategory() == APFloat::fcNormal)
4445 Second = scalbn(Second, -Exp, RM);
4446 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4449 } // End detail namespace
4451 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4452 if (usesLayout<IEEEFloat>(Semantics)) {
4453 new (&IEEE) IEEEFloat(std::move(F));
4454 return;
4456 if (usesLayout<DoubleAPFloat>(Semantics)) {
4457 const fltSemantics& S = F.getSemantics();
4458 new (&Double)
4459 DoubleAPFloat(Semantics, APFloat(std::move(F), S),
4460 APFloat(semIEEEdouble));
4461 return;
4463 llvm_unreachable("Unexpected semantics");
4466 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4467 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4470 hash_code hash_value(const APFloat &Arg) {
4471 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4472 return hash_value(Arg.U.IEEE);
4473 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4474 return hash_value(Arg.U.Double);
4475 llvm_unreachable("Unexpected semantics");
4478 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4479 : APFloat(Semantics) {
4480 convertFromString(S, rmNearestTiesToEven);
4483 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4484 roundingMode RM, bool *losesInfo) {
4485 if (&getSemantics() == &ToSemantics) {
4486 *losesInfo = false;
4487 return opOK;
4489 if (usesLayout<IEEEFloat>(getSemantics()) &&
4490 usesLayout<IEEEFloat>(ToSemantics))
4491 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4492 if (usesLayout<IEEEFloat>(getSemantics()) &&
4493 usesLayout<DoubleAPFloat>(ToSemantics)) {
4494 assert(&ToSemantics == &semPPCDoubleDouble);
4495 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4496 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4497 return Ret;
4499 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4500 usesLayout<IEEEFloat>(ToSemantics)) {
4501 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4502 *this = APFloat(std::move(getIEEE()), ToSemantics);
4503 return Ret;
4505 llvm_unreachable("Unexpected semantics");
4508 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4509 if (isIEEE) {
4510 switch (BitWidth) {
4511 case 16:
4512 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4513 case 32:
4514 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4515 case 64:
4516 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4517 case 80:
4518 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4519 case 128:
4520 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4521 default:
4522 llvm_unreachable("Unknown floating bit width");
4524 } else {
4525 assert(BitWidth == 128);
4526 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4530 void APFloat::print(raw_ostream &OS) const {
4531 SmallVector<char, 16> Buffer;
4532 toString(Buffer);
4533 OS << Buffer << "\n";
4536 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4537 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4538 #endif
4540 void APFloat::Profile(FoldingSetNodeID &NID) const {
4541 NID.Add(bitcastToAPInt());
4544 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4545 an APSInt, whose initial bit-width and signed-ness are used to determine the
4546 precision of the conversion.
4548 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4549 roundingMode rounding_mode,
4550 bool *isExact) const {
4551 unsigned bitWidth = result.getBitWidth();
4552 SmallVector<uint64_t, 4> parts(result.getNumWords());
4553 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4554 rounding_mode, isExact);
4555 // Keeps the original signed-ness.
4556 result = APInt(bitWidth, parts);
4557 return status;
4560 } // End llvm namespace
4562 #undef APFLOAT_DISPATCH_ON_SEMANTICS