[PowerPC] Materialize more constants with CR-field set in late peephole
[llvm-core.git] / lib / Support / APFloat.cpp
blob24005c1890c9d3686fd5be2450d78037d1c78ef1
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <cstring>
28 #include <limits.h>
30 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 do { \
32 if (usesLayout<IEEEFloat>(getSemantics())) \
33 return U.IEEE.METHOD_CALL; \
34 if (usesLayout<DoubleAPFloat>(getSemantics())) \
35 return U.Double.METHOD_CALL; \
36 llvm_unreachable("Unexpected semantics"); \
37 } while (false)
39 using namespace llvm;
41 /// A macro used to combine two fcCategory enums into one key which can be used
42 /// in a switch statement to classify how the interaction of two APFloat's
43 /// categories affects an operation.
44 ///
45 /// TODO: If clang source code is ever allowed to use constexpr in its own
46 /// codebase, change this into a static inline function.
47 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 /* Assumed in hexadecimal significand parsing, and conversion to
50 hexadecimal strings. */
51 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
53 namespace llvm {
54 /* Represents floating point arithmetic semantics. */
55 struct fltSemantics {
56 /* The largest E such that 2^E is representable; this matches the
57 definition of IEEE 754. */
58 APFloatBase::ExponentType maxExponent;
60 /* The smallest E such that 2^E is a normalized number; this
61 matches the definition of IEEE 754. */
62 APFloatBase::ExponentType minExponent;
64 /* Number of bits in the significand. This includes the integer
65 bit. */
66 unsigned int precision;
68 /* Number of bits actually used in the semantics. */
69 unsigned int sizeInBits;
72 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
73 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
74 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
75 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
76 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
77 static const fltSemantics semBogus = {0, 0, 0, 0};
79 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
80 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
81 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
82 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
83 to each other, and two 11-bit exponents.
85 Note: we need to make the value different from semBogus as otherwise
86 an unsafe optimization may collapse both values to a single address,
87 and we heavily rely on them having distinct addresses. */
88 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
90 /* These are legacy semantics for the fallback, inaccrurate implementation of
91 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
92 operation. It's equivalent to having an IEEE number with consecutive 106
93 bits of mantissa and 11 bits of exponent.
95 It's not equivalent to IBM double-double. For example, a legit IBM
96 double-double, 1 + epsilon:
98 1 + epsilon = 1 + (1 >> 1076)
100 is not representable by a consecutive 106 bits of mantissa.
102 Currently, these semantics are used in the following way:
104 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
105 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
106 semPPCDoubleDoubleLegacy -> IEEE operations
108 We use bitcastToAPInt() to get the bit representation (in APInt) of the
109 underlying IEEEdouble, then use the APInt constructor to construct the
110 legacy IEEE float.
112 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 semantics. */
114 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
115 53 + 53, 128};
117 const fltSemantics &APFloatBase::IEEEhalf() {
118 return semIEEEhalf;
120 const fltSemantics &APFloatBase::IEEEsingle() {
121 return semIEEEsingle;
123 const fltSemantics &APFloatBase::IEEEdouble() {
124 return semIEEEdouble;
126 const fltSemantics &APFloatBase::IEEEquad() {
127 return semIEEEquad;
129 const fltSemantics &APFloatBase::x87DoubleExtended() {
130 return semX87DoubleExtended;
132 const fltSemantics &APFloatBase::Bogus() {
133 return semBogus;
135 const fltSemantics &APFloatBase::PPCDoubleDouble() {
136 return semPPCDoubleDouble;
139 /* A tight upper bound on number of parts required to hold the value
140 pow(5, power) is
142 power * 815 / (351 * integerPartWidth) + 1
144 However, whilst the result may require only this many parts,
145 because we are multiplying two values to get it, the
146 multiplication may require an extra part with the excess part
147 being zero (consider the trivial case of 1 * 1, tcFullMultiply
148 requires two parts to hold the single-part result). So we add an
149 extra one to guarantee enough space whilst multiplying. */
150 const unsigned int maxExponent = 16383;
151 const unsigned int maxPrecision = 113;
152 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
153 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
155 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
156 return semantics.precision;
158 APFloatBase::ExponentType
159 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
160 return semantics.maxExponent;
162 APFloatBase::ExponentType
163 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
164 return semantics.minExponent;
166 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
167 return semantics.sizeInBits;
170 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
171 return Sem.sizeInBits;
174 /* A bunch of private, handy routines. */
176 static inline unsigned int
177 partCountForBits(unsigned int bits)
179 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
182 /* Returns 0U-9U. Return values >= 10U are not digits. */
183 static inline unsigned int
184 decDigitValue(unsigned int c)
186 return c - '0';
189 /* Return the value of a decimal exponent of the form
190 [+-]ddddddd.
192 If the exponent overflows, returns a large exponent with the
193 appropriate sign. */
194 static int
195 readExponent(StringRef::iterator begin, StringRef::iterator end)
197 bool isNegative;
198 unsigned int absExponent;
199 const unsigned int overlargeExponent = 24000; /* FIXME. */
200 StringRef::iterator p = begin;
202 assert(p != end && "Exponent has no digits");
204 isNegative = (*p == '-');
205 if (*p == '-' || *p == '+') {
206 p++;
207 assert(p != end && "Exponent has no digits");
210 absExponent = decDigitValue(*p++);
211 assert(absExponent < 10U && "Invalid character in exponent");
213 for (; p != end; ++p) {
214 unsigned int value;
216 value = decDigitValue(*p);
217 assert(value < 10U && "Invalid character in exponent");
219 value += absExponent * 10;
220 if (absExponent >= overlargeExponent) {
221 absExponent = overlargeExponent;
222 p = end; /* outwit assert below */
223 break;
225 absExponent = value;
228 assert(p == end && "Invalid exponent in exponent");
230 if (isNegative)
231 return -(int) absExponent;
232 else
233 return (int) absExponent;
236 /* This is ugly and needs cleaning up, but I don't immediately see
237 how whilst remaining safe. */
238 static int
239 totalExponent(StringRef::iterator p, StringRef::iterator end,
240 int exponentAdjustment)
242 int unsignedExponent;
243 bool negative, overflow;
244 int exponent = 0;
246 assert(p != end && "Exponent has no digits");
248 negative = *p == '-';
249 if (*p == '-' || *p == '+') {
250 p++;
251 assert(p != end && "Exponent has no digits");
254 unsignedExponent = 0;
255 overflow = false;
256 for (; p != end; ++p) {
257 unsigned int value;
259 value = decDigitValue(*p);
260 assert(value < 10U && "Invalid character in exponent");
262 unsignedExponent = unsignedExponent * 10 + value;
263 if (unsignedExponent > 32767) {
264 overflow = true;
265 break;
269 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
270 overflow = true;
272 if (!overflow) {
273 exponent = unsignedExponent;
274 if (negative)
275 exponent = -exponent;
276 exponent += exponentAdjustment;
277 if (exponent > 32767 || exponent < -32768)
278 overflow = true;
281 if (overflow)
282 exponent = negative ? -32768: 32767;
284 return exponent;
287 static StringRef::iterator
288 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
289 StringRef::iterator *dot)
291 StringRef::iterator p = begin;
292 *dot = end;
293 while (p != end && *p == '0')
294 p++;
296 if (p != end && *p == '.') {
297 *dot = p++;
299 assert(end - begin != 1 && "Significand has no digits");
301 while (p != end && *p == '0')
302 p++;
305 return p;
308 /* Given a normal decimal floating point number of the form
310 dddd.dddd[eE][+-]ddd
312 where the decimal point and exponent are optional, fill out the
313 structure D. Exponent is appropriate if the significand is
314 treated as an integer, and normalizedExponent if the significand
315 is taken to have the decimal point after a single leading
316 non-zero digit.
318 If the value is zero, V->firstSigDigit points to a non-digit, and
319 the return exponent is zero.
321 struct decimalInfo {
322 const char *firstSigDigit;
323 const char *lastSigDigit;
324 int exponent;
325 int normalizedExponent;
328 static void
329 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
330 decimalInfo *D)
332 StringRef::iterator dot = end;
333 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
335 D->firstSigDigit = p;
336 D->exponent = 0;
337 D->normalizedExponent = 0;
339 for (; p != end; ++p) {
340 if (*p == '.') {
341 assert(dot == end && "String contains multiple dots");
342 dot = p++;
343 if (p == end)
344 break;
346 if (decDigitValue(*p) >= 10U)
347 break;
350 if (p != end) {
351 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
352 assert(p != begin && "Significand has no digits");
353 assert((dot == end || p - begin != 1) && "Significand has no digits");
355 /* p points to the first non-digit in the string */
356 D->exponent = readExponent(p + 1, end);
358 /* Implied decimal point? */
359 if (dot == end)
360 dot = p;
363 /* If number is all zeroes accept any exponent. */
364 if (p != D->firstSigDigit) {
365 /* Drop insignificant trailing zeroes. */
366 if (p != begin) {
369 p--;
370 while (p != begin && *p == '0');
371 while (p != begin && *p == '.');
374 /* Adjust the exponents for any decimal point. */
375 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
376 D->normalizedExponent = (D->exponent +
377 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
378 - (dot > D->firstSigDigit && dot < p)));
381 D->lastSigDigit = p;
384 /* Return the trailing fraction of a hexadecimal number.
385 DIGITVALUE is the first hex digit of the fraction, P points to
386 the next digit. */
387 static lostFraction
388 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
389 unsigned int digitValue)
391 unsigned int hexDigit;
393 /* If the first trailing digit isn't 0 or 8 we can work out the
394 fraction immediately. */
395 if (digitValue > 8)
396 return lfMoreThanHalf;
397 else if (digitValue < 8 && digitValue > 0)
398 return lfLessThanHalf;
400 // Otherwise we need to find the first non-zero digit.
401 while (p != end && (*p == '0' || *p == '.'))
402 p++;
404 assert(p != end && "Invalid trailing hexadecimal fraction!");
406 hexDigit = hexDigitValue(*p);
408 /* If we ran off the end it is exactly zero or one-half, otherwise
409 a little more. */
410 if (hexDigit == -1U)
411 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
412 else
413 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
416 /* Return the fraction lost were a bignum truncated losing the least
417 significant BITS bits. */
418 static lostFraction
419 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
420 unsigned int partCount,
421 unsigned int bits)
423 unsigned int lsb;
425 lsb = APInt::tcLSB(parts, partCount);
427 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
428 if (bits <= lsb)
429 return lfExactlyZero;
430 if (bits == lsb + 1)
431 return lfExactlyHalf;
432 if (bits <= partCount * APFloatBase::integerPartWidth &&
433 APInt::tcExtractBit(parts, bits - 1))
434 return lfMoreThanHalf;
436 return lfLessThanHalf;
439 /* Shift DST right BITS bits noting lost fraction. */
440 static lostFraction
441 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
443 lostFraction lost_fraction;
445 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
447 APInt::tcShiftRight(dst, parts, bits);
449 return lost_fraction;
452 /* Combine the effect of two lost fractions. */
453 static lostFraction
454 combineLostFractions(lostFraction moreSignificant,
455 lostFraction lessSignificant)
457 if (lessSignificant != lfExactlyZero) {
458 if (moreSignificant == lfExactlyZero)
459 moreSignificant = lfLessThanHalf;
460 else if (moreSignificant == lfExactlyHalf)
461 moreSignificant = lfMoreThanHalf;
464 return moreSignificant;
467 /* The error from the true value, in half-ulps, on multiplying two
468 floating point numbers, which differ from the value they
469 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
470 than the returned value.
472 See "How to Read Floating Point Numbers Accurately" by William D
473 Clinger. */
474 static unsigned int
475 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
477 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
479 if (HUerr1 + HUerr2 == 0)
480 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
481 else
482 return inexactMultiply + 2 * (HUerr1 + HUerr2);
485 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
486 when the least significant BITS are truncated. BITS cannot be
487 zero. */
488 static APFloatBase::integerPart
489 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
490 bool isNearest) {
491 unsigned int count, partBits;
492 APFloatBase::integerPart part, boundary;
494 assert(bits != 0);
496 bits--;
497 count = bits / APFloatBase::integerPartWidth;
498 partBits = bits % APFloatBase::integerPartWidth + 1;
500 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
502 if (isNearest)
503 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
504 else
505 boundary = 0;
507 if (count == 0) {
508 if (part - boundary <= boundary - part)
509 return part - boundary;
510 else
511 return boundary - part;
514 if (part == boundary) {
515 while (--count)
516 if (parts[count])
517 return ~(APFloatBase::integerPart) 0; /* A lot. */
519 return parts[0];
520 } else if (part == boundary - 1) {
521 while (--count)
522 if (~parts[count])
523 return ~(APFloatBase::integerPart) 0; /* A lot. */
525 return -parts[0];
528 return ~(APFloatBase::integerPart) 0; /* A lot. */
531 /* Place pow(5, power) in DST, and return the number of parts used.
532 DST must be at least one part larger than size of the answer. */
533 static unsigned int
534 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
535 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
536 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
537 pow5s[0] = 78125 * 5;
539 unsigned int partsCount[16] = { 1 };
540 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
541 unsigned int result;
542 assert(power <= maxExponent);
544 p1 = dst;
545 p2 = scratch;
547 *p1 = firstEightPowers[power & 7];
548 power >>= 3;
550 result = 1;
551 pow5 = pow5s;
553 for (unsigned int n = 0; power; power >>= 1, n++) {
554 unsigned int pc;
556 pc = partsCount[n];
558 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
559 if (pc == 0) {
560 pc = partsCount[n - 1];
561 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
562 pc *= 2;
563 if (pow5[pc - 1] == 0)
564 pc--;
565 partsCount[n] = pc;
568 if (power & 1) {
569 APFloatBase::integerPart *tmp;
571 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
572 result += pc;
573 if (p2[result - 1] == 0)
574 result--;
576 /* Now result is in p1 with partsCount parts and p2 is scratch
577 space. */
578 tmp = p1;
579 p1 = p2;
580 p2 = tmp;
583 pow5 += pc;
586 if (p1 != dst)
587 APInt::tcAssign(dst, p1, result);
589 return result;
592 /* Zero at the end to avoid modular arithmetic when adding one; used
593 when rounding up during hexadecimal output. */
594 static const char hexDigitsLower[] = "0123456789abcdef0";
595 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
596 static const char infinityL[] = "infinity";
597 static const char infinityU[] = "INFINITY";
598 static const char NaNL[] = "nan";
599 static const char NaNU[] = "NAN";
601 /* Write out an integerPart in hexadecimal, starting with the most
602 significant nibble. Write out exactly COUNT hexdigits, return
603 COUNT. */
604 static unsigned int
605 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
606 const char *hexDigitChars)
608 unsigned int result = count;
610 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
612 part >>= (APFloatBase::integerPartWidth - 4 * count);
613 while (count--) {
614 dst[count] = hexDigitChars[part & 0xf];
615 part >>= 4;
618 return result;
621 /* Write out an unsigned decimal integer. */
622 static char *
623 writeUnsignedDecimal (char *dst, unsigned int n)
625 char buff[40], *p;
627 p = buff;
629 *p++ = '0' + n % 10;
630 while (n /= 10);
633 *dst++ = *--p;
634 while (p != buff);
636 return dst;
639 /* Write out a signed decimal integer. */
640 static char *
641 writeSignedDecimal (char *dst, int value)
643 if (value < 0) {
644 *dst++ = '-';
645 dst = writeUnsignedDecimal(dst, -(unsigned) value);
646 } else
647 dst = writeUnsignedDecimal(dst, value);
649 return dst;
652 namespace detail {
653 /* Constructors. */
654 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
655 unsigned int count;
657 semantics = ourSemantics;
658 count = partCount();
659 if (count > 1)
660 significand.parts = new integerPart[count];
663 void IEEEFloat::freeSignificand() {
664 if (needsCleanup())
665 delete [] significand.parts;
668 void IEEEFloat::assign(const IEEEFloat &rhs) {
669 assert(semantics == rhs.semantics);
671 sign = rhs.sign;
672 category = rhs.category;
673 exponent = rhs.exponent;
674 if (isFiniteNonZero() || category == fcNaN)
675 copySignificand(rhs);
678 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
679 assert(isFiniteNonZero() || category == fcNaN);
680 assert(rhs.partCount() >= partCount());
682 APInt::tcAssign(significandParts(), rhs.significandParts(),
683 partCount());
686 /* Make this number a NaN, with an arbitrary but deterministic value
687 for the significand. If double or longer, this is a signalling NaN,
688 which may not be ideal. If float, this is QNaN(0). */
689 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
690 category = fcNaN;
691 sign = Negative;
693 integerPart *significand = significandParts();
694 unsigned numParts = partCount();
696 // Set the significand bits to the fill.
697 if (!fill || fill->getNumWords() < numParts)
698 APInt::tcSet(significand, 0, numParts);
699 if (fill) {
700 APInt::tcAssign(significand, fill->getRawData(),
701 std::min(fill->getNumWords(), numParts));
703 // Zero out the excess bits of the significand.
704 unsigned bitsToPreserve = semantics->precision - 1;
705 unsigned part = bitsToPreserve / 64;
706 bitsToPreserve %= 64;
707 significand[part] &= ((1ULL << bitsToPreserve) - 1);
708 for (part++; part != numParts; ++part)
709 significand[part] = 0;
712 unsigned QNaNBit = semantics->precision - 2;
714 if (SNaN) {
715 // We always have to clear the QNaN bit to make it an SNaN.
716 APInt::tcClearBit(significand, QNaNBit);
718 // If there are no bits set in the payload, we have to set
719 // *something* to make it a NaN instead of an infinity;
720 // conventionally, this is the next bit down from the QNaN bit.
721 if (APInt::tcIsZero(significand, numParts))
722 APInt::tcSetBit(significand, QNaNBit - 1);
723 } else {
724 // We always have to set the QNaN bit to make it a QNaN.
725 APInt::tcSetBit(significand, QNaNBit);
728 // For x87 extended precision, we want to make a NaN, not a
729 // pseudo-NaN. Maybe we should expose the ability to make
730 // pseudo-NaNs?
731 if (semantics == &semX87DoubleExtended)
732 APInt::tcSetBit(significand, QNaNBit + 1);
735 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
736 if (this != &rhs) {
737 if (semantics != rhs.semantics) {
738 freeSignificand();
739 initialize(rhs.semantics);
741 assign(rhs);
744 return *this;
747 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
748 freeSignificand();
750 semantics = rhs.semantics;
751 significand = rhs.significand;
752 exponent = rhs.exponent;
753 category = rhs.category;
754 sign = rhs.sign;
756 rhs.semantics = &semBogus;
757 return *this;
760 bool IEEEFloat::isDenormal() const {
761 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
762 (APInt::tcExtractBit(significandParts(),
763 semantics->precision - 1) == 0);
766 bool IEEEFloat::isSmallest() const {
767 // The smallest number by magnitude in our format will be the smallest
768 // denormal, i.e. the floating point number with exponent being minimum
769 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
770 return isFiniteNonZero() && exponent == semantics->minExponent &&
771 significandMSB() == 0;
774 bool IEEEFloat::isSignificandAllOnes() const {
775 // Test if the significand excluding the integral bit is all ones. This allows
776 // us to test for binade boundaries.
777 const integerPart *Parts = significandParts();
778 const unsigned PartCount = partCount();
779 for (unsigned i = 0; i < PartCount - 1; i++)
780 if (~Parts[i])
781 return false;
783 // Set the unused high bits to all ones when we compare.
784 const unsigned NumHighBits =
785 PartCount*integerPartWidth - semantics->precision + 1;
786 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
787 "fill than integerPartWidth");
788 const integerPart HighBitFill =
789 ~integerPart(0) << (integerPartWidth - NumHighBits);
790 if (~(Parts[PartCount - 1] | HighBitFill))
791 return false;
793 return true;
796 bool IEEEFloat::isSignificandAllZeros() const {
797 // Test if the significand excluding the integral bit is all zeros. This
798 // allows us to test for binade boundaries.
799 const integerPart *Parts = significandParts();
800 const unsigned PartCount = partCount();
802 for (unsigned i = 0; i < PartCount - 1; i++)
803 if (Parts[i])
804 return false;
806 const unsigned NumHighBits =
807 PartCount*integerPartWidth - semantics->precision + 1;
808 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
809 "clear than integerPartWidth");
810 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
812 if (Parts[PartCount - 1] & HighBitMask)
813 return false;
815 return true;
818 bool IEEEFloat::isLargest() const {
819 // The largest number by magnitude in our format will be the floating point
820 // number with maximum exponent and with significand that is all ones.
821 return isFiniteNonZero() && exponent == semantics->maxExponent
822 && isSignificandAllOnes();
825 bool IEEEFloat::isInteger() const {
826 // This could be made more efficient; I'm going for obviously correct.
827 if (!isFinite()) return false;
828 IEEEFloat truncated = *this;
829 truncated.roundToIntegral(rmTowardZero);
830 return compare(truncated) == cmpEqual;
833 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
834 if (this == &rhs)
835 return true;
836 if (semantics != rhs.semantics ||
837 category != rhs.category ||
838 sign != rhs.sign)
839 return false;
840 if (category==fcZero || category==fcInfinity)
841 return true;
843 if (isFiniteNonZero() && exponent != rhs.exponent)
844 return false;
846 return std::equal(significandParts(), significandParts() + partCount(),
847 rhs.significandParts());
850 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
851 initialize(&ourSemantics);
852 sign = 0;
853 category = fcNormal;
854 zeroSignificand();
855 exponent = ourSemantics.precision - 1;
856 significandParts()[0] = value;
857 normalize(rmNearestTiesToEven, lfExactlyZero);
860 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
861 initialize(&ourSemantics);
862 category = fcZero;
863 sign = false;
866 // Delegate to the previous constructor, because later copy constructor may
867 // actually inspects category, which can't be garbage.
868 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
869 : IEEEFloat(ourSemantics) {}
871 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
872 initialize(rhs.semantics);
873 assign(rhs);
876 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
877 *this = std::move(rhs);
880 IEEEFloat::~IEEEFloat() { freeSignificand(); }
882 unsigned int IEEEFloat::partCount() const {
883 return partCountForBits(semantics->precision + 1);
886 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
887 return const_cast<IEEEFloat *>(this)->significandParts();
890 IEEEFloat::integerPart *IEEEFloat::significandParts() {
891 if (partCount() > 1)
892 return significand.parts;
893 else
894 return &significand.part;
897 void IEEEFloat::zeroSignificand() {
898 APInt::tcSet(significandParts(), 0, partCount());
901 /* Increment an fcNormal floating point number's significand. */
902 void IEEEFloat::incrementSignificand() {
903 integerPart carry;
905 carry = APInt::tcIncrement(significandParts(), partCount());
907 /* Our callers should never cause us to overflow. */
908 assert(carry == 0);
909 (void)carry;
912 /* Add the significand of the RHS. Returns the carry flag. */
913 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
914 integerPart *parts;
916 parts = significandParts();
918 assert(semantics == rhs.semantics);
919 assert(exponent == rhs.exponent);
921 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
924 /* Subtract the significand of the RHS with a borrow flag. Returns
925 the borrow flag. */
926 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
927 integerPart borrow) {
928 integerPart *parts;
930 parts = significandParts();
932 assert(semantics == rhs.semantics);
933 assert(exponent == rhs.exponent);
935 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
936 partCount());
939 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
940 on to the full-precision result of the multiplication. Returns the
941 lost fraction. */
942 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
943 const IEEEFloat *addend) {
944 unsigned int omsb; // One, not zero, based MSB.
945 unsigned int partsCount, newPartsCount, precision;
946 integerPart *lhsSignificand;
947 integerPart scratch[4];
948 integerPart *fullSignificand;
949 lostFraction lost_fraction;
950 bool ignored;
952 assert(semantics == rhs.semantics);
954 precision = semantics->precision;
956 // Allocate space for twice as many bits as the original significand, plus one
957 // extra bit for the addition to overflow into.
958 newPartsCount = partCountForBits(precision * 2 + 1);
960 if (newPartsCount > 4)
961 fullSignificand = new integerPart[newPartsCount];
962 else
963 fullSignificand = scratch;
965 lhsSignificand = significandParts();
966 partsCount = partCount();
968 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
969 rhs.significandParts(), partsCount, partsCount);
971 lost_fraction = lfExactlyZero;
972 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
973 exponent += rhs.exponent;
975 // Assume the operands involved in the multiplication are single-precision
976 // FP, and the two multiplicants are:
977 // *this = a23 . a22 ... a0 * 2^e1
978 // rhs = b23 . b22 ... b0 * 2^e2
979 // the result of multiplication is:
980 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
981 // Note that there are three significant bits at the left-hand side of the
982 // radix point: two for the multiplication, and an overflow bit for the
983 // addition (that will always be zero at this point). Move the radix point
984 // toward left by two bits, and adjust exponent accordingly.
985 exponent += 2;
987 if (addend && addend->isNonZero()) {
988 // The intermediate result of the multiplication has "2 * precision"
989 // signicant bit; adjust the addend to be consistent with mul result.
991 Significand savedSignificand = significand;
992 const fltSemantics *savedSemantics = semantics;
993 fltSemantics extendedSemantics;
994 opStatus status;
995 unsigned int extendedPrecision;
997 // Normalize our MSB to one below the top bit to allow for overflow.
998 extendedPrecision = 2 * precision + 1;
999 if (omsb != extendedPrecision - 1) {
1000 assert(extendedPrecision > omsb);
1001 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1002 (extendedPrecision - 1) - omsb);
1003 exponent -= (extendedPrecision - 1) - omsb;
1006 /* Create new semantics. */
1007 extendedSemantics = *semantics;
1008 extendedSemantics.precision = extendedPrecision;
1010 if (newPartsCount == 1)
1011 significand.part = fullSignificand[0];
1012 else
1013 significand.parts = fullSignificand;
1014 semantics = &extendedSemantics;
1016 IEEEFloat extendedAddend(*addend);
1017 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1018 assert(status == opOK);
1019 (void)status;
1021 // Shift the significand of the addend right by one bit. This guarantees
1022 // that the high bit of the significand is zero (same as fullSignificand),
1023 // so the addition will overflow (if it does overflow at all) into the top bit.
1024 lost_fraction = extendedAddend.shiftSignificandRight(1);
1025 assert(lost_fraction == lfExactlyZero &&
1026 "Lost precision while shifting addend for fused-multiply-add.");
1028 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1030 /* Restore our state. */
1031 if (newPartsCount == 1)
1032 fullSignificand[0] = significand.part;
1033 significand = savedSignificand;
1034 semantics = savedSemantics;
1036 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1039 // Convert the result having "2 * precision" significant-bits back to the one
1040 // having "precision" significant-bits. First, move the radix point from
1041 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1042 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1043 exponent -= precision + 1;
1045 // In case MSB resides at the left-hand side of radix point, shift the
1046 // mantissa right by some amount to make sure the MSB reside right before
1047 // the radix point (i.e. "MSB . rest-significant-bits").
1049 // Note that the result is not normalized when "omsb < precision". So, the
1050 // caller needs to call IEEEFloat::normalize() if normalized value is
1051 // expected.
1052 if (omsb > precision) {
1053 unsigned int bits, significantParts;
1054 lostFraction lf;
1056 bits = omsb - precision;
1057 significantParts = partCountForBits(omsb);
1058 lf = shiftRight(fullSignificand, significantParts, bits);
1059 lost_fraction = combineLostFractions(lf, lost_fraction);
1060 exponent += bits;
1063 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1065 if (newPartsCount > 4)
1066 delete [] fullSignificand;
1068 return lost_fraction;
1071 /* Multiply the significands of LHS and RHS to DST. */
1072 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1073 unsigned int bit, i, partsCount;
1074 const integerPart *rhsSignificand;
1075 integerPart *lhsSignificand, *dividend, *divisor;
1076 integerPart scratch[4];
1077 lostFraction lost_fraction;
1079 assert(semantics == rhs.semantics);
1081 lhsSignificand = significandParts();
1082 rhsSignificand = rhs.significandParts();
1083 partsCount = partCount();
1085 if (partsCount > 2)
1086 dividend = new integerPart[partsCount * 2];
1087 else
1088 dividend = scratch;
1090 divisor = dividend + partsCount;
1092 /* Copy the dividend and divisor as they will be modified in-place. */
1093 for (i = 0; i < partsCount; i++) {
1094 dividend[i] = lhsSignificand[i];
1095 divisor[i] = rhsSignificand[i];
1096 lhsSignificand[i] = 0;
1099 exponent -= rhs.exponent;
1101 unsigned int precision = semantics->precision;
1103 /* Normalize the divisor. */
1104 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1105 if (bit) {
1106 exponent += bit;
1107 APInt::tcShiftLeft(divisor, partsCount, bit);
1110 /* Normalize the dividend. */
1111 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1112 if (bit) {
1113 exponent -= bit;
1114 APInt::tcShiftLeft(dividend, partsCount, bit);
1117 /* Ensure the dividend >= divisor initially for the loop below.
1118 Incidentally, this means that the division loop below is
1119 guaranteed to set the integer bit to one. */
1120 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1121 exponent--;
1122 APInt::tcShiftLeft(dividend, partsCount, 1);
1123 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1126 /* Long division. */
1127 for (bit = precision; bit; bit -= 1) {
1128 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1129 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1130 APInt::tcSetBit(lhsSignificand, bit - 1);
1133 APInt::tcShiftLeft(dividend, partsCount, 1);
1136 /* Figure out the lost fraction. */
1137 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1139 if (cmp > 0)
1140 lost_fraction = lfMoreThanHalf;
1141 else if (cmp == 0)
1142 lost_fraction = lfExactlyHalf;
1143 else if (APInt::tcIsZero(dividend, partsCount))
1144 lost_fraction = lfExactlyZero;
1145 else
1146 lost_fraction = lfLessThanHalf;
1148 if (partsCount > 2)
1149 delete [] dividend;
1151 return lost_fraction;
1154 unsigned int IEEEFloat::significandMSB() const {
1155 return APInt::tcMSB(significandParts(), partCount());
1158 unsigned int IEEEFloat::significandLSB() const {
1159 return APInt::tcLSB(significandParts(), partCount());
1162 /* Note that a zero result is NOT normalized to fcZero. */
1163 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1164 /* Our exponent should not overflow. */
1165 assert((ExponentType) (exponent + bits) >= exponent);
1167 exponent += bits;
1169 return shiftRight(significandParts(), partCount(), bits);
1172 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1173 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1174 assert(bits < semantics->precision);
1176 if (bits) {
1177 unsigned int partsCount = partCount();
1179 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1180 exponent -= bits;
1182 assert(!APInt::tcIsZero(significandParts(), partsCount));
1186 IEEEFloat::cmpResult
1187 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1188 int compare;
1190 assert(semantics == rhs.semantics);
1191 assert(isFiniteNonZero());
1192 assert(rhs.isFiniteNonZero());
1194 compare = exponent - rhs.exponent;
1196 /* If exponents are equal, do an unsigned bignum comparison of the
1197 significands. */
1198 if (compare == 0)
1199 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1200 partCount());
1202 if (compare > 0)
1203 return cmpGreaterThan;
1204 else if (compare < 0)
1205 return cmpLessThan;
1206 else
1207 return cmpEqual;
1210 /* Handle overflow. Sign is preserved. We either become infinity or
1211 the largest finite number. */
1212 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1213 /* Infinity? */
1214 if (rounding_mode == rmNearestTiesToEven ||
1215 rounding_mode == rmNearestTiesToAway ||
1216 (rounding_mode == rmTowardPositive && !sign) ||
1217 (rounding_mode == rmTowardNegative && sign)) {
1218 category = fcInfinity;
1219 return (opStatus) (opOverflow | opInexact);
1222 /* Otherwise we become the largest finite number. */
1223 category = fcNormal;
1224 exponent = semantics->maxExponent;
1225 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1226 semantics->precision);
1228 return opInexact;
1231 /* Returns TRUE if, when truncating the current number, with BIT the
1232 new LSB, with the given lost fraction and rounding mode, the result
1233 would need to be rounded away from zero (i.e., by increasing the
1234 signficand). This routine must work for fcZero of both signs, and
1235 fcNormal numbers. */
1236 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1237 lostFraction lost_fraction,
1238 unsigned int bit) const {
1239 /* NaNs and infinities should not have lost fractions. */
1240 assert(isFiniteNonZero() || category == fcZero);
1242 /* Current callers never pass this so we don't handle it. */
1243 assert(lost_fraction != lfExactlyZero);
1245 switch (rounding_mode) {
1246 case rmNearestTiesToAway:
1247 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1249 case rmNearestTiesToEven:
1250 if (lost_fraction == lfMoreThanHalf)
1251 return true;
1253 /* Our zeroes don't have a significand to test. */
1254 if (lost_fraction == lfExactlyHalf && category != fcZero)
1255 return APInt::tcExtractBit(significandParts(), bit);
1257 return false;
1259 case rmTowardZero:
1260 return false;
1262 case rmTowardPositive:
1263 return !sign;
1265 case rmTowardNegative:
1266 return sign;
1268 llvm_unreachable("Invalid rounding mode found");
1271 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1272 lostFraction lost_fraction) {
1273 unsigned int omsb; /* One, not zero, based MSB. */
1274 int exponentChange;
1276 if (!isFiniteNonZero())
1277 return opOK;
1279 /* Before rounding normalize the exponent of fcNormal numbers. */
1280 omsb = significandMSB() + 1;
1282 if (omsb) {
1283 /* OMSB is numbered from 1. We want to place it in the integer
1284 bit numbered PRECISION if possible, with a compensating change in
1285 the exponent. */
1286 exponentChange = omsb - semantics->precision;
1288 /* If the resulting exponent is too high, overflow according to
1289 the rounding mode. */
1290 if (exponent + exponentChange > semantics->maxExponent)
1291 return handleOverflow(rounding_mode);
1293 /* Subnormal numbers have exponent minExponent, and their MSB
1294 is forced based on that. */
1295 if (exponent + exponentChange < semantics->minExponent)
1296 exponentChange = semantics->minExponent - exponent;
1298 /* Shifting left is easy as we don't lose precision. */
1299 if (exponentChange < 0) {
1300 assert(lost_fraction == lfExactlyZero);
1302 shiftSignificandLeft(-exponentChange);
1304 return opOK;
1307 if (exponentChange > 0) {
1308 lostFraction lf;
1310 /* Shift right and capture any new lost fraction. */
1311 lf = shiftSignificandRight(exponentChange);
1313 lost_fraction = combineLostFractions(lf, lost_fraction);
1315 /* Keep OMSB up-to-date. */
1316 if (omsb > (unsigned) exponentChange)
1317 omsb -= exponentChange;
1318 else
1319 omsb = 0;
1323 /* Now round the number according to rounding_mode given the lost
1324 fraction. */
1326 /* As specified in IEEE 754, since we do not trap we do not report
1327 underflow for exact results. */
1328 if (lost_fraction == lfExactlyZero) {
1329 /* Canonicalize zeroes. */
1330 if (omsb == 0)
1331 category = fcZero;
1333 return opOK;
1336 /* Increment the significand if we're rounding away from zero. */
1337 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1338 if (omsb == 0)
1339 exponent = semantics->minExponent;
1341 incrementSignificand();
1342 omsb = significandMSB() + 1;
1344 /* Did the significand increment overflow? */
1345 if (omsb == (unsigned) semantics->precision + 1) {
1346 /* Renormalize by incrementing the exponent and shifting our
1347 significand right one. However if we already have the
1348 maximum exponent we overflow to infinity. */
1349 if (exponent == semantics->maxExponent) {
1350 category = fcInfinity;
1352 return (opStatus) (opOverflow | opInexact);
1355 shiftSignificandRight(1);
1357 return opInexact;
1361 /* The normal case - we were and are not denormal, and any
1362 significand increment above didn't overflow. */
1363 if (omsb == semantics->precision)
1364 return opInexact;
1366 /* We have a non-zero denormal. */
1367 assert(omsb < semantics->precision);
1369 /* Canonicalize zeroes. */
1370 if (omsb == 0)
1371 category = fcZero;
1373 /* The fcZero case is a denormal that underflowed to zero. */
1374 return (opStatus) (opUnderflow | opInexact);
1377 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1378 bool subtract) {
1379 switch (PackCategoriesIntoKey(category, rhs.category)) {
1380 default:
1381 llvm_unreachable(nullptr);
1383 case PackCategoriesIntoKey(fcNaN, fcZero):
1384 case PackCategoriesIntoKey(fcNaN, fcNormal):
1385 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1386 case PackCategoriesIntoKey(fcNaN, fcNaN):
1387 case PackCategoriesIntoKey(fcNormal, fcZero):
1388 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1389 case PackCategoriesIntoKey(fcInfinity, fcZero):
1390 return opOK;
1392 case PackCategoriesIntoKey(fcZero, fcNaN):
1393 case PackCategoriesIntoKey(fcNormal, fcNaN):
1394 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1395 // We need to be sure to flip the sign here for subtraction because we
1396 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1397 sign = rhs.sign ^ subtract;
1398 category = fcNaN;
1399 copySignificand(rhs);
1400 return opOK;
1402 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1403 case PackCategoriesIntoKey(fcZero, fcInfinity):
1404 category = fcInfinity;
1405 sign = rhs.sign ^ subtract;
1406 return opOK;
1408 case PackCategoriesIntoKey(fcZero, fcNormal):
1409 assign(rhs);
1410 sign = rhs.sign ^ subtract;
1411 return opOK;
1413 case PackCategoriesIntoKey(fcZero, fcZero):
1414 /* Sign depends on rounding mode; handled by caller. */
1415 return opOK;
1417 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1418 /* Differently signed infinities can only be validly
1419 subtracted. */
1420 if (((sign ^ rhs.sign)!=0) != subtract) {
1421 makeNaN();
1422 return opInvalidOp;
1425 return opOK;
1427 case PackCategoriesIntoKey(fcNormal, fcNormal):
1428 return opDivByZero;
1432 /* Add or subtract two normal numbers. */
1433 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1434 bool subtract) {
1435 integerPart carry;
1436 lostFraction lost_fraction;
1437 int bits;
1439 /* Determine if the operation on the absolute values is effectively
1440 an addition or subtraction. */
1441 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1443 /* Are we bigger exponent-wise than the RHS? */
1444 bits = exponent - rhs.exponent;
1446 /* Subtraction is more subtle than one might naively expect. */
1447 if (subtract) {
1448 IEEEFloat temp_rhs(rhs);
1449 bool reverse;
1451 if (bits == 0) {
1452 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1453 lost_fraction = lfExactlyZero;
1454 } else if (bits > 0) {
1455 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1456 shiftSignificandLeft(1);
1457 reverse = false;
1458 } else {
1459 lost_fraction = shiftSignificandRight(-bits - 1);
1460 temp_rhs.shiftSignificandLeft(1);
1461 reverse = true;
1464 if (reverse) {
1465 carry = temp_rhs.subtractSignificand
1466 (*this, lost_fraction != lfExactlyZero);
1467 copySignificand(temp_rhs);
1468 sign = !sign;
1469 } else {
1470 carry = subtractSignificand
1471 (temp_rhs, lost_fraction != lfExactlyZero);
1474 /* Invert the lost fraction - it was on the RHS and
1475 subtracted. */
1476 if (lost_fraction == lfLessThanHalf)
1477 lost_fraction = lfMoreThanHalf;
1478 else if (lost_fraction == lfMoreThanHalf)
1479 lost_fraction = lfLessThanHalf;
1481 /* The code above is intended to ensure that no borrow is
1482 necessary. */
1483 assert(!carry);
1484 (void)carry;
1485 } else {
1486 if (bits > 0) {
1487 IEEEFloat temp_rhs(rhs);
1489 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1490 carry = addSignificand(temp_rhs);
1491 } else {
1492 lost_fraction = shiftSignificandRight(-bits);
1493 carry = addSignificand(rhs);
1496 /* We have a guard bit; generating a carry cannot happen. */
1497 assert(!carry);
1498 (void)carry;
1501 return lost_fraction;
1504 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1505 switch (PackCategoriesIntoKey(category, rhs.category)) {
1506 default:
1507 llvm_unreachable(nullptr);
1509 case PackCategoriesIntoKey(fcNaN, fcZero):
1510 case PackCategoriesIntoKey(fcNaN, fcNormal):
1511 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1512 case PackCategoriesIntoKey(fcNaN, fcNaN):
1513 sign = false;
1514 return opOK;
1516 case PackCategoriesIntoKey(fcZero, fcNaN):
1517 case PackCategoriesIntoKey(fcNormal, fcNaN):
1518 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1519 sign = false;
1520 category = fcNaN;
1521 copySignificand(rhs);
1522 return opOK;
1524 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1525 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1526 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1527 category = fcInfinity;
1528 return opOK;
1530 case PackCategoriesIntoKey(fcZero, fcNormal):
1531 case PackCategoriesIntoKey(fcNormal, fcZero):
1532 case PackCategoriesIntoKey(fcZero, fcZero):
1533 category = fcZero;
1534 return opOK;
1536 case PackCategoriesIntoKey(fcZero, fcInfinity):
1537 case PackCategoriesIntoKey(fcInfinity, fcZero):
1538 makeNaN();
1539 return opInvalidOp;
1541 case PackCategoriesIntoKey(fcNormal, fcNormal):
1542 return opOK;
1546 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1547 switch (PackCategoriesIntoKey(category, rhs.category)) {
1548 default:
1549 llvm_unreachable(nullptr);
1551 case PackCategoriesIntoKey(fcZero, fcNaN):
1552 case PackCategoriesIntoKey(fcNormal, fcNaN):
1553 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1554 category = fcNaN;
1555 copySignificand(rhs);
1556 LLVM_FALLTHROUGH;
1557 case PackCategoriesIntoKey(fcNaN, fcZero):
1558 case PackCategoriesIntoKey(fcNaN, fcNormal):
1559 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1560 case PackCategoriesIntoKey(fcNaN, fcNaN):
1561 sign = false;
1562 LLVM_FALLTHROUGH;
1563 case PackCategoriesIntoKey(fcInfinity, fcZero):
1564 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1565 case PackCategoriesIntoKey(fcZero, fcInfinity):
1566 case PackCategoriesIntoKey(fcZero, fcNormal):
1567 return opOK;
1569 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1570 category = fcZero;
1571 return opOK;
1573 case PackCategoriesIntoKey(fcNormal, fcZero):
1574 category = fcInfinity;
1575 return opDivByZero;
1577 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1578 case PackCategoriesIntoKey(fcZero, fcZero):
1579 makeNaN();
1580 return opInvalidOp;
1582 case PackCategoriesIntoKey(fcNormal, fcNormal):
1583 return opOK;
1587 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1588 switch (PackCategoriesIntoKey(category, rhs.category)) {
1589 default:
1590 llvm_unreachable(nullptr);
1592 case PackCategoriesIntoKey(fcNaN, fcZero):
1593 case PackCategoriesIntoKey(fcNaN, fcNormal):
1594 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1595 case PackCategoriesIntoKey(fcNaN, fcNaN):
1596 case PackCategoriesIntoKey(fcZero, fcInfinity):
1597 case PackCategoriesIntoKey(fcZero, fcNormal):
1598 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1599 return opOK;
1601 case PackCategoriesIntoKey(fcZero, fcNaN):
1602 case PackCategoriesIntoKey(fcNormal, fcNaN):
1603 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1604 sign = false;
1605 category = fcNaN;
1606 copySignificand(rhs);
1607 return opOK;
1609 case PackCategoriesIntoKey(fcNormal, fcZero):
1610 case PackCategoriesIntoKey(fcInfinity, fcZero):
1611 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1612 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1613 case PackCategoriesIntoKey(fcZero, fcZero):
1614 makeNaN();
1615 return opInvalidOp;
1617 case PackCategoriesIntoKey(fcNormal, fcNormal):
1618 return opOK;
1622 /* Change sign. */
1623 void IEEEFloat::changeSign() {
1624 /* Look mummy, this one's easy. */
1625 sign = !sign;
1628 /* Normalized addition or subtraction. */
1629 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1630 roundingMode rounding_mode,
1631 bool subtract) {
1632 opStatus fs;
1634 fs = addOrSubtractSpecials(rhs, subtract);
1636 /* This return code means it was not a simple case. */
1637 if (fs == opDivByZero) {
1638 lostFraction lost_fraction;
1640 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1641 fs = normalize(rounding_mode, lost_fraction);
1643 /* Can only be zero if we lost no fraction. */
1644 assert(category != fcZero || lost_fraction == lfExactlyZero);
1647 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1648 positive zero unless rounding to minus infinity, except that
1649 adding two like-signed zeroes gives that zero. */
1650 if (category == fcZero) {
1651 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1652 sign = (rounding_mode == rmTowardNegative);
1655 return fs;
1658 /* Normalized addition. */
1659 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1660 roundingMode rounding_mode) {
1661 return addOrSubtract(rhs, rounding_mode, false);
1664 /* Normalized subtraction. */
1665 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1666 roundingMode rounding_mode) {
1667 return addOrSubtract(rhs, rounding_mode, true);
1670 /* Normalized multiply. */
1671 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1672 roundingMode rounding_mode) {
1673 opStatus fs;
1675 sign ^= rhs.sign;
1676 fs = multiplySpecials(rhs);
1678 if (isFiniteNonZero()) {
1679 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1680 fs = normalize(rounding_mode, lost_fraction);
1681 if (lost_fraction != lfExactlyZero)
1682 fs = (opStatus) (fs | opInexact);
1685 return fs;
1688 /* Normalized divide. */
1689 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1690 roundingMode rounding_mode) {
1691 opStatus fs;
1693 sign ^= rhs.sign;
1694 fs = divideSpecials(rhs);
1696 if (isFiniteNonZero()) {
1697 lostFraction lost_fraction = divideSignificand(rhs);
1698 fs = normalize(rounding_mode, lost_fraction);
1699 if (lost_fraction != lfExactlyZero)
1700 fs = (opStatus) (fs | opInexact);
1703 return fs;
1706 /* Normalized remainder. This is not currently correct in all cases. */
1707 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1708 opStatus fs;
1709 IEEEFloat V = *this;
1710 unsigned int origSign = sign;
1712 fs = V.divide(rhs, rmNearestTiesToEven);
1713 if (fs == opDivByZero)
1714 return fs;
1716 int parts = partCount();
1717 integerPart *x = new integerPart[parts];
1718 bool ignored;
1719 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1720 parts * integerPartWidth, true, rmNearestTiesToEven,
1721 &ignored);
1722 if (fs == opInvalidOp) {
1723 delete[] x;
1724 return fs;
1727 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1728 rmNearestTiesToEven);
1729 assert(fs==opOK); // should always work
1731 fs = V.multiply(rhs, rmNearestTiesToEven);
1732 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1734 fs = subtract(V, rmNearestTiesToEven);
1735 assert(fs==opOK || fs==opInexact); // likewise
1737 if (isZero())
1738 sign = origSign; // IEEE754 requires this
1739 delete[] x;
1740 return fs;
1743 /* Normalized llvm frem (C fmod). */
1744 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1745 opStatus fs;
1746 fs = modSpecials(rhs);
1747 unsigned int origSign = sign;
1749 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1750 compareAbsoluteValue(rhs) != cmpLessThan) {
1751 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1752 if (compareAbsoluteValue(V) == cmpLessThan)
1753 V = scalbn(V, -1, rmNearestTiesToEven);
1754 V.sign = sign;
1756 fs = subtract(V, rmNearestTiesToEven);
1757 assert(fs==opOK);
1759 if (isZero())
1760 sign = origSign; // fmod requires this
1761 return fs;
1764 /* Normalized fused-multiply-add. */
1765 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1766 const IEEEFloat &addend,
1767 roundingMode rounding_mode) {
1768 opStatus fs;
1770 /* Post-multiplication sign, before addition. */
1771 sign ^= multiplicand.sign;
1773 /* If and only if all arguments are normal do we need to do an
1774 extended-precision calculation. */
1775 if (isFiniteNonZero() &&
1776 multiplicand.isFiniteNonZero() &&
1777 addend.isFinite()) {
1778 lostFraction lost_fraction;
1780 lost_fraction = multiplySignificand(multiplicand, &addend);
1781 fs = normalize(rounding_mode, lost_fraction);
1782 if (lost_fraction != lfExactlyZero)
1783 fs = (opStatus) (fs | opInexact);
1785 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1786 positive zero unless rounding to minus infinity, except that
1787 adding two like-signed zeroes gives that zero. */
1788 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1789 sign = (rounding_mode == rmTowardNegative);
1790 } else {
1791 fs = multiplySpecials(multiplicand);
1793 /* FS can only be opOK or opInvalidOp. There is no more work
1794 to do in the latter case. The IEEE-754R standard says it is
1795 implementation-defined in this case whether, if ADDEND is a
1796 quiet NaN, we raise invalid op; this implementation does so.
1798 If we need to do the addition we can do so with normal
1799 precision. */
1800 if (fs == opOK)
1801 fs = addOrSubtract(addend, rounding_mode, false);
1804 return fs;
1807 /* Rounding-mode corrrect round to integral value. */
1808 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1809 opStatus fs;
1811 // If the exponent is large enough, we know that this value is already
1812 // integral, and the arithmetic below would potentially cause it to saturate
1813 // to +/-Inf. Bail out early instead.
1814 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1815 return opOK;
1817 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1818 // precision of our format, and then subtract it back off again. The choice
1819 // of rounding modes for the addition/subtraction determines the rounding mode
1820 // for our integral rounding as well.
1821 // NOTE: When the input value is negative, we do subtraction followed by
1822 // addition instead.
1823 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1824 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1825 IEEEFloat MagicConstant(*semantics);
1826 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1827 rmNearestTiesToEven);
1828 MagicConstant.sign = sign;
1830 if (fs != opOK)
1831 return fs;
1833 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1834 bool inputSign = isNegative();
1836 fs = add(MagicConstant, rounding_mode);
1837 if (fs != opOK && fs != opInexact)
1838 return fs;
1840 fs = subtract(MagicConstant, rounding_mode);
1842 // Restore the input sign.
1843 if (inputSign != isNegative())
1844 changeSign();
1846 return fs;
1850 /* Comparison requires normalized numbers. */
1851 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1852 cmpResult result;
1854 assert(semantics == rhs.semantics);
1856 switch (PackCategoriesIntoKey(category, rhs.category)) {
1857 default:
1858 llvm_unreachable(nullptr);
1860 case PackCategoriesIntoKey(fcNaN, fcZero):
1861 case PackCategoriesIntoKey(fcNaN, fcNormal):
1862 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1863 case PackCategoriesIntoKey(fcNaN, fcNaN):
1864 case PackCategoriesIntoKey(fcZero, fcNaN):
1865 case PackCategoriesIntoKey(fcNormal, fcNaN):
1866 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1867 return cmpUnordered;
1869 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1870 case PackCategoriesIntoKey(fcInfinity, fcZero):
1871 case PackCategoriesIntoKey(fcNormal, fcZero):
1872 if (sign)
1873 return cmpLessThan;
1874 else
1875 return cmpGreaterThan;
1877 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1878 case PackCategoriesIntoKey(fcZero, fcInfinity):
1879 case PackCategoriesIntoKey(fcZero, fcNormal):
1880 if (rhs.sign)
1881 return cmpGreaterThan;
1882 else
1883 return cmpLessThan;
1885 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1886 if (sign == rhs.sign)
1887 return cmpEqual;
1888 else if (sign)
1889 return cmpLessThan;
1890 else
1891 return cmpGreaterThan;
1893 case PackCategoriesIntoKey(fcZero, fcZero):
1894 return cmpEqual;
1896 case PackCategoriesIntoKey(fcNormal, fcNormal):
1897 break;
1900 /* Two normal numbers. Do they have the same sign? */
1901 if (sign != rhs.sign) {
1902 if (sign)
1903 result = cmpLessThan;
1904 else
1905 result = cmpGreaterThan;
1906 } else {
1907 /* Compare absolute values; invert result if negative. */
1908 result = compareAbsoluteValue(rhs);
1910 if (sign) {
1911 if (result == cmpLessThan)
1912 result = cmpGreaterThan;
1913 else if (result == cmpGreaterThan)
1914 result = cmpLessThan;
1918 return result;
1921 /// IEEEFloat::convert - convert a value of one floating point type to another.
1922 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1923 /// records whether the transformation lost information, i.e. whether
1924 /// converting the result back to the original type will produce the
1925 /// original value (this is almost the same as return value==fsOK, but there
1926 /// are edge cases where this is not so).
1928 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1929 roundingMode rounding_mode,
1930 bool *losesInfo) {
1931 lostFraction lostFraction;
1932 unsigned int newPartCount, oldPartCount;
1933 opStatus fs;
1934 int shift;
1935 const fltSemantics &fromSemantics = *semantics;
1937 lostFraction = lfExactlyZero;
1938 newPartCount = partCountForBits(toSemantics.precision + 1);
1939 oldPartCount = partCount();
1940 shift = toSemantics.precision - fromSemantics.precision;
1942 bool X86SpecialNan = false;
1943 if (&fromSemantics == &semX87DoubleExtended &&
1944 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1945 (!(*significandParts() & 0x8000000000000000ULL) ||
1946 !(*significandParts() & 0x4000000000000000ULL))) {
1947 // x86 has some unusual NaNs which cannot be represented in any other
1948 // format; note them here.
1949 X86SpecialNan = true;
1952 // If this is a truncation of a denormal number, and the target semantics
1953 // has larger exponent range than the source semantics (this can happen
1954 // when truncating from PowerPC double-double to double format), the
1955 // right shift could lose result mantissa bits. Adjust exponent instead
1956 // of performing excessive shift.
1957 if (shift < 0 && isFiniteNonZero()) {
1958 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1959 if (exponent + exponentChange < toSemantics.minExponent)
1960 exponentChange = toSemantics.minExponent - exponent;
1961 if (exponentChange < shift)
1962 exponentChange = shift;
1963 if (exponentChange < 0) {
1964 shift -= exponentChange;
1965 exponent += exponentChange;
1969 // If this is a truncation, perform the shift before we narrow the storage.
1970 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1971 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1973 // Fix the storage so it can hold to new value.
1974 if (newPartCount > oldPartCount) {
1975 // The new type requires more storage; make it available.
1976 integerPart *newParts;
1977 newParts = new integerPart[newPartCount];
1978 APInt::tcSet(newParts, 0, newPartCount);
1979 if (isFiniteNonZero() || category==fcNaN)
1980 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1981 freeSignificand();
1982 significand.parts = newParts;
1983 } else if (newPartCount == 1 && oldPartCount != 1) {
1984 // Switch to built-in storage for a single part.
1985 integerPart newPart = 0;
1986 if (isFiniteNonZero() || category==fcNaN)
1987 newPart = significandParts()[0];
1988 freeSignificand();
1989 significand.part = newPart;
1992 // Now that we have the right storage, switch the semantics.
1993 semantics = &toSemantics;
1995 // If this is an extension, perform the shift now that the storage is
1996 // available.
1997 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1998 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
2000 if (isFiniteNonZero()) {
2001 fs = normalize(rounding_mode, lostFraction);
2002 *losesInfo = (fs != opOK);
2003 } else if (category == fcNaN) {
2004 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2006 // For x87 extended precision, we want to make a NaN, not a special NaN if
2007 // the input wasn't special either.
2008 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2009 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2011 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2012 // does not give you back the same bits. This is dubious, and we
2013 // don't currently do it. You're really supposed to get
2014 // an invalid operation signal at runtime, but nobody does that.
2015 fs = opOK;
2016 } else {
2017 *losesInfo = false;
2018 fs = opOK;
2021 return fs;
2024 /* Convert a floating point number to an integer according to the
2025 rounding mode. If the rounded integer value is out of range this
2026 returns an invalid operation exception and the contents of the
2027 destination parts are unspecified. If the rounded value is in
2028 range but the floating point number is not the exact integer, the C
2029 standard doesn't require an inexact exception to be raised. IEEE
2030 854 does require it so we do that.
2032 Note that for conversions to integer type the C standard requires
2033 round-to-zero to always be used. */
2034 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2035 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2036 roundingMode rounding_mode, bool *isExact) const {
2037 lostFraction lost_fraction;
2038 const integerPart *src;
2039 unsigned int dstPartsCount, truncatedBits;
2041 *isExact = false;
2043 /* Handle the three special cases first. */
2044 if (category == fcInfinity || category == fcNaN)
2045 return opInvalidOp;
2047 dstPartsCount = partCountForBits(width);
2048 assert(dstPartsCount <= parts.size() && "Integer too big");
2050 if (category == fcZero) {
2051 APInt::tcSet(parts.data(), 0, dstPartsCount);
2052 // Negative zero can't be represented as an int.
2053 *isExact = !sign;
2054 return opOK;
2057 src = significandParts();
2059 /* Step 1: place our absolute value, with any fraction truncated, in
2060 the destination. */
2061 if (exponent < 0) {
2062 /* Our absolute value is less than one; truncate everything. */
2063 APInt::tcSet(parts.data(), 0, dstPartsCount);
2064 /* For exponent -1 the integer bit represents .5, look at that.
2065 For smaller exponents leftmost truncated bit is 0. */
2066 truncatedBits = semantics->precision -1U - exponent;
2067 } else {
2068 /* We want the most significant (exponent + 1) bits; the rest are
2069 truncated. */
2070 unsigned int bits = exponent + 1U;
2072 /* Hopelessly large in magnitude? */
2073 if (bits > width)
2074 return opInvalidOp;
2076 if (bits < semantics->precision) {
2077 /* We truncate (semantics->precision - bits) bits. */
2078 truncatedBits = semantics->precision - bits;
2079 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2080 } else {
2081 /* We want at least as many bits as are available. */
2082 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2084 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2085 bits - semantics->precision);
2086 truncatedBits = 0;
2090 /* Step 2: work out any lost fraction, and increment the absolute
2091 value if we would round away from zero. */
2092 if (truncatedBits) {
2093 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2094 truncatedBits);
2095 if (lost_fraction != lfExactlyZero &&
2096 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2097 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2098 return opInvalidOp; /* Overflow. */
2100 } else {
2101 lost_fraction = lfExactlyZero;
2104 /* Step 3: check if we fit in the destination. */
2105 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2107 if (sign) {
2108 if (!isSigned) {
2109 /* Negative numbers cannot be represented as unsigned. */
2110 if (omsb != 0)
2111 return opInvalidOp;
2112 } else {
2113 /* It takes omsb bits to represent the unsigned integer value.
2114 We lose a bit for the sign, but care is needed as the
2115 maximally negative integer is a special case. */
2116 if (omsb == width &&
2117 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2118 return opInvalidOp;
2120 /* This case can happen because of rounding. */
2121 if (omsb > width)
2122 return opInvalidOp;
2125 APInt::tcNegate (parts.data(), dstPartsCount);
2126 } else {
2127 if (omsb >= width + !isSigned)
2128 return opInvalidOp;
2131 if (lost_fraction == lfExactlyZero) {
2132 *isExact = true;
2133 return opOK;
2134 } else
2135 return opInexact;
2138 /* Same as convertToSignExtendedInteger, except we provide
2139 deterministic values in case of an invalid operation exception,
2140 namely zero for NaNs and the minimal or maximal value respectively
2141 for underflow or overflow.
2142 The *isExact output tells whether the result is exact, in the sense
2143 that converting it back to the original floating point type produces
2144 the original value. This is almost equivalent to result==opOK,
2145 except for negative zeroes.
2147 IEEEFloat::opStatus
2148 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2149 unsigned int width, bool isSigned,
2150 roundingMode rounding_mode, bool *isExact) const {
2151 opStatus fs;
2153 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2154 isExact);
2156 if (fs == opInvalidOp) {
2157 unsigned int bits, dstPartsCount;
2159 dstPartsCount = partCountForBits(width);
2160 assert(dstPartsCount <= parts.size() && "Integer too big");
2162 if (category == fcNaN)
2163 bits = 0;
2164 else if (sign)
2165 bits = isSigned;
2166 else
2167 bits = width - isSigned;
2169 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2170 if (sign && isSigned)
2171 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2174 return fs;
2177 /* Convert an unsigned integer SRC to a floating point number,
2178 rounding according to ROUNDING_MODE. The sign of the floating
2179 point number is not modified. */
2180 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2181 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2182 unsigned int omsb, precision, dstCount;
2183 integerPart *dst;
2184 lostFraction lost_fraction;
2186 category = fcNormal;
2187 omsb = APInt::tcMSB(src, srcCount) + 1;
2188 dst = significandParts();
2189 dstCount = partCount();
2190 precision = semantics->precision;
2192 /* We want the most significant PRECISION bits of SRC. There may not
2193 be that many; extract what we can. */
2194 if (precision <= omsb) {
2195 exponent = omsb - 1;
2196 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2197 omsb - precision);
2198 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2199 } else {
2200 exponent = precision - 1;
2201 lost_fraction = lfExactlyZero;
2202 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2205 return normalize(rounding_mode, lost_fraction);
2208 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2209 roundingMode rounding_mode) {
2210 unsigned int partCount = Val.getNumWords();
2211 APInt api = Val;
2213 sign = false;
2214 if (isSigned && api.isNegative()) {
2215 sign = true;
2216 api = -api;
2219 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2222 /* Convert a two's complement integer SRC to a floating point number,
2223 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2224 integer is signed, in which case it must be sign-extended. */
2225 IEEEFloat::opStatus
2226 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2227 unsigned int srcCount, bool isSigned,
2228 roundingMode rounding_mode) {
2229 opStatus status;
2231 if (isSigned &&
2232 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2233 integerPart *copy;
2235 /* If we're signed and negative negate a copy. */
2236 sign = true;
2237 copy = new integerPart[srcCount];
2238 APInt::tcAssign(copy, src, srcCount);
2239 APInt::tcNegate(copy, srcCount);
2240 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2241 delete [] copy;
2242 } else {
2243 sign = false;
2244 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2247 return status;
2250 /* FIXME: should this just take a const APInt reference? */
2251 IEEEFloat::opStatus
2252 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2253 unsigned int width, bool isSigned,
2254 roundingMode rounding_mode) {
2255 unsigned int partCount = partCountForBits(width);
2256 APInt api = APInt(width, makeArrayRef(parts, partCount));
2258 sign = false;
2259 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2260 sign = true;
2261 api = -api;
2264 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2267 IEEEFloat::opStatus
2268 IEEEFloat::convertFromHexadecimalString(StringRef s,
2269 roundingMode rounding_mode) {
2270 lostFraction lost_fraction = lfExactlyZero;
2272 category = fcNormal;
2273 zeroSignificand();
2274 exponent = 0;
2276 integerPart *significand = significandParts();
2277 unsigned partsCount = partCount();
2278 unsigned bitPos = partsCount * integerPartWidth;
2279 bool computedTrailingFraction = false;
2281 // Skip leading zeroes and any (hexa)decimal point.
2282 StringRef::iterator begin = s.begin();
2283 StringRef::iterator end = s.end();
2284 StringRef::iterator dot;
2285 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2286 StringRef::iterator firstSignificantDigit = p;
2288 while (p != end) {
2289 integerPart hex_value;
2291 if (*p == '.') {
2292 assert(dot == end && "String contains multiple dots");
2293 dot = p++;
2294 continue;
2297 hex_value = hexDigitValue(*p);
2298 if (hex_value == -1U)
2299 break;
2301 p++;
2303 // Store the number while we have space.
2304 if (bitPos) {
2305 bitPos -= 4;
2306 hex_value <<= bitPos % integerPartWidth;
2307 significand[bitPos / integerPartWidth] |= hex_value;
2308 } else if (!computedTrailingFraction) {
2309 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2310 computedTrailingFraction = true;
2314 /* Hex floats require an exponent but not a hexadecimal point. */
2315 assert(p != end && "Hex strings require an exponent");
2316 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2317 assert(p != begin && "Significand has no digits");
2318 assert((dot == end || p - begin != 1) && "Significand has no digits");
2320 /* Ignore the exponent if we are zero. */
2321 if (p != firstSignificantDigit) {
2322 int expAdjustment;
2324 /* Implicit hexadecimal point? */
2325 if (dot == end)
2326 dot = p;
2328 /* Calculate the exponent adjustment implicit in the number of
2329 significant digits. */
2330 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2331 if (expAdjustment < 0)
2332 expAdjustment++;
2333 expAdjustment = expAdjustment * 4 - 1;
2335 /* Adjust for writing the significand starting at the most
2336 significant nibble. */
2337 expAdjustment += semantics->precision;
2338 expAdjustment -= partsCount * integerPartWidth;
2340 /* Adjust for the given exponent. */
2341 exponent = totalExponent(p + 1, end, expAdjustment);
2344 return normalize(rounding_mode, lost_fraction);
2347 IEEEFloat::opStatus
2348 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2349 unsigned sigPartCount, int exp,
2350 roundingMode rounding_mode) {
2351 unsigned int parts, pow5PartCount;
2352 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2353 integerPart pow5Parts[maxPowerOfFiveParts];
2354 bool isNearest;
2356 isNearest = (rounding_mode == rmNearestTiesToEven ||
2357 rounding_mode == rmNearestTiesToAway);
2359 parts = partCountForBits(semantics->precision + 11);
2361 /* Calculate pow(5, abs(exp)). */
2362 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2364 for (;; parts *= 2) {
2365 opStatus sigStatus, powStatus;
2366 unsigned int excessPrecision, truncatedBits;
2368 calcSemantics.precision = parts * integerPartWidth - 1;
2369 excessPrecision = calcSemantics.precision - semantics->precision;
2370 truncatedBits = excessPrecision;
2372 IEEEFloat decSig(calcSemantics, uninitialized);
2373 decSig.makeZero(sign);
2374 IEEEFloat pow5(calcSemantics);
2376 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2377 rmNearestTiesToEven);
2378 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2379 rmNearestTiesToEven);
2380 /* Add exp, as 10^n = 5^n * 2^n. */
2381 decSig.exponent += exp;
2383 lostFraction calcLostFraction;
2384 integerPart HUerr, HUdistance;
2385 unsigned int powHUerr;
2387 if (exp >= 0) {
2388 /* multiplySignificand leaves the precision-th bit set to 1. */
2389 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2390 powHUerr = powStatus != opOK;
2391 } else {
2392 calcLostFraction = decSig.divideSignificand(pow5);
2393 /* Denormal numbers have less precision. */
2394 if (decSig.exponent < semantics->minExponent) {
2395 excessPrecision += (semantics->minExponent - decSig.exponent);
2396 truncatedBits = excessPrecision;
2397 if (excessPrecision > calcSemantics.precision)
2398 excessPrecision = calcSemantics.precision;
2400 /* Extra half-ulp lost in reciprocal of exponent. */
2401 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2404 /* Both multiplySignificand and divideSignificand return the
2405 result with the integer bit set. */
2406 assert(APInt::tcExtractBit
2407 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2409 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2410 powHUerr);
2411 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2412 excessPrecision, isNearest);
2414 /* Are we guaranteed to round correctly if we truncate? */
2415 if (HUdistance >= HUerr) {
2416 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2417 calcSemantics.precision - excessPrecision,
2418 excessPrecision);
2419 /* Take the exponent of decSig. If we tcExtract-ed less bits
2420 above we must adjust our exponent to compensate for the
2421 implicit right shift. */
2422 exponent = (decSig.exponent + semantics->precision
2423 - (calcSemantics.precision - excessPrecision));
2424 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2425 decSig.partCount(),
2426 truncatedBits);
2427 return normalize(rounding_mode, calcLostFraction);
2432 IEEEFloat::opStatus
2433 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2434 decimalInfo D;
2435 opStatus fs;
2437 /* Scan the text. */
2438 StringRef::iterator p = str.begin();
2439 interpretDecimal(p, str.end(), &D);
2441 /* Handle the quick cases. First the case of no significant digits,
2442 i.e. zero, and then exponents that are obviously too large or too
2443 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2444 definitely overflows if
2446 (exp - 1) * L >= maxExponent
2448 and definitely underflows to zero where
2450 (exp + 1) * L <= minExponent - precision
2452 With integer arithmetic the tightest bounds for L are
2454 93/28 < L < 196/59 [ numerator <= 256 ]
2455 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2458 // Test if we have a zero number allowing for strings with no null terminators
2459 // and zero decimals with non-zero exponents.
2461 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2462 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2463 // be at most one dot. On the other hand, if we have a zero with a non-zero
2464 // exponent, then we know that D.firstSigDigit will be non-numeric.
2465 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2466 category = fcZero;
2467 fs = opOK;
2469 /* Check whether the normalized exponent is high enough to overflow
2470 max during the log-rebasing in the max-exponent check below. */
2471 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2472 fs = handleOverflow(rounding_mode);
2474 /* If it wasn't, then it also wasn't high enough to overflow max
2475 during the log-rebasing in the min-exponent check. Check that it
2476 won't overflow min in either check, then perform the min-exponent
2477 check. */
2478 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2479 (D.normalizedExponent + 1) * 28738 <=
2480 8651 * (semantics->minExponent - (int) semantics->precision)) {
2481 /* Underflow to zero and round. */
2482 category = fcNormal;
2483 zeroSignificand();
2484 fs = normalize(rounding_mode, lfLessThanHalf);
2486 /* We can finally safely perform the max-exponent check. */
2487 } else if ((D.normalizedExponent - 1) * 42039
2488 >= 12655 * semantics->maxExponent) {
2489 /* Overflow and round. */
2490 fs = handleOverflow(rounding_mode);
2491 } else {
2492 integerPart *decSignificand;
2493 unsigned int partCount;
2495 /* A tight upper bound on number of bits required to hold an
2496 N-digit decimal integer is N * 196 / 59. Allocate enough space
2497 to hold the full significand, and an extra part required by
2498 tcMultiplyPart. */
2499 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2500 partCount = partCountForBits(1 + 196 * partCount / 59);
2501 decSignificand = new integerPart[partCount + 1];
2502 partCount = 0;
2504 /* Convert to binary efficiently - we do almost all multiplication
2505 in an integerPart. When this would overflow do we do a single
2506 bignum multiplication, and then revert again to multiplication
2507 in an integerPart. */
2508 do {
2509 integerPart decValue, val, multiplier;
2511 val = 0;
2512 multiplier = 1;
2514 do {
2515 if (*p == '.') {
2516 p++;
2517 if (p == str.end()) {
2518 break;
2521 decValue = decDigitValue(*p++);
2522 assert(decValue < 10U && "Invalid character in significand");
2523 multiplier *= 10;
2524 val = val * 10 + decValue;
2525 /* The maximum number that can be multiplied by ten with any
2526 digit added without overflowing an integerPart. */
2527 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2529 /* Multiply out the current part. */
2530 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2531 partCount, partCount + 1, false);
2533 /* If we used another part (likely but not guaranteed), increase
2534 the count. */
2535 if (decSignificand[partCount])
2536 partCount++;
2537 } while (p <= D.lastSigDigit);
2539 category = fcNormal;
2540 fs = roundSignificandWithExponent(decSignificand, partCount,
2541 D.exponent, rounding_mode);
2543 delete [] decSignificand;
2546 return fs;
2549 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2550 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2551 makeInf(false);
2552 return true;
2555 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2556 makeInf(true);
2557 return true;
2560 if (str.equals("nan") || str.equals("NaN")) {
2561 makeNaN(false, false);
2562 return true;
2565 if (str.equals("-nan") || str.equals("-NaN")) {
2566 makeNaN(false, true);
2567 return true;
2570 return false;
2573 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2574 roundingMode rounding_mode) {
2575 assert(!str.empty() && "Invalid string length");
2577 // Handle special cases.
2578 if (convertFromStringSpecials(str))
2579 return opOK;
2581 /* Handle a leading minus sign. */
2582 StringRef::iterator p = str.begin();
2583 size_t slen = str.size();
2584 sign = *p == '-' ? 1 : 0;
2585 if (*p == '-' || *p == '+') {
2586 p++;
2587 slen--;
2588 assert(slen && "String has no digits");
2591 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2592 assert(slen - 2 && "Invalid string");
2593 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2594 rounding_mode);
2597 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2600 /* Write out a hexadecimal representation of the floating point value
2601 to DST, which must be of sufficient size, in the C99 form
2602 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2603 excluding the terminating NUL.
2605 If UPPERCASE, the output is in upper case, otherwise in lower case.
2607 HEXDIGITS digits appear altogether, rounding the value if
2608 necessary. If HEXDIGITS is 0, the minimal precision to display the
2609 number precisely is used instead. If nothing would appear after
2610 the decimal point it is suppressed.
2612 The decimal exponent is always printed and has at least one digit.
2613 Zero values display an exponent of zero. Infinities and NaNs
2614 appear as "infinity" or "nan" respectively.
2616 The above rules are as specified by C99. There is ambiguity about
2617 what the leading hexadecimal digit should be. This implementation
2618 uses whatever is necessary so that the exponent is displayed as
2619 stored. This implies the exponent will fall within the IEEE format
2620 range, and the leading hexadecimal digit will be 0 (for denormals),
2621 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2622 any other digits zero).
2624 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2625 bool upperCase,
2626 roundingMode rounding_mode) const {
2627 char *p;
2629 p = dst;
2630 if (sign)
2631 *dst++ = '-';
2633 switch (category) {
2634 case fcInfinity:
2635 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2636 dst += sizeof infinityL - 1;
2637 break;
2639 case fcNaN:
2640 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2641 dst += sizeof NaNU - 1;
2642 break;
2644 case fcZero:
2645 *dst++ = '0';
2646 *dst++ = upperCase ? 'X': 'x';
2647 *dst++ = '0';
2648 if (hexDigits > 1) {
2649 *dst++ = '.';
2650 memset (dst, '0', hexDigits - 1);
2651 dst += hexDigits - 1;
2653 *dst++ = upperCase ? 'P': 'p';
2654 *dst++ = '0';
2655 break;
2657 case fcNormal:
2658 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2659 break;
2662 *dst = 0;
2664 return static_cast<unsigned int>(dst - p);
2667 /* Does the hard work of outputting the correctly rounded hexadecimal
2668 form of a normal floating point number with the specified number of
2669 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2670 digits necessary to print the value precisely is output. */
2671 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2672 bool upperCase,
2673 roundingMode rounding_mode) const {
2674 unsigned int count, valueBits, shift, partsCount, outputDigits;
2675 const char *hexDigitChars;
2676 const integerPart *significand;
2677 char *p;
2678 bool roundUp;
2680 *dst++ = '0';
2681 *dst++ = upperCase ? 'X': 'x';
2683 roundUp = false;
2684 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2686 significand = significandParts();
2687 partsCount = partCount();
2689 /* +3 because the first digit only uses the single integer bit, so
2690 we have 3 virtual zero most-significant-bits. */
2691 valueBits = semantics->precision + 3;
2692 shift = integerPartWidth - valueBits % integerPartWidth;
2694 /* The natural number of digits required ignoring trailing
2695 insignificant zeroes. */
2696 outputDigits = (valueBits - significandLSB () + 3) / 4;
2698 /* hexDigits of zero means use the required number for the
2699 precision. Otherwise, see if we are truncating. If we are,
2700 find out if we need to round away from zero. */
2701 if (hexDigits) {
2702 if (hexDigits < outputDigits) {
2703 /* We are dropping non-zero bits, so need to check how to round.
2704 "bits" is the number of dropped bits. */
2705 unsigned int bits;
2706 lostFraction fraction;
2708 bits = valueBits - hexDigits * 4;
2709 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2710 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2712 outputDigits = hexDigits;
2715 /* Write the digits consecutively, and start writing in the location
2716 of the hexadecimal point. We move the most significant digit
2717 left and add the hexadecimal point later. */
2718 p = ++dst;
2720 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2722 while (outputDigits && count) {
2723 integerPart part;
2725 /* Put the most significant integerPartWidth bits in "part". */
2726 if (--count == partsCount)
2727 part = 0; /* An imaginary higher zero part. */
2728 else
2729 part = significand[count] << shift;
2731 if (count && shift)
2732 part |= significand[count - 1] >> (integerPartWidth - shift);
2734 /* Convert as much of "part" to hexdigits as we can. */
2735 unsigned int curDigits = integerPartWidth / 4;
2737 if (curDigits > outputDigits)
2738 curDigits = outputDigits;
2739 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2740 outputDigits -= curDigits;
2743 if (roundUp) {
2744 char *q = dst;
2746 /* Note that hexDigitChars has a trailing '0'. */
2747 do {
2748 q--;
2749 *q = hexDigitChars[hexDigitValue (*q) + 1];
2750 } while (*q == '0');
2751 assert(q >= p);
2752 } else {
2753 /* Add trailing zeroes. */
2754 memset (dst, '0', outputDigits);
2755 dst += outputDigits;
2758 /* Move the most significant digit to before the point, and if there
2759 is something after the decimal point add it. This must come
2760 after rounding above. */
2761 p[-1] = p[0];
2762 if (dst -1 == p)
2763 dst--;
2764 else
2765 p[0] = '.';
2767 /* Finally output the exponent. */
2768 *dst++ = upperCase ? 'P': 'p';
2770 return writeSignedDecimal (dst, exponent);
2773 hash_code hash_value(const IEEEFloat &Arg) {
2774 if (!Arg.isFiniteNonZero())
2775 return hash_combine((uint8_t)Arg.category,
2776 // NaN has no sign, fix it at zero.
2777 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2778 Arg.semantics->precision);
2780 // Normal floats need their exponent and significand hashed.
2781 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2782 Arg.semantics->precision, Arg.exponent,
2783 hash_combine_range(
2784 Arg.significandParts(),
2785 Arg.significandParts() + Arg.partCount()));
2788 // Conversion from APFloat to/from host float/double. It may eventually be
2789 // possible to eliminate these and have everybody deal with APFloats, but that
2790 // will take a while. This approach will not easily extend to long double.
2791 // Current implementation requires integerPartWidth==64, which is correct at
2792 // the moment but could be made more general.
2794 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2795 // the actual IEEE respresentations. We compensate for that here.
2797 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2798 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2799 assert(partCount()==2);
2801 uint64_t myexponent, mysignificand;
2803 if (isFiniteNonZero()) {
2804 myexponent = exponent+16383; //bias
2805 mysignificand = significandParts()[0];
2806 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2807 myexponent = 0; // denormal
2808 } else if (category==fcZero) {
2809 myexponent = 0;
2810 mysignificand = 0;
2811 } else if (category==fcInfinity) {
2812 myexponent = 0x7fff;
2813 mysignificand = 0x8000000000000000ULL;
2814 } else {
2815 assert(category == fcNaN && "Unknown category");
2816 myexponent = 0x7fff;
2817 mysignificand = significandParts()[0];
2820 uint64_t words[2];
2821 words[0] = mysignificand;
2822 words[1] = ((uint64_t)(sign & 1) << 15) |
2823 (myexponent & 0x7fffLL);
2824 return APInt(80, words);
2827 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2828 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2829 assert(partCount()==2);
2831 uint64_t words[2];
2832 opStatus fs;
2833 bool losesInfo;
2835 // Convert number to double. To avoid spurious underflows, we re-
2836 // normalize against the "double" minExponent first, and only *then*
2837 // truncate the mantissa. The result of that second conversion
2838 // may be inexact, but should never underflow.
2839 // Declare fltSemantics before APFloat that uses it (and
2840 // saves pointer to it) to ensure correct destruction order.
2841 fltSemantics extendedSemantics = *semantics;
2842 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2843 IEEEFloat extended(*this);
2844 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2845 assert(fs == opOK && !losesInfo);
2846 (void)fs;
2848 IEEEFloat u(extended);
2849 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2850 assert(fs == opOK || fs == opInexact);
2851 (void)fs;
2852 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2854 // If conversion was exact or resulted in a special case, we're done;
2855 // just set the second double to zero. Otherwise, re-convert back to
2856 // the extended format and compute the difference. This now should
2857 // convert exactly to double.
2858 if (u.isFiniteNonZero() && losesInfo) {
2859 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2860 assert(fs == opOK && !losesInfo);
2861 (void)fs;
2863 IEEEFloat v(extended);
2864 v.subtract(u, rmNearestTiesToEven);
2865 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2866 assert(fs == opOK && !losesInfo);
2867 (void)fs;
2868 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2869 } else {
2870 words[1] = 0;
2873 return APInt(128, words);
2876 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2877 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2878 assert(partCount()==2);
2880 uint64_t myexponent, mysignificand, mysignificand2;
2882 if (isFiniteNonZero()) {
2883 myexponent = exponent+16383; //bias
2884 mysignificand = significandParts()[0];
2885 mysignificand2 = significandParts()[1];
2886 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2887 myexponent = 0; // denormal
2888 } else if (category==fcZero) {
2889 myexponent = 0;
2890 mysignificand = mysignificand2 = 0;
2891 } else if (category==fcInfinity) {
2892 myexponent = 0x7fff;
2893 mysignificand = mysignificand2 = 0;
2894 } else {
2895 assert(category == fcNaN && "Unknown category!");
2896 myexponent = 0x7fff;
2897 mysignificand = significandParts()[0];
2898 mysignificand2 = significandParts()[1];
2901 uint64_t words[2];
2902 words[0] = mysignificand;
2903 words[1] = ((uint64_t)(sign & 1) << 63) |
2904 ((myexponent & 0x7fff) << 48) |
2905 (mysignificand2 & 0xffffffffffffLL);
2907 return APInt(128, words);
2910 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2911 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2912 assert(partCount()==1);
2914 uint64_t myexponent, mysignificand;
2916 if (isFiniteNonZero()) {
2917 myexponent = exponent+1023; //bias
2918 mysignificand = *significandParts();
2919 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2920 myexponent = 0; // denormal
2921 } else if (category==fcZero) {
2922 myexponent = 0;
2923 mysignificand = 0;
2924 } else if (category==fcInfinity) {
2925 myexponent = 0x7ff;
2926 mysignificand = 0;
2927 } else {
2928 assert(category == fcNaN && "Unknown category!");
2929 myexponent = 0x7ff;
2930 mysignificand = *significandParts();
2933 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2934 ((myexponent & 0x7ff) << 52) |
2935 (mysignificand & 0xfffffffffffffLL))));
2938 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2939 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2940 assert(partCount()==1);
2942 uint32_t myexponent, mysignificand;
2944 if (isFiniteNonZero()) {
2945 myexponent = exponent+127; //bias
2946 mysignificand = (uint32_t)*significandParts();
2947 if (myexponent == 1 && !(mysignificand & 0x800000))
2948 myexponent = 0; // denormal
2949 } else if (category==fcZero) {
2950 myexponent = 0;
2951 mysignificand = 0;
2952 } else if (category==fcInfinity) {
2953 myexponent = 0xff;
2954 mysignificand = 0;
2955 } else {
2956 assert(category == fcNaN && "Unknown category!");
2957 myexponent = 0xff;
2958 mysignificand = (uint32_t)*significandParts();
2961 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2962 (mysignificand & 0x7fffff)));
2965 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2966 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2967 assert(partCount()==1);
2969 uint32_t myexponent, mysignificand;
2971 if (isFiniteNonZero()) {
2972 myexponent = exponent+15; //bias
2973 mysignificand = (uint32_t)*significandParts();
2974 if (myexponent == 1 && !(mysignificand & 0x400))
2975 myexponent = 0; // denormal
2976 } else if (category==fcZero) {
2977 myexponent = 0;
2978 mysignificand = 0;
2979 } else if (category==fcInfinity) {
2980 myexponent = 0x1f;
2981 mysignificand = 0;
2982 } else {
2983 assert(category == fcNaN && "Unknown category!");
2984 myexponent = 0x1f;
2985 mysignificand = (uint32_t)*significandParts();
2988 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2989 (mysignificand & 0x3ff)));
2992 // This function creates an APInt that is just a bit map of the floating
2993 // point constant as it would appear in memory. It is not a conversion,
2994 // and treating the result as a normal integer is unlikely to be useful.
2996 APInt IEEEFloat::bitcastToAPInt() const {
2997 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2998 return convertHalfAPFloatToAPInt();
3000 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3001 return convertFloatAPFloatToAPInt();
3003 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3004 return convertDoubleAPFloatToAPInt();
3006 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3007 return convertQuadrupleAPFloatToAPInt();
3009 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3010 return convertPPCDoubleDoubleAPFloatToAPInt();
3012 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3013 "unknown format!");
3014 return convertF80LongDoubleAPFloatToAPInt();
3017 float IEEEFloat::convertToFloat() const {
3018 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3019 "Float semantics are not IEEEsingle");
3020 APInt api = bitcastToAPInt();
3021 return api.bitsToFloat();
3024 double IEEEFloat::convertToDouble() const {
3025 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3026 "Float semantics are not IEEEdouble");
3027 APInt api = bitcastToAPInt();
3028 return api.bitsToDouble();
3031 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3032 /// does not support these bit patterns:
3033 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3034 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3035 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3036 /// exponent = 0, integer bit 1 ("pseudodenormal")
3037 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3038 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3039 assert(api.getBitWidth()==80);
3040 uint64_t i1 = api.getRawData()[0];
3041 uint64_t i2 = api.getRawData()[1];
3042 uint64_t myexponent = (i2 & 0x7fff);
3043 uint64_t mysignificand = i1;
3044 uint8_t myintegerbit = mysignificand >> 63;
3046 initialize(&semX87DoubleExtended);
3047 assert(partCount()==2);
3049 sign = static_cast<unsigned int>(i2>>15);
3050 if (myexponent == 0 && mysignificand == 0) {
3051 // exponent, significand meaningless
3052 category = fcZero;
3053 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3054 // exponent, significand meaningless
3055 category = fcInfinity;
3056 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3057 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3058 // exponent meaningless
3059 category = fcNaN;
3060 significandParts()[0] = mysignificand;
3061 significandParts()[1] = 0;
3062 } else {
3063 category = fcNormal;
3064 exponent = myexponent - 16383;
3065 significandParts()[0] = mysignificand;
3066 significandParts()[1] = 0;
3067 if (myexponent==0) // denormal
3068 exponent = -16382;
3072 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3073 assert(api.getBitWidth()==128);
3074 uint64_t i1 = api.getRawData()[0];
3075 uint64_t i2 = api.getRawData()[1];
3076 opStatus fs;
3077 bool losesInfo;
3079 // Get the first double and convert to our format.
3080 initFromDoubleAPInt(APInt(64, i1));
3081 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3082 assert(fs == opOK && !losesInfo);
3083 (void)fs;
3085 // Unless we have a special case, add in second double.
3086 if (isFiniteNonZero()) {
3087 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3088 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3089 assert(fs == opOK && !losesInfo);
3090 (void)fs;
3092 add(v, rmNearestTiesToEven);
3096 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3097 assert(api.getBitWidth()==128);
3098 uint64_t i1 = api.getRawData()[0];
3099 uint64_t i2 = api.getRawData()[1];
3100 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3101 uint64_t mysignificand = i1;
3102 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3104 initialize(&semIEEEquad);
3105 assert(partCount()==2);
3107 sign = static_cast<unsigned int>(i2>>63);
3108 if (myexponent==0 &&
3109 (mysignificand==0 && mysignificand2==0)) {
3110 // exponent, significand meaningless
3111 category = fcZero;
3112 } else if (myexponent==0x7fff &&
3113 (mysignificand==0 && mysignificand2==0)) {
3114 // exponent, significand meaningless
3115 category = fcInfinity;
3116 } else if (myexponent==0x7fff &&
3117 (mysignificand!=0 || mysignificand2 !=0)) {
3118 // exponent meaningless
3119 category = fcNaN;
3120 significandParts()[0] = mysignificand;
3121 significandParts()[1] = mysignificand2;
3122 } else {
3123 category = fcNormal;
3124 exponent = myexponent - 16383;
3125 significandParts()[0] = mysignificand;
3126 significandParts()[1] = mysignificand2;
3127 if (myexponent==0) // denormal
3128 exponent = -16382;
3129 else
3130 significandParts()[1] |= 0x1000000000000LL; // integer bit
3134 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3135 assert(api.getBitWidth()==64);
3136 uint64_t i = *api.getRawData();
3137 uint64_t myexponent = (i >> 52) & 0x7ff;
3138 uint64_t mysignificand = i & 0xfffffffffffffLL;
3140 initialize(&semIEEEdouble);
3141 assert(partCount()==1);
3143 sign = static_cast<unsigned int>(i>>63);
3144 if (myexponent==0 && mysignificand==0) {
3145 // exponent, significand meaningless
3146 category = fcZero;
3147 } else if (myexponent==0x7ff && mysignificand==0) {
3148 // exponent, significand meaningless
3149 category = fcInfinity;
3150 } else if (myexponent==0x7ff && mysignificand!=0) {
3151 // exponent meaningless
3152 category = fcNaN;
3153 *significandParts() = mysignificand;
3154 } else {
3155 category = fcNormal;
3156 exponent = myexponent - 1023;
3157 *significandParts() = mysignificand;
3158 if (myexponent==0) // denormal
3159 exponent = -1022;
3160 else
3161 *significandParts() |= 0x10000000000000LL; // integer bit
3165 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3166 assert(api.getBitWidth()==32);
3167 uint32_t i = (uint32_t)*api.getRawData();
3168 uint32_t myexponent = (i >> 23) & 0xff;
3169 uint32_t mysignificand = i & 0x7fffff;
3171 initialize(&semIEEEsingle);
3172 assert(partCount()==1);
3174 sign = i >> 31;
3175 if (myexponent==0 && mysignificand==0) {
3176 // exponent, significand meaningless
3177 category = fcZero;
3178 } else if (myexponent==0xff && mysignificand==0) {
3179 // exponent, significand meaningless
3180 category = fcInfinity;
3181 } else if (myexponent==0xff && mysignificand!=0) {
3182 // sign, exponent, significand meaningless
3183 category = fcNaN;
3184 *significandParts() = mysignificand;
3185 } else {
3186 category = fcNormal;
3187 exponent = myexponent - 127; //bias
3188 *significandParts() = mysignificand;
3189 if (myexponent==0) // denormal
3190 exponent = -126;
3191 else
3192 *significandParts() |= 0x800000; // integer bit
3196 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3197 assert(api.getBitWidth()==16);
3198 uint32_t i = (uint32_t)*api.getRawData();
3199 uint32_t myexponent = (i >> 10) & 0x1f;
3200 uint32_t mysignificand = i & 0x3ff;
3202 initialize(&semIEEEhalf);
3203 assert(partCount()==1);
3205 sign = i >> 15;
3206 if (myexponent==0 && mysignificand==0) {
3207 // exponent, significand meaningless
3208 category = fcZero;
3209 } else if (myexponent==0x1f && mysignificand==0) {
3210 // exponent, significand meaningless
3211 category = fcInfinity;
3212 } else if (myexponent==0x1f && mysignificand!=0) {
3213 // sign, exponent, significand meaningless
3214 category = fcNaN;
3215 *significandParts() = mysignificand;
3216 } else {
3217 category = fcNormal;
3218 exponent = myexponent - 15; //bias
3219 *significandParts() = mysignificand;
3220 if (myexponent==0) // denormal
3221 exponent = -14;
3222 else
3223 *significandParts() |= 0x400; // integer bit
3227 /// Treat api as containing the bits of a floating point number. Currently
3228 /// we infer the floating point type from the size of the APInt. The
3229 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3230 /// when the size is anything else).
3231 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3232 if (Sem == &semIEEEhalf)
3233 return initFromHalfAPInt(api);
3234 if (Sem == &semIEEEsingle)
3235 return initFromFloatAPInt(api);
3236 if (Sem == &semIEEEdouble)
3237 return initFromDoubleAPInt(api);
3238 if (Sem == &semX87DoubleExtended)
3239 return initFromF80LongDoubleAPInt(api);
3240 if (Sem == &semIEEEquad)
3241 return initFromQuadrupleAPInt(api);
3242 if (Sem == &semPPCDoubleDoubleLegacy)
3243 return initFromPPCDoubleDoubleAPInt(api);
3245 llvm_unreachable(nullptr);
3248 /// Make this number the largest magnitude normal number in the given
3249 /// semantics.
3250 void IEEEFloat::makeLargest(bool Negative) {
3251 // We want (in interchange format):
3252 // sign = {Negative}
3253 // exponent = 1..10
3254 // significand = 1..1
3255 category = fcNormal;
3256 sign = Negative;
3257 exponent = semantics->maxExponent;
3259 // Use memset to set all but the highest integerPart to all ones.
3260 integerPart *significand = significandParts();
3261 unsigned PartCount = partCount();
3262 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3264 // Set the high integerPart especially setting all unused top bits for
3265 // internal consistency.
3266 const unsigned NumUnusedHighBits =
3267 PartCount*integerPartWidth - semantics->precision;
3268 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3269 ? (~integerPart(0) >> NumUnusedHighBits)
3270 : 0;
3273 /// Make this number the smallest magnitude denormal number in the given
3274 /// semantics.
3275 void IEEEFloat::makeSmallest(bool Negative) {
3276 // We want (in interchange format):
3277 // sign = {Negative}
3278 // exponent = 0..0
3279 // significand = 0..01
3280 category = fcNormal;
3281 sign = Negative;
3282 exponent = semantics->minExponent;
3283 APInt::tcSet(significandParts(), 1, partCount());
3286 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3287 // We want (in interchange format):
3288 // sign = {Negative}
3289 // exponent = 0..0
3290 // significand = 10..0
3292 category = fcNormal;
3293 zeroSignificand();
3294 sign = Negative;
3295 exponent = semantics->minExponent;
3296 significandParts()[partCountForBits(semantics->precision) - 1] |=
3297 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3300 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3301 initFromAPInt(&Sem, API);
3304 IEEEFloat::IEEEFloat(float f) {
3305 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3308 IEEEFloat::IEEEFloat(double d) {
3309 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3312 namespace {
3313 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3314 Buffer.append(Str.begin(), Str.end());
3317 /// Removes data from the given significand until it is no more
3318 /// precise than is required for the desired precision.
3319 void AdjustToPrecision(APInt &significand,
3320 int &exp, unsigned FormatPrecision) {
3321 unsigned bits = significand.getActiveBits();
3323 // 196/59 is a very slight overestimate of lg_2(10).
3324 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3326 if (bits <= bitsRequired) return;
3328 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3329 if (!tensRemovable) return;
3331 exp += tensRemovable;
3333 APInt divisor(significand.getBitWidth(), 1);
3334 APInt powten(significand.getBitWidth(), 10);
3335 while (true) {
3336 if (tensRemovable & 1)
3337 divisor *= powten;
3338 tensRemovable >>= 1;
3339 if (!tensRemovable) break;
3340 powten *= powten;
3343 significand = significand.udiv(divisor);
3345 // Truncate the significand down to its active bit count.
3346 significand = significand.trunc(significand.getActiveBits());
3350 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3351 int &exp, unsigned FormatPrecision) {
3352 unsigned N = buffer.size();
3353 if (N <= FormatPrecision) return;
3355 // The most significant figures are the last ones in the buffer.
3356 unsigned FirstSignificant = N - FormatPrecision;
3358 // Round.
3359 // FIXME: this probably shouldn't use 'round half up'.
3361 // Rounding down is just a truncation, except we also want to drop
3362 // trailing zeros from the new result.
3363 if (buffer[FirstSignificant - 1] < '5') {
3364 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3365 FirstSignificant++;
3367 exp += FirstSignificant;
3368 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3369 return;
3372 // Rounding up requires a decimal add-with-carry. If we continue
3373 // the carry, the newly-introduced zeros will just be truncated.
3374 for (unsigned I = FirstSignificant; I != N; ++I) {
3375 if (buffer[I] == '9') {
3376 FirstSignificant++;
3377 } else {
3378 buffer[I]++;
3379 break;
3383 // If we carried through, we have exactly one digit of precision.
3384 if (FirstSignificant == N) {
3385 exp += FirstSignificant;
3386 buffer.clear();
3387 buffer.push_back('1');
3388 return;
3391 exp += FirstSignificant;
3392 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3396 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3397 unsigned FormatMaxPadding, bool TruncateZero) const {
3398 switch (category) {
3399 case fcInfinity:
3400 if (isNegative())
3401 return append(Str, "-Inf");
3402 else
3403 return append(Str, "+Inf");
3405 case fcNaN: return append(Str, "NaN");
3407 case fcZero:
3408 if (isNegative())
3409 Str.push_back('-');
3411 if (!FormatMaxPadding) {
3412 if (TruncateZero)
3413 append(Str, "0.0E+0");
3414 else {
3415 append(Str, "0.0");
3416 if (FormatPrecision > 1)
3417 Str.append(FormatPrecision - 1, '0');
3418 append(Str, "e+00");
3420 } else
3421 Str.push_back('0');
3422 return;
3424 case fcNormal:
3425 break;
3428 if (isNegative())
3429 Str.push_back('-');
3431 // Decompose the number into an APInt and an exponent.
3432 int exp = exponent - ((int) semantics->precision - 1);
3433 APInt significand(semantics->precision,
3434 makeArrayRef(significandParts(),
3435 partCountForBits(semantics->precision)));
3437 // Set FormatPrecision if zero. We want to do this before we
3438 // truncate trailing zeros, as those are part of the precision.
3439 if (!FormatPrecision) {
3440 // We use enough digits so the number can be round-tripped back to an
3441 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3442 // Accurately" by Steele and White.
3443 // FIXME: Using a formula based purely on the precision is conservative;
3444 // we can print fewer digits depending on the actual value being printed.
3446 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3447 FormatPrecision = 2 + semantics->precision * 59 / 196;
3450 // Ignore trailing binary zeros.
3451 int trailingZeros = significand.countTrailingZeros();
3452 exp += trailingZeros;
3453 significand.lshrInPlace(trailingZeros);
3455 // Change the exponent from 2^e to 10^e.
3456 if (exp == 0) {
3457 // Nothing to do.
3458 } else if (exp > 0) {
3459 // Just shift left.
3460 significand = significand.zext(semantics->precision + exp);
3461 significand <<= exp;
3462 exp = 0;
3463 } else { /* exp < 0 */
3464 int texp = -exp;
3466 // We transform this using the identity:
3467 // (N)(2^-e) == (N)(5^e)(10^-e)
3468 // This means we have to multiply N (the significand) by 5^e.
3469 // To avoid overflow, we have to operate on numbers large
3470 // enough to store N * 5^e:
3471 // log2(N * 5^e) == log2(N) + e * log2(5)
3472 // <= semantics->precision + e * 137 / 59
3473 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3475 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3477 // Multiply significand by 5^e.
3478 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3479 significand = significand.zext(precision);
3480 APInt five_to_the_i(precision, 5);
3481 while (true) {
3482 if (texp & 1) significand *= five_to_the_i;
3484 texp >>= 1;
3485 if (!texp) break;
3486 five_to_the_i *= five_to_the_i;
3490 AdjustToPrecision(significand, exp, FormatPrecision);
3492 SmallVector<char, 256> buffer;
3494 // Fill the buffer.
3495 unsigned precision = significand.getBitWidth();
3496 APInt ten(precision, 10);
3497 APInt digit(precision, 0);
3499 bool inTrail = true;
3500 while (significand != 0) {
3501 // digit <- significand % 10
3502 // significand <- significand / 10
3503 APInt::udivrem(significand, ten, significand, digit);
3505 unsigned d = digit.getZExtValue();
3507 // Drop trailing zeros.
3508 if (inTrail && !d) exp++;
3509 else {
3510 buffer.push_back((char) ('0' + d));
3511 inTrail = false;
3515 assert(!buffer.empty() && "no characters in buffer!");
3517 // Drop down to FormatPrecision.
3518 // TODO: don't do more precise calculations above than are required.
3519 AdjustToPrecision(buffer, exp, FormatPrecision);
3521 unsigned NDigits = buffer.size();
3523 // Check whether we should use scientific notation.
3524 bool FormatScientific;
3525 if (!FormatMaxPadding)
3526 FormatScientific = true;
3527 else {
3528 if (exp >= 0) {
3529 // 765e3 --> 765000
3530 // ^^^
3531 // But we shouldn't make the number look more precise than it is.
3532 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3533 NDigits + (unsigned) exp > FormatPrecision);
3534 } else {
3535 // Power of the most significant digit.
3536 int MSD = exp + (int) (NDigits - 1);
3537 if (MSD >= 0) {
3538 // 765e-2 == 7.65
3539 FormatScientific = false;
3540 } else {
3541 // 765e-5 == 0.00765
3542 // ^ ^^
3543 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3548 // Scientific formatting is pretty straightforward.
3549 if (FormatScientific) {
3550 exp += (NDigits - 1);
3552 Str.push_back(buffer[NDigits-1]);
3553 Str.push_back('.');
3554 if (NDigits == 1 && TruncateZero)
3555 Str.push_back('0');
3556 else
3557 for (unsigned I = 1; I != NDigits; ++I)
3558 Str.push_back(buffer[NDigits-1-I]);
3559 // Fill with zeros up to FormatPrecision.
3560 if (!TruncateZero && FormatPrecision > NDigits - 1)
3561 Str.append(FormatPrecision - NDigits + 1, '0');
3562 // For !TruncateZero we use lower 'e'.
3563 Str.push_back(TruncateZero ? 'E' : 'e');
3565 Str.push_back(exp >= 0 ? '+' : '-');
3566 if (exp < 0) exp = -exp;
3567 SmallVector<char, 6> expbuf;
3568 do {
3569 expbuf.push_back((char) ('0' + (exp % 10)));
3570 exp /= 10;
3571 } while (exp);
3572 // Exponent always at least two digits if we do not truncate zeros.
3573 if (!TruncateZero && expbuf.size() < 2)
3574 expbuf.push_back('0');
3575 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3576 Str.push_back(expbuf[E-1-I]);
3577 return;
3580 // Non-scientific, positive exponents.
3581 if (exp >= 0) {
3582 for (unsigned I = 0; I != NDigits; ++I)
3583 Str.push_back(buffer[NDigits-1-I]);
3584 for (unsigned I = 0; I != (unsigned) exp; ++I)
3585 Str.push_back('0');
3586 return;
3589 // Non-scientific, negative exponents.
3591 // The number of digits to the left of the decimal point.
3592 int NWholeDigits = exp + (int) NDigits;
3594 unsigned I = 0;
3595 if (NWholeDigits > 0) {
3596 for (; I != (unsigned) NWholeDigits; ++I)
3597 Str.push_back(buffer[NDigits-I-1]);
3598 Str.push_back('.');
3599 } else {
3600 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3602 Str.push_back('0');
3603 Str.push_back('.');
3604 for (unsigned Z = 1; Z != NZeros; ++Z)
3605 Str.push_back('0');
3608 for (; I != NDigits; ++I)
3609 Str.push_back(buffer[NDigits-I-1]);
3612 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3613 // Special floats and denormals have no exact inverse.
3614 if (!isFiniteNonZero())
3615 return false;
3617 // Check that the number is a power of two by making sure that only the
3618 // integer bit is set in the significand.
3619 if (significandLSB() != semantics->precision - 1)
3620 return false;
3622 // Get the inverse.
3623 IEEEFloat reciprocal(*semantics, 1ULL);
3624 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3625 return false;
3627 // Avoid multiplication with a denormal, it is not safe on all platforms and
3628 // may be slower than a normal division.
3629 if (reciprocal.isDenormal())
3630 return false;
3632 assert(reciprocal.isFiniteNonZero() &&
3633 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3635 if (inv)
3636 *inv = APFloat(reciprocal, *semantics);
3638 return true;
3641 bool IEEEFloat::isSignaling() const {
3642 if (!isNaN())
3643 return false;
3645 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3646 // first bit of the trailing significand being 0.
3647 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3650 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3652 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3653 /// appropriate sign switching before/after the computation.
3654 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3655 // If we are performing nextDown, swap sign so we have -x.
3656 if (nextDown)
3657 changeSign();
3659 // Compute nextUp(x)
3660 opStatus result = opOK;
3662 // Handle each float category separately.
3663 switch (category) {
3664 case fcInfinity:
3665 // nextUp(+inf) = +inf
3666 if (!isNegative())
3667 break;
3668 // nextUp(-inf) = -getLargest()
3669 makeLargest(true);
3670 break;
3671 case fcNaN:
3672 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3673 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3674 // change the payload.
3675 if (isSignaling()) {
3676 result = opInvalidOp;
3677 // For consistency, propagate the sign of the sNaN to the qNaN.
3678 makeNaN(false, isNegative(), nullptr);
3680 break;
3681 case fcZero:
3682 // nextUp(pm 0) = +getSmallest()
3683 makeSmallest(false);
3684 break;
3685 case fcNormal:
3686 // nextUp(-getSmallest()) = -0
3687 if (isSmallest() && isNegative()) {
3688 APInt::tcSet(significandParts(), 0, partCount());
3689 category = fcZero;
3690 exponent = 0;
3691 break;
3694 // nextUp(getLargest()) == INFINITY
3695 if (isLargest() && !isNegative()) {
3696 APInt::tcSet(significandParts(), 0, partCount());
3697 category = fcInfinity;
3698 exponent = semantics->maxExponent + 1;
3699 break;
3702 // nextUp(normal) == normal + inc.
3703 if (isNegative()) {
3704 // If we are negative, we need to decrement the significand.
3706 // We only cross a binade boundary that requires adjusting the exponent
3707 // if:
3708 // 1. exponent != semantics->minExponent. This implies we are not in the
3709 // smallest binade or are dealing with denormals.
3710 // 2. Our significand excluding the integral bit is all zeros.
3711 bool WillCrossBinadeBoundary =
3712 exponent != semantics->minExponent && isSignificandAllZeros();
3714 // Decrement the significand.
3716 // We always do this since:
3717 // 1. If we are dealing with a non-binade decrement, by definition we
3718 // just decrement the significand.
3719 // 2. If we are dealing with a normal -> normal binade decrement, since
3720 // we have an explicit integral bit the fact that all bits but the
3721 // integral bit are zero implies that subtracting one will yield a
3722 // significand with 0 integral bit and 1 in all other spots. Thus we
3723 // must just adjust the exponent and set the integral bit to 1.
3724 // 3. If we are dealing with a normal -> denormal binade decrement,
3725 // since we set the integral bit to 0 when we represent denormals, we
3726 // just decrement the significand.
3727 integerPart *Parts = significandParts();
3728 APInt::tcDecrement(Parts, partCount());
3730 if (WillCrossBinadeBoundary) {
3731 // Our result is a normal number. Do the following:
3732 // 1. Set the integral bit to 1.
3733 // 2. Decrement the exponent.
3734 APInt::tcSetBit(Parts, semantics->precision - 1);
3735 exponent--;
3737 } else {
3738 // If we are positive, we need to increment the significand.
3740 // We only cross a binade boundary that requires adjusting the exponent if
3741 // the input is not a denormal and all of said input's significand bits
3742 // are set. If all of said conditions are true: clear the significand, set
3743 // the integral bit to 1, and increment the exponent. If we have a
3744 // denormal always increment since moving denormals and the numbers in the
3745 // smallest normal binade have the same exponent in our representation.
3746 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3748 if (WillCrossBinadeBoundary) {
3749 integerPart *Parts = significandParts();
3750 APInt::tcSet(Parts, 0, partCount());
3751 APInt::tcSetBit(Parts, semantics->precision - 1);
3752 assert(exponent != semantics->maxExponent &&
3753 "We can not increment an exponent beyond the maxExponent allowed"
3754 " by the given floating point semantics.");
3755 exponent++;
3756 } else {
3757 incrementSignificand();
3760 break;
3763 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3764 if (nextDown)
3765 changeSign();
3767 return result;
3770 void IEEEFloat::makeInf(bool Negative) {
3771 category = fcInfinity;
3772 sign = Negative;
3773 exponent = semantics->maxExponent + 1;
3774 APInt::tcSet(significandParts(), 0, partCount());
3777 void IEEEFloat::makeZero(bool Negative) {
3778 category = fcZero;
3779 sign = Negative;
3780 exponent = semantics->minExponent-1;
3781 APInt::tcSet(significandParts(), 0, partCount());
3784 void IEEEFloat::makeQuiet() {
3785 assert(isNaN());
3786 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3789 int ilogb(const IEEEFloat &Arg) {
3790 if (Arg.isNaN())
3791 return IEEEFloat::IEK_NaN;
3792 if (Arg.isZero())
3793 return IEEEFloat::IEK_Zero;
3794 if (Arg.isInfinity())
3795 return IEEEFloat::IEK_Inf;
3796 if (!Arg.isDenormal())
3797 return Arg.exponent;
3799 IEEEFloat Normalized(Arg);
3800 int SignificandBits = Arg.getSemantics().precision - 1;
3802 Normalized.exponent += SignificandBits;
3803 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3804 return Normalized.exponent - SignificandBits;
3807 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3808 auto MaxExp = X.getSemantics().maxExponent;
3809 auto MinExp = X.getSemantics().minExponent;
3811 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3812 // overflow; clamp it to a safe range before adding, but ensure that the range
3813 // is large enough that the clamp does not change the result. The range we
3814 // need to support is the difference between the largest possible exponent and
3815 // the normalized exponent of half the smallest denormal.
3817 int SignificandBits = X.getSemantics().precision - 1;
3818 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3820 // Clamp to one past the range ends to let normalize handle overlflow.
3821 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3822 X.normalize(RoundingMode, lfExactlyZero);
3823 if (X.isNaN())
3824 X.makeQuiet();
3825 return X;
3828 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3829 Exp = ilogb(Val);
3831 // Quiet signalling nans.
3832 if (Exp == IEEEFloat::IEK_NaN) {
3833 IEEEFloat Quiet(Val);
3834 Quiet.makeQuiet();
3835 return Quiet;
3838 if (Exp == IEEEFloat::IEK_Inf)
3839 return Val;
3841 // 1 is added because frexp is defined to return a normalized fraction in
3842 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3843 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3844 return scalbn(Val, -Exp, RM);
3847 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3848 : Semantics(&S),
3849 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3850 assert(Semantics == &semPPCDoubleDouble);
3853 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3854 : Semantics(&S),
3855 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3856 APFloat(semIEEEdouble, uninitialized)}) {
3857 assert(Semantics == &semPPCDoubleDouble);
3860 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3861 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3862 APFloat(semIEEEdouble)}) {
3863 assert(Semantics == &semPPCDoubleDouble);
3866 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3867 : Semantics(&S),
3868 Floats(new APFloat[2]{
3869 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3870 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3871 assert(Semantics == &semPPCDoubleDouble);
3874 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3875 APFloat &&Second)
3876 : Semantics(&S),
3877 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3878 assert(Semantics == &semPPCDoubleDouble);
3879 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3880 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3883 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3884 : Semantics(RHS.Semantics),
3885 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3886 APFloat(RHS.Floats[1])}
3887 : nullptr) {
3888 assert(Semantics == &semPPCDoubleDouble);
3891 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3892 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3893 RHS.Semantics = &semBogus;
3894 assert(Semantics == &semPPCDoubleDouble);
3897 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3898 if (Semantics == RHS.Semantics && RHS.Floats) {
3899 Floats[0] = RHS.Floats[0];
3900 Floats[1] = RHS.Floats[1];
3901 } else if (this != &RHS) {
3902 this->~DoubleAPFloat();
3903 new (this) DoubleAPFloat(RHS);
3905 return *this;
3908 // Implement addition, subtraction, multiplication and division based on:
3909 // "Software for Doubled-Precision Floating-Point Computations",
3910 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3911 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3912 const APFloat &c, const APFloat &cc,
3913 roundingMode RM) {
3914 int Status = opOK;
3915 APFloat z = a;
3916 Status |= z.add(c, RM);
3917 if (!z.isFinite()) {
3918 if (!z.isInfinity()) {
3919 Floats[0] = std::move(z);
3920 Floats[1].makeZero(/* Neg = */ false);
3921 return (opStatus)Status;
3923 Status = opOK;
3924 auto AComparedToC = a.compareAbsoluteValue(c);
3925 z = cc;
3926 Status |= z.add(aa, RM);
3927 if (AComparedToC == APFloat::cmpGreaterThan) {
3928 // z = cc + aa + c + a;
3929 Status |= z.add(c, RM);
3930 Status |= z.add(a, RM);
3931 } else {
3932 // z = cc + aa + a + c;
3933 Status |= z.add(a, RM);
3934 Status |= z.add(c, RM);
3936 if (!z.isFinite()) {
3937 Floats[0] = std::move(z);
3938 Floats[1].makeZero(/* Neg = */ false);
3939 return (opStatus)Status;
3941 Floats[0] = z;
3942 APFloat zz = aa;
3943 Status |= zz.add(cc, RM);
3944 if (AComparedToC == APFloat::cmpGreaterThan) {
3945 // Floats[1] = a - z + c + zz;
3946 Floats[1] = a;
3947 Status |= Floats[1].subtract(z, RM);
3948 Status |= Floats[1].add(c, RM);
3949 Status |= Floats[1].add(zz, RM);
3950 } else {
3951 // Floats[1] = c - z + a + zz;
3952 Floats[1] = c;
3953 Status |= Floats[1].subtract(z, RM);
3954 Status |= Floats[1].add(a, RM);
3955 Status |= Floats[1].add(zz, RM);
3957 } else {
3958 // q = a - z;
3959 APFloat q = a;
3960 Status |= q.subtract(z, RM);
3962 // zz = q + c + (a - (q + z)) + aa + cc;
3963 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3964 auto zz = q;
3965 Status |= zz.add(c, RM);
3966 Status |= q.add(z, RM);
3967 Status |= q.subtract(a, RM);
3968 q.changeSign();
3969 Status |= zz.add(q, RM);
3970 Status |= zz.add(aa, RM);
3971 Status |= zz.add(cc, RM);
3972 if (zz.isZero() && !zz.isNegative()) {
3973 Floats[0] = std::move(z);
3974 Floats[1].makeZero(/* Neg = */ false);
3975 return opOK;
3977 Floats[0] = z;
3978 Status |= Floats[0].add(zz, RM);
3979 if (!Floats[0].isFinite()) {
3980 Floats[1].makeZero(/* Neg = */ false);
3981 return (opStatus)Status;
3983 Floats[1] = std::move(z);
3984 Status |= Floats[1].subtract(Floats[0], RM);
3985 Status |= Floats[1].add(zz, RM);
3987 return (opStatus)Status;
3990 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3991 const DoubleAPFloat &RHS,
3992 DoubleAPFloat &Out,
3993 roundingMode RM) {
3994 if (LHS.getCategory() == fcNaN) {
3995 Out = LHS;
3996 return opOK;
3998 if (RHS.getCategory() == fcNaN) {
3999 Out = RHS;
4000 return opOK;
4002 if (LHS.getCategory() == fcZero) {
4003 Out = RHS;
4004 return opOK;
4006 if (RHS.getCategory() == fcZero) {
4007 Out = LHS;
4008 return opOK;
4010 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4011 LHS.isNegative() != RHS.isNegative()) {
4012 Out.makeNaN(false, Out.isNegative(), nullptr);
4013 return opInvalidOp;
4015 if (LHS.getCategory() == fcInfinity) {
4016 Out = LHS;
4017 return opOK;
4019 if (RHS.getCategory() == fcInfinity) {
4020 Out = RHS;
4021 return opOK;
4023 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4025 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4026 CC(RHS.Floats[1]);
4027 assert(&A.getSemantics() == &semIEEEdouble);
4028 assert(&AA.getSemantics() == &semIEEEdouble);
4029 assert(&C.getSemantics() == &semIEEEdouble);
4030 assert(&CC.getSemantics() == &semIEEEdouble);
4031 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4032 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4033 return Out.addImpl(A, AA, C, CC, RM);
4036 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4037 roundingMode RM) {
4038 return addWithSpecial(*this, RHS, *this, RM);
4041 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4042 roundingMode RM) {
4043 changeSign();
4044 auto Ret = add(RHS, RM);
4045 changeSign();
4046 return Ret;
4049 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4050 APFloat::roundingMode RM) {
4051 const auto &LHS = *this;
4052 auto &Out = *this;
4053 /* Interesting observation: For special categories, finding the lowest
4054 common ancestor of the following layered graph gives the correct
4055 return category:
4059 Zero Inf
4061 Normal
4063 e.g. NaN * NaN = NaN
4064 Zero * Inf = NaN
4065 Normal * Zero = Zero
4066 Normal * Inf = Inf
4068 if (LHS.getCategory() == fcNaN) {
4069 Out = LHS;
4070 return opOK;
4072 if (RHS.getCategory() == fcNaN) {
4073 Out = RHS;
4074 return opOK;
4076 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4077 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4078 Out.makeNaN(false, false, nullptr);
4079 return opOK;
4081 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4082 Out = LHS;
4083 return opOK;
4085 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4086 Out = RHS;
4087 return opOK;
4089 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4090 "Special cases not handled exhaustively");
4092 int Status = opOK;
4093 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4094 // t = a * c
4095 APFloat T = A;
4096 Status |= T.multiply(C, RM);
4097 if (!T.isFiniteNonZero()) {
4098 Floats[0] = T;
4099 Floats[1].makeZero(/* Neg = */ false);
4100 return (opStatus)Status;
4103 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4104 APFloat Tau = A;
4105 T.changeSign();
4106 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4107 T.changeSign();
4109 // v = a * d
4110 APFloat V = A;
4111 Status |= V.multiply(D, RM);
4112 // w = b * c
4113 APFloat W = B;
4114 Status |= W.multiply(C, RM);
4115 Status |= V.add(W, RM);
4116 // tau += v + w
4117 Status |= Tau.add(V, RM);
4119 // u = t + tau
4120 APFloat U = T;
4121 Status |= U.add(Tau, RM);
4123 Floats[0] = U;
4124 if (!U.isFinite()) {
4125 Floats[1].makeZero(/* Neg = */ false);
4126 } else {
4127 // Floats[1] = (t - u) + tau
4128 Status |= T.subtract(U, RM);
4129 Status |= T.add(Tau, RM);
4130 Floats[1] = T;
4132 return (opStatus)Status;
4135 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4136 APFloat::roundingMode RM) {
4137 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4138 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4139 auto Ret =
4140 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4141 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4142 return Ret;
4145 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4146 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4147 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4148 auto Ret =
4149 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4150 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4151 return Ret;
4154 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4155 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4156 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4157 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4158 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4159 return Ret;
4162 APFloat::opStatus
4163 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4164 const DoubleAPFloat &Addend,
4165 APFloat::roundingMode RM) {
4166 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4167 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4168 auto Ret = Tmp.fusedMultiplyAdd(
4169 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4170 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4171 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4172 return Ret;
4175 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4176 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4177 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4178 auto Ret = Tmp.roundToIntegral(RM);
4179 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4180 return Ret;
4183 void DoubleAPFloat::changeSign() {
4184 Floats[0].changeSign();
4185 Floats[1].changeSign();
4188 APFloat::cmpResult
4189 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4190 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4191 if (Result != cmpEqual)
4192 return Result;
4193 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4194 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4195 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4196 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4197 if (Against && !RHSAgainst)
4198 return cmpLessThan;
4199 if (!Against && RHSAgainst)
4200 return cmpGreaterThan;
4201 if (!Against && !RHSAgainst)
4202 return Result;
4203 if (Against && RHSAgainst)
4204 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4206 return Result;
4209 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4210 return Floats[0].getCategory();
4213 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4215 void DoubleAPFloat::makeInf(bool Neg) {
4216 Floats[0].makeInf(Neg);
4217 Floats[1].makeZero(/* Neg = */ false);
4220 void DoubleAPFloat::makeZero(bool Neg) {
4221 Floats[0].makeZero(Neg);
4222 Floats[1].makeZero(/* Neg = */ false);
4225 void DoubleAPFloat::makeLargest(bool Neg) {
4226 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4227 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4228 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4229 if (Neg)
4230 changeSign();
4233 void DoubleAPFloat::makeSmallest(bool Neg) {
4234 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4235 Floats[0].makeSmallest(Neg);
4236 Floats[1].makeZero(/* Neg = */ false);
4239 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4240 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4241 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4242 if (Neg)
4243 Floats[0].changeSign();
4244 Floats[1].makeZero(/* Neg = */ false);
4247 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4248 Floats[0].makeNaN(SNaN, Neg, fill);
4249 Floats[1].makeZero(/* Neg = */ false);
4252 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4253 auto Result = Floats[0].compare(RHS.Floats[0]);
4254 // |Float[0]| > |Float[1]|
4255 if (Result == APFloat::cmpEqual)
4256 return Floats[1].compare(RHS.Floats[1]);
4257 return Result;
4260 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4261 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4262 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4265 hash_code hash_value(const DoubleAPFloat &Arg) {
4266 if (Arg.Floats)
4267 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4268 return hash_combine(Arg.Semantics);
4271 APInt DoubleAPFloat::bitcastToAPInt() const {
4272 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4273 uint64_t Data[] = {
4274 Floats[0].bitcastToAPInt().getRawData()[0],
4275 Floats[1].bitcastToAPInt().getRawData()[0],
4277 return APInt(128, 2, Data);
4280 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4281 roundingMode RM) {
4282 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4283 APFloat Tmp(semPPCDoubleDoubleLegacy);
4284 auto Ret = Tmp.convertFromString(S, RM);
4285 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4286 return Ret;
4289 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4290 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4291 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4292 auto Ret = Tmp.next(nextDown);
4293 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4294 return Ret;
4297 APFloat::opStatus
4298 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4299 unsigned int Width, bool IsSigned,
4300 roundingMode RM, bool *IsExact) const {
4301 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4302 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4303 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4306 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4307 bool IsSigned,
4308 roundingMode RM) {
4309 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4310 APFloat Tmp(semPPCDoubleDoubleLegacy);
4311 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4312 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4313 return Ret;
4316 APFloat::opStatus
4317 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4318 unsigned int InputSize,
4319 bool IsSigned, roundingMode RM) {
4320 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4321 APFloat Tmp(semPPCDoubleDoubleLegacy);
4322 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4323 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4324 return Ret;
4327 APFloat::opStatus
4328 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4329 unsigned int InputSize,
4330 bool IsSigned, roundingMode RM) {
4331 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4332 APFloat Tmp(semPPCDoubleDoubleLegacy);
4333 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4334 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4335 return Ret;
4338 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4339 unsigned int HexDigits,
4340 bool UpperCase,
4341 roundingMode RM) const {
4342 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4343 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4344 .convertToHexString(DST, HexDigits, UpperCase, RM);
4347 bool DoubleAPFloat::isDenormal() const {
4348 return getCategory() == fcNormal &&
4349 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4350 // (double)(Hi + Lo) == Hi defines a normal number.
4351 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4354 bool DoubleAPFloat::isSmallest() const {
4355 if (getCategory() != fcNormal)
4356 return false;
4357 DoubleAPFloat Tmp(*this);
4358 Tmp.makeSmallest(this->isNegative());
4359 return Tmp.compare(*this) == cmpEqual;
4362 bool DoubleAPFloat::isLargest() const {
4363 if (getCategory() != fcNormal)
4364 return false;
4365 DoubleAPFloat Tmp(*this);
4366 Tmp.makeLargest(this->isNegative());
4367 return Tmp.compare(*this) == cmpEqual;
4370 bool DoubleAPFloat::isInteger() const {
4371 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4372 return Floats[0].isInteger() && Floats[1].isInteger();
4375 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4376 unsigned FormatPrecision,
4377 unsigned FormatMaxPadding,
4378 bool TruncateZero) const {
4379 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4380 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4381 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4384 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4385 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4386 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4387 if (!inv)
4388 return Tmp.getExactInverse(nullptr);
4389 APFloat Inv(semPPCDoubleDoubleLegacy);
4390 auto Ret = Tmp.getExactInverse(&Inv);
4391 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4392 return Ret;
4395 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4396 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4397 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4398 scalbn(Arg.Floats[1], Exp, RM));
4401 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4402 APFloat::roundingMode RM) {
4403 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4404 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4405 APFloat Second = Arg.Floats[1];
4406 if (Arg.getCategory() == APFloat::fcNormal)
4407 Second = scalbn(Second, -Exp, RM);
4408 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4411 } // End detail namespace
4413 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4414 if (usesLayout<IEEEFloat>(Semantics)) {
4415 new (&IEEE) IEEEFloat(std::move(F));
4416 return;
4418 if (usesLayout<DoubleAPFloat>(Semantics)) {
4419 new (&Double)
4420 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4421 APFloat(semIEEEdouble));
4422 return;
4424 llvm_unreachable("Unexpected semantics");
4427 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4428 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4431 hash_code hash_value(const APFloat &Arg) {
4432 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4433 return hash_value(Arg.U.IEEE);
4434 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4435 return hash_value(Arg.U.Double);
4436 llvm_unreachable("Unexpected semantics");
4439 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4440 : APFloat(Semantics) {
4441 convertFromString(S, rmNearestTiesToEven);
4444 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4445 roundingMode RM, bool *losesInfo) {
4446 if (&getSemantics() == &ToSemantics) {
4447 *losesInfo = false;
4448 return opOK;
4450 if (usesLayout<IEEEFloat>(getSemantics()) &&
4451 usesLayout<IEEEFloat>(ToSemantics))
4452 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4453 if (usesLayout<IEEEFloat>(getSemantics()) &&
4454 usesLayout<DoubleAPFloat>(ToSemantics)) {
4455 assert(&ToSemantics == &semPPCDoubleDouble);
4456 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4457 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4458 return Ret;
4460 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4461 usesLayout<IEEEFloat>(ToSemantics)) {
4462 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4463 *this = APFloat(std::move(getIEEE()), ToSemantics);
4464 return Ret;
4466 llvm_unreachable("Unexpected semantics");
4469 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4470 if (isIEEE) {
4471 switch (BitWidth) {
4472 case 16:
4473 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4474 case 32:
4475 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4476 case 64:
4477 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4478 case 80:
4479 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4480 case 128:
4481 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4482 default:
4483 llvm_unreachable("Unknown floating bit width");
4485 } else {
4486 assert(BitWidth == 128);
4487 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4491 void APFloat::print(raw_ostream &OS) const {
4492 SmallVector<char, 16> Buffer;
4493 toString(Buffer);
4494 OS << Buffer << "\n";
4497 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4498 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4499 #endif
4501 void APFloat::Profile(FoldingSetNodeID &NID) const {
4502 NID.Add(bitcastToAPInt());
4505 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4506 an APSInt, whose initial bit-width and signed-ness are used to determine the
4507 precision of the conversion.
4509 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4510 roundingMode rounding_mode,
4511 bool *isExact) const {
4512 unsigned bitWidth = result.getBitWidth();
4513 SmallVector<uint64_t, 4> parts(result.getNumWords());
4514 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4515 rounding_mode, isExact);
4516 // Keeps the original signed-ness.
4517 result = APInt(bitWidth, parts);
4518 return status;
4521 } // End llvm namespace
4523 #undef APFLOAT_DISPATCH_ON_SEMANTICS