Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Support / APFloat.cpp
blob3ebed5b5ab8c2d7a6ea15f2b31a6925e520e1c18
1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <cstring>
27 #include <limits.h>
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
30 do { \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
36 } while (false)
38 using namespace llvm;
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
43 ///
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
52 namespace llvm {
53 /* Represents floating point arithmetic semantics. */
54 struct fltSemantics {
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent;
63 /* Number of bits in the significand. This includes the integer
64 bit. */
65 unsigned int precision;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits;
71 static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
76 static const fltSemantics semBogus = {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
109 legacy IEEE float.
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
112 semantics. */
113 static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
114 53 + 53, 128};
116 const fltSemantics &APFloatBase::IEEEhalf() {
117 return semIEEEhalf;
119 const fltSemantics &APFloatBase::IEEEsingle() {
120 return semIEEEsingle;
122 const fltSemantics &APFloatBase::IEEEdouble() {
123 return semIEEEdouble;
125 const fltSemantics &APFloatBase::IEEEquad() {
126 return semIEEEquad;
128 const fltSemantics &APFloatBase::x87DoubleExtended() {
129 return semX87DoubleExtended;
131 const fltSemantics &APFloatBase::Bogus() {
132 return semBogus;
134 const fltSemantics &APFloatBase::PPCDoubleDouble() {
135 return semPPCDoubleDouble;
138 /* A tight upper bound on number of parts required to hold the value
139 pow(5, power) is
141 power * 815 / (351 * integerPartWidth) + 1
143 However, whilst the result may require only this many parts,
144 because we are multiplying two values to get it, the
145 multiplication may require an extra part with the excess part
146 being zero (consider the trivial case of 1 * 1, tcFullMultiply
147 requires two parts to hold the single-part result). So we add an
148 extra one to guarantee enough space whilst multiplying. */
149 const unsigned int maxExponent = 16383;
150 const unsigned int maxPrecision = 113;
151 const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
152 const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
154 unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
155 return semantics.precision;
157 APFloatBase::ExponentType
158 APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
159 return semantics.maxExponent;
161 APFloatBase::ExponentType
162 APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
163 return semantics.minExponent;
165 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
166 return semantics.sizeInBits;
169 unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
170 return Sem.sizeInBits;
173 /* A bunch of private, handy routines. */
175 static inline unsigned int
176 partCountForBits(unsigned int bits)
178 return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
181 /* Returns 0U-9U. Return values >= 10U are not digits. */
182 static inline unsigned int
183 decDigitValue(unsigned int c)
185 return c - '0';
188 /* Return the value of a decimal exponent of the form
189 [+-]ddddddd.
191 If the exponent overflows, returns a large exponent with the
192 appropriate sign. */
193 static int
194 readExponent(StringRef::iterator begin, StringRef::iterator end)
196 bool isNegative;
197 unsigned int absExponent;
198 const unsigned int overlargeExponent = 24000; /* FIXME. */
199 StringRef::iterator p = begin;
201 assert(p != end && "Exponent has no digits");
203 isNegative = (*p == '-');
204 if (*p == '-' || *p == '+') {
205 p++;
206 assert(p != end && "Exponent has no digits");
209 absExponent = decDigitValue(*p++);
210 assert(absExponent < 10U && "Invalid character in exponent");
212 for (; p != end; ++p) {
213 unsigned int value;
215 value = decDigitValue(*p);
216 assert(value < 10U && "Invalid character in exponent");
218 value += absExponent * 10;
219 if (absExponent >= overlargeExponent) {
220 absExponent = overlargeExponent;
221 p = end; /* outwit assert below */
222 break;
224 absExponent = value;
227 assert(p == end && "Invalid exponent in exponent");
229 if (isNegative)
230 return -(int) absExponent;
231 else
232 return (int) absExponent;
235 /* This is ugly and needs cleaning up, but I don't immediately see
236 how whilst remaining safe. */
237 static int
238 totalExponent(StringRef::iterator p, StringRef::iterator end,
239 int exponentAdjustment)
241 int unsignedExponent;
242 bool negative, overflow;
243 int exponent = 0;
245 assert(p != end && "Exponent has no digits");
247 negative = *p == '-';
248 if (*p == '-' || *p == '+') {
249 p++;
250 assert(p != end && "Exponent has no digits");
253 unsignedExponent = 0;
254 overflow = false;
255 for (; p != end; ++p) {
256 unsigned int value;
258 value = decDigitValue(*p);
259 assert(value < 10U && "Invalid character in exponent");
261 unsignedExponent = unsignedExponent * 10 + value;
262 if (unsignedExponent > 32767) {
263 overflow = true;
264 break;
268 if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
269 overflow = true;
271 if (!overflow) {
272 exponent = unsignedExponent;
273 if (negative)
274 exponent = -exponent;
275 exponent += exponentAdjustment;
276 if (exponent > 32767 || exponent < -32768)
277 overflow = true;
280 if (overflow)
281 exponent = negative ? -32768: 32767;
283 return exponent;
286 static StringRef::iterator
287 skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
288 StringRef::iterator *dot)
290 StringRef::iterator p = begin;
291 *dot = end;
292 while (p != end && *p == '0')
293 p++;
295 if (p != end && *p == '.') {
296 *dot = p++;
298 assert(end - begin != 1 && "Significand has no digits");
300 while (p != end && *p == '0')
301 p++;
304 return p;
307 /* Given a normal decimal floating point number of the form
309 dddd.dddd[eE][+-]ddd
311 where the decimal point and exponent are optional, fill out the
312 structure D. Exponent is appropriate if the significand is
313 treated as an integer, and normalizedExponent if the significand
314 is taken to have the decimal point after a single leading
315 non-zero digit.
317 If the value is zero, V->firstSigDigit points to a non-digit, and
318 the return exponent is zero.
320 struct decimalInfo {
321 const char *firstSigDigit;
322 const char *lastSigDigit;
323 int exponent;
324 int normalizedExponent;
327 static void
328 interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
329 decimalInfo *D)
331 StringRef::iterator dot = end;
332 StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
334 D->firstSigDigit = p;
335 D->exponent = 0;
336 D->normalizedExponent = 0;
338 for (; p != end; ++p) {
339 if (*p == '.') {
340 assert(dot == end && "String contains multiple dots");
341 dot = p++;
342 if (p == end)
343 break;
345 if (decDigitValue(*p) >= 10U)
346 break;
349 if (p != end) {
350 assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
351 assert(p != begin && "Significand has no digits");
352 assert((dot == end || p - begin != 1) && "Significand has no digits");
354 /* p points to the first non-digit in the string */
355 D->exponent = readExponent(p + 1, end);
357 /* Implied decimal point? */
358 if (dot == end)
359 dot = p;
362 /* If number is all zeroes accept any exponent. */
363 if (p != D->firstSigDigit) {
364 /* Drop insignificant trailing zeroes. */
365 if (p != begin) {
368 p--;
369 while (p != begin && *p == '0');
370 while (p != begin && *p == '.');
373 /* Adjust the exponents for any decimal point. */
374 D->exponent += static_cast<APFloat::ExponentType>((dot - p) - (dot > p));
375 D->normalizedExponent = (D->exponent +
376 static_cast<APFloat::ExponentType>((p - D->firstSigDigit)
377 - (dot > D->firstSigDigit && dot < p)));
380 D->lastSigDigit = p;
383 /* Return the trailing fraction of a hexadecimal number.
384 DIGITVALUE is the first hex digit of the fraction, P points to
385 the next digit. */
386 static lostFraction
387 trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
388 unsigned int digitValue)
390 unsigned int hexDigit;
392 /* If the first trailing digit isn't 0 or 8 we can work out the
393 fraction immediately. */
394 if (digitValue > 8)
395 return lfMoreThanHalf;
396 else if (digitValue < 8 && digitValue > 0)
397 return lfLessThanHalf;
399 // Otherwise we need to find the first non-zero digit.
400 while (p != end && (*p == '0' || *p == '.'))
401 p++;
403 assert(p != end && "Invalid trailing hexadecimal fraction!");
405 hexDigit = hexDigitValue(*p);
407 /* If we ran off the end it is exactly zero or one-half, otherwise
408 a little more. */
409 if (hexDigit == -1U)
410 return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
411 else
412 return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
415 /* Return the fraction lost were a bignum truncated losing the least
416 significant BITS bits. */
417 static lostFraction
418 lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
419 unsigned int partCount,
420 unsigned int bits)
422 unsigned int lsb;
424 lsb = APInt::tcLSB(parts, partCount);
426 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
427 if (bits <= lsb)
428 return lfExactlyZero;
429 if (bits == lsb + 1)
430 return lfExactlyHalf;
431 if (bits <= partCount * APFloatBase::integerPartWidth &&
432 APInt::tcExtractBit(parts, bits - 1))
433 return lfMoreThanHalf;
435 return lfLessThanHalf;
438 /* Shift DST right BITS bits noting lost fraction. */
439 static lostFraction
440 shiftRight(APFloatBase::integerPart *dst, unsigned int parts, unsigned int bits)
442 lostFraction lost_fraction;
444 lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
446 APInt::tcShiftRight(dst, parts, bits);
448 return lost_fraction;
451 /* Combine the effect of two lost fractions. */
452 static lostFraction
453 combineLostFractions(lostFraction moreSignificant,
454 lostFraction lessSignificant)
456 if (lessSignificant != lfExactlyZero) {
457 if (moreSignificant == lfExactlyZero)
458 moreSignificant = lfLessThanHalf;
459 else if (moreSignificant == lfExactlyHalf)
460 moreSignificant = lfMoreThanHalf;
463 return moreSignificant;
466 /* The error from the true value, in half-ulps, on multiplying two
467 floating point numbers, which differ from the value they
468 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
469 than the returned value.
471 See "How to Read Floating Point Numbers Accurately" by William D
472 Clinger. */
473 static unsigned int
474 HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
476 assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
478 if (HUerr1 + HUerr2 == 0)
479 return inexactMultiply * 2; /* <= inexactMultiply half-ulps. */
480 else
481 return inexactMultiply + 2 * (HUerr1 + HUerr2);
484 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
485 when the least significant BITS are truncated. BITS cannot be
486 zero. */
487 static APFloatBase::integerPart
488 ulpsFromBoundary(const APFloatBase::integerPart *parts, unsigned int bits,
489 bool isNearest) {
490 unsigned int count, partBits;
491 APFloatBase::integerPart part, boundary;
493 assert(bits != 0);
495 bits--;
496 count = bits / APFloatBase::integerPartWidth;
497 partBits = bits % APFloatBase::integerPartWidth + 1;
499 part = parts[count] & (~(APFloatBase::integerPart) 0 >> (APFloatBase::integerPartWidth - partBits));
501 if (isNearest)
502 boundary = (APFloatBase::integerPart) 1 << (partBits - 1);
503 else
504 boundary = 0;
506 if (count == 0) {
507 if (part - boundary <= boundary - part)
508 return part - boundary;
509 else
510 return boundary - part;
513 if (part == boundary) {
514 while (--count)
515 if (parts[count])
516 return ~(APFloatBase::integerPart) 0; /* A lot. */
518 return parts[0];
519 } else if (part == boundary - 1) {
520 while (--count)
521 if (~parts[count])
522 return ~(APFloatBase::integerPart) 0; /* A lot. */
524 return -parts[0];
527 return ~(APFloatBase::integerPart) 0; /* A lot. */
530 /* Place pow(5, power) in DST, and return the number of parts used.
531 DST must be at least one part larger than size of the answer. */
532 static unsigned int
533 powerOf5(APFloatBase::integerPart *dst, unsigned int power) {
534 static const APFloatBase::integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
535 APFloatBase::integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
536 pow5s[0] = 78125 * 5;
538 unsigned int partsCount[16] = { 1 };
539 APFloatBase::integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
540 unsigned int result;
541 assert(power <= maxExponent);
543 p1 = dst;
544 p2 = scratch;
546 *p1 = firstEightPowers[power & 7];
547 power >>= 3;
549 result = 1;
550 pow5 = pow5s;
552 for (unsigned int n = 0; power; power >>= 1, n++) {
553 unsigned int pc;
555 pc = partsCount[n];
557 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
558 if (pc == 0) {
559 pc = partsCount[n - 1];
560 APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
561 pc *= 2;
562 if (pow5[pc - 1] == 0)
563 pc--;
564 partsCount[n] = pc;
567 if (power & 1) {
568 APFloatBase::integerPart *tmp;
570 APInt::tcFullMultiply(p2, p1, pow5, result, pc);
571 result += pc;
572 if (p2[result - 1] == 0)
573 result--;
575 /* Now result is in p1 with partsCount parts and p2 is scratch
576 space. */
577 tmp = p1;
578 p1 = p2;
579 p2 = tmp;
582 pow5 += pc;
585 if (p1 != dst)
586 APInt::tcAssign(dst, p1, result);
588 return result;
591 /* Zero at the end to avoid modular arithmetic when adding one; used
592 when rounding up during hexadecimal output. */
593 static const char hexDigitsLower[] = "0123456789abcdef0";
594 static const char hexDigitsUpper[] = "0123456789ABCDEF0";
595 static const char infinityL[] = "infinity";
596 static const char infinityU[] = "INFINITY";
597 static const char NaNL[] = "nan";
598 static const char NaNU[] = "NAN";
600 /* Write out an integerPart in hexadecimal, starting with the most
601 significant nibble. Write out exactly COUNT hexdigits, return
602 COUNT. */
603 static unsigned int
604 partAsHex (char *dst, APFloatBase::integerPart part, unsigned int count,
605 const char *hexDigitChars)
607 unsigned int result = count;
609 assert(count != 0 && count <= APFloatBase::integerPartWidth / 4);
611 part >>= (APFloatBase::integerPartWidth - 4 * count);
612 while (count--) {
613 dst[count] = hexDigitChars[part & 0xf];
614 part >>= 4;
617 return result;
620 /* Write out an unsigned decimal integer. */
621 static char *
622 writeUnsignedDecimal (char *dst, unsigned int n)
624 char buff[40], *p;
626 p = buff;
628 *p++ = '0' + n % 10;
629 while (n /= 10);
632 *dst++ = *--p;
633 while (p != buff);
635 return dst;
638 /* Write out a signed decimal integer. */
639 static char *
640 writeSignedDecimal (char *dst, int value)
642 if (value < 0) {
643 *dst++ = '-';
644 dst = writeUnsignedDecimal(dst, -(unsigned) value);
645 } else
646 dst = writeUnsignedDecimal(dst, value);
648 return dst;
651 namespace detail {
652 /* Constructors. */
653 void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
654 unsigned int count;
656 semantics = ourSemantics;
657 count = partCount();
658 if (count > 1)
659 significand.parts = new integerPart[count];
662 void IEEEFloat::freeSignificand() {
663 if (needsCleanup())
664 delete [] significand.parts;
667 void IEEEFloat::assign(const IEEEFloat &rhs) {
668 assert(semantics == rhs.semantics);
670 sign = rhs.sign;
671 category = rhs.category;
672 exponent = rhs.exponent;
673 if (isFiniteNonZero() || category == fcNaN)
674 copySignificand(rhs);
677 void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
678 assert(isFiniteNonZero() || category == fcNaN);
679 assert(rhs.partCount() >= partCount());
681 APInt::tcAssign(significandParts(), rhs.significandParts(),
682 partCount());
685 /* Make this number a NaN, with an arbitrary but deterministic value
686 for the significand. If double or longer, this is a signalling NaN,
687 which may not be ideal. If float, this is QNaN(0). */
688 void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
689 category = fcNaN;
690 sign = Negative;
692 integerPart *significand = significandParts();
693 unsigned numParts = partCount();
695 // Set the significand bits to the fill.
696 if (!fill || fill->getNumWords() < numParts)
697 APInt::tcSet(significand, 0, numParts);
698 if (fill) {
699 APInt::tcAssign(significand, fill->getRawData(),
700 std::min(fill->getNumWords(), numParts));
702 // Zero out the excess bits of the significand.
703 unsigned bitsToPreserve = semantics->precision - 1;
704 unsigned part = bitsToPreserve / 64;
705 bitsToPreserve %= 64;
706 significand[part] &= ((1ULL << bitsToPreserve) - 1);
707 for (part++; part != numParts; ++part)
708 significand[part] = 0;
711 unsigned QNaNBit = semantics->precision - 2;
713 if (SNaN) {
714 // We always have to clear the QNaN bit to make it an SNaN.
715 APInt::tcClearBit(significand, QNaNBit);
717 // If there are no bits set in the payload, we have to set
718 // *something* to make it a NaN instead of an infinity;
719 // conventionally, this is the next bit down from the QNaN bit.
720 if (APInt::tcIsZero(significand, numParts))
721 APInt::tcSetBit(significand, QNaNBit - 1);
722 } else {
723 // We always have to set the QNaN bit to make it a QNaN.
724 APInt::tcSetBit(significand, QNaNBit);
727 // For x87 extended precision, we want to make a NaN, not a
728 // pseudo-NaN. Maybe we should expose the ability to make
729 // pseudo-NaNs?
730 if (semantics == &semX87DoubleExtended)
731 APInt::tcSetBit(significand, QNaNBit + 1);
734 IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
735 if (this != &rhs) {
736 if (semantics != rhs.semantics) {
737 freeSignificand();
738 initialize(rhs.semantics);
740 assign(rhs);
743 return *this;
746 IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
747 freeSignificand();
749 semantics = rhs.semantics;
750 significand = rhs.significand;
751 exponent = rhs.exponent;
752 category = rhs.category;
753 sign = rhs.sign;
755 rhs.semantics = &semBogus;
756 return *this;
759 bool IEEEFloat::isDenormal() const {
760 return isFiniteNonZero() && (exponent == semantics->minExponent) &&
761 (APInt::tcExtractBit(significandParts(),
762 semantics->precision - 1) == 0);
765 bool IEEEFloat::isSmallest() const {
766 // The smallest number by magnitude in our format will be the smallest
767 // denormal, i.e. the floating point number with exponent being minimum
768 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
769 return isFiniteNonZero() && exponent == semantics->minExponent &&
770 significandMSB() == 0;
773 bool IEEEFloat::isSignificandAllOnes() const {
774 // Test if the significand excluding the integral bit is all ones. This allows
775 // us to test for binade boundaries.
776 const integerPart *Parts = significandParts();
777 const unsigned PartCount = partCount();
778 for (unsigned i = 0; i < PartCount - 1; i++)
779 if (~Parts[i])
780 return false;
782 // Set the unused high bits to all ones when we compare.
783 const unsigned NumHighBits =
784 PartCount*integerPartWidth - semantics->precision + 1;
785 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
786 "fill than integerPartWidth");
787 const integerPart HighBitFill =
788 ~integerPart(0) << (integerPartWidth - NumHighBits);
789 if (~(Parts[PartCount - 1] | HighBitFill))
790 return false;
792 return true;
795 bool IEEEFloat::isSignificandAllZeros() const {
796 // Test if the significand excluding the integral bit is all zeros. This
797 // allows us to test for binade boundaries.
798 const integerPart *Parts = significandParts();
799 const unsigned PartCount = partCount();
801 for (unsigned i = 0; i < PartCount - 1; i++)
802 if (Parts[i])
803 return false;
805 const unsigned NumHighBits =
806 PartCount*integerPartWidth - semantics->precision + 1;
807 assert(NumHighBits <= integerPartWidth && "Can not have more high bits to "
808 "clear than integerPartWidth");
809 const integerPart HighBitMask = ~integerPart(0) >> NumHighBits;
811 if (Parts[PartCount - 1] & HighBitMask)
812 return false;
814 return true;
817 bool IEEEFloat::isLargest() const {
818 // The largest number by magnitude in our format will be the floating point
819 // number with maximum exponent and with significand that is all ones.
820 return isFiniteNonZero() && exponent == semantics->maxExponent
821 && isSignificandAllOnes();
824 bool IEEEFloat::isInteger() const {
825 // This could be made more efficient; I'm going for obviously correct.
826 if (!isFinite()) return false;
827 IEEEFloat truncated = *this;
828 truncated.roundToIntegral(rmTowardZero);
829 return compare(truncated) == cmpEqual;
832 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
833 if (this == &rhs)
834 return true;
835 if (semantics != rhs.semantics ||
836 category != rhs.category ||
837 sign != rhs.sign)
838 return false;
839 if (category==fcZero || category==fcInfinity)
840 return true;
842 if (isFiniteNonZero() && exponent != rhs.exponent)
843 return false;
845 return std::equal(significandParts(), significandParts() + partCount(),
846 rhs.significandParts());
849 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
850 initialize(&ourSemantics);
851 sign = 0;
852 category = fcNormal;
853 zeroSignificand();
854 exponent = ourSemantics.precision - 1;
855 significandParts()[0] = value;
856 normalize(rmNearestTiesToEven, lfExactlyZero);
859 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
860 initialize(&ourSemantics);
861 category = fcZero;
862 sign = false;
865 // Delegate to the previous constructor, because later copy constructor may
866 // actually inspects category, which can't be garbage.
867 IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
868 : IEEEFloat(ourSemantics) {}
870 IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
871 initialize(rhs.semantics);
872 assign(rhs);
875 IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
876 *this = std::move(rhs);
879 IEEEFloat::~IEEEFloat() { freeSignificand(); }
881 unsigned int IEEEFloat::partCount() const {
882 return partCountForBits(semantics->precision + 1);
885 const IEEEFloat::integerPart *IEEEFloat::significandParts() const {
886 return const_cast<IEEEFloat *>(this)->significandParts();
889 IEEEFloat::integerPart *IEEEFloat::significandParts() {
890 if (partCount() > 1)
891 return significand.parts;
892 else
893 return &significand.part;
896 void IEEEFloat::zeroSignificand() {
897 APInt::tcSet(significandParts(), 0, partCount());
900 /* Increment an fcNormal floating point number's significand. */
901 void IEEEFloat::incrementSignificand() {
902 integerPart carry;
904 carry = APInt::tcIncrement(significandParts(), partCount());
906 /* Our callers should never cause us to overflow. */
907 assert(carry == 0);
908 (void)carry;
911 /* Add the significand of the RHS. Returns the carry flag. */
912 IEEEFloat::integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
913 integerPart *parts;
915 parts = significandParts();
917 assert(semantics == rhs.semantics);
918 assert(exponent == rhs.exponent);
920 return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
923 /* Subtract the significand of the RHS with a borrow flag. Returns
924 the borrow flag. */
925 IEEEFloat::integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
926 integerPart borrow) {
927 integerPart *parts;
929 parts = significandParts();
931 assert(semantics == rhs.semantics);
932 assert(exponent == rhs.exponent);
934 return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
935 partCount());
938 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
939 on to the full-precision result of the multiplication. Returns the
940 lost fraction. */
941 lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
942 const IEEEFloat *addend) {
943 unsigned int omsb; // One, not zero, based MSB.
944 unsigned int partsCount, newPartsCount, precision;
945 integerPart *lhsSignificand;
946 integerPart scratch[4];
947 integerPart *fullSignificand;
948 lostFraction lost_fraction;
949 bool ignored;
951 assert(semantics == rhs.semantics);
953 precision = semantics->precision;
955 // Allocate space for twice as many bits as the original significand, plus one
956 // extra bit for the addition to overflow into.
957 newPartsCount = partCountForBits(precision * 2 + 1);
959 if (newPartsCount > 4)
960 fullSignificand = new integerPart[newPartsCount];
961 else
962 fullSignificand = scratch;
964 lhsSignificand = significandParts();
965 partsCount = partCount();
967 APInt::tcFullMultiply(fullSignificand, lhsSignificand,
968 rhs.significandParts(), partsCount, partsCount);
970 lost_fraction = lfExactlyZero;
971 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
972 exponent += rhs.exponent;
974 // Assume the operands involved in the multiplication are single-precision
975 // FP, and the two multiplicants are:
976 // *this = a23 . a22 ... a0 * 2^e1
977 // rhs = b23 . b22 ... b0 * 2^e2
978 // the result of multiplication is:
979 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
980 // Note that there are three significant bits at the left-hand side of the
981 // radix point: two for the multiplication, and an overflow bit for the
982 // addition (that will always be zero at this point). Move the radix point
983 // toward left by two bits, and adjust exponent accordingly.
984 exponent += 2;
986 if (addend && addend->isNonZero()) {
987 // The intermediate result of the multiplication has "2 * precision"
988 // signicant bit; adjust the addend to be consistent with mul result.
990 Significand savedSignificand = significand;
991 const fltSemantics *savedSemantics = semantics;
992 fltSemantics extendedSemantics;
993 opStatus status;
994 unsigned int extendedPrecision;
996 // Normalize our MSB to one below the top bit to allow for overflow.
997 extendedPrecision = 2 * precision + 1;
998 if (omsb != extendedPrecision - 1) {
999 assert(extendedPrecision > omsb);
1000 APInt::tcShiftLeft(fullSignificand, newPartsCount,
1001 (extendedPrecision - 1) - omsb);
1002 exponent -= (extendedPrecision - 1) - omsb;
1005 /* Create new semantics. */
1006 extendedSemantics = *semantics;
1007 extendedSemantics.precision = extendedPrecision;
1009 if (newPartsCount == 1)
1010 significand.part = fullSignificand[0];
1011 else
1012 significand.parts = fullSignificand;
1013 semantics = &extendedSemantics;
1015 IEEEFloat extendedAddend(*addend);
1016 status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
1017 assert(status == opOK);
1018 (void)status;
1020 // Shift the significand of the addend right by one bit. This guarantees
1021 // that the high bit of the significand is zero (same as fullSignificand),
1022 // so the addition will overflow (if it does overflow at all) into the top bit.
1023 lost_fraction = extendedAddend.shiftSignificandRight(1);
1024 assert(lost_fraction == lfExactlyZero &&
1025 "Lost precision while shifting addend for fused-multiply-add.");
1027 lost_fraction = addOrSubtractSignificand(extendedAddend, false);
1029 /* Restore our state. */
1030 if (newPartsCount == 1)
1031 fullSignificand[0] = significand.part;
1032 significand = savedSignificand;
1033 semantics = savedSemantics;
1035 omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
1038 // Convert the result having "2 * precision" significant-bits back to the one
1039 // having "precision" significant-bits. First, move the radix point from
1040 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1041 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1042 exponent -= precision + 1;
1044 // In case MSB resides at the left-hand side of radix point, shift the
1045 // mantissa right by some amount to make sure the MSB reside right before
1046 // the radix point (i.e. "MSB . rest-significant-bits").
1048 // Note that the result is not normalized when "omsb < precision". So, the
1049 // caller needs to call IEEEFloat::normalize() if normalized value is
1050 // expected.
1051 if (omsb > precision) {
1052 unsigned int bits, significantParts;
1053 lostFraction lf;
1055 bits = omsb - precision;
1056 significantParts = partCountForBits(omsb);
1057 lf = shiftRight(fullSignificand, significantParts, bits);
1058 lost_fraction = combineLostFractions(lf, lost_fraction);
1059 exponent += bits;
1062 APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
1064 if (newPartsCount > 4)
1065 delete [] fullSignificand;
1067 return lost_fraction;
1070 /* Multiply the significands of LHS and RHS to DST. */
1071 lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
1072 unsigned int bit, i, partsCount;
1073 const integerPart *rhsSignificand;
1074 integerPart *lhsSignificand, *dividend, *divisor;
1075 integerPart scratch[4];
1076 lostFraction lost_fraction;
1078 assert(semantics == rhs.semantics);
1080 lhsSignificand = significandParts();
1081 rhsSignificand = rhs.significandParts();
1082 partsCount = partCount();
1084 if (partsCount > 2)
1085 dividend = new integerPart[partsCount * 2];
1086 else
1087 dividend = scratch;
1089 divisor = dividend + partsCount;
1091 /* Copy the dividend and divisor as they will be modified in-place. */
1092 for (i = 0; i < partsCount; i++) {
1093 dividend[i] = lhsSignificand[i];
1094 divisor[i] = rhsSignificand[i];
1095 lhsSignificand[i] = 0;
1098 exponent -= rhs.exponent;
1100 unsigned int precision = semantics->precision;
1102 /* Normalize the divisor. */
1103 bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
1104 if (bit) {
1105 exponent += bit;
1106 APInt::tcShiftLeft(divisor, partsCount, bit);
1109 /* Normalize the dividend. */
1110 bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
1111 if (bit) {
1112 exponent -= bit;
1113 APInt::tcShiftLeft(dividend, partsCount, bit);
1116 /* Ensure the dividend >= divisor initially for the loop below.
1117 Incidentally, this means that the division loop below is
1118 guaranteed to set the integer bit to one. */
1119 if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
1120 exponent--;
1121 APInt::tcShiftLeft(dividend, partsCount, 1);
1122 assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
1125 /* Long division. */
1126 for (bit = precision; bit; bit -= 1) {
1127 if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
1128 APInt::tcSubtract(dividend, divisor, 0, partsCount);
1129 APInt::tcSetBit(lhsSignificand, bit - 1);
1132 APInt::tcShiftLeft(dividend, partsCount, 1);
1135 /* Figure out the lost fraction. */
1136 int cmp = APInt::tcCompare(dividend, divisor, partsCount);
1138 if (cmp > 0)
1139 lost_fraction = lfMoreThanHalf;
1140 else if (cmp == 0)
1141 lost_fraction = lfExactlyHalf;
1142 else if (APInt::tcIsZero(dividend, partsCount))
1143 lost_fraction = lfExactlyZero;
1144 else
1145 lost_fraction = lfLessThanHalf;
1147 if (partsCount > 2)
1148 delete [] dividend;
1150 return lost_fraction;
1153 unsigned int IEEEFloat::significandMSB() const {
1154 return APInt::tcMSB(significandParts(), partCount());
1157 unsigned int IEEEFloat::significandLSB() const {
1158 return APInt::tcLSB(significandParts(), partCount());
1161 /* Note that a zero result is NOT normalized to fcZero. */
1162 lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
1163 /* Our exponent should not overflow. */
1164 assert((ExponentType) (exponent + bits) >= exponent);
1166 exponent += bits;
1168 return shiftRight(significandParts(), partCount(), bits);
1171 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1172 void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
1173 assert(bits < semantics->precision);
1175 if (bits) {
1176 unsigned int partsCount = partCount();
1178 APInt::tcShiftLeft(significandParts(), partsCount, bits);
1179 exponent -= bits;
1181 assert(!APInt::tcIsZero(significandParts(), partsCount));
1185 IEEEFloat::cmpResult
1186 IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
1187 int compare;
1189 assert(semantics == rhs.semantics);
1190 assert(isFiniteNonZero());
1191 assert(rhs.isFiniteNonZero());
1193 compare = exponent - rhs.exponent;
1195 /* If exponents are equal, do an unsigned bignum comparison of the
1196 significands. */
1197 if (compare == 0)
1198 compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
1199 partCount());
1201 if (compare > 0)
1202 return cmpGreaterThan;
1203 else if (compare < 0)
1204 return cmpLessThan;
1205 else
1206 return cmpEqual;
1209 /* Handle overflow. Sign is preserved. We either become infinity or
1210 the largest finite number. */
1211 IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
1212 /* Infinity? */
1213 if (rounding_mode == rmNearestTiesToEven ||
1214 rounding_mode == rmNearestTiesToAway ||
1215 (rounding_mode == rmTowardPositive && !sign) ||
1216 (rounding_mode == rmTowardNegative && sign)) {
1217 category = fcInfinity;
1218 return (opStatus) (opOverflow | opInexact);
1221 /* Otherwise we become the largest finite number. */
1222 category = fcNormal;
1223 exponent = semantics->maxExponent;
1224 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1225 semantics->precision);
1227 return opInexact;
1230 /* Returns TRUE if, when truncating the current number, with BIT the
1231 new LSB, with the given lost fraction and rounding mode, the result
1232 would need to be rounded away from zero (i.e., by increasing the
1233 signficand). This routine must work for fcZero of both signs, and
1234 fcNormal numbers. */
1235 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
1236 lostFraction lost_fraction,
1237 unsigned int bit) const {
1238 /* NaNs and infinities should not have lost fractions. */
1239 assert(isFiniteNonZero() || category == fcZero);
1241 /* Current callers never pass this so we don't handle it. */
1242 assert(lost_fraction != lfExactlyZero);
1244 switch (rounding_mode) {
1245 case rmNearestTiesToAway:
1246 return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
1248 case rmNearestTiesToEven:
1249 if (lost_fraction == lfMoreThanHalf)
1250 return true;
1252 /* Our zeroes don't have a significand to test. */
1253 if (lost_fraction == lfExactlyHalf && category != fcZero)
1254 return APInt::tcExtractBit(significandParts(), bit);
1256 return false;
1258 case rmTowardZero:
1259 return false;
1261 case rmTowardPositive:
1262 return !sign;
1264 case rmTowardNegative:
1265 return sign;
1267 llvm_unreachable("Invalid rounding mode found");
1270 IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
1271 lostFraction lost_fraction) {
1272 unsigned int omsb; /* One, not zero, based MSB. */
1273 int exponentChange;
1275 if (!isFiniteNonZero())
1276 return opOK;
1278 /* Before rounding normalize the exponent of fcNormal numbers. */
1279 omsb = significandMSB() + 1;
1281 if (omsb) {
1282 /* OMSB is numbered from 1. We want to place it in the integer
1283 bit numbered PRECISION if possible, with a compensating change in
1284 the exponent. */
1285 exponentChange = omsb - semantics->precision;
1287 /* If the resulting exponent is too high, overflow according to
1288 the rounding mode. */
1289 if (exponent + exponentChange > semantics->maxExponent)
1290 return handleOverflow(rounding_mode);
1292 /* Subnormal numbers have exponent minExponent, and their MSB
1293 is forced based on that. */
1294 if (exponent + exponentChange < semantics->minExponent)
1295 exponentChange = semantics->minExponent - exponent;
1297 /* Shifting left is easy as we don't lose precision. */
1298 if (exponentChange < 0) {
1299 assert(lost_fraction == lfExactlyZero);
1301 shiftSignificandLeft(-exponentChange);
1303 return opOK;
1306 if (exponentChange > 0) {
1307 lostFraction lf;
1309 /* Shift right and capture any new lost fraction. */
1310 lf = shiftSignificandRight(exponentChange);
1312 lost_fraction = combineLostFractions(lf, lost_fraction);
1314 /* Keep OMSB up-to-date. */
1315 if (omsb > (unsigned) exponentChange)
1316 omsb -= exponentChange;
1317 else
1318 omsb = 0;
1322 /* Now round the number according to rounding_mode given the lost
1323 fraction. */
1325 /* As specified in IEEE 754, since we do not trap we do not report
1326 underflow for exact results. */
1327 if (lost_fraction == lfExactlyZero) {
1328 /* Canonicalize zeroes. */
1329 if (omsb == 0)
1330 category = fcZero;
1332 return opOK;
1335 /* Increment the significand if we're rounding away from zero. */
1336 if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
1337 if (omsb == 0)
1338 exponent = semantics->minExponent;
1340 incrementSignificand();
1341 omsb = significandMSB() + 1;
1343 /* Did the significand increment overflow? */
1344 if (omsb == (unsigned) semantics->precision + 1) {
1345 /* Renormalize by incrementing the exponent and shifting our
1346 significand right one. However if we already have the
1347 maximum exponent we overflow to infinity. */
1348 if (exponent == semantics->maxExponent) {
1349 category = fcInfinity;
1351 return (opStatus) (opOverflow | opInexact);
1354 shiftSignificandRight(1);
1356 return opInexact;
1360 /* The normal case - we were and are not denormal, and any
1361 significand increment above didn't overflow. */
1362 if (omsb == semantics->precision)
1363 return opInexact;
1365 /* We have a non-zero denormal. */
1366 assert(omsb < semantics->precision);
1368 /* Canonicalize zeroes. */
1369 if (omsb == 0)
1370 category = fcZero;
1372 /* The fcZero case is a denormal that underflowed to zero. */
1373 return (opStatus) (opUnderflow | opInexact);
1376 IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
1377 bool subtract) {
1378 switch (PackCategoriesIntoKey(category, rhs.category)) {
1379 default:
1380 llvm_unreachable(nullptr);
1382 case PackCategoriesIntoKey(fcNaN, fcZero):
1383 case PackCategoriesIntoKey(fcNaN, fcNormal):
1384 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1385 case PackCategoriesIntoKey(fcNaN, fcNaN):
1386 case PackCategoriesIntoKey(fcNormal, fcZero):
1387 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1388 case PackCategoriesIntoKey(fcInfinity, fcZero):
1389 return opOK;
1391 case PackCategoriesIntoKey(fcZero, fcNaN):
1392 case PackCategoriesIntoKey(fcNormal, fcNaN):
1393 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1394 // We need to be sure to flip the sign here for subtraction because we
1395 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1396 sign = rhs.sign ^ subtract;
1397 category = fcNaN;
1398 copySignificand(rhs);
1399 return opOK;
1401 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1402 case PackCategoriesIntoKey(fcZero, fcInfinity):
1403 category = fcInfinity;
1404 sign = rhs.sign ^ subtract;
1405 return opOK;
1407 case PackCategoriesIntoKey(fcZero, fcNormal):
1408 assign(rhs);
1409 sign = rhs.sign ^ subtract;
1410 return opOK;
1412 case PackCategoriesIntoKey(fcZero, fcZero):
1413 /* Sign depends on rounding mode; handled by caller. */
1414 return opOK;
1416 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1417 /* Differently signed infinities can only be validly
1418 subtracted. */
1419 if (((sign ^ rhs.sign)!=0) != subtract) {
1420 makeNaN();
1421 return opInvalidOp;
1424 return opOK;
1426 case PackCategoriesIntoKey(fcNormal, fcNormal):
1427 return opDivByZero;
1431 /* Add or subtract two normal numbers. */
1432 lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
1433 bool subtract) {
1434 integerPart carry;
1435 lostFraction lost_fraction;
1436 int bits;
1438 /* Determine if the operation on the absolute values is effectively
1439 an addition or subtraction. */
1440 subtract ^= static_cast<bool>(sign ^ rhs.sign);
1442 /* Are we bigger exponent-wise than the RHS? */
1443 bits = exponent - rhs.exponent;
1445 /* Subtraction is more subtle than one might naively expect. */
1446 if (subtract) {
1447 IEEEFloat temp_rhs(rhs);
1448 bool reverse;
1450 if (bits == 0) {
1451 reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
1452 lost_fraction = lfExactlyZero;
1453 } else if (bits > 0) {
1454 lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
1455 shiftSignificandLeft(1);
1456 reverse = false;
1457 } else {
1458 lost_fraction = shiftSignificandRight(-bits - 1);
1459 temp_rhs.shiftSignificandLeft(1);
1460 reverse = true;
1463 if (reverse) {
1464 carry = temp_rhs.subtractSignificand
1465 (*this, lost_fraction != lfExactlyZero);
1466 copySignificand(temp_rhs);
1467 sign = !sign;
1468 } else {
1469 carry = subtractSignificand
1470 (temp_rhs, lost_fraction != lfExactlyZero);
1473 /* Invert the lost fraction - it was on the RHS and
1474 subtracted. */
1475 if (lost_fraction == lfLessThanHalf)
1476 lost_fraction = lfMoreThanHalf;
1477 else if (lost_fraction == lfMoreThanHalf)
1478 lost_fraction = lfLessThanHalf;
1480 /* The code above is intended to ensure that no borrow is
1481 necessary. */
1482 assert(!carry);
1483 (void)carry;
1484 } else {
1485 if (bits > 0) {
1486 IEEEFloat temp_rhs(rhs);
1488 lost_fraction = temp_rhs.shiftSignificandRight(bits);
1489 carry = addSignificand(temp_rhs);
1490 } else {
1491 lost_fraction = shiftSignificandRight(-bits);
1492 carry = addSignificand(rhs);
1495 /* We have a guard bit; generating a carry cannot happen. */
1496 assert(!carry);
1497 (void)carry;
1500 return lost_fraction;
1503 IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
1504 switch (PackCategoriesIntoKey(category, rhs.category)) {
1505 default:
1506 llvm_unreachable(nullptr);
1508 case PackCategoriesIntoKey(fcNaN, fcZero):
1509 case PackCategoriesIntoKey(fcNaN, fcNormal):
1510 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1511 case PackCategoriesIntoKey(fcNaN, fcNaN):
1512 sign = false;
1513 return opOK;
1515 case PackCategoriesIntoKey(fcZero, fcNaN):
1516 case PackCategoriesIntoKey(fcNormal, fcNaN):
1517 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1518 sign = false;
1519 category = fcNaN;
1520 copySignificand(rhs);
1521 return opOK;
1523 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1524 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1525 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1526 category = fcInfinity;
1527 return opOK;
1529 case PackCategoriesIntoKey(fcZero, fcNormal):
1530 case PackCategoriesIntoKey(fcNormal, fcZero):
1531 case PackCategoriesIntoKey(fcZero, fcZero):
1532 category = fcZero;
1533 return opOK;
1535 case PackCategoriesIntoKey(fcZero, fcInfinity):
1536 case PackCategoriesIntoKey(fcInfinity, fcZero):
1537 makeNaN();
1538 return opInvalidOp;
1540 case PackCategoriesIntoKey(fcNormal, fcNormal):
1541 return opOK;
1545 IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
1546 switch (PackCategoriesIntoKey(category, rhs.category)) {
1547 default:
1548 llvm_unreachable(nullptr);
1550 case PackCategoriesIntoKey(fcZero, fcNaN):
1551 case PackCategoriesIntoKey(fcNormal, fcNaN):
1552 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1553 category = fcNaN;
1554 copySignificand(rhs);
1555 LLVM_FALLTHROUGH;
1556 case PackCategoriesIntoKey(fcNaN, fcZero):
1557 case PackCategoriesIntoKey(fcNaN, fcNormal):
1558 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1559 case PackCategoriesIntoKey(fcNaN, fcNaN):
1560 sign = false;
1561 LLVM_FALLTHROUGH;
1562 case PackCategoriesIntoKey(fcInfinity, fcZero):
1563 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1564 case PackCategoriesIntoKey(fcZero, fcInfinity):
1565 case PackCategoriesIntoKey(fcZero, fcNormal):
1566 return opOK;
1568 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1569 category = fcZero;
1570 return opOK;
1572 case PackCategoriesIntoKey(fcNormal, fcZero):
1573 category = fcInfinity;
1574 return opDivByZero;
1576 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1577 case PackCategoriesIntoKey(fcZero, fcZero):
1578 makeNaN();
1579 return opInvalidOp;
1581 case PackCategoriesIntoKey(fcNormal, fcNormal):
1582 return opOK;
1586 IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
1587 switch (PackCategoriesIntoKey(category, rhs.category)) {
1588 default:
1589 llvm_unreachable(nullptr);
1591 case PackCategoriesIntoKey(fcNaN, fcZero):
1592 case PackCategoriesIntoKey(fcNaN, fcNormal):
1593 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1594 case PackCategoriesIntoKey(fcNaN, fcNaN):
1595 case PackCategoriesIntoKey(fcZero, fcInfinity):
1596 case PackCategoriesIntoKey(fcZero, fcNormal):
1597 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1598 return opOK;
1600 case PackCategoriesIntoKey(fcZero, fcNaN):
1601 case PackCategoriesIntoKey(fcNormal, fcNaN):
1602 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1603 sign = false;
1604 category = fcNaN;
1605 copySignificand(rhs);
1606 return opOK;
1608 case PackCategoriesIntoKey(fcNormal, fcZero):
1609 case PackCategoriesIntoKey(fcInfinity, fcZero):
1610 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1611 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1612 case PackCategoriesIntoKey(fcZero, fcZero):
1613 makeNaN();
1614 return opInvalidOp;
1616 case PackCategoriesIntoKey(fcNormal, fcNormal):
1617 return opOK;
1621 /* Change sign. */
1622 void IEEEFloat::changeSign() {
1623 /* Look mummy, this one's easy. */
1624 sign = !sign;
1627 /* Normalized addition or subtraction. */
1628 IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
1629 roundingMode rounding_mode,
1630 bool subtract) {
1631 opStatus fs;
1633 fs = addOrSubtractSpecials(rhs, subtract);
1635 /* This return code means it was not a simple case. */
1636 if (fs == opDivByZero) {
1637 lostFraction lost_fraction;
1639 lost_fraction = addOrSubtractSignificand(rhs, subtract);
1640 fs = normalize(rounding_mode, lost_fraction);
1642 /* Can only be zero if we lost no fraction. */
1643 assert(category != fcZero || lost_fraction == lfExactlyZero);
1646 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1647 positive zero unless rounding to minus infinity, except that
1648 adding two like-signed zeroes gives that zero. */
1649 if (category == fcZero) {
1650 if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
1651 sign = (rounding_mode == rmTowardNegative);
1654 return fs;
1657 /* Normalized addition. */
1658 IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
1659 roundingMode rounding_mode) {
1660 return addOrSubtract(rhs, rounding_mode, false);
1663 /* Normalized subtraction. */
1664 IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
1665 roundingMode rounding_mode) {
1666 return addOrSubtract(rhs, rounding_mode, true);
1669 /* Normalized multiply. */
1670 IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
1671 roundingMode rounding_mode) {
1672 opStatus fs;
1674 sign ^= rhs.sign;
1675 fs = multiplySpecials(rhs);
1677 if (isFiniteNonZero()) {
1678 lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
1679 fs = normalize(rounding_mode, lost_fraction);
1680 if (lost_fraction != lfExactlyZero)
1681 fs = (opStatus) (fs | opInexact);
1684 return fs;
1687 /* Normalized divide. */
1688 IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
1689 roundingMode rounding_mode) {
1690 opStatus fs;
1692 sign ^= rhs.sign;
1693 fs = divideSpecials(rhs);
1695 if (isFiniteNonZero()) {
1696 lostFraction lost_fraction = divideSignificand(rhs);
1697 fs = normalize(rounding_mode, lost_fraction);
1698 if (lost_fraction != lfExactlyZero)
1699 fs = (opStatus) (fs | opInexact);
1702 return fs;
1705 /* Normalized remainder. This is not currently correct in all cases. */
1706 IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
1707 opStatus fs;
1708 IEEEFloat V = *this;
1709 unsigned int origSign = sign;
1711 fs = V.divide(rhs, rmNearestTiesToEven);
1712 if (fs == opDivByZero)
1713 return fs;
1715 int parts = partCount();
1716 integerPart *x = new integerPart[parts];
1717 bool ignored;
1718 fs = V.convertToInteger(makeMutableArrayRef(x, parts),
1719 parts * integerPartWidth, true, rmNearestTiesToEven,
1720 &ignored);
1721 if (fs == opInvalidOp) {
1722 delete[] x;
1723 return fs;
1726 fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
1727 rmNearestTiesToEven);
1728 assert(fs==opOK); // should always work
1730 fs = V.multiply(rhs, rmNearestTiesToEven);
1731 assert(fs==opOK || fs==opInexact); // should not overflow or underflow
1733 fs = subtract(V, rmNearestTiesToEven);
1734 assert(fs==opOK || fs==opInexact); // likewise
1736 if (isZero())
1737 sign = origSign; // IEEE754 requires this
1738 delete[] x;
1739 return fs;
1742 /* Normalized llvm frem (C fmod). */
1743 IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
1744 opStatus fs;
1745 fs = modSpecials(rhs);
1746 unsigned int origSign = sign;
1748 while (isFiniteNonZero() && rhs.isFiniteNonZero() &&
1749 compareAbsoluteValue(rhs) != cmpLessThan) {
1750 IEEEFloat V = scalbn(rhs, ilogb(*this) - ilogb(rhs), rmNearestTiesToEven);
1751 if (compareAbsoluteValue(V) == cmpLessThan)
1752 V = scalbn(V, -1, rmNearestTiesToEven);
1753 V.sign = sign;
1755 fs = subtract(V, rmNearestTiesToEven);
1756 assert(fs==opOK);
1758 if (isZero())
1759 sign = origSign; // fmod requires this
1760 return fs;
1763 /* Normalized fused-multiply-add. */
1764 IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
1765 const IEEEFloat &addend,
1766 roundingMode rounding_mode) {
1767 opStatus fs;
1769 /* Post-multiplication sign, before addition. */
1770 sign ^= multiplicand.sign;
1772 /* If and only if all arguments are normal do we need to do an
1773 extended-precision calculation. */
1774 if (isFiniteNonZero() &&
1775 multiplicand.isFiniteNonZero() &&
1776 addend.isFinite()) {
1777 lostFraction lost_fraction;
1779 lost_fraction = multiplySignificand(multiplicand, &addend);
1780 fs = normalize(rounding_mode, lost_fraction);
1781 if (lost_fraction != lfExactlyZero)
1782 fs = (opStatus) (fs | opInexact);
1784 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1785 positive zero unless rounding to minus infinity, except that
1786 adding two like-signed zeroes gives that zero. */
1787 if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
1788 sign = (rounding_mode == rmTowardNegative);
1789 } else {
1790 fs = multiplySpecials(multiplicand);
1792 /* FS can only be opOK or opInvalidOp. There is no more work
1793 to do in the latter case. The IEEE-754R standard says it is
1794 implementation-defined in this case whether, if ADDEND is a
1795 quiet NaN, we raise invalid op; this implementation does so.
1797 If we need to do the addition we can do so with normal
1798 precision. */
1799 if (fs == opOK)
1800 fs = addOrSubtract(addend, rounding_mode, false);
1803 return fs;
1806 /* Rounding-mode corrrect round to integral value. */
1807 IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
1808 opStatus fs;
1810 // If the exponent is large enough, we know that this value is already
1811 // integral, and the arithmetic below would potentially cause it to saturate
1812 // to +/-Inf. Bail out early instead.
1813 if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics))
1814 return opOK;
1816 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1817 // precision of our format, and then subtract it back off again. The choice
1818 // of rounding modes for the addition/subtraction determines the rounding mode
1819 // for our integral rounding as well.
1820 // NOTE: When the input value is negative, we do subtraction followed by
1821 // addition instead.
1822 APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
1823 IntegerConstant <<= semanticsPrecision(*semantics)-1;
1824 IEEEFloat MagicConstant(*semantics);
1825 fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
1826 rmNearestTiesToEven);
1827 MagicConstant.sign = sign;
1829 if (fs != opOK)
1830 return fs;
1832 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1833 bool inputSign = isNegative();
1835 fs = add(MagicConstant, rounding_mode);
1836 if (fs != opOK && fs != opInexact)
1837 return fs;
1839 fs = subtract(MagicConstant, rounding_mode);
1841 // Restore the input sign.
1842 if (inputSign != isNegative())
1843 changeSign();
1845 return fs;
1849 /* Comparison requires normalized numbers. */
1850 IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
1851 cmpResult result;
1853 assert(semantics == rhs.semantics);
1855 switch (PackCategoriesIntoKey(category, rhs.category)) {
1856 default:
1857 llvm_unreachable(nullptr);
1859 case PackCategoriesIntoKey(fcNaN, fcZero):
1860 case PackCategoriesIntoKey(fcNaN, fcNormal):
1861 case PackCategoriesIntoKey(fcNaN, fcInfinity):
1862 case PackCategoriesIntoKey(fcNaN, fcNaN):
1863 case PackCategoriesIntoKey(fcZero, fcNaN):
1864 case PackCategoriesIntoKey(fcNormal, fcNaN):
1865 case PackCategoriesIntoKey(fcInfinity, fcNaN):
1866 return cmpUnordered;
1868 case PackCategoriesIntoKey(fcInfinity, fcNormal):
1869 case PackCategoriesIntoKey(fcInfinity, fcZero):
1870 case PackCategoriesIntoKey(fcNormal, fcZero):
1871 if (sign)
1872 return cmpLessThan;
1873 else
1874 return cmpGreaterThan;
1876 case PackCategoriesIntoKey(fcNormal, fcInfinity):
1877 case PackCategoriesIntoKey(fcZero, fcInfinity):
1878 case PackCategoriesIntoKey(fcZero, fcNormal):
1879 if (rhs.sign)
1880 return cmpGreaterThan;
1881 else
1882 return cmpLessThan;
1884 case PackCategoriesIntoKey(fcInfinity, fcInfinity):
1885 if (sign == rhs.sign)
1886 return cmpEqual;
1887 else if (sign)
1888 return cmpLessThan;
1889 else
1890 return cmpGreaterThan;
1892 case PackCategoriesIntoKey(fcZero, fcZero):
1893 return cmpEqual;
1895 case PackCategoriesIntoKey(fcNormal, fcNormal):
1896 break;
1899 /* Two normal numbers. Do they have the same sign? */
1900 if (sign != rhs.sign) {
1901 if (sign)
1902 result = cmpLessThan;
1903 else
1904 result = cmpGreaterThan;
1905 } else {
1906 /* Compare absolute values; invert result if negative. */
1907 result = compareAbsoluteValue(rhs);
1909 if (sign) {
1910 if (result == cmpLessThan)
1911 result = cmpGreaterThan;
1912 else if (result == cmpGreaterThan)
1913 result = cmpLessThan;
1917 return result;
1920 /// IEEEFloat::convert - convert a value of one floating point type to another.
1921 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1922 /// records whether the transformation lost information, i.e. whether
1923 /// converting the result back to the original type will produce the
1924 /// original value (this is almost the same as return value==fsOK, but there
1925 /// are edge cases where this is not so).
1927 IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
1928 roundingMode rounding_mode,
1929 bool *losesInfo) {
1930 lostFraction lostFraction;
1931 unsigned int newPartCount, oldPartCount;
1932 opStatus fs;
1933 int shift;
1934 const fltSemantics &fromSemantics = *semantics;
1936 lostFraction = lfExactlyZero;
1937 newPartCount = partCountForBits(toSemantics.precision + 1);
1938 oldPartCount = partCount();
1939 shift = toSemantics.precision - fromSemantics.precision;
1941 bool X86SpecialNan = false;
1942 if (&fromSemantics == &semX87DoubleExtended &&
1943 &toSemantics != &semX87DoubleExtended && category == fcNaN &&
1944 (!(*significandParts() & 0x8000000000000000ULL) ||
1945 !(*significandParts() & 0x4000000000000000ULL))) {
1946 // x86 has some unusual NaNs which cannot be represented in any other
1947 // format; note them here.
1948 X86SpecialNan = true;
1951 // If this is a truncation of a denormal number, and the target semantics
1952 // has larger exponent range than the source semantics (this can happen
1953 // when truncating from PowerPC double-double to double format), the
1954 // right shift could lose result mantissa bits. Adjust exponent instead
1955 // of performing excessive shift.
1956 if (shift < 0 && isFiniteNonZero()) {
1957 int exponentChange = significandMSB() + 1 - fromSemantics.precision;
1958 if (exponent + exponentChange < toSemantics.minExponent)
1959 exponentChange = toSemantics.minExponent - exponent;
1960 if (exponentChange < shift)
1961 exponentChange = shift;
1962 if (exponentChange < 0) {
1963 shift -= exponentChange;
1964 exponent += exponentChange;
1968 // If this is a truncation, perform the shift before we narrow the storage.
1969 if (shift < 0 && (isFiniteNonZero() || category==fcNaN))
1970 lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
1972 // Fix the storage so it can hold to new value.
1973 if (newPartCount > oldPartCount) {
1974 // The new type requires more storage; make it available.
1975 integerPart *newParts;
1976 newParts = new integerPart[newPartCount];
1977 APInt::tcSet(newParts, 0, newPartCount);
1978 if (isFiniteNonZero() || category==fcNaN)
1979 APInt::tcAssign(newParts, significandParts(), oldPartCount);
1980 freeSignificand();
1981 significand.parts = newParts;
1982 } else if (newPartCount == 1 && oldPartCount != 1) {
1983 // Switch to built-in storage for a single part.
1984 integerPart newPart = 0;
1985 if (isFiniteNonZero() || category==fcNaN)
1986 newPart = significandParts()[0];
1987 freeSignificand();
1988 significand.part = newPart;
1991 // Now that we have the right storage, switch the semantics.
1992 semantics = &toSemantics;
1994 // If this is an extension, perform the shift now that the storage is
1995 // available.
1996 if (shift > 0 && (isFiniteNonZero() || category==fcNaN))
1997 APInt::tcShiftLeft(significandParts(), newPartCount, shift);
1999 if (isFiniteNonZero()) {
2000 fs = normalize(rounding_mode, lostFraction);
2001 *losesInfo = (fs != opOK);
2002 } else if (category == fcNaN) {
2003 *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
2005 // For x87 extended precision, we want to make a NaN, not a special NaN if
2006 // the input wasn't special either.
2007 if (!X86SpecialNan && semantics == &semX87DoubleExtended)
2008 APInt::tcSetBit(significandParts(), semantics->precision - 1);
2010 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2011 // does not give you back the same bits. This is dubious, and we
2012 // don't currently do it. You're really supposed to get
2013 // an invalid operation signal at runtime, but nobody does that.
2014 fs = opOK;
2015 } else {
2016 *losesInfo = false;
2017 fs = opOK;
2020 return fs;
2023 /* Convert a floating point number to an integer according to the
2024 rounding mode. If the rounded integer value is out of range this
2025 returns an invalid operation exception and the contents of the
2026 destination parts are unspecified. If the rounded value is in
2027 range but the floating point number is not the exact integer, the C
2028 standard doesn't require an inexact exception to be raised. IEEE
2029 854 does require it so we do that.
2031 Note that for conversions to integer type the C standard requires
2032 round-to-zero to always be used. */
2033 IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
2034 MutableArrayRef<integerPart> parts, unsigned int width, bool isSigned,
2035 roundingMode rounding_mode, bool *isExact) const {
2036 lostFraction lost_fraction;
2037 const integerPart *src;
2038 unsigned int dstPartsCount, truncatedBits;
2040 *isExact = false;
2042 /* Handle the three special cases first. */
2043 if (category == fcInfinity || category == fcNaN)
2044 return opInvalidOp;
2046 dstPartsCount = partCountForBits(width);
2047 assert(dstPartsCount <= parts.size() && "Integer too big");
2049 if (category == fcZero) {
2050 APInt::tcSet(parts.data(), 0, dstPartsCount);
2051 // Negative zero can't be represented as an int.
2052 *isExact = !sign;
2053 return opOK;
2056 src = significandParts();
2058 /* Step 1: place our absolute value, with any fraction truncated, in
2059 the destination. */
2060 if (exponent < 0) {
2061 /* Our absolute value is less than one; truncate everything. */
2062 APInt::tcSet(parts.data(), 0, dstPartsCount);
2063 /* For exponent -1 the integer bit represents .5, look at that.
2064 For smaller exponents leftmost truncated bit is 0. */
2065 truncatedBits = semantics->precision -1U - exponent;
2066 } else {
2067 /* We want the most significant (exponent + 1) bits; the rest are
2068 truncated. */
2069 unsigned int bits = exponent + 1U;
2071 /* Hopelessly large in magnitude? */
2072 if (bits > width)
2073 return opInvalidOp;
2075 if (bits < semantics->precision) {
2076 /* We truncate (semantics->precision - bits) bits. */
2077 truncatedBits = semantics->precision - bits;
2078 APInt::tcExtract(parts.data(), dstPartsCount, src, bits, truncatedBits);
2079 } else {
2080 /* We want at least as many bits as are available. */
2081 APInt::tcExtract(parts.data(), dstPartsCount, src, semantics->precision,
2083 APInt::tcShiftLeft(parts.data(), dstPartsCount,
2084 bits - semantics->precision);
2085 truncatedBits = 0;
2089 /* Step 2: work out any lost fraction, and increment the absolute
2090 value if we would round away from zero. */
2091 if (truncatedBits) {
2092 lost_fraction = lostFractionThroughTruncation(src, partCount(),
2093 truncatedBits);
2094 if (lost_fraction != lfExactlyZero &&
2095 roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
2096 if (APInt::tcIncrement(parts.data(), dstPartsCount))
2097 return opInvalidOp; /* Overflow. */
2099 } else {
2100 lost_fraction = lfExactlyZero;
2103 /* Step 3: check if we fit in the destination. */
2104 unsigned int omsb = APInt::tcMSB(parts.data(), dstPartsCount) + 1;
2106 if (sign) {
2107 if (!isSigned) {
2108 /* Negative numbers cannot be represented as unsigned. */
2109 if (omsb != 0)
2110 return opInvalidOp;
2111 } else {
2112 /* It takes omsb bits to represent the unsigned integer value.
2113 We lose a bit for the sign, but care is needed as the
2114 maximally negative integer is a special case. */
2115 if (omsb == width &&
2116 APInt::tcLSB(parts.data(), dstPartsCount) + 1 != omsb)
2117 return opInvalidOp;
2119 /* This case can happen because of rounding. */
2120 if (omsb > width)
2121 return opInvalidOp;
2124 APInt::tcNegate (parts.data(), dstPartsCount);
2125 } else {
2126 if (omsb >= width + !isSigned)
2127 return opInvalidOp;
2130 if (lost_fraction == lfExactlyZero) {
2131 *isExact = true;
2132 return opOK;
2133 } else
2134 return opInexact;
2137 /* Same as convertToSignExtendedInteger, except we provide
2138 deterministic values in case of an invalid operation exception,
2139 namely zero for NaNs and the minimal or maximal value respectively
2140 for underflow or overflow.
2141 The *isExact output tells whether the result is exact, in the sense
2142 that converting it back to the original floating point type produces
2143 the original value. This is almost equivalent to result==opOK,
2144 except for negative zeroes.
2146 IEEEFloat::opStatus
2147 IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
2148 unsigned int width, bool isSigned,
2149 roundingMode rounding_mode, bool *isExact) const {
2150 opStatus fs;
2152 fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
2153 isExact);
2155 if (fs == opInvalidOp) {
2156 unsigned int bits, dstPartsCount;
2158 dstPartsCount = partCountForBits(width);
2159 assert(dstPartsCount <= parts.size() && "Integer too big");
2161 if (category == fcNaN)
2162 bits = 0;
2163 else if (sign)
2164 bits = isSigned;
2165 else
2166 bits = width - isSigned;
2168 APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
2169 if (sign && isSigned)
2170 APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
2173 return fs;
2176 /* Convert an unsigned integer SRC to a floating point number,
2177 rounding according to ROUNDING_MODE. The sign of the floating
2178 point number is not modified. */
2179 IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
2180 const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
2181 unsigned int omsb, precision, dstCount;
2182 integerPart *dst;
2183 lostFraction lost_fraction;
2185 category = fcNormal;
2186 omsb = APInt::tcMSB(src, srcCount) + 1;
2187 dst = significandParts();
2188 dstCount = partCount();
2189 precision = semantics->precision;
2191 /* We want the most significant PRECISION bits of SRC. There may not
2192 be that many; extract what we can. */
2193 if (precision <= omsb) {
2194 exponent = omsb - 1;
2195 lost_fraction = lostFractionThroughTruncation(src, srcCount,
2196 omsb - precision);
2197 APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
2198 } else {
2199 exponent = precision - 1;
2200 lost_fraction = lfExactlyZero;
2201 APInt::tcExtract(dst, dstCount, src, omsb, 0);
2204 return normalize(rounding_mode, lost_fraction);
2207 IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
2208 roundingMode rounding_mode) {
2209 unsigned int partCount = Val.getNumWords();
2210 APInt api = Val;
2212 sign = false;
2213 if (isSigned && api.isNegative()) {
2214 sign = true;
2215 api = -api;
2218 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2221 /* Convert a two's complement integer SRC to a floating point number,
2222 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2223 integer is signed, in which case it must be sign-extended. */
2224 IEEEFloat::opStatus
2225 IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
2226 unsigned int srcCount, bool isSigned,
2227 roundingMode rounding_mode) {
2228 opStatus status;
2230 if (isSigned &&
2231 APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
2232 integerPart *copy;
2234 /* If we're signed and negative negate a copy. */
2235 sign = true;
2236 copy = new integerPart[srcCount];
2237 APInt::tcAssign(copy, src, srcCount);
2238 APInt::tcNegate(copy, srcCount);
2239 status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
2240 delete [] copy;
2241 } else {
2242 sign = false;
2243 status = convertFromUnsignedParts(src, srcCount, rounding_mode);
2246 return status;
2249 /* FIXME: should this just take a const APInt reference? */
2250 IEEEFloat::opStatus
2251 IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
2252 unsigned int width, bool isSigned,
2253 roundingMode rounding_mode) {
2254 unsigned int partCount = partCountForBits(width);
2255 APInt api = APInt(width, makeArrayRef(parts, partCount));
2257 sign = false;
2258 if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
2259 sign = true;
2260 api = -api;
2263 return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
2266 IEEEFloat::opStatus
2267 IEEEFloat::convertFromHexadecimalString(StringRef s,
2268 roundingMode rounding_mode) {
2269 lostFraction lost_fraction = lfExactlyZero;
2271 category = fcNormal;
2272 zeroSignificand();
2273 exponent = 0;
2275 integerPart *significand = significandParts();
2276 unsigned partsCount = partCount();
2277 unsigned bitPos = partsCount * integerPartWidth;
2278 bool computedTrailingFraction = false;
2280 // Skip leading zeroes and any (hexa)decimal point.
2281 StringRef::iterator begin = s.begin();
2282 StringRef::iterator end = s.end();
2283 StringRef::iterator dot;
2284 StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
2285 StringRef::iterator firstSignificantDigit = p;
2287 while (p != end) {
2288 integerPart hex_value;
2290 if (*p == '.') {
2291 assert(dot == end && "String contains multiple dots");
2292 dot = p++;
2293 continue;
2296 hex_value = hexDigitValue(*p);
2297 if (hex_value == -1U)
2298 break;
2300 p++;
2302 // Store the number while we have space.
2303 if (bitPos) {
2304 bitPos -= 4;
2305 hex_value <<= bitPos % integerPartWidth;
2306 significand[bitPos / integerPartWidth] |= hex_value;
2307 } else if (!computedTrailingFraction) {
2308 lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
2309 computedTrailingFraction = true;
2313 /* Hex floats require an exponent but not a hexadecimal point. */
2314 assert(p != end && "Hex strings require an exponent");
2315 assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
2316 assert(p != begin && "Significand has no digits");
2317 assert((dot == end || p - begin != 1) && "Significand has no digits");
2319 /* Ignore the exponent if we are zero. */
2320 if (p != firstSignificantDigit) {
2321 int expAdjustment;
2323 /* Implicit hexadecimal point? */
2324 if (dot == end)
2325 dot = p;
2327 /* Calculate the exponent adjustment implicit in the number of
2328 significant digits. */
2329 expAdjustment = static_cast<int>(dot - firstSignificantDigit);
2330 if (expAdjustment < 0)
2331 expAdjustment++;
2332 expAdjustment = expAdjustment * 4 - 1;
2334 /* Adjust for writing the significand starting at the most
2335 significant nibble. */
2336 expAdjustment += semantics->precision;
2337 expAdjustment -= partsCount * integerPartWidth;
2339 /* Adjust for the given exponent. */
2340 exponent = totalExponent(p + 1, end, expAdjustment);
2343 return normalize(rounding_mode, lost_fraction);
2346 IEEEFloat::opStatus
2347 IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
2348 unsigned sigPartCount, int exp,
2349 roundingMode rounding_mode) {
2350 unsigned int parts, pow5PartCount;
2351 fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
2352 integerPart pow5Parts[maxPowerOfFiveParts];
2353 bool isNearest;
2355 isNearest = (rounding_mode == rmNearestTiesToEven ||
2356 rounding_mode == rmNearestTiesToAway);
2358 parts = partCountForBits(semantics->precision + 11);
2360 /* Calculate pow(5, abs(exp)). */
2361 pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
2363 for (;; parts *= 2) {
2364 opStatus sigStatus, powStatus;
2365 unsigned int excessPrecision, truncatedBits;
2367 calcSemantics.precision = parts * integerPartWidth - 1;
2368 excessPrecision = calcSemantics.precision - semantics->precision;
2369 truncatedBits = excessPrecision;
2371 IEEEFloat decSig(calcSemantics, uninitialized);
2372 decSig.makeZero(sign);
2373 IEEEFloat pow5(calcSemantics);
2375 sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
2376 rmNearestTiesToEven);
2377 powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
2378 rmNearestTiesToEven);
2379 /* Add exp, as 10^n = 5^n * 2^n. */
2380 decSig.exponent += exp;
2382 lostFraction calcLostFraction;
2383 integerPart HUerr, HUdistance;
2384 unsigned int powHUerr;
2386 if (exp >= 0) {
2387 /* multiplySignificand leaves the precision-th bit set to 1. */
2388 calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
2389 powHUerr = powStatus != opOK;
2390 } else {
2391 calcLostFraction = decSig.divideSignificand(pow5);
2392 /* Denormal numbers have less precision. */
2393 if (decSig.exponent < semantics->minExponent) {
2394 excessPrecision += (semantics->minExponent - decSig.exponent);
2395 truncatedBits = excessPrecision;
2396 if (excessPrecision > calcSemantics.precision)
2397 excessPrecision = calcSemantics.precision;
2399 /* Extra half-ulp lost in reciprocal of exponent. */
2400 powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
2403 /* Both multiplySignificand and divideSignificand return the
2404 result with the integer bit set. */
2405 assert(APInt::tcExtractBit
2406 (decSig.significandParts(), calcSemantics.precision - 1) == 1);
2408 HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
2409 powHUerr);
2410 HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
2411 excessPrecision, isNearest);
2413 /* Are we guaranteed to round correctly if we truncate? */
2414 if (HUdistance >= HUerr) {
2415 APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
2416 calcSemantics.precision - excessPrecision,
2417 excessPrecision);
2418 /* Take the exponent of decSig. If we tcExtract-ed less bits
2419 above we must adjust our exponent to compensate for the
2420 implicit right shift. */
2421 exponent = (decSig.exponent + semantics->precision
2422 - (calcSemantics.precision - excessPrecision));
2423 calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
2424 decSig.partCount(),
2425 truncatedBits);
2426 return normalize(rounding_mode, calcLostFraction);
2431 IEEEFloat::opStatus
2432 IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
2433 decimalInfo D;
2434 opStatus fs;
2436 /* Scan the text. */
2437 StringRef::iterator p = str.begin();
2438 interpretDecimal(p, str.end(), &D);
2440 /* Handle the quick cases. First the case of no significant digits,
2441 i.e. zero, and then exponents that are obviously too large or too
2442 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2443 definitely overflows if
2445 (exp - 1) * L >= maxExponent
2447 and definitely underflows to zero where
2449 (exp + 1) * L <= minExponent - precision
2451 With integer arithmetic the tightest bounds for L are
2453 93/28 < L < 196/59 [ numerator <= 256 ]
2454 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2457 // Test if we have a zero number allowing for strings with no null terminators
2458 // and zero decimals with non-zero exponents.
2460 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2461 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2462 // be at most one dot. On the other hand, if we have a zero with a non-zero
2463 // exponent, then we know that D.firstSigDigit will be non-numeric.
2464 if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
2465 category = fcZero;
2466 fs = opOK;
2468 /* Check whether the normalized exponent is high enough to overflow
2469 max during the log-rebasing in the max-exponent check below. */
2470 } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
2471 fs = handleOverflow(rounding_mode);
2473 /* If it wasn't, then it also wasn't high enough to overflow max
2474 during the log-rebasing in the min-exponent check. Check that it
2475 won't overflow min in either check, then perform the min-exponent
2476 check. */
2477 } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
2478 (D.normalizedExponent + 1) * 28738 <=
2479 8651 * (semantics->minExponent - (int) semantics->precision)) {
2480 /* Underflow to zero and round. */
2481 category = fcNormal;
2482 zeroSignificand();
2483 fs = normalize(rounding_mode, lfLessThanHalf);
2485 /* We can finally safely perform the max-exponent check. */
2486 } else if ((D.normalizedExponent - 1) * 42039
2487 >= 12655 * semantics->maxExponent) {
2488 /* Overflow and round. */
2489 fs = handleOverflow(rounding_mode);
2490 } else {
2491 integerPart *decSignificand;
2492 unsigned int partCount;
2494 /* A tight upper bound on number of bits required to hold an
2495 N-digit decimal integer is N * 196 / 59. Allocate enough space
2496 to hold the full significand, and an extra part required by
2497 tcMultiplyPart. */
2498 partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
2499 partCount = partCountForBits(1 + 196 * partCount / 59);
2500 decSignificand = new integerPart[partCount + 1];
2501 partCount = 0;
2503 /* Convert to binary efficiently - we do almost all multiplication
2504 in an integerPart. When this would overflow do we do a single
2505 bignum multiplication, and then revert again to multiplication
2506 in an integerPart. */
2507 do {
2508 integerPart decValue, val, multiplier;
2510 val = 0;
2511 multiplier = 1;
2513 do {
2514 if (*p == '.') {
2515 p++;
2516 if (p == str.end()) {
2517 break;
2520 decValue = decDigitValue(*p++);
2521 assert(decValue < 10U && "Invalid character in significand");
2522 multiplier *= 10;
2523 val = val * 10 + decValue;
2524 /* The maximum number that can be multiplied by ten with any
2525 digit added without overflowing an integerPart. */
2526 } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
2528 /* Multiply out the current part. */
2529 APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
2530 partCount, partCount + 1, false);
2532 /* If we used another part (likely but not guaranteed), increase
2533 the count. */
2534 if (decSignificand[partCount])
2535 partCount++;
2536 } while (p <= D.lastSigDigit);
2538 category = fcNormal;
2539 fs = roundSignificandWithExponent(decSignificand, partCount,
2540 D.exponent, rounding_mode);
2542 delete [] decSignificand;
2545 return fs;
2548 bool IEEEFloat::convertFromStringSpecials(StringRef str) {
2549 if (str.equals("inf") || str.equals("INFINITY") || str.equals("+Inf")) {
2550 makeInf(false);
2551 return true;
2554 if (str.equals("-inf") || str.equals("-INFINITY") || str.equals("-Inf")) {
2555 makeInf(true);
2556 return true;
2559 if (str.equals("nan") || str.equals("NaN")) {
2560 makeNaN(false, false);
2561 return true;
2564 if (str.equals("-nan") || str.equals("-NaN")) {
2565 makeNaN(false, true);
2566 return true;
2569 return false;
2572 IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
2573 roundingMode rounding_mode) {
2574 assert(!str.empty() && "Invalid string length");
2576 // Handle special cases.
2577 if (convertFromStringSpecials(str))
2578 return opOK;
2580 /* Handle a leading minus sign. */
2581 StringRef::iterator p = str.begin();
2582 size_t slen = str.size();
2583 sign = *p == '-' ? 1 : 0;
2584 if (*p == '-' || *p == '+') {
2585 p++;
2586 slen--;
2587 assert(slen && "String has no digits");
2590 if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2591 assert(slen - 2 && "Invalid string");
2592 return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
2593 rounding_mode);
2596 return convertFromDecimalString(StringRef(p, slen), rounding_mode);
2599 /* Write out a hexadecimal representation of the floating point value
2600 to DST, which must be of sufficient size, in the C99 form
2601 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2602 excluding the terminating NUL.
2604 If UPPERCASE, the output is in upper case, otherwise in lower case.
2606 HEXDIGITS digits appear altogether, rounding the value if
2607 necessary. If HEXDIGITS is 0, the minimal precision to display the
2608 number precisely is used instead. If nothing would appear after
2609 the decimal point it is suppressed.
2611 The decimal exponent is always printed and has at least one digit.
2612 Zero values display an exponent of zero. Infinities and NaNs
2613 appear as "infinity" or "nan" respectively.
2615 The above rules are as specified by C99. There is ambiguity about
2616 what the leading hexadecimal digit should be. This implementation
2617 uses whatever is necessary so that the exponent is displayed as
2618 stored. This implies the exponent will fall within the IEEE format
2619 range, and the leading hexadecimal digit will be 0 (for denormals),
2620 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2621 any other digits zero).
2623 unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
2624 bool upperCase,
2625 roundingMode rounding_mode) const {
2626 char *p;
2628 p = dst;
2629 if (sign)
2630 *dst++ = '-';
2632 switch (category) {
2633 case fcInfinity:
2634 memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
2635 dst += sizeof infinityL - 1;
2636 break;
2638 case fcNaN:
2639 memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
2640 dst += sizeof NaNU - 1;
2641 break;
2643 case fcZero:
2644 *dst++ = '0';
2645 *dst++ = upperCase ? 'X': 'x';
2646 *dst++ = '0';
2647 if (hexDigits > 1) {
2648 *dst++ = '.';
2649 memset (dst, '0', hexDigits - 1);
2650 dst += hexDigits - 1;
2652 *dst++ = upperCase ? 'P': 'p';
2653 *dst++ = '0';
2654 break;
2656 case fcNormal:
2657 dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
2658 break;
2661 *dst = 0;
2663 return static_cast<unsigned int>(dst - p);
2666 /* Does the hard work of outputting the correctly rounded hexadecimal
2667 form of a normal floating point number with the specified number of
2668 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2669 digits necessary to print the value precisely is output. */
2670 char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
2671 bool upperCase,
2672 roundingMode rounding_mode) const {
2673 unsigned int count, valueBits, shift, partsCount, outputDigits;
2674 const char *hexDigitChars;
2675 const integerPart *significand;
2676 char *p;
2677 bool roundUp;
2679 *dst++ = '0';
2680 *dst++ = upperCase ? 'X': 'x';
2682 roundUp = false;
2683 hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
2685 significand = significandParts();
2686 partsCount = partCount();
2688 /* +3 because the first digit only uses the single integer bit, so
2689 we have 3 virtual zero most-significant-bits. */
2690 valueBits = semantics->precision + 3;
2691 shift = integerPartWidth - valueBits % integerPartWidth;
2693 /* The natural number of digits required ignoring trailing
2694 insignificant zeroes. */
2695 outputDigits = (valueBits - significandLSB () + 3) / 4;
2697 /* hexDigits of zero means use the required number for the
2698 precision. Otherwise, see if we are truncating. If we are,
2699 find out if we need to round away from zero. */
2700 if (hexDigits) {
2701 if (hexDigits < outputDigits) {
2702 /* We are dropping non-zero bits, so need to check how to round.
2703 "bits" is the number of dropped bits. */
2704 unsigned int bits;
2705 lostFraction fraction;
2707 bits = valueBits - hexDigits * 4;
2708 fraction = lostFractionThroughTruncation (significand, partsCount, bits);
2709 roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
2711 outputDigits = hexDigits;
2714 /* Write the digits consecutively, and start writing in the location
2715 of the hexadecimal point. We move the most significant digit
2716 left and add the hexadecimal point later. */
2717 p = ++dst;
2719 count = (valueBits + integerPartWidth - 1) / integerPartWidth;
2721 while (outputDigits && count) {
2722 integerPart part;
2724 /* Put the most significant integerPartWidth bits in "part". */
2725 if (--count == partsCount)
2726 part = 0; /* An imaginary higher zero part. */
2727 else
2728 part = significand[count] << shift;
2730 if (count && shift)
2731 part |= significand[count - 1] >> (integerPartWidth - shift);
2733 /* Convert as much of "part" to hexdigits as we can. */
2734 unsigned int curDigits = integerPartWidth / 4;
2736 if (curDigits > outputDigits)
2737 curDigits = outputDigits;
2738 dst += partAsHex (dst, part, curDigits, hexDigitChars);
2739 outputDigits -= curDigits;
2742 if (roundUp) {
2743 char *q = dst;
2745 /* Note that hexDigitChars has a trailing '0'. */
2746 do {
2747 q--;
2748 *q = hexDigitChars[hexDigitValue (*q) + 1];
2749 } while (*q == '0');
2750 assert(q >= p);
2751 } else {
2752 /* Add trailing zeroes. */
2753 memset (dst, '0', outputDigits);
2754 dst += outputDigits;
2757 /* Move the most significant digit to before the point, and if there
2758 is something after the decimal point add it. This must come
2759 after rounding above. */
2760 p[-1] = p[0];
2761 if (dst -1 == p)
2762 dst--;
2763 else
2764 p[0] = '.';
2766 /* Finally output the exponent. */
2767 *dst++ = upperCase ? 'P': 'p';
2769 return writeSignedDecimal (dst, exponent);
2772 hash_code hash_value(const IEEEFloat &Arg) {
2773 if (!Arg.isFiniteNonZero())
2774 return hash_combine((uint8_t)Arg.category,
2775 // NaN has no sign, fix it at zero.
2776 Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
2777 Arg.semantics->precision);
2779 // Normal floats need their exponent and significand hashed.
2780 return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
2781 Arg.semantics->precision, Arg.exponent,
2782 hash_combine_range(
2783 Arg.significandParts(),
2784 Arg.significandParts() + Arg.partCount()));
2787 // Conversion from APFloat to/from host float/double. It may eventually be
2788 // possible to eliminate these and have everybody deal with APFloats, but that
2789 // will take a while. This approach will not easily extend to long double.
2790 // Current implementation requires integerPartWidth==64, which is correct at
2791 // the moment but could be made more general.
2793 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2794 // the actual IEEE respresentations. We compensate for that here.
2796 APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2797 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
2798 assert(partCount()==2);
2800 uint64_t myexponent, mysignificand;
2802 if (isFiniteNonZero()) {
2803 myexponent = exponent+16383; //bias
2804 mysignificand = significandParts()[0];
2805 if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
2806 myexponent = 0; // denormal
2807 } else if (category==fcZero) {
2808 myexponent = 0;
2809 mysignificand = 0;
2810 } else if (category==fcInfinity) {
2811 myexponent = 0x7fff;
2812 mysignificand = 0x8000000000000000ULL;
2813 } else {
2814 assert(category == fcNaN && "Unknown category");
2815 myexponent = 0x7fff;
2816 mysignificand = significandParts()[0];
2819 uint64_t words[2];
2820 words[0] = mysignificand;
2821 words[1] = ((uint64_t)(sign & 1) << 15) |
2822 (myexponent & 0x7fffLL);
2823 return APInt(80, words);
2826 APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2827 assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy);
2828 assert(partCount()==2);
2830 uint64_t words[2];
2831 opStatus fs;
2832 bool losesInfo;
2834 // Convert number to double. To avoid spurious underflows, we re-
2835 // normalize against the "double" minExponent first, and only *then*
2836 // truncate the mantissa. The result of that second conversion
2837 // may be inexact, but should never underflow.
2838 // Declare fltSemantics before APFloat that uses it (and
2839 // saves pointer to it) to ensure correct destruction order.
2840 fltSemantics extendedSemantics = *semantics;
2841 extendedSemantics.minExponent = semIEEEdouble.minExponent;
2842 IEEEFloat extended(*this);
2843 fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2844 assert(fs == opOK && !losesInfo);
2845 (void)fs;
2847 IEEEFloat u(extended);
2848 fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2849 assert(fs == opOK || fs == opInexact);
2850 (void)fs;
2851 words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
2853 // If conversion was exact or resulted in a special case, we're done;
2854 // just set the second double to zero. Otherwise, re-convert back to
2855 // the extended format and compute the difference. This now should
2856 // convert exactly to double.
2857 if (u.isFiniteNonZero() && losesInfo) {
2858 fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
2859 assert(fs == opOK && !losesInfo);
2860 (void)fs;
2862 IEEEFloat v(extended);
2863 v.subtract(u, rmNearestTiesToEven);
2864 fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
2865 assert(fs == opOK && !losesInfo);
2866 (void)fs;
2867 words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
2868 } else {
2869 words[1] = 0;
2872 return APInt(128, words);
2875 APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2876 assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
2877 assert(partCount()==2);
2879 uint64_t myexponent, mysignificand, mysignificand2;
2881 if (isFiniteNonZero()) {
2882 myexponent = exponent+16383; //bias
2883 mysignificand = significandParts()[0];
2884 mysignificand2 = significandParts()[1];
2885 if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
2886 myexponent = 0; // denormal
2887 } else if (category==fcZero) {
2888 myexponent = 0;
2889 mysignificand = mysignificand2 = 0;
2890 } else if (category==fcInfinity) {
2891 myexponent = 0x7fff;
2892 mysignificand = mysignificand2 = 0;
2893 } else {
2894 assert(category == fcNaN && "Unknown category!");
2895 myexponent = 0x7fff;
2896 mysignificand = significandParts()[0];
2897 mysignificand2 = significandParts()[1];
2900 uint64_t words[2];
2901 words[0] = mysignificand;
2902 words[1] = ((uint64_t)(sign & 1) << 63) |
2903 ((myexponent & 0x7fff) << 48) |
2904 (mysignificand2 & 0xffffffffffffLL);
2906 return APInt(128, words);
2909 APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
2910 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
2911 assert(partCount()==1);
2913 uint64_t myexponent, mysignificand;
2915 if (isFiniteNonZero()) {
2916 myexponent = exponent+1023; //bias
2917 mysignificand = *significandParts();
2918 if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
2919 myexponent = 0; // denormal
2920 } else if (category==fcZero) {
2921 myexponent = 0;
2922 mysignificand = 0;
2923 } else if (category==fcInfinity) {
2924 myexponent = 0x7ff;
2925 mysignificand = 0;
2926 } else {
2927 assert(category == fcNaN && "Unknown category!");
2928 myexponent = 0x7ff;
2929 mysignificand = *significandParts();
2932 return APInt(64, ((((uint64_t)(sign & 1) << 63) |
2933 ((myexponent & 0x7ff) << 52) |
2934 (mysignificand & 0xfffffffffffffLL))));
2937 APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
2938 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
2939 assert(partCount()==1);
2941 uint32_t myexponent, mysignificand;
2943 if (isFiniteNonZero()) {
2944 myexponent = exponent+127; //bias
2945 mysignificand = (uint32_t)*significandParts();
2946 if (myexponent == 1 && !(mysignificand & 0x800000))
2947 myexponent = 0; // denormal
2948 } else if (category==fcZero) {
2949 myexponent = 0;
2950 mysignificand = 0;
2951 } else if (category==fcInfinity) {
2952 myexponent = 0xff;
2953 mysignificand = 0;
2954 } else {
2955 assert(category == fcNaN && "Unknown category!");
2956 myexponent = 0xff;
2957 mysignificand = (uint32_t)*significandParts();
2960 return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
2961 (mysignificand & 0x7fffff)));
2964 APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
2965 assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
2966 assert(partCount()==1);
2968 uint32_t myexponent, mysignificand;
2970 if (isFiniteNonZero()) {
2971 myexponent = exponent+15; //bias
2972 mysignificand = (uint32_t)*significandParts();
2973 if (myexponent == 1 && !(mysignificand & 0x400))
2974 myexponent = 0; // denormal
2975 } else if (category==fcZero) {
2976 myexponent = 0;
2977 mysignificand = 0;
2978 } else if (category==fcInfinity) {
2979 myexponent = 0x1f;
2980 mysignificand = 0;
2981 } else {
2982 assert(category == fcNaN && "Unknown category!");
2983 myexponent = 0x1f;
2984 mysignificand = (uint32_t)*significandParts();
2987 return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
2988 (mysignificand & 0x3ff)));
2991 // This function creates an APInt that is just a bit map of the floating
2992 // point constant as it would appear in memory. It is not a conversion,
2993 // and treating the result as a normal integer is unlikely to be useful.
2995 APInt IEEEFloat::bitcastToAPInt() const {
2996 if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
2997 return convertHalfAPFloatToAPInt();
2999 if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
3000 return convertFloatAPFloatToAPInt();
3002 if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
3003 return convertDoubleAPFloatToAPInt();
3005 if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
3006 return convertQuadrupleAPFloatToAPInt();
3008 if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleLegacy)
3009 return convertPPCDoubleDoubleAPFloatToAPInt();
3011 assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
3012 "unknown format!");
3013 return convertF80LongDoubleAPFloatToAPInt();
3016 float IEEEFloat::convertToFloat() const {
3017 assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
3018 "Float semantics are not IEEEsingle");
3019 APInt api = bitcastToAPInt();
3020 return api.bitsToFloat();
3023 double IEEEFloat::convertToDouble() const {
3024 assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
3025 "Float semantics are not IEEEdouble");
3026 APInt api = bitcastToAPInt();
3027 return api.bitsToDouble();
3030 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3031 /// does not support these bit patterns:
3032 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3033 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3034 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3035 /// exponent = 0, integer bit 1 ("pseudodenormal")
3036 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3037 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
3038 assert(api.getBitWidth()==80);
3039 uint64_t i1 = api.getRawData()[0];
3040 uint64_t i2 = api.getRawData()[1];
3041 uint64_t myexponent = (i2 & 0x7fff);
3042 uint64_t mysignificand = i1;
3043 uint8_t myintegerbit = mysignificand >> 63;
3045 initialize(&semX87DoubleExtended);
3046 assert(partCount()==2);
3048 sign = static_cast<unsigned int>(i2>>15);
3049 if (myexponent == 0 && mysignificand == 0) {
3050 // exponent, significand meaningless
3051 category = fcZero;
3052 } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
3053 // exponent, significand meaningless
3054 category = fcInfinity;
3055 } else if ((myexponent == 0x7fff && mysignificand != 0x8000000000000000ULL) ||
3056 (myexponent != 0x7fff && myexponent != 0 && myintegerbit == 0)) {
3057 // exponent meaningless
3058 category = fcNaN;
3059 significandParts()[0] = mysignificand;
3060 significandParts()[1] = 0;
3061 } else {
3062 category = fcNormal;
3063 exponent = myexponent - 16383;
3064 significandParts()[0] = mysignificand;
3065 significandParts()[1] = 0;
3066 if (myexponent==0) // denormal
3067 exponent = -16382;
3071 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
3072 assert(api.getBitWidth()==128);
3073 uint64_t i1 = api.getRawData()[0];
3074 uint64_t i2 = api.getRawData()[1];
3075 opStatus fs;
3076 bool losesInfo;
3078 // Get the first double and convert to our format.
3079 initFromDoubleAPInt(APInt(64, i1));
3080 fs = convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3081 assert(fs == opOK && !losesInfo);
3082 (void)fs;
3084 // Unless we have a special case, add in second double.
3085 if (isFiniteNonZero()) {
3086 IEEEFloat v(semIEEEdouble, APInt(64, i2));
3087 fs = v.convert(semPPCDoubleDoubleLegacy, rmNearestTiesToEven, &losesInfo);
3088 assert(fs == opOK && !losesInfo);
3089 (void)fs;
3091 add(v, rmNearestTiesToEven);
3095 void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
3096 assert(api.getBitWidth()==128);
3097 uint64_t i1 = api.getRawData()[0];
3098 uint64_t i2 = api.getRawData()[1];
3099 uint64_t myexponent = (i2 >> 48) & 0x7fff;
3100 uint64_t mysignificand = i1;
3101 uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
3103 initialize(&semIEEEquad);
3104 assert(partCount()==2);
3106 sign = static_cast<unsigned int>(i2>>63);
3107 if (myexponent==0 &&
3108 (mysignificand==0 && mysignificand2==0)) {
3109 // exponent, significand meaningless
3110 category = fcZero;
3111 } else if (myexponent==0x7fff &&
3112 (mysignificand==0 && mysignificand2==0)) {
3113 // exponent, significand meaningless
3114 category = fcInfinity;
3115 } else if (myexponent==0x7fff &&
3116 (mysignificand!=0 || mysignificand2 !=0)) {
3117 // exponent meaningless
3118 category = fcNaN;
3119 significandParts()[0] = mysignificand;
3120 significandParts()[1] = mysignificand2;
3121 } else {
3122 category = fcNormal;
3123 exponent = myexponent - 16383;
3124 significandParts()[0] = mysignificand;
3125 significandParts()[1] = mysignificand2;
3126 if (myexponent==0) // denormal
3127 exponent = -16382;
3128 else
3129 significandParts()[1] |= 0x1000000000000LL; // integer bit
3133 void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
3134 assert(api.getBitWidth()==64);
3135 uint64_t i = *api.getRawData();
3136 uint64_t myexponent = (i >> 52) & 0x7ff;
3137 uint64_t mysignificand = i & 0xfffffffffffffLL;
3139 initialize(&semIEEEdouble);
3140 assert(partCount()==1);
3142 sign = static_cast<unsigned int>(i>>63);
3143 if (myexponent==0 && mysignificand==0) {
3144 // exponent, significand meaningless
3145 category = fcZero;
3146 } else if (myexponent==0x7ff && mysignificand==0) {
3147 // exponent, significand meaningless
3148 category = fcInfinity;
3149 } else if (myexponent==0x7ff && mysignificand!=0) {
3150 // exponent meaningless
3151 category = fcNaN;
3152 *significandParts() = mysignificand;
3153 } else {
3154 category = fcNormal;
3155 exponent = myexponent - 1023;
3156 *significandParts() = mysignificand;
3157 if (myexponent==0) // denormal
3158 exponent = -1022;
3159 else
3160 *significandParts() |= 0x10000000000000LL; // integer bit
3164 void IEEEFloat::initFromFloatAPInt(const APInt &api) {
3165 assert(api.getBitWidth()==32);
3166 uint32_t i = (uint32_t)*api.getRawData();
3167 uint32_t myexponent = (i >> 23) & 0xff;
3168 uint32_t mysignificand = i & 0x7fffff;
3170 initialize(&semIEEEsingle);
3171 assert(partCount()==1);
3173 sign = i >> 31;
3174 if (myexponent==0 && mysignificand==0) {
3175 // exponent, significand meaningless
3176 category = fcZero;
3177 } else if (myexponent==0xff && mysignificand==0) {
3178 // exponent, significand meaningless
3179 category = fcInfinity;
3180 } else if (myexponent==0xff && mysignificand!=0) {
3181 // sign, exponent, significand meaningless
3182 category = fcNaN;
3183 *significandParts() = mysignificand;
3184 } else {
3185 category = fcNormal;
3186 exponent = myexponent - 127; //bias
3187 *significandParts() = mysignificand;
3188 if (myexponent==0) // denormal
3189 exponent = -126;
3190 else
3191 *significandParts() |= 0x800000; // integer bit
3195 void IEEEFloat::initFromHalfAPInt(const APInt &api) {
3196 assert(api.getBitWidth()==16);
3197 uint32_t i = (uint32_t)*api.getRawData();
3198 uint32_t myexponent = (i >> 10) & 0x1f;
3199 uint32_t mysignificand = i & 0x3ff;
3201 initialize(&semIEEEhalf);
3202 assert(partCount()==1);
3204 sign = i >> 15;
3205 if (myexponent==0 && mysignificand==0) {
3206 // exponent, significand meaningless
3207 category = fcZero;
3208 } else if (myexponent==0x1f && mysignificand==0) {
3209 // exponent, significand meaningless
3210 category = fcInfinity;
3211 } else if (myexponent==0x1f && mysignificand!=0) {
3212 // sign, exponent, significand meaningless
3213 category = fcNaN;
3214 *significandParts() = mysignificand;
3215 } else {
3216 category = fcNormal;
3217 exponent = myexponent - 15; //bias
3218 *significandParts() = mysignificand;
3219 if (myexponent==0) // denormal
3220 exponent = -14;
3221 else
3222 *significandParts() |= 0x400; // integer bit
3226 /// Treat api as containing the bits of a floating point number. Currently
3227 /// we infer the floating point type from the size of the APInt. The
3228 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3229 /// when the size is anything else).
3230 void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
3231 if (Sem == &semIEEEhalf)
3232 return initFromHalfAPInt(api);
3233 if (Sem == &semIEEEsingle)
3234 return initFromFloatAPInt(api);
3235 if (Sem == &semIEEEdouble)
3236 return initFromDoubleAPInt(api);
3237 if (Sem == &semX87DoubleExtended)
3238 return initFromF80LongDoubleAPInt(api);
3239 if (Sem == &semIEEEquad)
3240 return initFromQuadrupleAPInt(api);
3241 if (Sem == &semPPCDoubleDoubleLegacy)
3242 return initFromPPCDoubleDoubleAPInt(api);
3244 llvm_unreachable(nullptr);
3247 /// Make this number the largest magnitude normal number in the given
3248 /// semantics.
3249 void IEEEFloat::makeLargest(bool Negative) {
3250 // We want (in interchange format):
3251 // sign = {Negative}
3252 // exponent = 1..10
3253 // significand = 1..1
3254 category = fcNormal;
3255 sign = Negative;
3256 exponent = semantics->maxExponent;
3258 // Use memset to set all but the highest integerPart to all ones.
3259 integerPart *significand = significandParts();
3260 unsigned PartCount = partCount();
3261 memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1));
3263 // Set the high integerPart especially setting all unused top bits for
3264 // internal consistency.
3265 const unsigned NumUnusedHighBits =
3266 PartCount*integerPartWidth - semantics->precision;
3267 significand[PartCount - 1] = (NumUnusedHighBits < integerPartWidth)
3268 ? (~integerPart(0) >> NumUnusedHighBits)
3269 : 0;
3272 /// Make this number the smallest magnitude denormal number in the given
3273 /// semantics.
3274 void IEEEFloat::makeSmallest(bool Negative) {
3275 // We want (in interchange format):
3276 // sign = {Negative}
3277 // exponent = 0..0
3278 // significand = 0..01
3279 category = fcNormal;
3280 sign = Negative;
3281 exponent = semantics->minExponent;
3282 APInt::tcSet(significandParts(), 1, partCount());
3285 void IEEEFloat::makeSmallestNormalized(bool Negative) {
3286 // We want (in interchange format):
3287 // sign = {Negative}
3288 // exponent = 0..0
3289 // significand = 10..0
3291 category = fcNormal;
3292 zeroSignificand();
3293 sign = Negative;
3294 exponent = semantics->minExponent;
3295 significandParts()[partCountForBits(semantics->precision) - 1] |=
3296 (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
3299 IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
3300 initFromAPInt(&Sem, API);
3303 IEEEFloat::IEEEFloat(float f) {
3304 initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
3307 IEEEFloat::IEEEFloat(double d) {
3308 initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
3311 namespace {
3312 void append(SmallVectorImpl<char> &Buffer, StringRef Str) {
3313 Buffer.append(Str.begin(), Str.end());
3316 /// Removes data from the given significand until it is no more
3317 /// precise than is required for the desired precision.
3318 void AdjustToPrecision(APInt &significand,
3319 int &exp, unsigned FormatPrecision) {
3320 unsigned bits = significand.getActiveBits();
3322 // 196/59 is a very slight overestimate of lg_2(10).
3323 unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
3325 if (bits <= bitsRequired) return;
3327 unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
3328 if (!tensRemovable) return;
3330 exp += tensRemovable;
3332 APInt divisor(significand.getBitWidth(), 1);
3333 APInt powten(significand.getBitWidth(), 10);
3334 while (true) {
3335 if (tensRemovable & 1)
3336 divisor *= powten;
3337 tensRemovable >>= 1;
3338 if (!tensRemovable) break;
3339 powten *= powten;
3342 significand = significand.udiv(divisor);
3344 // Truncate the significand down to its active bit count.
3345 significand = significand.trunc(significand.getActiveBits());
3349 void AdjustToPrecision(SmallVectorImpl<char> &buffer,
3350 int &exp, unsigned FormatPrecision) {
3351 unsigned N = buffer.size();
3352 if (N <= FormatPrecision) return;
3354 // The most significant figures are the last ones in the buffer.
3355 unsigned FirstSignificant = N - FormatPrecision;
3357 // Round.
3358 // FIXME: this probably shouldn't use 'round half up'.
3360 // Rounding down is just a truncation, except we also want to drop
3361 // trailing zeros from the new result.
3362 if (buffer[FirstSignificant - 1] < '5') {
3363 while (FirstSignificant < N && buffer[FirstSignificant] == '0')
3364 FirstSignificant++;
3366 exp += FirstSignificant;
3367 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3368 return;
3371 // Rounding up requires a decimal add-with-carry. If we continue
3372 // the carry, the newly-introduced zeros will just be truncated.
3373 for (unsigned I = FirstSignificant; I != N; ++I) {
3374 if (buffer[I] == '9') {
3375 FirstSignificant++;
3376 } else {
3377 buffer[I]++;
3378 break;
3382 // If we carried through, we have exactly one digit of precision.
3383 if (FirstSignificant == N) {
3384 exp += FirstSignificant;
3385 buffer.clear();
3386 buffer.push_back('1');
3387 return;
3390 exp += FirstSignificant;
3391 buffer.erase(&buffer[0], &buffer[FirstSignificant]);
3395 void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
3396 unsigned FormatMaxPadding, bool TruncateZero) const {
3397 switch (category) {
3398 case fcInfinity:
3399 if (isNegative())
3400 return append(Str, "-Inf");
3401 else
3402 return append(Str, "+Inf");
3404 case fcNaN: return append(Str, "NaN");
3406 case fcZero:
3407 if (isNegative())
3408 Str.push_back('-');
3410 if (!FormatMaxPadding) {
3411 if (TruncateZero)
3412 append(Str, "0.0E+0");
3413 else {
3414 append(Str, "0.0");
3415 if (FormatPrecision > 1)
3416 Str.append(FormatPrecision - 1, '0');
3417 append(Str, "e+00");
3419 } else
3420 Str.push_back('0');
3421 return;
3423 case fcNormal:
3424 break;
3427 if (isNegative())
3428 Str.push_back('-');
3430 // Decompose the number into an APInt and an exponent.
3431 int exp = exponent - ((int) semantics->precision - 1);
3432 APInt significand(semantics->precision,
3433 makeArrayRef(significandParts(),
3434 partCountForBits(semantics->precision)));
3436 // Set FormatPrecision if zero. We want to do this before we
3437 // truncate trailing zeros, as those are part of the precision.
3438 if (!FormatPrecision) {
3439 // We use enough digits so the number can be round-tripped back to an
3440 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3441 // Accurately" by Steele and White.
3442 // FIXME: Using a formula based purely on the precision is conservative;
3443 // we can print fewer digits depending on the actual value being printed.
3445 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3446 FormatPrecision = 2 + semantics->precision * 59 / 196;
3449 // Ignore trailing binary zeros.
3450 int trailingZeros = significand.countTrailingZeros();
3451 exp += trailingZeros;
3452 significand.lshrInPlace(trailingZeros);
3454 // Change the exponent from 2^e to 10^e.
3455 if (exp == 0) {
3456 // Nothing to do.
3457 } else if (exp > 0) {
3458 // Just shift left.
3459 significand = significand.zext(semantics->precision + exp);
3460 significand <<= exp;
3461 exp = 0;
3462 } else { /* exp < 0 */
3463 int texp = -exp;
3465 // We transform this using the identity:
3466 // (N)(2^-e) == (N)(5^e)(10^-e)
3467 // This means we have to multiply N (the significand) by 5^e.
3468 // To avoid overflow, we have to operate on numbers large
3469 // enough to store N * 5^e:
3470 // log2(N * 5^e) == log2(N) + e * log2(5)
3471 // <= semantics->precision + e * 137 / 59
3472 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3474 unsigned precision = semantics->precision + (137 * texp + 136) / 59;
3476 // Multiply significand by 5^e.
3477 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3478 significand = significand.zext(precision);
3479 APInt five_to_the_i(precision, 5);
3480 while (true) {
3481 if (texp & 1) significand *= five_to_the_i;
3483 texp >>= 1;
3484 if (!texp) break;
3485 five_to_the_i *= five_to_the_i;
3489 AdjustToPrecision(significand, exp, FormatPrecision);
3491 SmallVector<char, 256> buffer;
3493 // Fill the buffer.
3494 unsigned precision = significand.getBitWidth();
3495 APInt ten(precision, 10);
3496 APInt digit(precision, 0);
3498 bool inTrail = true;
3499 while (significand != 0) {
3500 // digit <- significand % 10
3501 // significand <- significand / 10
3502 APInt::udivrem(significand, ten, significand, digit);
3504 unsigned d = digit.getZExtValue();
3506 // Drop trailing zeros.
3507 if (inTrail && !d) exp++;
3508 else {
3509 buffer.push_back((char) ('0' + d));
3510 inTrail = false;
3514 assert(!buffer.empty() && "no characters in buffer!");
3516 // Drop down to FormatPrecision.
3517 // TODO: don't do more precise calculations above than are required.
3518 AdjustToPrecision(buffer, exp, FormatPrecision);
3520 unsigned NDigits = buffer.size();
3522 // Check whether we should use scientific notation.
3523 bool FormatScientific;
3524 if (!FormatMaxPadding)
3525 FormatScientific = true;
3526 else {
3527 if (exp >= 0) {
3528 // 765e3 --> 765000
3529 // ^^^
3530 // But we shouldn't make the number look more precise than it is.
3531 FormatScientific = ((unsigned) exp > FormatMaxPadding ||
3532 NDigits + (unsigned) exp > FormatPrecision);
3533 } else {
3534 // Power of the most significant digit.
3535 int MSD = exp + (int) (NDigits - 1);
3536 if (MSD >= 0) {
3537 // 765e-2 == 7.65
3538 FormatScientific = false;
3539 } else {
3540 // 765e-5 == 0.00765
3541 // ^ ^^
3542 FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
3547 // Scientific formatting is pretty straightforward.
3548 if (FormatScientific) {
3549 exp += (NDigits - 1);
3551 Str.push_back(buffer[NDigits-1]);
3552 Str.push_back('.');
3553 if (NDigits == 1 && TruncateZero)
3554 Str.push_back('0');
3555 else
3556 for (unsigned I = 1; I != NDigits; ++I)
3557 Str.push_back(buffer[NDigits-1-I]);
3558 // Fill with zeros up to FormatPrecision.
3559 if (!TruncateZero && FormatPrecision > NDigits - 1)
3560 Str.append(FormatPrecision - NDigits + 1, '0');
3561 // For !TruncateZero we use lower 'e'.
3562 Str.push_back(TruncateZero ? 'E' : 'e');
3564 Str.push_back(exp >= 0 ? '+' : '-');
3565 if (exp < 0) exp = -exp;
3566 SmallVector<char, 6> expbuf;
3567 do {
3568 expbuf.push_back((char) ('0' + (exp % 10)));
3569 exp /= 10;
3570 } while (exp);
3571 // Exponent always at least two digits if we do not truncate zeros.
3572 if (!TruncateZero && expbuf.size() < 2)
3573 expbuf.push_back('0');
3574 for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
3575 Str.push_back(expbuf[E-1-I]);
3576 return;
3579 // Non-scientific, positive exponents.
3580 if (exp >= 0) {
3581 for (unsigned I = 0; I != NDigits; ++I)
3582 Str.push_back(buffer[NDigits-1-I]);
3583 for (unsigned I = 0; I != (unsigned) exp; ++I)
3584 Str.push_back('0');
3585 return;
3588 // Non-scientific, negative exponents.
3590 // The number of digits to the left of the decimal point.
3591 int NWholeDigits = exp + (int) NDigits;
3593 unsigned I = 0;
3594 if (NWholeDigits > 0) {
3595 for (; I != (unsigned) NWholeDigits; ++I)
3596 Str.push_back(buffer[NDigits-I-1]);
3597 Str.push_back('.');
3598 } else {
3599 unsigned NZeros = 1 + (unsigned) -NWholeDigits;
3601 Str.push_back('0');
3602 Str.push_back('.');
3603 for (unsigned Z = 1; Z != NZeros; ++Z)
3604 Str.push_back('0');
3607 for (; I != NDigits; ++I)
3608 Str.push_back(buffer[NDigits-I-1]);
3611 bool IEEEFloat::getExactInverse(APFloat *inv) const {
3612 // Special floats and denormals have no exact inverse.
3613 if (!isFiniteNonZero())
3614 return false;
3616 // Check that the number is a power of two by making sure that only the
3617 // integer bit is set in the significand.
3618 if (significandLSB() != semantics->precision - 1)
3619 return false;
3621 // Get the inverse.
3622 IEEEFloat reciprocal(*semantics, 1ULL);
3623 if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
3624 return false;
3626 // Avoid multiplication with a denormal, it is not safe on all platforms and
3627 // may be slower than a normal division.
3628 if (reciprocal.isDenormal())
3629 return false;
3631 assert(reciprocal.isFiniteNonZero() &&
3632 reciprocal.significandLSB() == reciprocal.semantics->precision - 1);
3634 if (inv)
3635 *inv = APFloat(reciprocal, *semantics);
3637 return true;
3640 bool IEEEFloat::isSignaling() const {
3641 if (!isNaN())
3642 return false;
3644 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3645 // first bit of the trailing significand being 0.
3646 return !APInt::tcExtractBit(significandParts(), semantics->precision - 2);
3649 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3651 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3652 /// appropriate sign switching before/after the computation.
3653 IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
3654 // If we are performing nextDown, swap sign so we have -x.
3655 if (nextDown)
3656 changeSign();
3658 // Compute nextUp(x)
3659 opStatus result = opOK;
3661 // Handle each float category separately.
3662 switch (category) {
3663 case fcInfinity:
3664 // nextUp(+inf) = +inf
3665 if (!isNegative())
3666 break;
3667 // nextUp(-inf) = -getLargest()
3668 makeLargest(true);
3669 break;
3670 case fcNaN:
3671 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3672 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3673 // change the payload.
3674 if (isSignaling()) {
3675 result = opInvalidOp;
3676 // For consistency, propagate the sign of the sNaN to the qNaN.
3677 makeNaN(false, isNegative(), nullptr);
3679 break;
3680 case fcZero:
3681 // nextUp(pm 0) = +getSmallest()
3682 makeSmallest(false);
3683 break;
3684 case fcNormal:
3685 // nextUp(-getSmallest()) = -0
3686 if (isSmallest() && isNegative()) {
3687 APInt::tcSet(significandParts(), 0, partCount());
3688 category = fcZero;
3689 exponent = 0;
3690 break;
3693 // nextUp(getLargest()) == INFINITY
3694 if (isLargest() && !isNegative()) {
3695 APInt::tcSet(significandParts(), 0, partCount());
3696 category = fcInfinity;
3697 exponent = semantics->maxExponent + 1;
3698 break;
3701 // nextUp(normal) == normal + inc.
3702 if (isNegative()) {
3703 // If we are negative, we need to decrement the significand.
3705 // We only cross a binade boundary that requires adjusting the exponent
3706 // if:
3707 // 1. exponent != semantics->minExponent. This implies we are not in the
3708 // smallest binade or are dealing with denormals.
3709 // 2. Our significand excluding the integral bit is all zeros.
3710 bool WillCrossBinadeBoundary =
3711 exponent != semantics->minExponent && isSignificandAllZeros();
3713 // Decrement the significand.
3715 // We always do this since:
3716 // 1. If we are dealing with a non-binade decrement, by definition we
3717 // just decrement the significand.
3718 // 2. If we are dealing with a normal -> normal binade decrement, since
3719 // we have an explicit integral bit the fact that all bits but the
3720 // integral bit are zero implies that subtracting one will yield a
3721 // significand with 0 integral bit and 1 in all other spots. Thus we
3722 // must just adjust the exponent and set the integral bit to 1.
3723 // 3. If we are dealing with a normal -> denormal binade decrement,
3724 // since we set the integral bit to 0 when we represent denormals, we
3725 // just decrement the significand.
3726 integerPart *Parts = significandParts();
3727 APInt::tcDecrement(Parts, partCount());
3729 if (WillCrossBinadeBoundary) {
3730 // Our result is a normal number. Do the following:
3731 // 1. Set the integral bit to 1.
3732 // 2. Decrement the exponent.
3733 APInt::tcSetBit(Parts, semantics->precision - 1);
3734 exponent--;
3736 } else {
3737 // If we are positive, we need to increment the significand.
3739 // We only cross a binade boundary that requires adjusting the exponent if
3740 // the input is not a denormal and all of said input's significand bits
3741 // are set. If all of said conditions are true: clear the significand, set
3742 // the integral bit to 1, and increment the exponent. If we have a
3743 // denormal always increment since moving denormals and the numbers in the
3744 // smallest normal binade have the same exponent in our representation.
3745 bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes();
3747 if (WillCrossBinadeBoundary) {
3748 integerPart *Parts = significandParts();
3749 APInt::tcSet(Parts, 0, partCount());
3750 APInt::tcSetBit(Parts, semantics->precision - 1);
3751 assert(exponent != semantics->maxExponent &&
3752 "We can not increment an exponent beyond the maxExponent allowed"
3753 " by the given floating point semantics.");
3754 exponent++;
3755 } else {
3756 incrementSignificand();
3759 break;
3762 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3763 if (nextDown)
3764 changeSign();
3766 return result;
3769 void IEEEFloat::makeInf(bool Negative) {
3770 category = fcInfinity;
3771 sign = Negative;
3772 exponent = semantics->maxExponent + 1;
3773 APInt::tcSet(significandParts(), 0, partCount());
3776 void IEEEFloat::makeZero(bool Negative) {
3777 category = fcZero;
3778 sign = Negative;
3779 exponent = semantics->minExponent-1;
3780 APInt::tcSet(significandParts(), 0, partCount());
3783 void IEEEFloat::makeQuiet() {
3784 assert(isNaN());
3785 APInt::tcSetBit(significandParts(), semantics->precision - 2);
3788 int ilogb(const IEEEFloat &Arg) {
3789 if (Arg.isNaN())
3790 return IEEEFloat::IEK_NaN;
3791 if (Arg.isZero())
3792 return IEEEFloat::IEK_Zero;
3793 if (Arg.isInfinity())
3794 return IEEEFloat::IEK_Inf;
3795 if (!Arg.isDenormal())
3796 return Arg.exponent;
3798 IEEEFloat Normalized(Arg);
3799 int SignificandBits = Arg.getSemantics().precision - 1;
3801 Normalized.exponent += SignificandBits;
3802 Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
3803 return Normalized.exponent - SignificandBits;
3806 IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
3807 auto MaxExp = X.getSemantics().maxExponent;
3808 auto MinExp = X.getSemantics().minExponent;
3810 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3811 // overflow; clamp it to a safe range before adding, but ensure that the range
3812 // is large enough that the clamp does not change the result. The range we
3813 // need to support is the difference between the largest possible exponent and
3814 // the normalized exponent of half the smallest denormal.
3816 int SignificandBits = X.getSemantics().precision - 1;
3817 int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
3819 // Clamp to one past the range ends to let normalize handle overlflow.
3820 X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
3821 X.normalize(RoundingMode, lfExactlyZero);
3822 if (X.isNaN())
3823 X.makeQuiet();
3824 return X;
3827 IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
3828 Exp = ilogb(Val);
3830 // Quiet signalling nans.
3831 if (Exp == IEEEFloat::IEK_NaN) {
3832 IEEEFloat Quiet(Val);
3833 Quiet.makeQuiet();
3834 return Quiet;
3837 if (Exp == IEEEFloat::IEK_Inf)
3838 return Val;
3840 // 1 is added because frexp is defined to return a normalized fraction in
3841 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3842 Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
3843 return scalbn(Val, -Exp, RM);
3846 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
3847 : Semantics(&S),
3848 Floats(new APFloat[2]{APFloat(semIEEEdouble), APFloat(semIEEEdouble)}) {
3849 assert(Semantics == &semPPCDoubleDouble);
3852 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
3853 : Semantics(&S),
3854 Floats(new APFloat[2]{APFloat(semIEEEdouble, uninitialized),
3855 APFloat(semIEEEdouble, uninitialized)}) {
3856 assert(Semantics == &semPPCDoubleDouble);
3859 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
3860 : Semantics(&S), Floats(new APFloat[2]{APFloat(semIEEEdouble, I),
3861 APFloat(semIEEEdouble)}) {
3862 assert(Semantics == &semPPCDoubleDouble);
3865 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
3866 : Semantics(&S),
3867 Floats(new APFloat[2]{
3868 APFloat(semIEEEdouble, APInt(64, I.getRawData()[0])),
3869 APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
3870 assert(Semantics == &semPPCDoubleDouble);
3873 DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
3874 APFloat &&Second)
3875 : Semantics(&S),
3876 Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
3877 assert(Semantics == &semPPCDoubleDouble);
3878 assert(&Floats[0].getSemantics() == &semIEEEdouble);
3879 assert(&Floats[1].getSemantics() == &semIEEEdouble);
3882 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
3883 : Semantics(RHS.Semantics),
3884 Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
3885 APFloat(RHS.Floats[1])}
3886 : nullptr) {
3887 assert(Semantics == &semPPCDoubleDouble);
3890 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
3891 : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
3892 RHS.Semantics = &semBogus;
3893 assert(Semantics == &semPPCDoubleDouble);
3896 DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
3897 if (Semantics == RHS.Semantics && RHS.Floats) {
3898 Floats[0] = RHS.Floats[0];
3899 Floats[1] = RHS.Floats[1];
3900 } else if (this != &RHS) {
3901 this->~DoubleAPFloat();
3902 new (this) DoubleAPFloat(RHS);
3904 return *this;
3907 // Implement addition, subtraction, multiplication and division based on:
3908 // "Software for Doubled-Precision Floating-Point Computations",
3909 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3910 APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
3911 const APFloat &c, const APFloat &cc,
3912 roundingMode RM) {
3913 int Status = opOK;
3914 APFloat z = a;
3915 Status |= z.add(c, RM);
3916 if (!z.isFinite()) {
3917 if (!z.isInfinity()) {
3918 Floats[0] = std::move(z);
3919 Floats[1].makeZero(/* Neg = */ false);
3920 return (opStatus)Status;
3922 Status = opOK;
3923 auto AComparedToC = a.compareAbsoluteValue(c);
3924 z = cc;
3925 Status |= z.add(aa, RM);
3926 if (AComparedToC == APFloat::cmpGreaterThan) {
3927 // z = cc + aa + c + a;
3928 Status |= z.add(c, RM);
3929 Status |= z.add(a, RM);
3930 } else {
3931 // z = cc + aa + a + c;
3932 Status |= z.add(a, RM);
3933 Status |= z.add(c, RM);
3935 if (!z.isFinite()) {
3936 Floats[0] = std::move(z);
3937 Floats[1].makeZero(/* Neg = */ false);
3938 return (opStatus)Status;
3940 Floats[0] = z;
3941 APFloat zz = aa;
3942 Status |= zz.add(cc, RM);
3943 if (AComparedToC == APFloat::cmpGreaterThan) {
3944 // Floats[1] = a - z + c + zz;
3945 Floats[1] = a;
3946 Status |= Floats[1].subtract(z, RM);
3947 Status |= Floats[1].add(c, RM);
3948 Status |= Floats[1].add(zz, RM);
3949 } else {
3950 // Floats[1] = c - z + a + zz;
3951 Floats[1] = c;
3952 Status |= Floats[1].subtract(z, RM);
3953 Status |= Floats[1].add(a, RM);
3954 Status |= Floats[1].add(zz, RM);
3956 } else {
3957 // q = a - z;
3958 APFloat q = a;
3959 Status |= q.subtract(z, RM);
3961 // zz = q + c + (a - (q + z)) + aa + cc;
3962 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3963 auto zz = q;
3964 Status |= zz.add(c, RM);
3965 Status |= q.add(z, RM);
3966 Status |= q.subtract(a, RM);
3967 q.changeSign();
3968 Status |= zz.add(q, RM);
3969 Status |= zz.add(aa, RM);
3970 Status |= zz.add(cc, RM);
3971 if (zz.isZero() && !zz.isNegative()) {
3972 Floats[0] = std::move(z);
3973 Floats[1].makeZero(/* Neg = */ false);
3974 return opOK;
3976 Floats[0] = z;
3977 Status |= Floats[0].add(zz, RM);
3978 if (!Floats[0].isFinite()) {
3979 Floats[1].makeZero(/* Neg = */ false);
3980 return (opStatus)Status;
3982 Floats[1] = std::move(z);
3983 Status |= Floats[1].subtract(Floats[0], RM);
3984 Status |= Floats[1].add(zz, RM);
3986 return (opStatus)Status;
3989 APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
3990 const DoubleAPFloat &RHS,
3991 DoubleAPFloat &Out,
3992 roundingMode RM) {
3993 if (LHS.getCategory() == fcNaN) {
3994 Out = LHS;
3995 return opOK;
3997 if (RHS.getCategory() == fcNaN) {
3998 Out = RHS;
3999 return opOK;
4001 if (LHS.getCategory() == fcZero) {
4002 Out = RHS;
4003 return opOK;
4005 if (RHS.getCategory() == fcZero) {
4006 Out = LHS;
4007 return opOK;
4009 if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
4010 LHS.isNegative() != RHS.isNegative()) {
4011 Out.makeNaN(false, Out.isNegative(), nullptr);
4012 return opInvalidOp;
4014 if (LHS.getCategory() == fcInfinity) {
4015 Out = LHS;
4016 return opOK;
4018 if (RHS.getCategory() == fcInfinity) {
4019 Out = RHS;
4020 return opOK;
4022 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
4024 APFloat A(LHS.Floats[0]), AA(LHS.Floats[1]), C(RHS.Floats[0]),
4025 CC(RHS.Floats[1]);
4026 assert(&A.getSemantics() == &semIEEEdouble);
4027 assert(&AA.getSemantics() == &semIEEEdouble);
4028 assert(&C.getSemantics() == &semIEEEdouble);
4029 assert(&CC.getSemantics() == &semIEEEdouble);
4030 assert(&Out.Floats[0].getSemantics() == &semIEEEdouble);
4031 assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
4032 return Out.addImpl(A, AA, C, CC, RM);
4035 APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
4036 roundingMode RM) {
4037 return addWithSpecial(*this, RHS, *this, RM);
4040 APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
4041 roundingMode RM) {
4042 changeSign();
4043 auto Ret = add(RHS, RM);
4044 changeSign();
4045 return Ret;
4048 APFloat::opStatus DoubleAPFloat::multiply(const DoubleAPFloat &RHS,
4049 APFloat::roundingMode RM) {
4050 const auto &LHS = *this;
4051 auto &Out = *this;
4052 /* Interesting observation: For special categories, finding the lowest
4053 common ancestor of the following layered graph gives the correct
4054 return category:
4058 Zero Inf
4060 Normal
4062 e.g. NaN * NaN = NaN
4063 Zero * Inf = NaN
4064 Normal * Zero = Zero
4065 Normal * Inf = Inf
4067 if (LHS.getCategory() == fcNaN) {
4068 Out = LHS;
4069 return opOK;
4071 if (RHS.getCategory() == fcNaN) {
4072 Out = RHS;
4073 return opOK;
4075 if ((LHS.getCategory() == fcZero && RHS.getCategory() == fcInfinity) ||
4076 (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcZero)) {
4077 Out.makeNaN(false, false, nullptr);
4078 return opOK;
4080 if (LHS.getCategory() == fcZero || LHS.getCategory() == fcInfinity) {
4081 Out = LHS;
4082 return opOK;
4084 if (RHS.getCategory() == fcZero || RHS.getCategory() == fcInfinity) {
4085 Out = RHS;
4086 return opOK;
4088 assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal &&
4089 "Special cases not handled exhaustively");
4091 int Status = opOK;
4092 APFloat A = Floats[0], B = Floats[1], C = RHS.Floats[0], D = RHS.Floats[1];
4093 // t = a * c
4094 APFloat T = A;
4095 Status |= T.multiply(C, RM);
4096 if (!T.isFiniteNonZero()) {
4097 Floats[0] = T;
4098 Floats[1].makeZero(/* Neg = */ false);
4099 return (opStatus)Status;
4102 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4103 APFloat Tau = A;
4104 T.changeSign();
4105 Status |= Tau.fusedMultiplyAdd(C, T, RM);
4106 T.changeSign();
4108 // v = a * d
4109 APFloat V = A;
4110 Status |= V.multiply(D, RM);
4111 // w = b * c
4112 APFloat W = B;
4113 Status |= W.multiply(C, RM);
4114 Status |= V.add(W, RM);
4115 // tau += v + w
4116 Status |= Tau.add(V, RM);
4118 // u = t + tau
4119 APFloat U = T;
4120 Status |= U.add(Tau, RM);
4122 Floats[0] = U;
4123 if (!U.isFinite()) {
4124 Floats[1].makeZero(/* Neg = */ false);
4125 } else {
4126 // Floats[1] = (t - u) + tau
4127 Status |= T.subtract(U, RM);
4128 Status |= T.add(Tau, RM);
4129 Floats[1] = T;
4131 return (opStatus)Status;
4134 APFloat::opStatus DoubleAPFloat::divide(const DoubleAPFloat &RHS,
4135 APFloat::roundingMode RM) {
4136 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4137 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4138 auto Ret =
4139 Tmp.divide(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()), RM);
4140 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4141 return Ret;
4144 APFloat::opStatus DoubleAPFloat::remainder(const DoubleAPFloat &RHS) {
4145 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4146 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4147 auto Ret =
4148 Tmp.remainder(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4149 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4150 return Ret;
4153 APFloat::opStatus DoubleAPFloat::mod(const DoubleAPFloat &RHS) {
4154 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4155 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4156 auto Ret = Tmp.mod(APFloat(semPPCDoubleDoubleLegacy, RHS.bitcastToAPInt()));
4157 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4158 return Ret;
4161 APFloat::opStatus
4162 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat &Multiplicand,
4163 const DoubleAPFloat &Addend,
4164 APFloat::roundingMode RM) {
4165 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4166 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4167 auto Ret = Tmp.fusedMultiplyAdd(
4168 APFloat(semPPCDoubleDoubleLegacy, Multiplicand.bitcastToAPInt()),
4169 APFloat(semPPCDoubleDoubleLegacy, Addend.bitcastToAPInt()), RM);
4170 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4171 return Ret;
4174 APFloat::opStatus DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM) {
4175 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4176 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4177 auto Ret = Tmp.roundToIntegral(RM);
4178 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4179 return Ret;
4182 void DoubleAPFloat::changeSign() {
4183 Floats[0].changeSign();
4184 Floats[1].changeSign();
4187 APFloat::cmpResult
4188 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
4189 auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
4190 if (Result != cmpEqual)
4191 return Result;
4192 Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
4193 if (Result == cmpLessThan || Result == cmpGreaterThan) {
4194 auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
4195 auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
4196 if (Against && !RHSAgainst)
4197 return cmpLessThan;
4198 if (!Against && RHSAgainst)
4199 return cmpGreaterThan;
4200 if (!Against && !RHSAgainst)
4201 return Result;
4202 if (Against && RHSAgainst)
4203 return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
4205 return Result;
4208 APFloat::fltCategory DoubleAPFloat::getCategory() const {
4209 return Floats[0].getCategory();
4212 bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
4214 void DoubleAPFloat::makeInf(bool Neg) {
4215 Floats[0].makeInf(Neg);
4216 Floats[1].makeZero(/* Neg = */ false);
4219 void DoubleAPFloat::makeZero(bool Neg) {
4220 Floats[0].makeZero(Neg);
4221 Floats[1].makeZero(/* Neg = */ false);
4224 void DoubleAPFloat::makeLargest(bool Neg) {
4225 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4226 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x7fefffffffffffffull));
4227 Floats[1] = APFloat(semIEEEdouble, APInt(64, 0x7c8ffffffffffffeull));
4228 if (Neg)
4229 changeSign();
4232 void DoubleAPFloat::makeSmallest(bool Neg) {
4233 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4234 Floats[0].makeSmallest(Neg);
4235 Floats[1].makeZero(/* Neg = */ false);
4238 void DoubleAPFloat::makeSmallestNormalized(bool Neg) {
4239 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4240 Floats[0] = APFloat(semIEEEdouble, APInt(64, 0x0360000000000000ull));
4241 if (Neg)
4242 Floats[0].changeSign();
4243 Floats[1].makeZero(/* Neg = */ false);
4246 void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
4247 Floats[0].makeNaN(SNaN, Neg, fill);
4248 Floats[1].makeZero(/* Neg = */ false);
4251 APFloat::cmpResult DoubleAPFloat::compare(const DoubleAPFloat &RHS) const {
4252 auto Result = Floats[0].compare(RHS.Floats[0]);
4253 // |Float[0]| > |Float[1]|
4254 if (Result == APFloat::cmpEqual)
4255 return Floats[1].compare(RHS.Floats[1]);
4256 return Result;
4259 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat &RHS) const {
4260 return Floats[0].bitwiseIsEqual(RHS.Floats[0]) &&
4261 Floats[1].bitwiseIsEqual(RHS.Floats[1]);
4264 hash_code hash_value(const DoubleAPFloat &Arg) {
4265 if (Arg.Floats)
4266 return hash_combine(hash_value(Arg.Floats[0]), hash_value(Arg.Floats[1]));
4267 return hash_combine(Arg.Semantics);
4270 APInt DoubleAPFloat::bitcastToAPInt() const {
4271 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4272 uint64_t Data[] = {
4273 Floats[0].bitcastToAPInt().getRawData()[0],
4274 Floats[1].bitcastToAPInt().getRawData()[0],
4276 return APInt(128, 2, Data);
4279 APFloat::opStatus DoubleAPFloat::convertFromString(StringRef S,
4280 roundingMode RM) {
4281 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4282 APFloat Tmp(semPPCDoubleDoubleLegacy);
4283 auto Ret = Tmp.convertFromString(S, RM);
4284 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4285 return Ret;
4288 APFloat::opStatus DoubleAPFloat::next(bool nextDown) {
4289 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4290 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4291 auto Ret = Tmp.next(nextDown);
4292 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4293 return Ret;
4296 APFloat::opStatus
4297 DoubleAPFloat::convertToInteger(MutableArrayRef<integerPart> Input,
4298 unsigned int Width, bool IsSigned,
4299 roundingMode RM, bool *IsExact) const {
4300 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4301 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4302 .convertToInteger(Input, Width, IsSigned, RM, IsExact);
4305 APFloat::opStatus DoubleAPFloat::convertFromAPInt(const APInt &Input,
4306 bool IsSigned,
4307 roundingMode RM) {
4308 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4309 APFloat Tmp(semPPCDoubleDoubleLegacy);
4310 auto Ret = Tmp.convertFromAPInt(Input, IsSigned, RM);
4311 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4312 return Ret;
4315 APFloat::opStatus
4316 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart *Input,
4317 unsigned int InputSize,
4318 bool IsSigned, roundingMode RM) {
4319 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4320 APFloat Tmp(semPPCDoubleDoubleLegacy);
4321 auto Ret = Tmp.convertFromSignExtendedInteger(Input, InputSize, IsSigned, RM);
4322 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4323 return Ret;
4326 APFloat::opStatus
4327 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart *Input,
4328 unsigned int InputSize,
4329 bool IsSigned, roundingMode RM) {
4330 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4331 APFloat Tmp(semPPCDoubleDoubleLegacy);
4332 auto Ret = Tmp.convertFromZeroExtendedInteger(Input, InputSize, IsSigned, RM);
4333 *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt());
4334 return Ret;
4337 unsigned int DoubleAPFloat::convertToHexString(char *DST,
4338 unsigned int HexDigits,
4339 bool UpperCase,
4340 roundingMode RM) const {
4341 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4342 return APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4343 .convertToHexString(DST, HexDigits, UpperCase, RM);
4346 bool DoubleAPFloat::isDenormal() const {
4347 return getCategory() == fcNormal &&
4348 (Floats[0].isDenormal() || Floats[1].isDenormal() ||
4349 // (double)(Hi + Lo) == Hi defines a normal number.
4350 Floats[0].compare(Floats[0] + Floats[1]) != cmpEqual);
4353 bool DoubleAPFloat::isSmallest() const {
4354 if (getCategory() != fcNormal)
4355 return false;
4356 DoubleAPFloat Tmp(*this);
4357 Tmp.makeSmallest(this->isNegative());
4358 return Tmp.compare(*this) == cmpEqual;
4361 bool DoubleAPFloat::isLargest() const {
4362 if (getCategory() != fcNormal)
4363 return false;
4364 DoubleAPFloat Tmp(*this);
4365 Tmp.makeLargest(this->isNegative());
4366 return Tmp.compare(*this) == cmpEqual;
4369 bool DoubleAPFloat::isInteger() const {
4370 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4371 return Floats[0].isInteger() && Floats[1].isInteger();
4374 void DoubleAPFloat::toString(SmallVectorImpl<char> &Str,
4375 unsigned FormatPrecision,
4376 unsigned FormatMaxPadding,
4377 bool TruncateZero) const {
4378 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4379 APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt())
4380 .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero);
4383 bool DoubleAPFloat::getExactInverse(APFloat *inv) const {
4384 assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4385 APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt());
4386 if (!inv)
4387 return Tmp.getExactInverse(nullptr);
4388 APFloat Inv(semPPCDoubleDoubleLegacy);
4389 auto Ret = Tmp.getExactInverse(&Inv);
4390 *inv = APFloat(semPPCDoubleDouble, Inv.bitcastToAPInt());
4391 return Ret;
4394 DoubleAPFloat scalbn(DoubleAPFloat Arg, int Exp, APFloat::roundingMode RM) {
4395 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4396 return DoubleAPFloat(semPPCDoubleDouble, scalbn(Arg.Floats[0], Exp, RM),
4397 scalbn(Arg.Floats[1], Exp, RM));
4400 DoubleAPFloat frexp(const DoubleAPFloat &Arg, int &Exp,
4401 APFloat::roundingMode RM) {
4402 assert(Arg.Semantics == &semPPCDoubleDouble && "Unexpected Semantics");
4403 APFloat First = frexp(Arg.Floats[0], Exp, RM);
4404 APFloat Second = Arg.Floats[1];
4405 if (Arg.getCategory() == APFloat::fcNormal)
4406 Second = scalbn(Second, -Exp, RM);
4407 return DoubleAPFloat(semPPCDoubleDouble, std::move(First), std::move(Second));
4410 } // End detail namespace
4412 APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
4413 if (usesLayout<IEEEFloat>(Semantics)) {
4414 new (&IEEE) IEEEFloat(std::move(F));
4415 return;
4417 if (usesLayout<DoubleAPFloat>(Semantics)) {
4418 new (&Double)
4419 DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
4420 APFloat(semIEEEdouble));
4421 return;
4423 llvm_unreachable("Unexpected semantics");
4426 APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
4427 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str, RM));
4430 hash_code hash_value(const APFloat &Arg) {
4431 if (APFloat::usesLayout<detail::IEEEFloat>(Arg.getSemantics()))
4432 return hash_value(Arg.U.IEEE);
4433 if (APFloat::usesLayout<detail::DoubleAPFloat>(Arg.getSemantics()))
4434 return hash_value(Arg.U.Double);
4435 llvm_unreachable("Unexpected semantics");
4438 APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
4439 : APFloat(Semantics) {
4440 convertFromString(S, rmNearestTiesToEven);
4443 APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
4444 roundingMode RM, bool *losesInfo) {
4445 if (&getSemantics() == &ToSemantics) {
4446 *losesInfo = false;
4447 return opOK;
4449 if (usesLayout<IEEEFloat>(getSemantics()) &&
4450 usesLayout<IEEEFloat>(ToSemantics))
4451 return U.IEEE.convert(ToSemantics, RM, losesInfo);
4452 if (usesLayout<IEEEFloat>(getSemantics()) &&
4453 usesLayout<DoubleAPFloat>(ToSemantics)) {
4454 assert(&ToSemantics == &semPPCDoubleDouble);
4455 auto Ret = U.IEEE.convert(semPPCDoubleDoubleLegacy, RM, losesInfo);
4456 *this = APFloat(ToSemantics, U.IEEE.bitcastToAPInt());
4457 return Ret;
4459 if (usesLayout<DoubleAPFloat>(getSemantics()) &&
4460 usesLayout<IEEEFloat>(ToSemantics)) {
4461 auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
4462 *this = APFloat(std::move(getIEEE()), ToSemantics);
4463 return Ret;
4465 llvm_unreachable("Unexpected semantics");
4468 APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
4469 if (isIEEE) {
4470 switch (BitWidth) {
4471 case 16:
4472 return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
4473 case 32:
4474 return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
4475 case 64:
4476 return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
4477 case 80:
4478 return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
4479 case 128:
4480 return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
4481 default:
4482 llvm_unreachable("Unknown floating bit width");
4484 } else {
4485 assert(BitWidth == 128);
4486 return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
4490 void APFloat::print(raw_ostream &OS) const {
4491 SmallVector<char, 16> Buffer;
4492 toString(Buffer);
4493 OS << Buffer << "\n";
4496 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4497 LLVM_DUMP_METHOD void APFloat::dump() const { print(dbgs()); }
4498 #endif
4500 void APFloat::Profile(FoldingSetNodeID &NID) const {
4501 NID.Add(bitcastToAPInt());
4504 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4505 an APSInt, whose initial bit-width and signed-ness are used to determine the
4506 precision of the conversion.
4508 APFloat::opStatus APFloat::convertToInteger(APSInt &result,
4509 roundingMode rounding_mode,
4510 bool *isExact) const {
4511 unsigned bitWidth = result.getBitWidth();
4512 SmallVector<uint64_t, 4> parts(result.getNumWords());
4513 opStatus status = convertToInteger(parts, bitWidth, result.isSigned(),
4514 rounding_mode, isExact);
4515 // Keeps the original signed-ness.
4516 result = APInt(bitWidth, parts);
4517 return status;
4520 } // End llvm namespace
4522 #undef APFLOAT_DISPATCH_ON_SEMANTICS