1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements a class to represent arbitrary precision floating
11 // point values and provide a variety of arithmetic operations on them.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
30 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
32 if (usesLayout<IEEEFloat>(getSemantics())) \
33 return U.IEEE.METHOD_CALL; \
34 if (usesLayout<DoubleAPFloat>(getSemantics())) \
35 return U.Double.METHOD_CALL; \
36 llvm_unreachable("Unexpected semantics"); \
41 /// A macro used to combine two fcCategory enums into one key which can be used
42 /// in a switch statement to classify how the interaction of two APFloat's
43 /// categories affects an operation.
45 /// TODO: If clang source code is ever allowed to use constexpr in its own
46 /// codebase, change this into a static inline function.
47 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
49 /* Assumed in hexadecimal significand parsing, and conversion to
50 hexadecimal strings. */
51 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
54 /* Represents floating point arithmetic semantics. */
56 /* The largest E such that 2^E is representable; this matches the
57 definition of IEEE 754. */
58 APFloatBase::ExponentType maxExponent
;
60 /* The smallest E such that 2^E is a normalized number; this
61 matches the definition of IEEE 754. */
62 APFloatBase::ExponentType minExponent
;
64 /* Number of bits in the significand. This includes the integer
66 unsigned int precision
;
68 /* Number of bits actually used in the semantics. */
69 unsigned int sizeInBits
;
72 static const fltSemantics semIEEEhalf
= {15, -14, 11, 16};
73 static const fltSemantics semIEEEsingle
= {127, -126, 24, 32};
74 static const fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
75 static const fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
76 static const fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
77 static const fltSemantics semBogus
= {0, 0, 0, 0};
79 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
80 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
81 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
82 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
83 to each other, and two 11-bit exponents.
85 Note: we need to make the value different from semBogus as otherwise
86 an unsafe optimization may collapse both values to a single address,
87 and we heavily rely on them having distinct addresses. */
88 static const fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 0};
90 /* These are legacy semantics for the fallback, inaccrurate implementation of
91 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
92 operation. It's equivalent to having an IEEE number with consecutive 106
93 bits of mantissa and 11 bits of exponent.
95 It's not equivalent to IBM double-double. For example, a legit IBM
96 double-double, 1 + epsilon:
98 1 + epsilon = 1 + (1 >> 1076)
100 is not representable by a consecutive 106 bits of mantissa.
102 Currently, these semantics are used in the following way:
104 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
105 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
106 semPPCDoubleDoubleLegacy -> IEEE operations
108 We use bitcastToAPInt() to get the bit representation (in APInt) of the
109 underlying IEEEdouble, then use the APInt constructor to construct the
112 TODO: Implement all operations in semPPCDoubleDouble, and delete these
114 static const fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
117 const fltSemantics
&APFloatBase::IEEEhalf() {
120 const fltSemantics
&APFloatBase::IEEEsingle() {
121 return semIEEEsingle
;
123 const fltSemantics
&APFloatBase::IEEEdouble() {
124 return semIEEEdouble
;
126 const fltSemantics
&APFloatBase::IEEEquad() {
129 const fltSemantics
&APFloatBase::x87DoubleExtended() {
130 return semX87DoubleExtended
;
132 const fltSemantics
&APFloatBase::Bogus() {
135 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
136 return semPPCDoubleDouble
;
139 /* A tight upper bound on number of parts required to hold the value
142 power * 815 / (351 * integerPartWidth) + 1
144 However, whilst the result may require only this many parts,
145 because we are multiplying two values to get it, the
146 multiplication may require an extra part with the excess part
147 being zero (consider the trivial case of 1 * 1, tcFullMultiply
148 requires two parts to hold the single-part result). So we add an
149 extra one to guarantee enough space whilst multiplying. */
150 const unsigned int maxExponent
= 16383;
151 const unsigned int maxPrecision
= 113;
152 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
153 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
155 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
156 return semantics
.precision
;
158 APFloatBase::ExponentType
159 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
160 return semantics
.maxExponent
;
162 APFloatBase::ExponentType
163 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
164 return semantics
.minExponent
;
166 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
167 return semantics
.sizeInBits
;
170 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
171 return Sem
.sizeInBits
;
174 /* A bunch of private, handy routines. */
176 static inline unsigned int
177 partCountForBits(unsigned int bits
)
179 return ((bits
) + APFloatBase::integerPartWidth
- 1) / APFloatBase::integerPartWidth
;
182 /* Returns 0U-9U. Return values >= 10U are not digits. */
183 static inline unsigned int
184 decDigitValue(unsigned int c
)
189 /* Return the value of a decimal exponent of the form
192 If the exponent overflows, returns a large exponent with the
195 readExponent(StringRef::iterator begin
, StringRef::iterator end
)
198 unsigned int absExponent
;
199 const unsigned int overlargeExponent
= 24000; /* FIXME. */
200 StringRef::iterator p
= begin
;
202 assert(p
!= end
&& "Exponent has no digits");
204 isNegative
= (*p
== '-');
205 if (*p
== '-' || *p
== '+') {
207 assert(p
!= end
&& "Exponent has no digits");
210 absExponent
= decDigitValue(*p
++);
211 assert(absExponent
< 10U && "Invalid character in exponent");
213 for (; p
!= end
; ++p
) {
216 value
= decDigitValue(*p
);
217 assert(value
< 10U && "Invalid character in exponent");
219 value
+= absExponent
* 10;
220 if (absExponent
>= overlargeExponent
) {
221 absExponent
= overlargeExponent
;
222 p
= end
; /* outwit assert below */
228 assert(p
== end
&& "Invalid exponent in exponent");
231 return -(int) absExponent
;
233 return (int) absExponent
;
236 /* This is ugly and needs cleaning up, but I don't immediately see
237 how whilst remaining safe. */
239 totalExponent(StringRef::iterator p
, StringRef::iterator end
,
240 int exponentAdjustment
)
242 int unsignedExponent
;
243 bool negative
, overflow
;
246 assert(p
!= end
&& "Exponent has no digits");
248 negative
= *p
== '-';
249 if (*p
== '-' || *p
== '+') {
251 assert(p
!= end
&& "Exponent has no digits");
254 unsignedExponent
= 0;
256 for (; p
!= end
; ++p
) {
259 value
= decDigitValue(*p
);
260 assert(value
< 10U && "Invalid character in exponent");
262 unsignedExponent
= unsignedExponent
* 10 + value
;
263 if (unsignedExponent
> 32767) {
269 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
273 exponent
= unsignedExponent
;
275 exponent
= -exponent
;
276 exponent
+= exponentAdjustment
;
277 if (exponent
> 32767 || exponent
< -32768)
282 exponent
= negative
? -32768: 32767;
287 static StringRef::iterator
288 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
289 StringRef::iterator
*dot
)
291 StringRef::iterator p
= begin
;
293 while (p
!= end
&& *p
== '0')
296 if (p
!= end
&& *p
== '.') {
299 assert(end
- begin
!= 1 && "Significand has no digits");
301 while (p
!= end
&& *p
== '0')
308 /* Given a normal decimal floating point number of the form
312 where the decimal point and exponent are optional, fill out the
313 structure D. Exponent is appropriate if the significand is
314 treated as an integer, and normalizedExponent if the significand
315 is taken to have the decimal point after a single leading
318 If the value is zero, V->firstSigDigit points to a non-digit, and
319 the return exponent is zero.
322 const char *firstSigDigit
;
323 const char *lastSigDigit
;
325 int normalizedExponent
;
329 interpretDecimal(StringRef::iterator begin
, StringRef::iterator end
,
332 StringRef::iterator dot
= end
;
333 StringRef::iterator p
= skipLeadingZeroesAndAnyDot (begin
, end
, &dot
);
335 D
->firstSigDigit
= p
;
337 D
->normalizedExponent
= 0;
339 for (; p
!= end
; ++p
) {
341 assert(dot
== end
&& "String contains multiple dots");
346 if (decDigitValue(*p
) >= 10U)
351 assert((*p
== 'e' || *p
== 'E') && "Invalid character in significand");
352 assert(p
!= begin
&& "Significand has no digits");
353 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
355 /* p points to the first non-digit in the string */
356 D
->exponent
= readExponent(p
+ 1, end
);
358 /* Implied decimal point? */
363 /* If number is all zeroes accept any exponent. */
364 if (p
!= D
->firstSigDigit
) {
365 /* Drop insignificant trailing zeroes. */
370 while (p
!= begin
&& *p
== '0');
371 while (p
!= begin
&& *p
== '.');
374 /* Adjust the exponents for any decimal point. */
375 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
376 D
->normalizedExponent
= (D
->exponent
+
377 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
378 - (dot
> D
->firstSigDigit
&& dot
< p
)));
384 /* Return the trailing fraction of a hexadecimal number.
385 DIGITVALUE is the first hex digit of the fraction, P points to
388 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
389 unsigned int digitValue
)
391 unsigned int hexDigit
;
393 /* If the first trailing digit isn't 0 or 8 we can work out the
394 fraction immediately. */
396 return lfMoreThanHalf
;
397 else if (digitValue
< 8 && digitValue
> 0)
398 return lfLessThanHalf
;
400 // Otherwise we need to find the first non-zero digit.
401 while (p
!= end
&& (*p
== '0' || *p
== '.'))
404 assert(p
!= end
&& "Invalid trailing hexadecimal fraction!");
406 hexDigit
= hexDigitValue(*p
);
408 /* If we ran off the end it is exactly zero or one-half, otherwise
411 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
413 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
416 /* Return the fraction lost were a bignum truncated losing the least
417 significant BITS bits. */
419 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
420 unsigned int partCount
,
425 lsb
= APInt::tcLSB(parts
, partCount
);
427 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
429 return lfExactlyZero
;
431 return lfExactlyHalf
;
432 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
433 APInt::tcExtractBit(parts
, bits
- 1))
434 return lfMoreThanHalf
;
436 return lfLessThanHalf
;
439 /* Shift DST right BITS bits noting lost fraction. */
441 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
443 lostFraction lost_fraction
;
445 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
447 APInt::tcShiftRight(dst
, parts
, bits
);
449 return lost_fraction
;
452 /* Combine the effect of two lost fractions. */
454 combineLostFractions(lostFraction moreSignificant
,
455 lostFraction lessSignificant
)
457 if (lessSignificant
!= lfExactlyZero
) {
458 if (moreSignificant
== lfExactlyZero
)
459 moreSignificant
= lfLessThanHalf
;
460 else if (moreSignificant
== lfExactlyHalf
)
461 moreSignificant
= lfMoreThanHalf
;
464 return moreSignificant
;
467 /* The error from the true value, in half-ulps, on multiplying two
468 floating point numbers, which differ from the value they
469 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
470 than the returned value.
472 See "How to Read Floating Point Numbers Accurately" by William D
475 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
477 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
479 if (HUerr1
+ HUerr2
== 0)
480 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
482 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
485 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
486 when the least significant BITS are truncated. BITS cannot be
488 static APFloatBase::integerPart
489 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
491 unsigned int count
, partBits
;
492 APFloatBase::integerPart part
, boundary
;
497 count
= bits
/ APFloatBase::integerPartWidth
;
498 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
500 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
503 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
508 if (part
- boundary
<= boundary
- part
)
509 return part
- boundary
;
511 return boundary
- part
;
514 if (part
== boundary
) {
517 return ~(APFloatBase::integerPart
) 0; /* A lot. */
520 } else if (part
== boundary
- 1) {
523 return ~(APFloatBase::integerPart
) 0; /* A lot. */
528 return ~(APFloatBase::integerPart
) 0; /* A lot. */
531 /* Place pow(5, power) in DST, and return the number of parts used.
532 DST must be at least one part larger than size of the answer. */
534 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
535 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
536 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
537 pow5s
[0] = 78125 * 5;
539 unsigned int partsCount
[16] = { 1 };
540 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
542 assert(power
<= maxExponent
);
547 *p1
= firstEightPowers
[power
& 7];
553 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
558 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
560 pc
= partsCount
[n
- 1];
561 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
563 if (pow5
[pc
- 1] == 0)
569 APFloatBase::integerPart
*tmp
;
571 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
573 if (p2
[result
- 1] == 0)
576 /* Now result is in p1 with partsCount parts and p2 is scratch
587 APInt::tcAssign(dst
, p1
, result
);
592 /* Zero at the end to avoid modular arithmetic when adding one; used
593 when rounding up during hexadecimal output. */
594 static const char hexDigitsLower
[] = "0123456789abcdef0";
595 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
596 static const char infinityL
[] = "infinity";
597 static const char infinityU
[] = "INFINITY";
598 static const char NaNL
[] = "nan";
599 static const char NaNU
[] = "NAN";
601 /* Write out an integerPart in hexadecimal, starting with the most
602 significant nibble. Write out exactly COUNT hexdigits, return
605 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
606 const char *hexDigitChars
)
608 unsigned int result
= count
;
610 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
612 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
614 dst
[count
] = hexDigitChars
[part
& 0xf];
621 /* Write out an unsigned decimal integer. */
623 writeUnsignedDecimal (char *dst
, unsigned int n
)
639 /* Write out a signed decimal integer. */
641 writeSignedDecimal (char *dst
, int value
)
645 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
647 dst
= writeUnsignedDecimal(dst
, value
);
654 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
657 semantics
= ourSemantics
;
660 significand
.parts
= new integerPart
[count
];
663 void IEEEFloat::freeSignificand() {
665 delete [] significand
.parts
;
668 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
669 assert(semantics
== rhs
.semantics
);
672 category
= rhs
.category
;
673 exponent
= rhs
.exponent
;
674 if (isFiniteNonZero() || category
== fcNaN
)
675 copySignificand(rhs
);
678 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
679 assert(isFiniteNonZero() || category
== fcNaN
);
680 assert(rhs
.partCount() >= partCount());
682 APInt::tcAssign(significandParts(), rhs
.significandParts(),
686 /* Make this number a NaN, with an arbitrary but deterministic value
687 for the significand. If double or longer, this is a signalling NaN,
688 which may not be ideal. If float, this is QNaN(0). */
689 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
693 integerPart
*significand
= significandParts();
694 unsigned numParts
= partCount();
696 // Set the significand bits to the fill.
697 if (!fill
|| fill
->getNumWords() < numParts
)
698 APInt::tcSet(significand
, 0, numParts
);
700 APInt::tcAssign(significand
, fill
->getRawData(),
701 std::min(fill
->getNumWords(), numParts
));
703 // Zero out the excess bits of the significand.
704 unsigned bitsToPreserve
= semantics
->precision
- 1;
705 unsigned part
= bitsToPreserve
/ 64;
706 bitsToPreserve
%= 64;
707 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
708 for (part
++; part
!= numParts
; ++part
)
709 significand
[part
] = 0;
712 unsigned QNaNBit
= semantics
->precision
- 2;
715 // We always have to clear the QNaN bit to make it an SNaN.
716 APInt::tcClearBit(significand
, QNaNBit
);
718 // If there are no bits set in the payload, we have to set
719 // *something* to make it a NaN instead of an infinity;
720 // conventionally, this is the next bit down from the QNaN bit.
721 if (APInt::tcIsZero(significand
, numParts
))
722 APInt::tcSetBit(significand
, QNaNBit
- 1);
724 // We always have to set the QNaN bit to make it a QNaN.
725 APInt::tcSetBit(significand
, QNaNBit
);
728 // For x87 extended precision, we want to make a NaN, not a
729 // pseudo-NaN. Maybe we should expose the ability to make
731 if (semantics
== &semX87DoubleExtended
)
732 APInt::tcSetBit(significand
, QNaNBit
+ 1);
735 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
737 if (semantics
!= rhs
.semantics
) {
739 initialize(rhs
.semantics
);
747 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
750 semantics
= rhs
.semantics
;
751 significand
= rhs
.significand
;
752 exponent
= rhs
.exponent
;
753 category
= rhs
.category
;
756 rhs
.semantics
= &semBogus
;
760 bool IEEEFloat::isDenormal() const {
761 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
762 (APInt::tcExtractBit(significandParts(),
763 semantics
->precision
- 1) == 0);
766 bool IEEEFloat::isSmallest() const {
767 // The smallest number by magnitude in our format will be the smallest
768 // denormal, i.e. the floating point number with exponent being minimum
769 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
770 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
771 significandMSB() == 0;
774 bool IEEEFloat::isSignificandAllOnes() const {
775 // Test if the significand excluding the integral bit is all ones. This allows
776 // us to test for binade boundaries.
777 const integerPart
*Parts
= significandParts();
778 const unsigned PartCount
= partCount();
779 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
783 // Set the unused high bits to all ones when we compare.
784 const unsigned NumHighBits
=
785 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
786 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
787 "fill than integerPartWidth");
788 const integerPart HighBitFill
=
789 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
790 if (~(Parts
[PartCount
- 1] | HighBitFill
))
796 bool IEEEFloat::isSignificandAllZeros() const {
797 // Test if the significand excluding the integral bit is all zeros. This
798 // allows us to test for binade boundaries.
799 const integerPart
*Parts
= significandParts();
800 const unsigned PartCount
= partCount();
802 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
806 const unsigned NumHighBits
=
807 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
808 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
809 "clear than integerPartWidth");
810 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
812 if (Parts
[PartCount
- 1] & HighBitMask
)
818 bool IEEEFloat::isLargest() const {
819 // The largest number by magnitude in our format will be the floating point
820 // number with maximum exponent and with significand that is all ones.
821 return isFiniteNonZero() && exponent
== semantics
->maxExponent
822 && isSignificandAllOnes();
825 bool IEEEFloat::isInteger() const {
826 // This could be made more efficient; I'm going for obviously correct.
827 if (!isFinite()) return false;
828 IEEEFloat truncated
= *this;
829 truncated
.roundToIntegral(rmTowardZero
);
830 return compare(truncated
) == cmpEqual
;
833 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
836 if (semantics
!= rhs
.semantics
||
837 category
!= rhs
.category
||
840 if (category
==fcZero
|| category
==fcInfinity
)
843 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
846 return std::equal(significandParts(), significandParts() + partCount(),
847 rhs
.significandParts());
850 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
851 initialize(&ourSemantics
);
855 exponent
= ourSemantics
.precision
- 1;
856 significandParts()[0] = value
;
857 normalize(rmNearestTiesToEven
, lfExactlyZero
);
860 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
861 initialize(&ourSemantics
);
866 // Delegate to the previous constructor, because later copy constructor may
867 // actually inspects category, which can't be garbage.
868 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
869 : IEEEFloat(ourSemantics
) {}
871 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
872 initialize(rhs
.semantics
);
876 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
877 *this = std::move(rhs
);
880 IEEEFloat::~IEEEFloat() { freeSignificand(); }
882 unsigned int IEEEFloat::partCount() const {
883 return partCountForBits(semantics
->precision
+ 1);
886 const IEEEFloat::integerPart
*IEEEFloat::significandParts() const {
887 return const_cast<IEEEFloat
*>(this)->significandParts();
890 IEEEFloat::integerPart
*IEEEFloat::significandParts() {
892 return significand
.parts
;
894 return &significand
.part
;
897 void IEEEFloat::zeroSignificand() {
898 APInt::tcSet(significandParts(), 0, partCount());
901 /* Increment an fcNormal floating point number's significand. */
902 void IEEEFloat::incrementSignificand() {
905 carry
= APInt::tcIncrement(significandParts(), partCount());
907 /* Our callers should never cause us to overflow. */
912 /* Add the significand of the RHS. Returns the carry flag. */
913 IEEEFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
916 parts
= significandParts();
918 assert(semantics
== rhs
.semantics
);
919 assert(exponent
== rhs
.exponent
);
921 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
924 /* Subtract the significand of the RHS with a borrow flag. Returns
926 IEEEFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
927 integerPart borrow
) {
930 parts
= significandParts();
932 assert(semantics
== rhs
.semantics
);
933 assert(exponent
== rhs
.exponent
);
935 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
939 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
940 on to the full-precision result of the multiplication. Returns the
942 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
943 const IEEEFloat
*addend
) {
944 unsigned int omsb
; // One, not zero, based MSB.
945 unsigned int partsCount
, newPartsCount
, precision
;
946 integerPart
*lhsSignificand
;
947 integerPart scratch
[4];
948 integerPart
*fullSignificand
;
949 lostFraction lost_fraction
;
952 assert(semantics
== rhs
.semantics
);
954 precision
= semantics
->precision
;
956 // Allocate space for twice as many bits as the original significand, plus one
957 // extra bit for the addition to overflow into.
958 newPartsCount
= partCountForBits(precision
* 2 + 1);
960 if (newPartsCount
> 4)
961 fullSignificand
= new integerPart
[newPartsCount
];
963 fullSignificand
= scratch
;
965 lhsSignificand
= significandParts();
966 partsCount
= partCount();
968 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
969 rhs
.significandParts(), partsCount
, partsCount
);
971 lost_fraction
= lfExactlyZero
;
972 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
973 exponent
+= rhs
.exponent
;
975 // Assume the operands involved in the multiplication are single-precision
976 // FP, and the two multiplicants are:
977 // *this = a23 . a22 ... a0 * 2^e1
978 // rhs = b23 . b22 ... b0 * 2^e2
979 // the result of multiplication is:
980 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
981 // Note that there are three significant bits at the left-hand side of the
982 // radix point: two for the multiplication, and an overflow bit for the
983 // addition (that will always be zero at this point). Move the radix point
984 // toward left by two bits, and adjust exponent accordingly.
987 if (addend
&& addend
->isNonZero()) {
988 // The intermediate result of the multiplication has "2 * precision"
989 // signicant bit; adjust the addend to be consistent with mul result.
991 Significand savedSignificand
= significand
;
992 const fltSemantics
*savedSemantics
= semantics
;
993 fltSemantics extendedSemantics
;
995 unsigned int extendedPrecision
;
997 // Normalize our MSB to one below the top bit to allow for overflow.
998 extendedPrecision
= 2 * precision
+ 1;
999 if (omsb
!= extendedPrecision
- 1) {
1000 assert(extendedPrecision
> omsb
);
1001 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1002 (extendedPrecision
- 1) - omsb
);
1003 exponent
-= (extendedPrecision
- 1) - omsb
;
1006 /* Create new semantics. */
1007 extendedSemantics
= *semantics
;
1008 extendedSemantics
.precision
= extendedPrecision
;
1010 if (newPartsCount
== 1)
1011 significand
.part
= fullSignificand
[0];
1013 significand
.parts
= fullSignificand
;
1014 semantics
= &extendedSemantics
;
1016 IEEEFloat
extendedAddend(*addend
);
1017 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
1018 assert(status
== opOK
);
1021 // Shift the significand of the addend right by one bit. This guarantees
1022 // that the high bit of the significand is zero (same as fullSignificand),
1023 // so the addition will overflow (if it does overflow at all) into the top bit.
1024 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1025 assert(lost_fraction
== lfExactlyZero
&&
1026 "Lost precision while shifting addend for fused-multiply-add.");
1028 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1030 /* Restore our state. */
1031 if (newPartsCount
== 1)
1032 fullSignificand
[0] = significand
.part
;
1033 significand
= savedSignificand
;
1034 semantics
= savedSemantics
;
1036 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1039 // Convert the result having "2 * precision" significant-bits back to the one
1040 // having "precision" significant-bits. First, move the radix point from
1041 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1042 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1043 exponent
-= precision
+ 1;
1045 // In case MSB resides at the left-hand side of radix point, shift the
1046 // mantissa right by some amount to make sure the MSB reside right before
1047 // the radix point (i.e. "MSB . rest-significant-bits").
1049 // Note that the result is not normalized when "omsb < precision". So, the
1050 // caller needs to call IEEEFloat::normalize() if normalized value is
1052 if (omsb
> precision
) {
1053 unsigned int bits
, significantParts
;
1056 bits
= omsb
- precision
;
1057 significantParts
= partCountForBits(omsb
);
1058 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1059 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1063 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1065 if (newPartsCount
> 4)
1066 delete [] fullSignificand
;
1068 return lost_fraction
;
1071 /* Multiply the significands of LHS and RHS to DST. */
1072 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1073 unsigned int bit
, i
, partsCount
;
1074 const integerPart
*rhsSignificand
;
1075 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1076 integerPart scratch
[4];
1077 lostFraction lost_fraction
;
1079 assert(semantics
== rhs
.semantics
);
1081 lhsSignificand
= significandParts();
1082 rhsSignificand
= rhs
.significandParts();
1083 partsCount
= partCount();
1086 dividend
= new integerPart
[partsCount
* 2];
1090 divisor
= dividend
+ partsCount
;
1092 /* Copy the dividend and divisor as they will be modified in-place. */
1093 for (i
= 0; i
< partsCount
; i
++) {
1094 dividend
[i
] = lhsSignificand
[i
];
1095 divisor
[i
] = rhsSignificand
[i
];
1096 lhsSignificand
[i
] = 0;
1099 exponent
-= rhs
.exponent
;
1101 unsigned int precision
= semantics
->precision
;
1103 /* Normalize the divisor. */
1104 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1107 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1110 /* Normalize the dividend. */
1111 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1114 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1117 /* Ensure the dividend >= divisor initially for the loop below.
1118 Incidentally, this means that the division loop below is
1119 guaranteed to set the integer bit to one. */
1120 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1122 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1123 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1126 /* Long division. */
1127 for (bit
= precision
; bit
; bit
-= 1) {
1128 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1129 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1130 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1133 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1136 /* Figure out the lost fraction. */
1137 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1140 lost_fraction
= lfMoreThanHalf
;
1142 lost_fraction
= lfExactlyHalf
;
1143 else if (APInt::tcIsZero(dividend
, partsCount
))
1144 lost_fraction
= lfExactlyZero
;
1146 lost_fraction
= lfLessThanHalf
;
1151 return lost_fraction
;
1154 unsigned int IEEEFloat::significandMSB() const {
1155 return APInt::tcMSB(significandParts(), partCount());
1158 unsigned int IEEEFloat::significandLSB() const {
1159 return APInt::tcLSB(significandParts(), partCount());
1162 /* Note that a zero result is NOT normalized to fcZero. */
1163 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1164 /* Our exponent should not overflow. */
1165 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1169 return shiftRight(significandParts(), partCount(), bits
);
1172 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1173 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1174 assert(bits
< semantics
->precision
);
1177 unsigned int partsCount
= partCount();
1179 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1182 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1186 IEEEFloat::cmpResult
1187 IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1190 assert(semantics
== rhs
.semantics
);
1191 assert(isFiniteNonZero());
1192 assert(rhs
.isFiniteNonZero());
1194 compare
= exponent
- rhs
.exponent
;
1196 /* If exponents are equal, do an unsigned bignum comparison of the
1199 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1203 return cmpGreaterThan
;
1204 else if (compare
< 0)
1210 /* Handle overflow. Sign is preserved. We either become infinity or
1211 the largest finite number. */
1212 IEEEFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1214 if (rounding_mode
== rmNearestTiesToEven
||
1215 rounding_mode
== rmNearestTiesToAway
||
1216 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1217 (rounding_mode
== rmTowardNegative
&& sign
)) {
1218 category
= fcInfinity
;
1219 return (opStatus
) (opOverflow
| opInexact
);
1222 /* Otherwise we become the largest finite number. */
1223 category
= fcNormal
;
1224 exponent
= semantics
->maxExponent
;
1225 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1226 semantics
->precision
);
1231 /* Returns TRUE if, when truncating the current number, with BIT the
1232 new LSB, with the given lost fraction and rounding mode, the result
1233 would need to be rounded away from zero (i.e., by increasing the
1234 signficand). This routine must work for fcZero of both signs, and
1235 fcNormal numbers. */
1236 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1237 lostFraction lost_fraction
,
1238 unsigned int bit
) const {
1239 /* NaNs and infinities should not have lost fractions. */
1240 assert(isFiniteNonZero() || category
== fcZero
);
1242 /* Current callers never pass this so we don't handle it. */
1243 assert(lost_fraction
!= lfExactlyZero
);
1245 switch (rounding_mode
) {
1246 case rmNearestTiesToAway
:
1247 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1249 case rmNearestTiesToEven
:
1250 if (lost_fraction
== lfMoreThanHalf
)
1253 /* Our zeroes don't have a significand to test. */
1254 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1255 return APInt::tcExtractBit(significandParts(), bit
);
1262 case rmTowardPositive
:
1265 case rmTowardNegative
:
1268 llvm_unreachable("Invalid rounding mode found");
1271 IEEEFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1272 lostFraction lost_fraction
) {
1273 unsigned int omsb
; /* One, not zero, based MSB. */
1276 if (!isFiniteNonZero())
1279 /* Before rounding normalize the exponent of fcNormal numbers. */
1280 omsb
= significandMSB() + 1;
1283 /* OMSB is numbered from 1. We want to place it in the integer
1284 bit numbered PRECISION if possible, with a compensating change in
1286 exponentChange
= omsb
- semantics
->precision
;
1288 /* If the resulting exponent is too high, overflow according to
1289 the rounding mode. */
1290 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1291 return handleOverflow(rounding_mode
);
1293 /* Subnormal numbers have exponent minExponent, and their MSB
1294 is forced based on that. */
1295 if (exponent
+ exponentChange
< semantics
->minExponent
)
1296 exponentChange
= semantics
->minExponent
- exponent
;
1298 /* Shifting left is easy as we don't lose precision. */
1299 if (exponentChange
< 0) {
1300 assert(lost_fraction
== lfExactlyZero
);
1302 shiftSignificandLeft(-exponentChange
);
1307 if (exponentChange
> 0) {
1310 /* Shift right and capture any new lost fraction. */
1311 lf
= shiftSignificandRight(exponentChange
);
1313 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1315 /* Keep OMSB up-to-date. */
1316 if (omsb
> (unsigned) exponentChange
)
1317 omsb
-= exponentChange
;
1323 /* Now round the number according to rounding_mode given the lost
1326 /* As specified in IEEE 754, since we do not trap we do not report
1327 underflow for exact results. */
1328 if (lost_fraction
== lfExactlyZero
) {
1329 /* Canonicalize zeroes. */
1336 /* Increment the significand if we're rounding away from zero. */
1337 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1339 exponent
= semantics
->minExponent
;
1341 incrementSignificand();
1342 omsb
= significandMSB() + 1;
1344 /* Did the significand increment overflow? */
1345 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1346 /* Renormalize by incrementing the exponent and shifting our
1347 significand right one. However if we already have the
1348 maximum exponent we overflow to infinity. */
1349 if (exponent
== semantics
->maxExponent
) {
1350 category
= fcInfinity
;
1352 return (opStatus
) (opOverflow
| opInexact
);
1355 shiftSignificandRight(1);
1361 /* The normal case - we were and are not denormal, and any
1362 significand increment above didn't overflow. */
1363 if (omsb
== semantics
->precision
)
1366 /* We have a non-zero denormal. */
1367 assert(omsb
< semantics
->precision
);
1369 /* Canonicalize zeroes. */
1373 /* The fcZero case is a denormal that underflowed to zero. */
1374 return (opStatus
) (opUnderflow
| opInexact
);
1377 IEEEFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1379 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1381 llvm_unreachable(nullptr);
1383 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1384 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1385 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1386 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1387 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1388 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1389 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1392 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1393 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1394 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1395 // We need to be sure to flip the sign here for subtraction because we
1396 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1397 sign
= rhs
.sign
^ subtract
;
1399 copySignificand(rhs
);
1402 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1403 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1404 category
= fcInfinity
;
1405 sign
= rhs
.sign
^ subtract
;
1408 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1410 sign
= rhs
.sign
^ subtract
;
1413 case PackCategoriesIntoKey(fcZero
, fcZero
):
1414 /* Sign depends on rounding mode; handled by caller. */
1417 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1418 /* Differently signed infinities can only be validly
1420 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1427 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1432 /* Add or subtract two normal numbers. */
1433 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1436 lostFraction lost_fraction
;
1439 /* Determine if the operation on the absolute values is effectively
1440 an addition or subtraction. */
1441 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1443 /* Are we bigger exponent-wise than the RHS? */
1444 bits
= exponent
- rhs
.exponent
;
1446 /* Subtraction is more subtle than one might naively expect. */
1448 IEEEFloat
temp_rhs(rhs
);
1452 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1453 lost_fraction
= lfExactlyZero
;
1454 } else if (bits
> 0) {
1455 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1456 shiftSignificandLeft(1);
1459 lost_fraction
= shiftSignificandRight(-bits
- 1);
1460 temp_rhs
.shiftSignificandLeft(1);
1465 carry
= temp_rhs
.subtractSignificand
1466 (*this, lost_fraction
!= lfExactlyZero
);
1467 copySignificand(temp_rhs
);
1470 carry
= subtractSignificand
1471 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1474 /* Invert the lost fraction - it was on the RHS and
1476 if (lost_fraction
== lfLessThanHalf
)
1477 lost_fraction
= lfMoreThanHalf
;
1478 else if (lost_fraction
== lfMoreThanHalf
)
1479 lost_fraction
= lfLessThanHalf
;
1481 /* The code above is intended to ensure that no borrow is
1487 IEEEFloat
temp_rhs(rhs
);
1489 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1490 carry
= addSignificand(temp_rhs
);
1492 lost_fraction
= shiftSignificandRight(-bits
);
1493 carry
= addSignificand(rhs
);
1496 /* We have a guard bit; generating a carry cannot happen. */
1501 return lost_fraction
;
1504 IEEEFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1505 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1507 llvm_unreachable(nullptr);
1509 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1510 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1511 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1512 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1516 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1517 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1518 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1521 copySignificand(rhs
);
1524 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1525 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1526 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1527 category
= fcInfinity
;
1530 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1531 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1532 case PackCategoriesIntoKey(fcZero
, fcZero
):
1536 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1537 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1541 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1546 IEEEFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1547 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1549 llvm_unreachable(nullptr);
1551 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1552 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1553 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1555 copySignificand(rhs
);
1557 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1558 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1559 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1560 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1563 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1564 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1565 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1566 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1569 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1573 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1574 category
= fcInfinity
;
1577 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1578 case PackCategoriesIntoKey(fcZero
, fcZero
):
1582 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1587 IEEEFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
1588 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1590 llvm_unreachable(nullptr);
1592 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1593 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1594 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1595 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1596 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1597 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1598 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1601 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1602 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1603 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1606 copySignificand(rhs
);
1609 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1610 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1611 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1612 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1613 case PackCategoriesIntoKey(fcZero
, fcZero
):
1617 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1623 void IEEEFloat::changeSign() {
1624 /* Look mummy, this one's easy. */
1628 /* Normalized addition or subtraction. */
1629 IEEEFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
1630 roundingMode rounding_mode
,
1634 fs
= addOrSubtractSpecials(rhs
, subtract
);
1636 /* This return code means it was not a simple case. */
1637 if (fs
== opDivByZero
) {
1638 lostFraction lost_fraction
;
1640 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1641 fs
= normalize(rounding_mode
, lost_fraction
);
1643 /* Can only be zero if we lost no fraction. */
1644 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1647 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1648 positive zero unless rounding to minus infinity, except that
1649 adding two like-signed zeroes gives that zero. */
1650 if (category
== fcZero
) {
1651 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1652 sign
= (rounding_mode
== rmTowardNegative
);
1658 /* Normalized addition. */
1659 IEEEFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
1660 roundingMode rounding_mode
) {
1661 return addOrSubtract(rhs
, rounding_mode
, false);
1664 /* Normalized subtraction. */
1665 IEEEFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
1666 roundingMode rounding_mode
) {
1667 return addOrSubtract(rhs
, rounding_mode
, true);
1670 /* Normalized multiply. */
1671 IEEEFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
1672 roundingMode rounding_mode
) {
1676 fs
= multiplySpecials(rhs
);
1678 if (isFiniteNonZero()) {
1679 lostFraction lost_fraction
= multiplySignificand(rhs
, nullptr);
1680 fs
= normalize(rounding_mode
, lost_fraction
);
1681 if (lost_fraction
!= lfExactlyZero
)
1682 fs
= (opStatus
) (fs
| opInexact
);
1688 /* Normalized divide. */
1689 IEEEFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
1690 roundingMode rounding_mode
) {
1694 fs
= divideSpecials(rhs
);
1696 if (isFiniteNonZero()) {
1697 lostFraction lost_fraction
= divideSignificand(rhs
);
1698 fs
= normalize(rounding_mode
, lost_fraction
);
1699 if (lost_fraction
!= lfExactlyZero
)
1700 fs
= (opStatus
) (fs
| opInexact
);
1706 /* Normalized remainder. This is not currently correct in all cases. */
1707 IEEEFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
1709 IEEEFloat V
= *this;
1710 unsigned int origSign
= sign
;
1712 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1713 if (fs
== opDivByZero
)
1716 int parts
= partCount();
1717 integerPart
*x
= new integerPart
[parts
];
1719 fs
= V
.convertToInteger(makeMutableArrayRef(x
, parts
),
1720 parts
* integerPartWidth
, true, rmNearestTiesToEven
,
1722 if (fs
== opInvalidOp
) {
1727 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1728 rmNearestTiesToEven
);
1729 assert(fs
==opOK
); // should always work
1731 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1732 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1734 fs
= subtract(V
, rmNearestTiesToEven
);
1735 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1738 sign
= origSign
; // IEEE754 requires this
1743 /* Normalized llvm frem (C fmod). */
1744 IEEEFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
1746 fs
= modSpecials(rhs
);
1747 unsigned int origSign
= sign
;
1749 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
1750 compareAbsoluteValue(rhs
) != cmpLessThan
) {
1751 IEEEFloat V
= scalbn(rhs
, ilogb(*this) - ilogb(rhs
), rmNearestTiesToEven
);
1752 if (compareAbsoluteValue(V
) == cmpLessThan
)
1753 V
= scalbn(V
, -1, rmNearestTiesToEven
);
1756 fs
= subtract(V
, rmNearestTiesToEven
);
1760 sign
= origSign
; // fmod requires this
1764 /* Normalized fused-multiply-add. */
1765 IEEEFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
1766 const IEEEFloat
&addend
,
1767 roundingMode rounding_mode
) {
1770 /* Post-multiplication sign, before addition. */
1771 sign
^= multiplicand
.sign
;
1773 /* If and only if all arguments are normal do we need to do an
1774 extended-precision calculation. */
1775 if (isFiniteNonZero() &&
1776 multiplicand
.isFiniteNonZero() &&
1777 addend
.isFinite()) {
1778 lostFraction lost_fraction
;
1780 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1781 fs
= normalize(rounding_mode
, lost_fraction
);
1782 if (lost_fraction
!= lfExactlyZero
)
1783 fs
= (opStatus
) (fs
| opInexact
);
1785 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1786 positive zero unless rounding to minus infinity, except that
1787 adding two like-signed zeroes gives that zero. */
1788 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
)
1789 sign
= (rounding_mode
== rmTowardNegative
);
1791 fs
= multiplySpecials(multiplicand
);
1793 /* FS can only be opOK or opInvalidOp. There is no more work
1794 to do in the latter case. The IEEE-754R standard says it is
1795 implementation-defined in this case whether, if ADDEND is a
1796 quiet NaN, we raise invalid op; this implementation does so.
1798 If we need to do the addition we can do so with normal
1801 fs
= addOrSubtract(addend
, rounding_mode
, false);
1807 /* Rounding-mode corrrect round to integral value. */
1808 IEEEFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
1811 // If the exponent is large enough, we know that this value is already
1812 // integral, and the arithmetic below would potentially cause it to saturate
1813 // to +/-Inf. Bail out early instead.
1814 if (isFiniteNonZero() && exponent
+1 >= (int)semanticsPrecision(*semantics
))
1817 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1818 // precision of our format, and then subtract it back off again. The choice
1819 // of rounding modes for the addition/subtraction determines the rounding mode
1820 // for our integral rounding as well.
1821 // NOTE: When the input value is negative, we do subtraction followed by
1822 // addition instead.
1823 APInt
IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics
)), 1);
1824 IntegerConstant
<<= semanticsPrecision(*semantics
)-1;
1825 IEEEFloat
MagicConstant(*semantics
);
1826 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
1827 rmNearestTiesToEven
);
1828 MagicConstant
.sign
= sign
;
1833 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1834 bool inputSign
= isNegative();
1836 fs
= add(MagicConstant
, rounding_mode
);
1837 if (fs
!= opOK
&& fs
!= opInexact
)
1840 fs
= subtract(MagicConstant
, rounding_mode
);
1842 // Restore the input sign.
1843 if (inputSign
!= isNegative())
1850 /* Comparison requires normalized numbers. */
1851 IEEEFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
1854 assert(semantics
== rhs
.semantics
);
1856 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1858 llvm_unreachable(nullptr);
1860 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1861 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1862 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1863 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1864 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1865 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1866 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1867 return cmpUnordered
;
1869 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1870 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1871 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1875 return cmpGreaterThan
;
1877 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1878 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1879 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1881 return cmpGreaterThan
;
1885 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1886 if (sign
== rhs
.sign
)
1891 return cmpGreaterThan
;
1893 case PackCategoriesIntoKey(fcZero
, fcZero
):
1896 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1900 /* Two normal numbers. Do they have the same sign? */
1901 if (sign
!= rhs
.sign
) {
1903 result
= cmpLessThan
;
1905 result
= cmpGreaterThan
;
1907 /* Compare absolute values; invert result if negative. */
1908 result
= compareAbsoluteValue(rhs
);
1911 if (result
== cmpLessThan
)
1912 result
= cmpGreaterThan
;
1913 else if (result
== cmpGreaterThan
)
1914 result
= cmpLessThan
;
1921 /// IEEEFloat::convert - convert a value of one floating point type to another.
1922 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1923 /// records whether the transformation lost information, i.e. whether
1924 /// converting the result back to the original type will produce the
1925 /// original value (this is almost the same as return value==fsOK, but there
1926 /// are edge cases where this is not so).
1928 IEEEFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
1929 roundingMode rounding_mode
,
1931 lostFraction lostFraction
;
1932 unsigned int newPartCount
, oldPartCount
;
1935 const fltSemantics
&fromSemantics
= *semantics
;
1937 lostFraction
= lfExactlyZero
;
1938 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1939 oldPartCount
= partCount();
1940 shift
= toSemantics
.precision
- fromSemantics
.precision
;
1942 bool X86SpecialNan
= false;
1943 if (&fromSemantics
== &semX87DoubleExtended
&&
1944 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
1945 (!(*significandParts() & 0x8000000000000000ULL
) ||
1946 !(*significandParts() & 0x4000000000000000ULL
))) {
1947 // x86 has some unusual NaNs which cannot be represented in any other
1948 // format; note them here.
1949 X86SpecialNan
= true;
1952 // If this is a truncation of a denormal number, and the target semantics
1953 // has larger exponent range than the source semantics (this can happen
1954 // when truncating from PowerPC double-double to double format), the
1955 // right shift could lose result mantissa bits. Adjust exponent instead
1956 // of performing excessive shift.
1957 if (shift
< 0 && isFiniteNonZero()) {
1958 int exponentChange
= significandMSB() + 1 - fromSemantics
.precision
;
1959 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
1960 exponentChange
= toSemantics
.minExponent
- exponent
;
1961 if (exponentChange
< shift
)
1962 exponentChange
= shift
;
1963 if (exponentChange
< 0) {
1964 shift
-= exponentChange
;
1965 exponent
+= exponentChange
;
1969 // If this is a truncation, perform the shift before we narrow the storage.
1970 if (shift
< 0 && (isFiniteNonZero() || category
==fcNaN
))
1971 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
1973 // Fix the storage so it can hold to new value.
1974 if (newPartCount
> oldPartCount
) {
1975 // The new type requires more storage; make it available.
1976 integerPart
*newParts
;
1977 newParts
= new integerPart
[newPartCount
];
1978 APInt::tcSet(newParts
, 0, newPartCount
);
1979 if (isFiniteNonZero() || category
==fcNaN
)
1980 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
1982 significand
.parts
= newParts
;
1983 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
1984 // Switch to built-in storage for a single part.
1985 integerPart newPart
= 0;
1986 if (isFiniteNonZero() || category
==fcNaN
)
1987 newPart
= significandParts()[0];
1989 significand
.part
= newPart
;
1992 // Now that we have the right storage, switch the semantics.
1993 semantics
= &toSemantics
;
1995 // If this is an extension, perform the shift now that the storage is
1997 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
1998 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
2000 if (isFiniteNonZero()) {
2001 fs
= normalize(rounding_mode
, lostFraction
);
2002 *losesInfo
= (fs
!= opOK
);
2003 } else if (category
== fcNaN
) {
2004 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2006 // For x87 extended precision, we want to make a NaN, not a special NaN if
2007 // the input wasn't special either.
2008 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2009 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2011 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2012 // does not give you back the same bits. This is dubious, and we
2013 // don't currently do it. You're really supposed to get
2014 // an invalid operation signal at runtime, but nobody does that.
2024 /* Convert a floating point number to an integer according to the
2025 rounding mode. If the rounded integer value is out of range this
2026 returns an invalid operation exception and the contents of the
2027 destination parts are unspecified. If the rounded value is in
2028 range but the floating point number is not the exact integer, the C
2029 standard doesn't require an inexact exception to be raised. IEEE
2030 854 does require it so we do that.
2032 Note that for conversions to integer type the C standard requires
2033 round-to-zero to always be used. */
2034 IEEEFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2035 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2036 roundingMode rounding_mode
, bool *isExact
) const {
2037 lostFraction lost_fraction
;
2038 const integerPart
*src
;
2039 unsigned int dstPartsCount
, truncatedBits
;
2043 /* Handle the three special cases first. */
2044 if (category
== fcInfinity
|| category
== fcNaN
)
2047 dstPartsCount
= partCountForBits(width
);
2048 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2050 if (category
== fcZero
) {
2051 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2052 // Negative zero can't be represented as an int.
2057 src
= significandParts();
2059 /* Step 1: place our absolute value, with any fraction truncated, in
2062 /* Our absolute value is less than one; truncate everything. */
2063 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2064 /* For exponent -1 the integer bit represents .5, look at that.
2065 For smaller exponents leftmost truncated bit is 0. */
2066 truncatedBits
= semantics
->precision
-1U - exponent
;
2068 /* We want the most significant (exponent + 1) bits; the rest are
2070 unsigned int bits
= exponent
+ 1U;
2072 /* Hopelessly large in magnitude? */
2076 if (bits
< semantics
->precision
) {
2077 /* We truncate (semantics->precision - bits) bits. */
2078 truncatedBits
= semantics
->precision
- bits
;
2079 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2081 /* We want at least as many bits as are available. */
2082 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2084 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2085 bits
- semantics
->precision
);
2090 /* Step 2: work out any lost fraction, and increment the absolute
2091 value if we would round away from zero. */
2092 if (truncatedBits
) {
2093 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2095 if (lost_fraction
!= lfExactlyZero
&&
2096 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2097 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2098 return opInvalidOp
; /* Overflow. */
2101 lost_fraction
= lfExactlyZero
;
2104 /* Step 3: check if we fit in the destination. */
2105 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2109 /* Negative numbers cannot be represented as unsigned. */
2113 /* It takes omsb bits to represent the unsigned integer value.
2114 We lose a bit for the sign, but care is needed as the
2115 maximally negative integer is a special case. */
2116 if (omsb
== width
&&
2117 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2120 /* This case can happen because of rounding. */
2125 APInt::tcNegate (parts
.data(), dstPartsCount
);
2127 if (omsb
>= width
+ !isSigned
)
2131 if (lost_fraction
== lfExactlyZero
) {
2138 /* Same as convertToSignExtendedInteger, except we provide
2139 deterministic values in case of an invalid operation exception,
2140 namely zero for NaNs and the minimal or maximal value respectively
2141 for underflow or overflow.
2142 The *isExact output tells whether the result is exact, in the sense
2143 that converting it back to the original floating point type produces
2144 the original value. This is almost equivalent to result==opOK,
2145 except for negative zeroes.
2148 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2149 unsigned int width
, bool isSigned
,
2150 roundingMode rounding_mode
, bool *isExact
) const {
2153 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2156 if (fs
== opInvalidOp
) {
2157 unsigned int bits
, dstPartsCount
;
2159 dstPartsCount
= partCountForBits(width
);
2160 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2162 if (category
== fcNaN
)
2167 bits
= width
- isSigned
;
2169 APInt::tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2170 if (sign
&& isSigned
)
2171 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2177 /* Convert an unsigned integer SRC to a floating point number,
2178 rounding according to ROUNDING_MODE. The sign of the floating
2179 point number is not modified. */
2180 IEEEFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2181 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2182 unsigned int omsb
, precision
, dstCount
;
2184 lostFraction lost_fraction
;
2186 category
= fcNormal
;
2187 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2188 dst
= significandParts();
2189 dstCount
= partCount();
2190 precision
= semantics
->precision
;
2192 /* We want the most significant PRECISION bits of SRC. There may not
2193 be that many; extract what we can. */
2194 if (precision
<= omsb
) {
2195 exponent
= omsb
- 1;
2196 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2198 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2200 exponent
= precision
- 1;
2201 lost_fraction
= lfExactlyZero
;
2202 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2205 return normalize(rounding_mode
, lost_fraction
);
2208 IEEEFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2209 roundingMode rounding_mode
) {
2210 unsigned int partCount
= Val
.getNumWords();
2214 if (isSigned
&& api
.isNegative()) {
2219 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2222 /* Convert a two's complement integer SRC to a floating point number,
2223 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2224 integer is signed, in which case it must be sign-extended. */
2226 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2227 unsigned int srcCount
, bool isSigned
,
2228 roundingMode rounding_mode
) {
2232 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2235 /* If we're signed and negative negate a copy. */
2237 copy
= new integerPart
[srcCount
];
2238 APInt::tcAssign(copy
, src
, srcCount
);
2239 APInt::tcNegate(copy
, srcCount
);
2240 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2244 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2250 /* FIXME: should this just take a const APInt reference? */
2252 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2253 unsigned int width
, bool isSigned
,
2254 roundingMode rounding_mode
) {
2255 unsigned int partCount
= partCountForBits(width
);
2256 APInt api
= APInt(width
, makeArrayRef(parts
, partCount
));
2259 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2264 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2268 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2269 roundingMode rounding_mode
) {
2270 lostFraction lost_fraction
= lfExactlyZero
;
2272 category
= fcNormal
;
2276 integerPart
*significand
= significandParts();
2277 unsigned partsCount
= partCount();
2278 unsigned bitPos
= partsCount
* integerPartWidth
;
2279 bool computedTrailingFraction
= false;
2281 // Skip leading zeroes and any (hexa)decimal point.
2282 StringRef::iterator begin
= s
.begin();
2283 StringRef::iterator end
= s
.end();
2284 StringRef::iterator dot
;
2285 StringRef::iterator p
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2286 StringRef::iterator firstSignificantDigit
= p
;
2289 integerPart hex_value
;
2292 assert(dot
== end
&& "String contains multiple dots");
2297 hex_value
= hexDigitValue(*p
);
2298 if (hex_value
== -1U)
2303 // Store the number while we have space.
2306 hex_value
<<= bitPos
% integerPartWidth
;
2307 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2308 } else if (!computedTrailingFraction
) {
2309 lost_fraction
= trailingHexadecimalFraction(p
, end
, hex_value
);
2310 computedTrailingFraction
= true;
2314 /* Hex floats require an exponent but not a hexadecimal point. */
2315 assert(p
!= end
&& "Hex strings require an exponent");
2316 assert((*p
== 'p' || *p
== 'P') && "Invalid character in significand");
2317 assert(p
!= begin
&& "Significand has no digits");
2318 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
2320 /* Ignore the exponent if we are zero. */
2321 if (p
!= firstSignificantDigit
) {
2324 /* Implicit hexadecimal point? */
2328 /* Calculate the exponent adjustment implicit in the number of
2329 significant digits. */
2330 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2331 if (expAdjustment
< 0)
2333 expAdjustment
= expAdjustment
* 4 - 1;
2335 /* Adjust for writing the significand starting at the most
2336 significant nibble. */
2337 expAdjustment
+= semantics
->precision
;
2338 expAdjustment
-= partsCount
* integerPartWidth
;
2340 /* Adjust for the given exponent. */
2341 exponent
= totalExponent(p
+ 1, end
, expAdjustment
);
2344 return normalize(rounding_mode
, lost_fraction
);
2348 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2349 unsigned sigPartCount
, int exp
,
2350 roundingMode rounding_mode
) {
2351 unsigned int parts
, pow5PartCount
;
2352 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
2353 integerPart pow5Parts
[maxPowerOfFiveParts
];
2356 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2357 rounding_mode
== rmNearestTiesToAway
);
2359 parts
= partCountForBits(semantics
->precision
+ 11);
2361 /* Calculate pow(5, abs(exp)). */
2362 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2364 for (;; parts
*= 2) {
2365 opStatus sigStatus
, powStatus
;
2366 unsigned int excessPrecision
, truncatedBits
;
2368 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2369 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2370 truncatedBits
= excessPrecision
;
2372 IEEEFloat
decSig(calcSemantics
, uninitialized
);
2373 decSig
.makeZero(sign
);
2374 IEEEFloat
pow5(calcSemantics
);
2376 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2377 rmNearestTiesToEven
);
2378 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2379 rmNearestTiesToEven
);
2380 /* Add exp, as 10^n = 5^n * 2^n. */
2381 decSig
.exponent
+= exp
;
2383 lostFraction calcLostFraction
;
2384 integerPart HUerr
, HUdistance
;
2385 unsigned int powHUerr
;
2388 /* multiplySignificand leaves the precision-th bit set to 1. */
2389 calcLostFraction
= decSig
.multiplySignificand(pow5
, nullptr);
2390 powHUerr
= powStatus
!= opOK
;
2392 calcLostFraction
= decSig
.divideSignificand(pow5
);
2393 /* Denormal numbers have less precision. */
2394 if (decSig
.exponent
< semantics
->minExponent
) {
2395 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2396 truncatedBits
= excessPrecision
;
2397 if (excessPrecision
> calcSemantics
.precision
)
2398 excessPrecision
= calcSemantics
.precision
;
2400 /* Extra half-ulp lost in reciprocal of exponent. */
2401 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2404 /* Both multiplySignificand and divideSignificand return the
2405 result with the integer bit set. */
2406 assert(APInt::tcExtractBit
2407 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2409 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2411 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2412 excessPrecision
, isNearest
);
2414 /* Are we guaranteed to round correctly if we truncate? */
2415 if (HUdistance
>= HUerr
) {
2416 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2417 calcSemantics
.precision
- excessPrecision
,
2419 /* Take the exponent of decSig. If we tcExtract-ed less bits
2420 above we must adjust our exponent to compensate for the
2421 implicit right shift. */
2422 exponent
= (decSig
.exponent
+ semantics
->precision
2423 - (calcSemantics
.precision
- excessPrecision
));
2424 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2427 return normalize(rounding_mode
, calcLostFraction
);
2433 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
2437 /* Scan the text. */
2438 StringRef::iterator p
= str
.begin();
2439 interpretDecimal(p
, str
.end(), &D
);
2441 /* Handle the quick cases. First the case of no significant digits,
2442 i.e. zero, and then exponents that are obviously too large or too
2443 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2444 definitely overflows if
2446 (exp - 1) * L >= maxExponent
2448 and definitely underflows to zero where
2450 (exp + 1) * L <= minExponent - precision
2452 With integer arithmetic the tightest bounds for L are
2454 93/28 < L < 196/59 [ numerator <= 256 ]
2455 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2458 // Test if we have a zero number allowing for strings with no null terminators
2459 // and zero decimals with non-zero exponents.
2461 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2462 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2463 // be at most one dot. On the other hand, if we have a zero with a non-zero
2464 // exponent, then we know that D.firstSigDigit will be non-numeric.
2465 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
2469 /* Check whether the normalized exponent is high enough to overflow
2470 max during the log-rebasing in the max-exponent check below. */
2471 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
2472 fs
= handleOverflow(rounding_mode
);
2474 /* If it wasn't, then it also wasn't high enough to overflow max
2475 during the log-rebasing in the min-exponent check. Check that it
2476 won't overflow min in either check, then perform the min-exponent
2478 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
2479 (D
.normalizedExponent
+ 1) * 28738 <=
2480 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2481 /* Underflow to zero and round. */
2482 category
= fcNormal
;
2484 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2486 /* We can finally safely perform the max-exponent check. */
2487 } else if ((D
.normalizedExponent
- 1) * 42039
2488 >= 12655 * semantics
->maxExponent
) {
2489 /* Overflow and round. */
2490 fs
= handleOverflow(rounding_mode
);
2492 integerPart
*decSignificand
;
2493 unsigned int partCount
;
2495 /* A tight upper bound on number of bits required to hold an
2496 N-digit decimal integer is N * 196 / 59. Allocate enough space
2497 to hold the full significand, and an extra part required by
2499 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2500 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2501 decSignificand
= new integerPart
[partCount
+ 1];
2504 /* Convert to binary efficiently - we do almost all multiplication
2505 in an integerPart. When this would overflow do we do a single
2506 bignum multiplication, and then revert again to multiplication
2507 in an integerPart. */
2509 integerPart decValue
, val
, multiplier
;
2517 if (p
== str
.end()) {
2521 decValue
= decDigitValue(*p
++);
2522 assert(decValue
< 10U && "Invalid character in significand");
2524 val
= val
* 10 + decValue
;
2525 /* The maximum number that can be multiplied by ten with any
2526 digit added without overflowing an integerPart. */
2527 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2529 /* Multiply out the current part. */
2530 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2531 partCount
, partCount
+ 1, false);
2533 /* If we used another part (likely but not guaranteed), increase
2535 if (decSignificand
[partCount
])
2537 } while (p
<= D
.lastSigDigit
);
2539 category
= fcNormal
;
2540 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2541 D
.exponent
, rounding_mode
);
2543 delete [] decSignificand
;
2549 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
2550 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("+Inf")) {
2555 if (str
.equals("-inf") || str
.equals("-INFINITY") || str
.equals("-Inf")) {
2560 if (str
.equals("nan") || str
.equals("NaN")) {
2561 makeNaN(false, false);
2565 if (str
.equals("-nan") || str
.equals("-NaN")) {
2566 makeNaN(false, true);
2573 IEEEFloat::opStatus
IEEEFloat::convertFromString(StringRef str
,
2574 roundingMode rounding_mode
) {
2575 assert(!str
.empty() && "Invalid string length");
2577 // Handle special cases.
2578 if (convertFromStringSpecials(str
))
2581 /* Handle a leading minus sign. */
2582 StringRef::iterator p
= str
.begin();
2583 size_t slen
= str
.size();
2584 sign
= *p
== '-' ? 1 : 0;
2585 if (*p
== '-' || *p
== '+') {
2588 assert(slen
&& "String has no digits");
2591 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2592 assert(slen
- 2 && "Invalid string");
2593 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2597 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2600 /* Write out a hexadecimal representation of the floating point value
2601 to DST, which must be of sufficient size, in the C99 form
2602 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2603 excluding the terminating NUL.
2605 If UPPERCASE, the output is in upper case, otherwise in lower case.
2607 HEXDIGITS digits appear altogether, rounding the value if
2608 necessary. If HEXDIGITS is 0, the minimal precision to display the
2609 number precisely is used instead. If nothing would appear after
2610 the decimal point it is suppressed.
2612 The decimal exponent is always printed and has at least one digit.
2613 Zero values display an exponent of zero. Infinities and NaNs
2614 appear as "infinity" or "nan" respectively.
2616 The above rules are as specified by C99. There is ambiguity about
2617 what the leading hexadecimal digit should be. This implementation
2618 uses whatever is necessary so that the exponent is displayed as
2619 stored. This implies the exponent will fall within the IEEE format
2620 range, and the leading hexadecimal digit will be 0 (for denormals),
2621 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2622 any other digits zero).
2624 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2626 roundingMode rounding_mode
) const {
2635 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2636 dst
+= sizeof infinityL
- 1;
2640 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2641 dst
+= sizeof NaNU
- 1;
2646 *dst
++ = upperCase
? 'X': 'x';
2648 if (hexDigits
> 1) {
2650 memset (dst
, '0', hexDigits
- 1);
2651 dst
+= hexDigits
- 1;
2653 *dst
++ = upperCase
? 'P': 'p';
2658 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2664 return static_cast<unsigned int>(dst
- p
);
2667 /* Does the hard work of outputting the correctly rounded hexadecimal
2668 form of a normal floating point number with the specified number of
2669 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2670 digits necessary to print the value precisely is output. */
2671 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2673 roundingMode rounding_mode
) const {
2674 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2675 const char *hexDigitChars
;
2676 const integerPart
*significand
;
2681 *dst
++ = upperCase
? 'X': 'x';
2684 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2686 significand
= significandParts();
2687 partsCount
= partCount();
2689 /* +3 because the first digit only uses the single integer bit, so
2690 we have 3 virtual zero most-significant-bits. */
2691 valueBits
= semantics
->precision
+ 3;
2692 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2694 /* The natural number of digits required ignoring trailing
2695 insignificant zeroes. */
2696 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2698 /* hexDigits of zero means use the required number for the
2699 precision. Otherwise, see if we are truncating. If we are,
2700 find out if we need to round away from zero. */
2702 if (hexDigits
< outputDigits
) {
2703 /* We are dropping non-zero bits, so need to check how to round.
2704 "bits" is the number of dropped bits. */
2706 lostFraction fraction
;
2708 bits
= valueBits
- hexDigits
* 4;
2709 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2710 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2712 outputDigits
= hexDigits
;
2715 /* Write the digits consecutively, and start writing in the location
2716 of the hexadecimal point. We move the most significant digit
2717 left and add the hexadecimal point later. */
2720 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2722 while (outputDigits
&& count
) {
2725 /* Put the most significant integerPartWidth bits in "part". */
2726 if (--count
== partsCount
)
2727 part
= 0; /* An imaginary higher zero part. */
2729 part
= significand
[count
] << shift
;
2732 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2734 /* Convert as much of "part" to hexdigits as we can. */
2735 unsigned int curDigits
= integerPartWidth
/ 4;
2737 if (curDigits
> outputDigits
)
2738 curDigits
= outputDigits
;
2739 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2740 outputDigits
-= curDigits
;
2746 /* Note that hexDigitChars has a trailing '0'. */
2749 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2750 } while (*q
== '0');
2753 /* Add trailing zeroes. */
2754 memset (dst
, '0', outputDigits
);
2755 dst
+= outputDigits
;
2758 /* Move the most significant digit to before the point, and if there
2759 is something after the decimal point add it. This must come
2760 after rounding above. */
2767 /* Finally output the exponent. */
2768 *dst
++ = upperCase
? 'P': 'p';
2770 return writeSignedDecimal (dst
, exponent
);
2773 hash_code
hash_value(const IEEEFloat
&Arg
) {
2774 if (!Arg
.isFiniteNonZero())
2775 return hash_combine((uint8_t)Arg
.category
,
2776 // NaN has no sign, fix it at zero.
2777 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
2778 Arg
.semantics
->precision
);
2780 // Normal floats need their exponent and significand hashed.
2781 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
2782 Arg
.semantics
->precision
, Arg
.exponent
,
2784 Arg
.significandParts(),
2785 Arg
.significandParts() + Arg
.partCount()));
2788 // Conversion from APFloat to/from host float/double. It may eventually be
2789 // possible to eliminate these and have everybody deal with APFloats, but that
2790 // will take a while. This approach will not easily extend to long double.
2791 // Current implementation requires integerPartWidth==64, which is correct at
2792 // the moment but could be made more general.
2794 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2795 // the actual IEEE respresentations. We compensate for that here.
2797 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2798 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
2799 assert(partCount()==2);
2801 uint64_t myexponent
, mysignificand
;
2803 if (isFiniteNonZero()) {
2804 myexponent
= exponent
+16383; //bias
2805 mysignificand
= significandParts()[0];
2806 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2807 myexponent
= 0; // denormal
2808 } else if (category
==fcZero
) {
2811 } else if (category
==fcInfinity
) {
2812 myexponent
= 0x7fff;
2813 mysignificand
= 0x8000000000000000ULL
;
2815 assert(category
== fcNaN
&& "Unknown category");
2816 myexponent
= 0x7fff;
2817 mysignificand
= significandParts()[0];
2821 words
[0] = mysignificand
;
2822 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2823 (myexponent
& 0x7fffLL
);
2824 return APInt(80, words
);
2827 APInt
IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2828 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
2829 assert(partCount()==2);
2835 // Convert number to double. To avoid spurious underflows, we re-
2836 // normalize against the "double" minExponent first, and only *then*
2837 // truncate the mantissa. The result of that second conversion
2838 // may be inexact, but should never underflow.
2839 // Declare fltSemantics before APFloat that uses it (and
2840 // saves pointer to it) to ensure correct destruction order.
2841 fltSemantics extendedSemantics
= *semantics
;
2842 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
2843 IEEEFloat
extended(*this);
2844 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2845 assert(fs
== opOK
&& !losesInfo
);
2848 IEEEFloat
u(extended
);
2849 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2850 assert(fs
== opOK
|| fs
== opInexact
);
2852 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
2854 // If conversion was exact or resulted in a special case, we're done;
2855 // just set the second double to zero. Otherwise, re-convert back to
2856 // the extended format and compute the difference. This now should
2857 // convert exactly to double.
2858 if (u
.isFiniteNonZero() && losesInfo
) {
2859 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2860 assert(fs
== opOK
&& !losesInfo
);
2863 IEEEFloat
v(extended
);
2864 v
.subtract(u
, rmNearestTiesToEven
);
2865 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2866 assert(fs
== opOK
&& !losesInfo
);
2868 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
2873 return APInt(128, words
);
2876 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2877 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEquad
);
2878 assert(partCount()==2);
2880 uint64_t myexponent
, mysignificand
, mysignificand2
;
2882 if (isFiniteNonZero()) {
2883 myexponent
= exponent
+16383; //bias
2884 mysignificand
= significandParts()[0];
2885 mysignificand2
= significandParts()[1];
2886 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
2887 myexponent
= 0; // denormal
2888 } else if (category
==fcZero
) {
2890 mysignificand
= mysignificand2
= 0;
2891 } else if (category
==fcInfinity
) {
2892 myexponent
= 0x7fff;
2893 mysignificand
= mysignificand2
= 0;
2895 assert(category
== fcNaN
&& "Unknown category!");
2896 myexponent
= 0x7fff;
2897 mysignificand
= significandParts()[0];
2898 mysignificand2
= significandParts()[1];
2902 words
[0] = mysignificand
;
2903 words
[1] = ((uint64_t)(sign
& 1) << 63) |
2904 ((myexponent
& 0x7fff) << 48) |
2905 (mysignificand2
& 0xffffffffffffLL
);
2907 return APInt(128, words
);
2910 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
2911 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
);
2912 assert(partCount()==1);
2914 uint64_t myexponent
, mysignificand
;
2916 if (isFiniteNonZero()) {
2917 myexponent
= exponent
+1023; //bias
2918 mysignificand
= *significandParts();
2919 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2920 myexponent
= 0; // denormal
2921 } else if (category
==fcZero
) {
2924 } else if (category
==fcInfinity
) {
2928 assert(category
== fcNaN
&& "Unknown category!");
2930 mysignificand
= *significandParts();
2933 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2934 ((myexponent
& 0x7ff) << 52) |
2935 (mysignificand
& 0xfffffffffffffLL
))));
2938 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
2939 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
);
2940 assert(partCount()==1);
2942 uint32_t myexponent
, mysignificand
;
2944 if (isFiniteNonZero()) {
2945 myexponent
= exponent
+127; //bias
2946 mysignificand
= (uint32_t)*significandParts();
2947 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2948 myexponent
= 0; // denormal
2949 } else if (category
==fcZero
) {
2952 } else if (category
==fcInfinity
) {
2956 assert(category
== fcNaN
&& "Unknown category!");
2958 mysignificand
= (uint32_t)*significandParts();
2961 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
2962 (mysignificand
& 0x7fffff)));
2965 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
2966 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
);
2967 assert(partCount()==1);
2969 uint32_t myexponent
, mysignificand
;
2971 if (isFiniteNonZero()) {
2972 myexponent
= exponent
+15; //bias
2973 mysignificand
= (uint32_t)*significandParts();
2974 if (myexponent
== 1 && !(mysignificand
& 0x400))
2975 myexponent
= 0; // denormal
2976 } else if (category
==fcZero
) {
2979 } else if (category
==fcInfinity
) {
2983 assert(category
== fcNaN
&& "Unknown category!");
2985 mysignificand
= (uint32_t)*significandParts();
2988 return APInt(16, (((sign
&1) << 15) | ((myexponent
&0x1f) << 10) |
2989 (mysignificand
& 0x3ff)));
2992 // This function creates an APInt that is just a bit map of the floating
2993 // point constant as it would appear in memory. It is not a conversion,
2994 // and treating the result as a normal integer is unlikely to be useful.
2996 APInt
IEEEFloat::bitcastToAPInt() const {
2997 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
2998 return convertHalfAPFloatToAPInt();
3000 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3001 return convertFloatAPFloatToAPInt();
3003 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3004 return convertDoubleAPFloatToAPInt();
3006 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3007 return convertQuadrupleAPFloatToAPInt();
3009 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3010 return convertPPCDoubleDoubleAPFloatToAPInt();
3012 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3014 return convertF80LongDoubleAPFloatToAPInt();
3017 float IEEEFloat::convertToFloat() const {
3018 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3019 "Float semantics are not IEEEsingle");
3020 APInt api
= bitcastToAPInt();
3021 return api
.bitsToFloat();
3024 double IEEEFloat::convertToDouble() const {
3025 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3026 "Float semantics are not IEEEdouble");
3027 APInt api
= bitcastToAPInt();
3028 return api
.bitsToDouble();
3031 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3032 /// does not support these bit patterns:
3033 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3034 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3035 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3036 /// exponent = 0, integer bit 1 ("pseudodenormal")
3037 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3038 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3039 assert(api
.getBitWidth()==80);
3040 uint64_t i1
= api
.getRawData()[0];
3041 uint64_t i2
= api
.getRawData()[1];
3042 uint64_t myexponent
= (i2
& 0x7fff);
3043 uint64_t mysignificand
= i1
;
3044 uint8_t myintegerbit
= mysignificand
>> 63;
3046 initialize(&semX87DoubleExtended
);
3047 assert(partCount()==2);
3049 sign
= static_cast<unsigned int>(i2
>>15);
3050 if (myexponent
== 0 && mysignificand
== 0) {
3051 // exponent, significand meaningless
3053 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3054 // exponent, significand meaningless
3055 category
= fcInfinity
;
3056 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3057 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3058 // exponent meaningless
3060 significandParts()[0] = mysignificand
;
3061 significandParts()[1] = 0;
3063 category
= fcNormal
;
3064 exponent
= myexponent
- 16383;
3065 significandParts()[0] = mysignificand
;
3066 significandParts()[1] = 0;
3067 if (myexponent
==0) // denormal
3072 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
) {
3073 assert(api
.getBitWidth()==128);
3074 uint64_t i1
= api
.getRawData()[0];
3075 uint64_t i2
= api
.getRawData()[1];
3079 // Get the first double and convert to our format.
3080 initFromDoubleAPInt(APInt(64, i1
));
3081 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3082 assert(fs
== opOK
&& !losesInfo
);
3085 // Unless we have a special case, add in second double.
3086 if (isFiniteNonZero()) {
3087 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3088 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3089 assert(fs
== opOK
&& !losesInfo
);
3092 add(v
, rmNearestTiesToEven
);
3096 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
3097 assert(api
.getBitWidth()==128);
3098 uint64_t i1
= api
.getRawData()[0];
3099 uint64_t i2
= api
.getRawData()[1];
3100 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
3101 uint64_t mysignificand
= i1
;
3102 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
3104 initialize(&semIEEEquad
);
3105 assert(partCount()==2);
3107 sign
= static_cast<unsigned int>(i2
>>63);
3108 if (myexponent
==0 &&
3109 (mysignificand
==0 && mysignificand2
==0)) {
3110 // exponent, significand meaningless
3112 } else if (myexponent
==0x7fff &&
3113 (mysignificand
==0 && mysignificand2
==0)) {
3114 // exponent, significand meaningless
3115 category
= fcInfinity
;
3116 } else if (myexponent
==0x7fff &&
3117 (mysignificand
!=0 || mysignificand2
!=0)) {
3118 // exponent meaningless
3120 significandParts()[0] = mysignificand
;
3121 significandParts()[1] = mysignificand2
;
3123 category
= fcNormal
;
3124 exponent
= myexponent
- 16383;
3125 significandParts()[0] = mysignificand
;
3126 significandParts()[1] = mysignificand2
;
3127 if (myexponent
==0) // denormal
3130 significandParts()[1] |= 0x1000000000000LL
; // integer bit
3134 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
3135 assert(api
.getBitWidth()==64);
3136 uint64_t i
= *api
.getRawData();
3137 uint64_t myexponent
= (i
>> 52) & 0x7ff;
3138 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
3140 initialize(&semIEEEdouble
);
3141 assert(partCount()==1);
3143 sign
= static_cast<unsigned int>(i
>>63);
3144 if (myexponent
==0 && mysignificand
==0) {
3145 // exponent, significand meaningless
3147 } else if (myexponent
==0x7ff && mysignificand
==0) {
3148 // exponent, significand meaningless
3149 category
= fcInfinity
;
3150 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3151 // exponent meaningless
3153 *significandParts() = mysignificand
;
3155 category
= fcNormal
;
3156 exponent
= myexponent
- 1023;
3157 *significandParts() = mysignificand
;
3158 if (myexponent
==0) // denormal
3161 *significandParts() |= 0x10000000000000LL
; // integer bit
3165 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
3166 assert(api
.getBitWidth()==32);
3167 uint32_t i
= (uint32_t)*api
.getRawData();
3168 uint32_t myexponent
= (i
>> 23) & 0xff;
3169 uint32_t mysignificand
= i
& 0x7fffff;
3171 initialize(&semIEEEsingle
);
3172 assert(partCount()==1);
3175 if (myexponent
==0 && mysignificand
==0) {
3176 // exponent, significand meaningless
3178 } else if (myexponent
==0xff && mysignificand
==0) {
3179 // exponent, significand meaningless
3180 category
= fcInfinity
;
3181 } else if (myexponent
==0xff && mysignificand
!=0) {
3182 // sign, exponent, significand meaningless
3184 *significandParts() = mysignificand
;
3186 category
= fcNormal
;
3187 exponent
= myexponent
- 127; //bias
3188 *significandParts() = mysignificand
;
3189 if (myexponent
==0) // denormal
3192 *significandParts() |= 0x800000; // integer bit
3196 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
3197 assert(api
.getBitWidth()==16);
3198 uint32_t i
= (uint32_t)*api
.getRawData();
3199 uint32_t myexponent
= (i
>> 10) & 0x1f;
3200 uint32_t mysignificand
= i
& 0x3ff;
3202 initialize(&semIEEEhalf
);
3203 assert(partCount()==1);
3206 if (myexponent
==0 && mysignificand
==0) {
3207 // exponent, significand meaningless
3209 } else if (myexponent
==0x1f && mysignificand
==0) {
3210 // exponent, significand meaningless
3211 category
= fcInfinity
;
3212 } else if (myexponent
==0x1f && mysignificand
!=0) {
3213 // sign, exponent, significand meaningless
3215 *significandParts() = mysignificand
;
3217 category
= fcNormal
;
3218 exponent
= myexponent
- 15; //bias
3219 *significandParts() = mysignificand
;
3220 if (myexponent
==0) // denormal
3223 *significandParts() |= 0x400; // integer bit
3227 /// Treat api as containing the bits of a floating point number. Currently
3228 /// we infer the floating point type from the size of the APInt. The
3229 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3230 /// when the size is anything else).
3231 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
3232 if (Sem
== &semIEEEhalf
)
3233 return initFromHalfAPInt(api
);
3234 if (Sem
== &semIEEEsingle
)
3235 return initFromFloatAPInt(api
);
3236 if (Sem
== &semIEEEdouble
)
3237 return initFromDoubleAPInt(api
);
3238 if (Sem
== &semX87DoubleExtended
)
3239 return initFromF80LongDoubleAPInt(api
);
3240 if (Sem
== &semIEEEquad
)
3241 return initFromQuadrupleAPInt(api
);
3242 if (Sem
== &semPPCDoubleDoubleLegacy
)
3243 return initFromPPCDoubleDoubleAPInt(api
);
3245 llvm_unreachable(nullptr);
3248 /// Make this number the largest magnitude normal number in the given
3250 void IEEEFloat::makeLargest(bool Negative
) {
3251 // We want (in interchange format):
3252 // sign = {Negative}
3254 // significand = 1..1
3255 category
= fcNormal
;
3257 exponent
= semantics
->maxExponent
;
3259 // Use memset to set all but the highest integerPart to all ones.
3260 integerPart
*significand
= significandParts();
3261 unsigned PartCount
= partCount();
3262 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
3264 // Set the high integerPart especially setting all unused top bits for
3265 // internal consistency.
3266 const unsigned NumUnusedHighBits
=
3267 PartCount
*integerPartWidth
- semantics
->precision
;
3268 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
3269 ? (~integerPart(0) >> NumUnusedHighBits
)
3273 /// Make this number the smallest magnitude denormal number in the given
3275 void IEEEFloat::makeSmallest(bool Negative
) {
3276 // We want (in interchange format):
3277 // sign = {Negative}
3279 // significand = 0..01
3280 category
= fcNormal
;
3282 exponent
= semantics
->minExponent
;
3283 APInt::tcSet(significandParts(), 1, partCount());
3286 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
3287 // We want (in interchange format):
3288 // sign = {Negative}
3290 // significand = 10..0
3292 category
= fcNormal
;
3295 exponent
= semantics
->minExponent
;
3296 significandParts()[partCountForBits(semantics
->precision
) - 1] |=
3297 (((integerPart
)1) << ((semantics
->precision
- 1) % integerPartWidth
));
3300 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
3301 initFromAPInt(&Sem
, API
);
3304 IEEEFloat::IEEEFloat(float f
) {
3305 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
3308 IEEEFloat::IEEEFloat(double d
) {
3309 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
3313 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
3314 Buffer
.append(Str
.begin(), Str
.end());
3317 /// Removes data from the given significand until it is no more
3318 /// precise than is required for the desired precision.
3319 void AdjustToPrecision(APInt
&significand
,
3320 int &exp
, unsigned FormatPrecision
) {
3321 unsigned bits
= significand
.getActiveBits();
3323 // 196/59 is a very slight overestimate of lg_2(10).
3324 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3326 if (bits
<= bitsRequired
) return;
3328 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3329 if (!tensRemovable
) return;
3331 exp
+= tensRemovable
;
3333 APInt
divisor(significand
.getBitWidth(), 1);
3334 APInt
powten(significand
.getBitWidth(), 10);
3336 if (tensRemovable
& 1)
3338 tensRemovable
>>= 1;
3339 if (!tensRemovable
) break;
3343 significand
= significand
.udiv(divisor
);
3345 // Truncate the significand down to its active bit count.
3346 significand
= significand
.trunc(significand
.getActiveBits());
3350 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
3351 int &exp
, unsigned FormatPrecision
) {
3352 unsigned N
= buffer
.size();
3353 if (N
<= FormatPrecision
) return;
3355 // The most significant figures are the last ones in the buffer.
3356 unsigned FirstSignificant
= N
- FormatPrecision
;
3359 // FIXME: this probably shouldn't use 'round half up'.
3361 // Rounding down is just a truncation, except we also want to drop
3362 // trailing zeros from the new result.
3363 if (buffer
[FirstSignificant
- 1] < '5') {
3364 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
3367 exp
+= FirstSignificant
;
3368 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3372 // Rounding up requires a decimal add-with-carry. If we continue
3373 // the carry, the newly-introduced zeros will just be truncated.
3374 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
3375 if (buffer
[I
] == '9') {
3383 // If we carried through, we have exactly one digit of precision.
3384 if (FirstSignificant
== N
) {
3385 exp
+= FirstSignificant
;
3387 buffer
.push_back('1');
3391 exp
+= FirstSignificant
;
3392 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3396 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
3397 unsigned FormatMaxPadding
, bool TruncateZero
) const {
3401 return append(Str
, "-Inf");
3403 return append(Str
, "+Inf");
3405 case fcNaN
: return append(Str
, "NaN");
3411 if (!FormatMaxPadding
) {
3413 append(Str
, "0.0E+0");
3416 if (FormatPrecision
> 1)
3417 Str
.append(FormatPrecision
- 1, '0');
3418 append(Str
, "e+00");
3431 // Decompose the number into an APInt and an exponent.
3432 int exp
= exponent
- ((int) semantics
->precision
- 1);
3433 APInt
significand(semantics
->precision
,
3434 makeArrayRef(significandParts(),
3435 partCountForBits(semantics
->precision
)));
3437 // Set FormatPrecision if zero. We want to do this before we
3438 // truncate trailing zeros, as those are part of the precision.
3439 if (!FormatPrecision
) {
3440 // We use enough digits so the number can be round-tripped back to an
3441 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3442 // Accurately" by Steele and White.
3443 // FIXME: Using a formula based purely on the precision is conservative;
3444 // we can print fewer digits depending on the actual value being printed.
3446 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3447 FormatPrecision
= 2 + semantics
->precision
* 59 / 196;
3450 // Ignore trailing binary zeros.
3451 int trailingZeros
= significand
.countTrailingZeros();
3452 exp
+= trailingZeros
;
3453 significand
.lshrInPlace(trailingZeros
);
3455 // Change the exponent from 2^e to 10^e.
3458 } else if (exp
> 0) {
3460 significand
= significand
.zext(semantics
->precision
+ exp
);
3461 significand
<<= exp
;
3463 } else { /* exp < 0 */
3466 // We transform this using the identity:
3467 // (N)(2^-e) == (N)(5^e)(10^-e)
3468 // This means we have to multiply N (the significand) by 5^e.
3469 // To avoid overflow, we have to operate on numbers large
3470 // enough to store N * 5^e:
3471 // log2(N * 5^e) == log2(N) + e * log2(5)
3472 // <= semantics->precision + e * 137 / 59
3473 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3475 unsigned precision
= semantics
->precision
+ (137 * texp
+ 136) / 59;
3477 // Multiply significand by 5^e.
3478 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3479 significand
= significand
.zext(precision
);
3480 APInt
five_to_the_i(precision
, 5);
3482 if (texp
& 1) significand
*= five_to_the_i
;
3486 five_to_the_i
*= five_to_the_i
;
3490 AdjustToPrecision(significand
, exp
, FormatPrecision
);
3492 SmallVector
<char, 256> buffer
;
3495 unsigned precision
= significand
.getBitWidth();
3496 APInt
ten(precision
, 10);
3497 APInt
digit(precision
, 0);
3499 bool inTrail
= true;
3500 while (significand
!= 0) {
3501 // digit <- significand % 10
3502 // significand <- significand / 10
3503 APInt::udivrem(significand
, ten
, significand
, digit
);
3505 unsigned d
= digit
.getZExtValue();
3507 // Drop trailing zeros.
3508 if (inTrail
&& !d
) exp
++;
3510 buffer
.push_back((char) ('0' + d
));
3515 assert(!buffer
.empty() && "no characters in buffer!");
3517 // Drop down to FormatPrecision.
3518 // TODO: don't do more precise calculations above than are required.
3519 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
3521 unsigned NDigits
= buffer
.size();
3523 // Check whether we should use scientific notation.
3524 bool FormatScientific
;
3525 if (!FormatMaxPadding
)
3526 FormatScientific
= true;
3531 // But we shouldn't make the number look more precise than it is.
3532 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
3533 NDigits
+ (unsigned) exp
> FormatPrecision
);
3535 // Power of the most significant digit.
3536 int MSD
= exp
+ (int) (NDigits
- 1);
3539 FormatScientific
= false;
3541 // 765e-5 == 0.00765
3543 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
3548 // Scientific formatting is pretty straightforward.
3549 if (FormatScientific
) {
3550 exp
+= (NDigits
- 1);
3552 Str
.push_back(buffer
[NDigits
-1]);
3554 if (NDigits
== 1 && TruncateZero
)
3557 for (unsigned I
= 1; I
!= NDigits
; ++I
)
3558 Str
.push_back(buffer
[NDigits
-1-I
]);
3559 // Fill with zeros up to FormatPrecision.
3560 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
3561 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
3562 // For !TruncateZero we use lower 'e'.
3563 Str
.push_back(TruncateZero
? 'E' : 'e');
3565 Str
.push_back(exp
>= 0 ? '+' : '-');
3566 if (exp
< 0) exp
= -exp
;
3567 SmallVector
<char, 6> expbuf
;
3569 expbuf
.push_back((char) ('0' + (exp
% 10)));
3572 // Exponent always at least two digits if we do not truncate zeros.
3573 if (!TruncateZero
&& expbuf
.size() < 2)
3574 expbuf
.push_back('0');
3575 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
3576 Str
.push_back(expbuf
[E
-1-I
]);
3580 // Non-scientific, positive exponents.
3582 for (unsigned I
= 0; I
!= NDigits
; ++I
)
3583 Str
.push_back(buffer
[NDigits
-1-I
]);
3584 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
3589 // Non-scientific, negative exponents.
3591 // The number of digits to the left of the decimal point.
3592 int NWholeDigits
= exp
+ (int) NDigits
;
3595 if (NWholeDigits
> 0) {
3596 for (; I
!= (unsigned) NWholeDigits
; ++I
)
3597 Str
.push_back(buffer
[NDigits
-I
-1]);
3600 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
3604 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
3608 for (; I
!= NDigits
; ++I
)
3609 Str
.push_back(buffer
[NDigits
-I
-1]);
3612 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
3613 // Special floats and denormals have no exact inverse.
3614 if (!isFiniteNonZero())
3617 // Check that the number is a power of two by making sure that only the
3618 // integer bit is set in the significand.
3619 if (significandLSB() != semantics
->precision
- 1)
3623 IEEEFloat
reciprocal(*semantics
, 1ULL);
3624 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
3627 // Avoid multiplication with a denormal, it is not safe on all platforms and
3628 // may be slower than a normal division.
3629 if (reciprocal
.isDenormal())
3632 assert(reciprocal
.isFiniteNonZero() &&
3633 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
3636 *inv
= APFloat(reciprocal
, *semantics
);
3641 bool IEEEFloat::isSignaling() const {
3645 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3646 // first bit of the trailing significand being 0.
3647 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
3650 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3652 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3653 /// appropriate sign switching before/after the computation.
3654 IEEEFloat::opStatus
IEEEFloat::next(bool nextDown
) {
3655 // If we are performing nextDown, swap sign so we have -x.
3659 // Compute nextUp(x)
3660 opStatus result
= opOK
;
3662 // Handle each float category separately.
3665 // nextUp(+inf) = +inf
3668 // nextUp(-inf) = -getLargest()
3672 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3673 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3674 // change the payload.
3675 if (isSignaling()) {
3676 result
= opInvalidOp
;
3677 // For consistency, propagate the sign of the sNaN to the qNaN.
3678 makeNaN(false, isNegative(), nullptr);
3682 // nextUp(pm 0) = +getSmallest()
3683 makeSmallest(false);
3686 // nextUp(-getSmallest()) = -0
3687 if (isSmallest() && isNegative()) {
3688 APInt::tcSet(significandParts(), 0, partCount());
3694 // nextUp(getLargest()) == INFINITY
3695 if (isLargest() && !isNegative()) {
3696 APInt::tcSet(significandParts(), 0, partCount());
3697 category
= fcInfinity
;
3698 exponent
= semantics
->maxExponent
+ 1;
3702 // nextUp(normal) == normal + inc.
3704 // If we are negative, we need to decrement the significand.
3706 // We only cross a binade boundary that requires adjusting the exponent
3708 // 1. exponent != semantics->minExponent. This implies we are not in the
3709 // smallest binade or are dealing with denormals.
3710 // 2. Our significand excluding the integral bit is all zeros.
3711 bool WillCrossBinadeBoundary
=
3712 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
3714 // Decrement the significand.
3716 // We always do this since:
3717 // 1. If we are dealing with a non-binade decrement, by definition we
3718 // just decrement the significand.
3719 // 2. If we are dealing with a normal -> normal binade decrement, since
3720 // we have an explicit integral bit the fact that all bits but the
3721 // integral bit are zero implies that subtracting one will yield a
3722 // significand with 0 integral bit and 1 in all other spots. Thus we
3723 // must just adjust the exponent and set the integral bit to 1.
3724 // 3. If we are dealing with a normal -> denormal binade decrement,
3725 // since we set the integral bit to 0 when we represent denormals, we
3726 // just decrement the significand.
3727 integerPart
*Parts
= significandParts();
3728 APInt::tcDecrement(Parts
, partCount());
3730 if (WillCrossBinadeBoundary
) {
3731 // Our result is a normal number. Do the following:
3732 // 1. Set the integral bit to 1.
3733 // 2. Decrement the exponent.
3734 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3738 // If we are positive, we need to increment the significand.
3740 // We only cross a binade boundary that requires adjusting the exponent if
3741 // the input is not a denormal and all of said input's significand bits
3742 // are set. If all of said conditions are true: clear the significand, set
3743 // the integral bit to 1, and increment the exponent. If we have a
3744 // denormal always increment since moving denormals and the numbers in the
3745 // smallest normal binade have the same exponent in our representation.
3746 bool WillCrossBinadeBoundary
= !isDenormal() && isSignificandAllOnes();
3748 if (WillCrossBinadeBoundary
) {
3749 integerPart
*Parts
= significandParts();
3750 APInt::tcSet(Parts
, 0, partCount());
3751 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3752 assert(exponent
!= semantics
->maxExponent
&&
3753 "We can not increment an exponent beyond the maxExponent allowed"
3754 " by the given floating point semantics.");
3757 incrementSignificand();
3763 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3770 void IEEEFloat::makeInf(bool Negative
) {
3771 category
= fcInfinity
;
3773 exponent
= semantics
->maxExponent
+ 1;
3774 APInt::tcSet(significandParts(), 0, partCount());
3777 void IEEEFloat::makeZero(bool Negative
) {
3780 exponent
= semantics
->minExponent
-1;
3781 APInt::tcSet(significandParts(), 0, partCount());
3784 void IEEEFloat::makeQuiet() {
3786 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
3789 int ilogb(const IEEEFloat
&Arg
) {
3791 return IEEEFloat::IEK_NaN
;
3793 return IEEEFloat::IEK_Zero
;
3794 if (Arg
.isInfinity())
3795 return IEEEFloat::IEK_Inf
;
3796 if (!Arg
.isDenormal())
3797 return Arg
.exponent
;
3799 IEEEFloat
Normalized(Arg
);
3800 int SignificandBits
= Arg
.getSemantics().precision
- 1;
3802 Normalized
.exponent
+= SignificandBits
;
3803 Normalized
.normalize(IEEEFloat::rmNearestTiesToEven
, lfExactlyZero
);
3804 return Normalized
.exponent
- SignificandBits
;
3807 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, IEEEFloat::roundingMode RoundingMode
) {
3808 auto MaxExp
= X
.getSemantics().maxExponent
;
3809 auto MinExp
= X
.getSemantics().minExponent
;
3811 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3812 // overflow; clamp it to a safe range before adding, but ensure that the range
3813 // is large enough that the clamp does not change the result. The range we
3814 // need to support is the difference between the largest possible exponent and
3815 // the normalized exponent of half the smallest denormal.
3817 int SignificandBits
= X
.getSemantics().precision
- 1;
3818 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
3820 // Clamp to one past the range ends to let normalize handle overlflow.
3821 X
.exponent
+= std::min(std::max(Exp
, -MaxIncrement
- 1), MaxIncrement
);
3822 X
.normalize(RoundingMode
, lfExactlyZero
);
3828 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, IEEEFloat::roundingMode RM
) {
3831 // Quiet signalling nans.
3832 if (Exp
== IEEEFloat::IEK_NaN
) {
3833 IEEEFloat
Quiet(Val
);
3838 if (Exp
== IEEEFloat::IEK_Inf
)
3841 // 1 is added because frexp is defined to return a normalized fraction in
3842 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3843 Exp
= Exp
== IEEEFloat::IEK_Zero
? 0 : Exp
+ 1;
3844 return scalbn(Val
, -Exp
, RM
);
3847 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
3849 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
3850 assert(Semantics
== &semPPCDoubleDouble
);
3853 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
3855 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
3856 APFloat(semIEEEdouble
, uninitialized
)}) {
3857 assert(Semantics
== &semPPCDoubleDouble
);
3860 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
3861 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
3862 APFloat(semIEEEdouble
)}) {
3863 assert(Semantics
== &semPPCDoubleDouble
);
3866 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
3868 Floats(new APFloat
[2]{
3869 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
3870 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
3871 assert(Semantics
== &semPPCDoubleDouble
);
3874 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
3877 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
3878 assert(Semantics
== &semPPCDoubleDouble
);
3879 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
3880 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
3883 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
3884 : Semantics(RHS
.Semantics
),
3885 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
3886 APFloat(RHS
.Floats
[1])}
3888 assert(Semantics
== &semPPCDoubleDouble
);
3891 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
3892 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
3893 RHS
.Semantics
= &semBogus
;
3894 assert(Semantics
== &semPPCDoubleDouble
);
3897 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
3898 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
3899 Floats
[0] = RHS
.Floats
[0];
3900 Floats
[1] = RHS
.Floats
[1];
3901 } else if (this != &RHS
) {
3902 this->~DoubleAPFloat();
3903 new (this) DoubleAPFloat(RHS
);
3908 // Implement addition, subtraction, multiplication and division based on:
3909 // "Software for Doubled-Precision Floating-Point Computations",
3910 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3911 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
3912 const APFloat
&c
, const APFloat
&cc
,
3916 Status
|= z
.add(c
, RM
);
3917 if (!z
.isFinite()) {
3918 if (!z
.isInfinity()) {
3919 Floats
[0] = std::move(z
);
3920 Floats
[1].makeZero(/* Neg = */ false);
3921 return (opStatus
)Status
;
3924 auto AComparedToC
= a
.compareAbsoluteValue(c
);
3926 Status
|= z
.add(aa
, RM
);
3927 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3928 // z = cc + aa + c + a;
3929 Status
|= z
.add(c
, RM
);
3930 Status
|= z
.add(a
, RM
);
3932 // z = cc + aa + a + c;
3933 Status
|= z
.add(a
, RM
);
3934 Status
|= z
.add(c
, RM
);
3936 if (!z
.isFinite()) {
3937 Floats
[0] = std::move(z
);
3938 Floats
[1].makeZero(/* Neg = */ false);
3939 return (opStatus
)Status
;
3943 Status
|= zz
.add(cc
, RM
);
3944 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3945 // Floats[1] = a - z + c + zz;
3947 Status
|= Floats
[1].subtract(z
, RM
);
3948 Status
|= Floats
[1].add(c
, RM
);
3949 Status
|= Floats
[1].add(zz
, RM
);
3951 // Floats[1] = c - z + a + zz;
3953 Status
|= Floats
[1].subtract(z
, RM
);
3954 Status
|= Floats
[1].add(a
, RM
);
3955 Status
|= Floats
[1].add(zz
, RM
);
3960 Status
|= q
.subtract(z
, RM
);
3962 // zz = q + c + (a - (q + z)) + aa + cc;
3963 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3965 Status
|= zz
.add(c
, RM
);
3966 Status
|= q
.add(z
, RM
);
3967 Status
|= q
.subtract(a
, RM
);
3969 Status
|= zz
.add(q
, RM
);
3970 Status
|= zz
.add(aa
, RM
);
3971 Status
|= zz
.add(cc
, RM
);
3972 if (zz
.isZero() && !zz
.isNegative()) {
3973 Floats
[0] = std::move(z
);
3974 Floats
[1].makeZero(/* Neg = */ false);
3978 Status
|= Floats
[0].add(zz
, RM
);
3979 if (!Floats
[0].isFinite()) {
3980 Floats
[1].makeZero(/* Neg = */ false);
3981 return (opStatus
)Status
;
3983 Floats
[1] = std::move(z
);
3984 Status
|= Floats
[1].subtract(Floats
[0], RM
);
3985 Status
|= Floats
[1].add(zz
, RM
);
3987 return (opStatus
)Status
;
3990 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
3991 const DoubleAPFloat
&RHS
,
3994 if (LHS
.getCategory() == fcNaN
) {
3998 if (RHS
.getCategory() == fcNaN
) {
4002 if (LHS
.getCategory() == fcZero
) {
4006 if (RHS
.getCategory() == fcZero
) {
4010 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4011 LHS
.isNegative() != RHS
.isNegative()) {
4012 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4015 if (LHS
.getCategory() == fcInfinity
) {
4019 if (RHS
.getCategory() == fcInfinity
) {
4023 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
4025 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
4027 assert(&A
.getSemantics() == &semIEEEdouble
);
4028 assert(&AA
.getSemantics() == &semIEEEdouble
);
4029 assert(&C
.getSemantics() == &semIEEEdouble
);
4030 assert(&CC
.getSemantics() == &semIEEEdouble
);
4031 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
4032 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
4033 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
4036 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
4038 return addWithSpecial(*this, RHS
, *this, RM
);
4041 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
4044 auto Ret
= add(RHS
, RM
);
4049 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
4050 APFloat::roundingMode RM
) {
4051 const auto &LHS
= *this;
4053 /* Interesting observation: For special categories, finding the lowest
4054 common ancestor of the following layered graph gives the correct
4063 e.g. NaN * NaN = NaN
4065 Normal * Zero = Zero
4068 if (LHS
.getCategory() == fcNaN
) {
4072 if (RHS
.getCategory() == fcNaN
) {
4076 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
4077 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
4078 Out
.makeNaN(false, false, nullptr);
4081 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
4085 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
4089 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
4090 "Special cases not handled exhaustively");
4093 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
4096 Status
|= T
.multiply(C
, RM
);
4097 if (!T
.isFiniteNonZero()) {
4099 Floats
[1].makeZero(/* Neg = */ false);
4100 return (opStatus
)Status
;
4103 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4106 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
4111 Status
|= V
.multiply(D
, RM
);
4114 Status
|= W
.multiply(C
, RM
);
4115 Status
|= V
.add(W
, RM
);
4117 Status
|= Tau
.add(V
, RM
);
4121 Status
|= U
.add(Tau
, RM
);
4124 if (!U
.isFinite()) {
4125 Floats
[1].makeZero(/* Neg = */ false);
4127 // Floats[1] = (t - u) + tau
4128 Status
|= T
.subtract(U
, RM
);
4129 Status
|= T
.add(Tau
, RM
);
4132 return (opStatus
)Status
;
4135 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
4136 APFloat::roundingMode RM
) {
4137 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4138 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4140 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
4141 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4145 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
4146 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4147 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4149 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4150 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4154 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
4155 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4156 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4157 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4158 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4163 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
4164 const DoubleAPFloat
&Addend
,
4165 APFloat::roundingMode RM
) {
4166 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4167 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4168 auto Ret
= Tmp
.fusedMultiplyAdd(
4169 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
4170 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
4171 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4175 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
4176 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4177 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4178 auto Ret
= Tmp
.roundToIntegral(RM
);
4179 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4183 void DoubleAPFloat::changeSign() {
4184 Floats
[0].changeSign();
4185 Floats
[1].changeSign();
4189 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
4190 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
4191 if (Result
!= cmpEqual
)
4193 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
4194 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
4195 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
4196 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
4197 if (Against
&& !RHSAgainst
)
4199 if (!Against
&& RHSAgainst
)
4200 return cmpGreaterThan
;
4201 if (!Against
&& !RHSAgainst
)
4203 if (Against
&& RHSAgainst
)
4204 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
4209 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
4210 return Floats
[0].getCategory();
4213 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
4215 void DoubleAPFloat::makeInf(bool Neg
) {
4216 Floats
[0].makeInf(Neg
);
4217 Floats
[1].makeZero(/* Neg = */ false);
4220 void DoubleAPFloat::makeZero(bool Neg
) {
4221 Floats
[0].makeZero(Neg
);
4222 Floats
[1].makeZero(/* Neg = */ false);
4225 void DoubleAPFloat::makeLargest(bool Neg
) {
4226 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4227 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
4228 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
4233 void DoubleAPFloat::makeSmallest(bool Neg
) {
4234 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4235 Floats
[0].makeSmallest(Neg
);
4236 Floats
[1].makeZero(/* Neg = */ false);
4239 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
4240 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4241 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
4243 Floats
[0].changeSign();
4244 Floats
[1].makeZero(/* Neg = */ false);
4247 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
4248 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
4249 Floats
[1].makeZero(/* Neg = */ false);
4252 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
4253 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
4254 // |Float[0]| > |Float[1]|
4255 if (Result
== APFloat::cmpEqual
)
4256 return Floats
[1].compare(RHS
.Floats
[1]);
4260 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
4261 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
4262 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
4265 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
4267 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
4268 return hash_combine(Arg
.Semantics
);
4271 APInt
DoubleAPFloat::bitcastToAPInt() const {
4272 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4274 Floats
[0].bitcastToAPInt().getRawData()[0],
4275 Floats
[1].bitcastToAPInt().getRawData()[0],
4277 return APInt(128, 2, Data
);
4280 APFloat::opStatus
DoubleAPFloat::convertFromString(StringRef S
,
4282 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4283 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4284 auto Ret
= Tmp
.convertFromString(S
, RM
);
4285 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4289 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
4290 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4291 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4292 auto Ret
= Tmp
.next(nextDown
);
4293 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4298 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
4299 unsigned int Width
, bool IsSigned
,
4300 roundingMode RM
, bool *IsExact
) const {
4301 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4302 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4303 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
4306 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
4309 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4310 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4311 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
4312 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4317 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
4318 unsigned int InputSize
,
4319 bool IsSigned
, roundingMode RM
) {
4320 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4321 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4322 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4323 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4328 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
4329 unsigned int InputSize
,
4330 bool IsSigned
, roundingMode RM
) {
4331 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4332 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4333 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4334 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4338 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
4339 unsigned int HexDigits
,
4341 roundingMode RM
) const {
4342 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4343 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4344 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
4347 bool DoubleAPFloat::isDenormal() const {
4348 return getCategory() == fcNormal
&&
4349 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
4350 // (double)(Hi + Lo) == Hi defines a normal number.
4351 Floats
[0].compare(Floats
[0] + Floats
[1]) != cmpEqual
);
4354 bool DoubleAPFloat::isSmallest() const {
4355 if (getCategory() != fcNormal
)
4357 DoubleAPFloat
Tmp(*this);
4358 Tmp
.makeSmallest(this->isNegative());
4359 return Tmp
.compare(*this) == cmpEqual
;
4362 bool DoubleAPFloat::isLargest() const {
4363 if (getCategory() != fcNormal
)
4365 DoubleAPFloat
Tmp(*this);
4366 Tmp
.makeLargest(this->isNegative());
4367 return Tmp
.compare(*this) == cmpEqual
;
4370 bool DoubleAPFloat::isInteger() const {
4371 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4372 return Floats
[0].isInteger() && Floats
[1].isInteger();
4375 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
4376 unsigned FormatPrecision
,
4377 unsigned FormatMaxPadding
,
4378 bool TruncateZero
) const {
4379 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4380 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4381 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
4384 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
4385 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4386 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4388 return Tmp
.getExactInverse(nullptr);
4389 APFloat
Inv(semPPCDoubleDoubleLegacy
);
4390 auto Ret
= Tmp
.getExactInverse(&Inv
);
4391 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
4395 DoubleAPFloat
scalbn(DoubleAPFloat Arg
, int Exp
, APFloat::roundingMode RM
) {
4396 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4397 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
4398 scalbn(Arg
.Floats
[1], Exp
, RM
));
4401 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
4402 APFloat::roundingMode RM
) {
4403 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4404 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
4405 APFloat Second
= Arg
.Floats
[1];
4406 if (Arg
.getCategory() == APFloat::fcNormal
)
4407 Second
= scalbn(Second
, -Exp
, RM
);
4408 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
4411 } // End detail namespace
4413 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
4414 if (usesLayout
<IEEEFloat
>(Semantics
)) {
4415 new (&IEEE
) IEEEFloat(std::move(F
));
4418 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
4420 DoubleAPFloat(Semantics
, APFloat(std::move(F
), F
.getSemantics()),
4421 APFloat(semIEEEdouble
));
4424 llvm_unreachable("Unexpected semantics");
4427 APFloat::opStatus
APFloat::convertFromString(StringRef Str
, roundingMode RM
) {
4428 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
4431 hash_code
hash_value(const APFloat
&Arg
) {
4432 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
4433 return hash_value(Arg
.U
.IEEE
);
4434 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
4435 return hash_value(Arg
.U
.Double
);
4436 llvm_unreachable("Unexpected semantics");
4439 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
4440 : APFloat(Semantics
) {
4441 convertFromString(S
, rmNearestTiesToEven
);
4444 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
4445 roundingMode RM
, bool *losesInfo
) {
4446 if (&getSemantics() == &ToSemantics
) {
4450 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4451 usesLayout
<IEEEFloat
>(ToSemantics
))
4452 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
4453 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4454 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
4455 assert(&ToSemantics
== &semPPCDoubleDouble
);
4456 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
4457 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
4460 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
4461 usesLayout
<IEEEFloat
>(ToSemantics
)) {
4462 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
4463 *this = APFloat(std::move(getIEEE()), ToSemantics
);
4466 llvm_unreachable("Unexpected semantics");
4469 APFloat
APFloat::getAllOnesValue(unsigned BitWidth
, bool isIEEE
) {
4473 return APFloat(semIEEEhalf
, APInt::getAllOnesValue(BitWidth
));
4475 return APFloat(semIEEEsingle
, APInt::getAllOnesValue(BitWidth
));
4477 return APFloat(semIEEEdouble
, APInt::getAllOnesValue(BitWidth
));
4479 return APFloat(semX87DoubleExtended
, APInt::getAllOnesValue(BitWidth
));
4481 return APFloat(semIEEEquad
, APInt::getAllOnesValue(BitWidth
));
4483 llvm_unreachable("Unknown floating bit width");
4486 assert(BitWidth
== 128);
4487 return APFloat(semPPCDoubleDouble
, APInt::getAllOnesValue(BitWidth
));
4491 void APFloat::print(raw_ostream
&OS
) const {
4492 SmallVector
<char, 16> Buffer
;
4494 OS
<< Buffer
<< "\n";
4497 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4498 LLVM_DUMP_METHOD
void APFloat::dump() const { print(dbgs()); }
4501 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
4502 NID
.Add(bitcastToAPInt());
4505 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4506 an APSInt, whose initial bit-width and signed-ness are used to determine the
4507 precision of the conversion.
4509 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
4510 roundingMode rounding_mode
,
4511 bool *isExact
) const {
4512 unsigned bitWidth
= result
.getBitWidth();
4513 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
4514 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
4515 rounding_mode
, isExact
);
4516 // Keeps the original signed-ness.
4517 result
= APInt(bitWidth
, parts
);
4521 } // End llvm namespace
4523 #undef APFLOAT_DISPATCH_ON_SEMANTICS