1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent
;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent
;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision
;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits
;
71 static const fltSemantics semIEEEhalf
= {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle
= {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
76 static const fltSemantics semBogus
= {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 static const fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
116 const llvm::fltSemantics
&APFloatBase::EnumToSemantics(Semantics S
) {
124 case S_x87DoubleExtended
:
125 return x87DoubleExtended();
128 case S_PPCDoubleDouble
:
129 return PPCDoubleDouble();
131 llvm_unreachable("Unrecognised floating semantics");
134 APFloatBase::Semantics
135 APFloatBase::SemanticsToEnum(const llvm::fltSemantics
&Sem
) {
136 if (&Sem
== &llvm::APFloat::IEEEhalf())
138 else if (&Sem
== &llvm::APFloat::IEEEsingle())
140 else if (&Sem
== &llvm::APFloat::IEEEdouble())
142 else if (&Sem
== &llvm::APFloat::x87DoubleExtended())
143 return S_x87DoubleExtended
;
144 else if (&Sem
== &llvm::APFloat::IEEEquad())
146 else if (&Sem
== &llvm::APFloat::PPCDoubleDouble())
147 return S_PPCDoubleDouble
;
149 llvm_unreachable("Unknown floating semantics");
152 const fltSemantics
&APFloatBase::IEEEhalf() {
155 const fltSemantics
&APFloatBase::IEEEsingle() {
156 return semIEEEsingle
;
158 const fltSemantics
&APFloatBase::IEEEdouble() {
159 return semIEEEdouble
;
161 const fltSemantics
&APFloatBase::IEEEquad() {
164 const fltSemantics
&APFloatBase::x87DoubleExtended() {
165 return semX87DoubleExtended
;
167 const fltSemantics
&APFloatBase::Bogus() {
170 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
171 return semPPCDoubleDouble
;
174 /* A tight upper bound on number of parts required to hold the value
177 power * 815 / (351 * integerPartWidth) + 1
179 However, whilst the result may require only this many parts,
180 because we are multiplying two values to get it, the
181 multiplication may require an extra part with the excess part
182 being zero (consider the trivial case of 1 * 1, tcFullMultiply
183 requires two parts to hold the single-part result). So we add an
184 extra one to guarantee enough space whilst multiplying. */
185 const unsigned int maxExponent
= 16383;
186 const unsigned int maxPrecision
= 113;
187 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
188 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
190 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
191 return semantics
.precision
;
193 APFloatBase::ExponentType
194 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
195 return semantics
.maxExponent
;
197 APFloatBase::ExponentType
198 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
199 return semantics
.minExponent
;
201 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
202 return semantics
.sizeInBits
;
205 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
206 return Sem
.sizeInBits
;
209 /* A bunch of private, handy routines. */
211 static inline unsigned int
212 partCountForBits(unsigned int bits
)
214 return ((bits
) + APFloatBase::integerPartWidth
- 1) / APFloatBase::integerPartWidth
;
217 /* Returns 0U-9U. Return values >= 10U are not digits. */
218 static inline unsigned int
219 decDigitValue(unsigned int c
)
224 /* Return the value of a decimal exponent of the form
227 If the exponent overflows, returns a large exponent with the
230 readExponent(StringRef::iterator begin
, StringRef::iterator end
)
233 unsigned int absExponent
;
234 const unsigned int overlargeExponent
= 24000; /* FIXME. */
235 StringRef::iterator p
= begin
;
237 // Treat no exponent as 0 to match binutils
238 if (p
== end
|| ((*p
== '-' || *p
== '+') && (p
+ 1) == end
)) {
242 isNegative
= (*p
== '-');
243 if (*p
== '-' || *p
== '+') {
245 assert(p
!= end
&& "Exponent has no digits");
248 absExponent
= decDigitValue(*p
++);
249 assert(absExponent
< 10U && "Invalid character in exponent");
251 for (; p
!= end
; ++p
) {
254 value
= decDigitValue(*p
);
255 assert(value
< 10U && "Invalid character in exponent");
257 value
+= absExponent
* 10;
258 if (absExponent
>= overlargeExponent
) {
259 absExponent
= overlargeExponent
;
260 p
= end
; /* outwit assert below */
266 assert(p
== end
&& "Invalid exponent in exponent");
269 return -(int) absExponent
;
271 return (int) absExponent
;
274 /* This is ugly and needs cleaning up, but I don't immediately see
275 how whilst remaining safe. */
277 totalExponent(StringRef::iterator p
, StringRef::iterator end
,
278 int exponentAdjustment
)
280 int unsignedExponent
;
281 bool negative
, overflow
;
284 assert(p
!= end
&& "Exponent has no digits");
286 negative
= *p
== '-';
287 if (*p
== '-' || *p
== '+') {
289 assert(p
!= end
&& "Exponent has no digits");
292 unsignedExponent
= 0;
294 for (; p
!= end
; ++p
) {
297 value
= decDigitValue(*p
);
298 assert(value
< 10U && "Invalid character in exponent");
300 unsignedExponent
= unsignedExponent
* 10 + value
;
301 if (unsignedExponent
> 32767) {
307 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
311 exponent
= unsignedExponent
;
313 exponent
= -exponent
;
314 exponent
+= exponentAdjustment
;
315 if (exponent
> 32767 || exponent
< -32768)
320 exponent
= negative
? -32768: 32767;
325 static StringRef::iterator
326 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
327 StringRef::iterator
*dot
)
329 StringRef::iterator p
= begin
;
331 while (p
!= end
&& *p
== '0')
334 if (p
!= end
&& *p
== '.') {
337 assert(end
- begin
!= 1 && "Significand has no digits");
339 while (p
!= end
&& *p
== '0')
346 /* Given a normal decimal floating point number of the form
350 where the decimal point and exponent are optional, fill out the
351 structure D. Exponent is appropriate if the significand is
352 treated as an integer, and normalizedExponent if the significand
353 is taken to have the decimal point after a single leading
356 If the value is zero, V->firstSigDigit points to a non-digit, and
357 the return exponent is zero.
360 const char *firstSigDigit
;
361 const char *lastSigDigit
;
363 int normalizedExponent
;
367 interpretDecimal(StringRef::iterator begin
, StringRef::iterator end
,
370 StringRef::iterator dot
= end
;
371 StringRef::iterator p
= skipLeadingZeroesAndAnyDot (begin
, end
, &dot
);
373 D
->firstSigDigit
= p
;
375 D
->normalizedExponent
= 0;
377 for (; p
!= end
; ++p
) {
379 assert(dot
== end
&& "String contains multiple dots");
384 if (decDigitValue(*p
) >= 10U)
389 assert((*p
== 'e' || *p
== 'E') && "Invalid character in significand");
390 assert(p
!= begin
&& "Significand has no digits");
391 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
393 /* p points to the first non-digit in the string */
394 D
->exponent
= readExponent(p
+ 1, end
);
396 /* Implied decimal point? */
401 /* If number is all zeroes accept any exponent. */
402 if (p
!= D
->firstSigDigit
) {
403 /* Drop insignificant trailing zeroes. */
408 while (p
!= begin
&& *p
== '0');
409 while (p
!= begin
&& *p
== '.');
412 /* Adjust the exponents for any decimal point. */
413 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
414 D
->normalizedExponent
= (D
->exponent
+
415 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
416 - (dot
> D
->firstSigDigit
&& dot
< p
)));
422 /* Return the trailing fraction of a hexadecimal number.
423 DIGITVALUE is the first hex digit of the fraction, P points to
426 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
427 unsigned int digitValue
)
429 unsigned int hexDigit
;
431 /* If the first trailing digit isn't 0 or 8 we can work out the
432 fraction immediately. */
434 return lfMoreThanHalf
;
435 else if (digitValue
< 8 && digitValue
> 0)
436 return lfLessThanHalf
;
438 // Otherwise we need to find the first non-zero digit.
439 while (p
!= end
&& (*p
== '0' || *p
== '.'))
442 assert(p
!= end
&& "Invalid trailing hexadecimal fraction!");
444 hexDigit
= hexDigitValue(*p
);
446 /* If we ran off the end it is exactly zero or one-half, otherwise
449 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
451 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
454 /* Return the fraction lost were a bignum truncated losing the least
455 significant BITS bits. */
457 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
458 unsigned int partCount
,
463 lsb
= APInt::tcLSB(parts
, partCount
);
465 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
467 return lfExactlyZero
;
469 return lfExactlyHalf
;
470 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
471 APInt::tcExtractBit(parts
, bits
- 1))
472 return lfMoreThanHalf
;
474 return lfLessThanHalf
;
477 /* Shift DST right BITS bits noting lost fraction. */
479 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
481 lostFraction lost_fraction
;
483 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
485 APInt::tcShiftRight(dst
, parts
, bits
);
487 return lost_fraction
;
490 /* Combine the effect of two lost fractions. */
492 combineLostFractions(lostFraction moreSignificant
,
493 lostFraction lessSignificant
)
495 if (lessSignificant
!= lfExactlyZero
) {
496 if (moreSignificant
== lfExactlyZero
)
497 moreSignificant
= lfLessThanHalf
;
498 else if (moreSignificant
== lfExactlyHalf
)
499 moreSignificant
= lfMoreThanHalf
;
502 return moreSignificant
;
505 /* The error from the true value, in half-ulps, on multiplying two
506 floating point numbers, which differ from the value they
507 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
508 than the returned value.
510 See "How to Read Floating Point Numbers Accurately" by William D
513 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
515 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
517 if (HUerr1
+ HUerr2
== 0)
518 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
520 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
523 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
524 when the least significant BITS are truncated. BITS cannot be
526 static APFloatBase::integerPart
527 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
529 unsigned int count
, partBits
;
530 APFloatBase::integerPart part
, boundary
;
535 count
= bits
/ APFloatBase::integerPartWidth
;
536 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
538 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
541 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
546 if (part
- boundary
<= boundary
- part
)
547 return part
- boundary
;
549 return boundary
- part
;
552 if (part
== boundary
) {
555 return ~(APFloatBase::integerPart
) 0; /* A lot. */
558 } else if (part
== boundary
- 1) {
561 return ~(APFloatBase::integerPart
) 0; /* A lot. */
566 return ~(APFloatBase::integerPart
) 0; /* A lot. */
569 /* Place pow(5, power) in DST, and return the number of parts used.
570 DST must be at least one part larger than size of the answer. */
572 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
573 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
574 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
575 pow5s
[0] = 78125 * 5;
577 unsigned int partsCount
[16] = { 1 };
578 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
580 assert(power
<= maxExponent
);
585 *p1
= firstEightPowers
[power
& 7];
591 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
596 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
598 pc
= partsCount
[n
- 1];
599 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
601 if (pow5
[pc
- 1] == 0)
607 APFloatBase::integerPart
*tmp
;
609 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
611 if (p2
[result
- 1] == 0)
614 /* Now result is in p1 with partsCount parts and p2 is scratch
625 APInt::tcAssign(dst
, p1
, result
);
630 /* Zero at the end to avoid modular arithmetic when adding one; used
631 when rounding up during hexadecimal output. */
632 static const char hexDigitsLower
[] = "0123456789abcdef0";
633 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
634 static const char infinityL
[] = "infinity";
635 static const char infinityU
[] = "INFINITY";
636 static const char NaNL
[] = "nan";
637 static const char NaNU
[] = "NAN";
639 /* Write out an integerPart in hexadecimal, starting with the most
640 significant nibble. Write out exactly COUNT hexdigits, return
643 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
644 const char *hexDigitChars
)
646 unsigned int result
= count
;
648 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
650 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
652 dst
[count
] = hexDigitChars
[part
& 0xf];
659 /* Write out an unsigned decimal integer. */
661 writeUnsignedDecimal (char *dst
, unsigned int n
)
677 /* Write out a signed decimal integer. */
679 writeSignedDecimal (char *dst
, int value
)
683 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
685 dst
= writeUnsignedDecimal(dst
, value
);
692 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
695 semantics
= ourSemantics
;
698 significand
.parts
= new integerPart
[count
];
701 void IEEEFloat::freeSignificand() {
703 delete [] significand
.parts
;
706 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
707 assert(semantics
== rhs
.semantics
);
710 category
= rhs
.category
;
711 exponent
= rhs
.exponent
;
712 if (isFiniteNonZero() || category
== fcNaN
)
713 copySignificand(rhs
);
716 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
717 assert(isFiniteNonZero() || category
== fcNaN
);
718 assert(rhs
.partCount() >= partCount());
720 APInt::tcAssign(significandParts(), rhs
.significandParts(),
724 /* Make this number a NaN, with an arbitrary but deterministic value
725 for the significand. If double or longer, this is a signalling NaN,
726 which may not be ideal. If float, this is QNaN(0). */
727 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
731 integerPart
*significand
= significandParts();
732 unsigned numParts
= partCount();
734 // Set the significand bits to the fill.
735 if (!fill
|| fill
->getNumWords() < numParts
)
736 APInt::tcSet(significand
, 0, numParts
);
738 APInt::tcAssign(significand
, fill
->getRawData(),
739 std::min(fill
->getNumWords(), numParts
));
741 // Zero out the excess bits of the significand.
742 unsigned bitsToPreserve
= semantics
->precision
- 1;
743 unsigned part
= bitsToPreserve
/ 64;
744 bitsToPreserve
%= 64;
745 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
746 for (part
++; part
!= numParts
; ++part
)
747 significand
[part
] = 0;
750 unsigned QNaNBit
= semantics
->precision
- 2;
753 // We always have to clear the QNaN bit to make it an SNaN.
754 APInt::tcClearBit(significand
, QNaNBit
);
756 // If there are no bits set in the payload, we have to set
757 // *something* to make it a NaN instead of an infinity;
758 // conventionally, this is the next bit down from the QNaN bit.
759 if (APInt::tcIsZero(significand
, numParts
))
760 APInt::tcSetBit(significand
, QNaNBit
- 1);
762 // We always have to set the QNaN bit to make it a QNaN.
763 APInt::tcSetBit(significand
, QNaNBit
);
766 // For x87 extended precision, we want to make a NaN, not a
767 // pseudo-NaN. Maybe we should expose the ability to make
769 if (semantics
== &semX87DoubleExtended
)
770 APInt::tcSetBit(significand
, QNaNBit
+ 1);
773 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
775 if (semantics
!= rhs
.semantics
) {
777 initialize(rhs
.semantics
);
785 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
788 semantics
= rhs
.semantics
;
789 significand
= rhs
.significand
;
790 exponent
= rhs
.exponent
;
791 category
= rhs
.category
;
794 rhs
.semantics
= &semBogus
;
798 bool IEEEFloat::isDenormal() const {
799 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
800 (APInt::tcExtractBit(significandParts(),
801 semantics
->precision
- 1) == 0);
804 bool IEEEFloat::isSmallest() const {
805 // The smallest number by magnitude in our format will be the smallest
806 // denormal, i.e. the floating point number with exponent being minimum
807 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
808 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
809 significandMSB() == 0;
812 bool IEEEFloat::isSignificandAllOnes() const {
813 // Test if the significand excluding the integral bit is all ones. This allows
814 // us to test for binade boundaries.
815 const integerPart
*Parts
= significandParts();
816 const unsigned PartCount
= partCount();
817 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
821 // Set the unused high bits to all ones when we compare.
822 const unsigned NumHighBits
=
823 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
824 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
825 "fill than integerPartWidth");
826 const integerPart HighBitFill
=
827 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
828 if (~(Parts
[PartCount
- 1] | HighBitFill
))
834 bool IEEEFloat::isSignificandAllZeros() const {
835 // Test if the significand excluding the integral bit is all zeros. This
836 // allows us to test for binade boundaries.
837 const integerPart
*Parts
= significandParts();
838 const unsigned PartCount
= partCount();
840 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
844 const unsigned NumHighBits
=
845 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
846 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
847 "clear than integerPartWidth");
848 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
850 if (Parts
[PartCount
- 1] & HighBitMask
)
856 bool IEEEFloat::isLargest() const {
857 // The largest number by magnitude in our format will be the floating point
858 // number with maximum exponent and with significand that is all ones.
859 return isFiniteNonZero() && exponent
== semantics
->maxExponent
860 && isSignificandAllOnes();
863 bool IEEEFloat::isInteger() const {
864 // This could be made more efficient; I'm going for obviously correct.
865 if (!isFinite()) return false;
866 IEEEFloat truncated
= *this;
867 truncated
.roundToIntegral(rmTowardZero
);
868 return compare(truncated
) == cmpEqual
;
871 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
874 if (semantics
!= rhs
.semantics
||
875 category
!= rhs
.category
||
878 if (category
==fcZero
|| category
==fcInfinity
)
881 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
884 return std::equal(significandParts(), significandParts() + partCount(),
885 rhs
.significandParts());
888 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
889 initialize(&ourSemantics
);
893 exponent
= ourSemantics
.precision
- 1;
894 significandParts()[0] = value
;
895 normalize(rmNearestTiesToEven
, lfExactlyZero
);
898 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
899 initialize(&ourSemantics
);
904 // Delegate to the previous constructor, because later copy constructor may
905 // actually inspects category, which can't be garbage.
906 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
907 : IEEEFloat(ourSemantics
) {}
909 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
910 initialize(rhs
.semantics
);
914 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
915 *this = std::move(rhs
);
918 IEEEFloat::~IEEEFloat() { freeSignificand(); }
920 unsigned int IEEEFloat::partCount() const {
921 return partCountForBits(semantics
->precision
+ 1);
924 const IEEEFloat::integerPart
*IEEEFloat::significandParts() const {
925 return const_cast<IEEEFloat
*>(this)->significandParts();
928 IEEEFloat::integerPart
*IEEEFloat::significandParts() {
930 return significand
.parts
;
932 return &significand
.part
;
935 void IEEEFloat::zeroSignificand() {
936 APInt::tcSet(significandParts(), 0, partCount());
939 /* Increment an fcNormal floating point number's significand. */
940 void IEEEFloat::incrementSignificand() {
943 carry
= APInt::tcIncrement(significandParts(), partCount());
945 /* Our callers should never cause us to overflow. */
950 /* Add the significand of the RHS. Returns the carry flag. */
951 IEEEFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
954 parts
= significandParts();
956 assert(semantics
== rhs
.semantics
);
957 assert(exponent
== rhs
.exponent
);
959 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
962 /* Subtract the significand of the RHS with a borrow flag. Returns
964 IEEEFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
965 integerPart borrow
) {
968 parts
= significandParts();
970 assert(semantics
== rhs
.semantics
);
971 assert(exponent
== rhs
.exponent
);
973 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
977 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
978 on to the full-precision result of the multiplication. Returns the
980 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
981 const IEEEFloat
*addend
) {
982 unsigned int omsb
; // One, not zero, based MSB.
983 unsigned int partsCount
, newPartsCount
, precision
;
984 integerPart
*lhsSignificand
;
985 integerPart scratch
[4];
986 integerPart
*fullSignificand
;
987 lostFraction lost_fraction
;
990 assert(semantics
== rhs
.semantics
);
992 precision
= semantics
->precision
;
994 // Allocate space for twice as many bits as the original significand, plus one
995 // extra bit for the addition to overflow into.
996 newPartsCount
= partCountForBits(precision
* 2 + 1);
998 if (newPartsCount
> 4)
999 fullSignificand
= new integerPart
[newPartsCount
];
1001 fullSignificand
= scratch
;
1003 lhsSignificand
= significandParts();
1004 partsCount
= partCount();
1006 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
1007 rhs
.significandParts(), partsCount
, partsCount
);
1009 lost_fraction
= lfExactlyZero
;
1010 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1011 exponent
+= rhs
.exponent
;
1013 // Assume the operands involved in the multiplication are single-precision
1014 // FP, and the two multiplicants are:
1015 // *this = a23 . a22 ... a0 * 2^e1
1016 // rhs = b23 . b22 ... b0 * 2^e2
1017 // the result of multiplication is:
1018 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1019 // Note that there are three significant bits at the left-hand side of the
1020 // radix point: two for the multiplication, and an overflow bit for the
1021 // addition (that will always be zero at this point). Move the radix point
1022 // toward left by two bits, and adjust exponent accordingly.
1025 if (addend
&& addend
->isNonZero()) {
1026 // The intermediate result of the multiplication has "2 * precision"
1027 // signicant bit; adjust the addend to be consistent with mul result.
1029 Significand savedSignificand
= significand
;
1030 const fltSemantics
*savedSemantics
= semantics
;
1031 fltSemantics extendedSemantics
;
1033 unsigned int extendedPrecision
;
1035 // Normalize our MSB to one below the top bit to allow for overflow.
1036 extendedPrecision
= 2 * precision
+ 1;
1037 if (omsb
!= extendedPrecision
- 1) {
1038 assert(extendedPrecision
> omsb
);
1039 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1040 (extendedPrecision
- 1) - omsb
);
1041 exponent
-= (extendedPrecision
- 1) - omsb
;
1044 /* Create new semantics. */
1045 extendedSemantics
= *semantics
;
1046 extendedSemantics
.precision
= extendedPrecision
;
1048 if (newPartsCount
== 1)
1049 significand
.part
= fullSignificand
[0];
1051 significand
.parts
= fullSignificand
;
1052 semantics
= &extendedSemantics
;
1054 IEEEFloat
extendedAddend(*addend
);
1055 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
1056 assert(status
== opOK
);
1059 // Shift the significand of the addend right by one bit. This guarantees
1060 // that the high bit of the significand is zero (same as fullSignificand),
1061 // so the addition will overflow (if it does overflow at all) into the top bit.
1062 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1063 assert(lost_fraction
== lfExactlyZero
&&
1064 "Lost precision while shifting addend for fused-multiply-add.");
1066 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1068 /* Restore our state. */
1069 if (newPartsCount
== 1)
1070 fullSignificand
[0] = significand
.part
;
1071 significand
= savedSignificand
;
1072 semantics
= savedSemantics
;
1074 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1077 // Convert the result having "2 * precision" significant-bits back to the one
1078 // having "precision" significant-bits. First, move the radix point from
1079 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1080 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1081 exponent
-= precision
+ 1;
1083 // In case MSB resides at the left-hand side of radix point, shift the
1084 // mantissa right by some amount to make sure the MSB reside right before
1085 // the radix point (i.e. "MSB . rest-significant-bits").
1087 // Note that the result is not normalized when "omsb < precision". So, the
1088 // caller needs to call IEEEFloat::normalize() if normalized value is
1090 if (omsb
> precision
) {
1091 unsigned int bits
, significantParts
;
1094 bits
= omsb
- precision
;
1095 significantParts
= partCountForBits(omsb
);
1096 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1097 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1101 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1103 if (newPartsCount
> 4)
1104 delete [] fullSignificand
;
1106 return lost_fraction
;
1109 /* Multiply the significands of LHS and RHS to DST. */
1110 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1111 unsigned int bit
, i
, partsCount
;
1112 const integerPart
*rhsSignificand
;
1113 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1114 integerPart scratch
[4];
1115 lostFraction lost_fraction
;
1117 assert(semantics
== rhs
.semantics
);
1119 lhsSignificand
= significandParts();
1120 rhsSignificand
= rhs
.significandParts();
1121 partsCount
= partCount();
1124 dividend
= new integerPart
[partsCount
* 2];
1128 divisor
= dividend
+ partsCount
;
1130 /* Copy the dividend and divisor as they will be modified in-place. */
1131 for (i
= 0; i
< partsCount
; i
++) {
1132 dividend
[i
] = lhsSignificand
[i
];
1133 divisor
[i
] = rhsSignificand
[i
];
1134 lhsSignificand
[i
] = 0;
1137 exponent
-= rhs
.exponent
;
1139 unsigned int precision
= semantics
->precision
;
1141 /* Normalize the divisor. */
1142 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1145 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1148 /* Normalize the dividend. */
1149 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1152 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1155 /* Ensure the dividend >= divisor initially for the loop below.
1156 Incidentally, this means that the division loop below is
1157 guaranteed to set the integer bit to one. */
1158 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1160 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1161 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1164 /* Long division. */
1165 for (bit
= precision
; bit
; bit
-= 1) {
1166 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1167 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1168 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1171 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1174 /* Figure out the lost fraction. */
1175 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1178 lost_fraction
= lfMoreThanHalf
;
1180 lost_fraction
= lfExactlyHalf
;
1181 else if (APInt::tcIsZero(dividend
, partsCount
))
1182 lost_fraction
= lfExactlyZero
;
1184 lost_fraction
= lfLessThanHalf
;
1189 return lost_fraction
;
1192 unsigned int IEEEFloat::significandMSB() const {
1193 return APInt::tcMSB(significandParts(), partCount());
1196 unsigned int IEEEFloat::significandLSB() const {
1197 return APInt::tcLSB(significandParts(), partCount());
1200 /* Note that a zero result is NOT normalized to fcZero. */
1201 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1202 /* Our exponent should not overflow. */
1203 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1207 return shiftRight(significandParts(), partCount(), bits
);
1210 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1211 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1212 assert(bits
< semantics
->precision
);
1215 unsigned int partsCount
= partCount();
1217 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1220 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1224 IEEEFloat::cmpResult
1225 IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1228 assert(semantics
== rhs
.semantics
);
1229 assert(isFiniteNonZero());
1230 assert(rhs
.isFiniteNonZero());
1232 compare
= exponent
- rhs
.exponent
;
1234 /* If exponents are equal, do an unsigned bignum comparison of the
1237 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1241 return cmpGreaterThan
;
1242 else if (compare
< 0)
1248 /* Handle overflow. Sign is preserved. We either become infinity or
1249 the largest finite number. */
1250 IEEEFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1252 if (rounding_mode
== rmNearestTiesToEven
||
1253 rounding_mode
== rmNearestTiesToAway
||
1254 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1255 (rounding_mode
== rmTowardNegative
&& sign
)) {
1256 category
= fcInfinity
;
1257 return (opStatus
) (opOverflow
| opInexact
);
1260 /* Otherwise we become the largest finite number. */
1261 category
= fcNormal
;
1262 exponent
= semantics
->maxExponent
;
1263 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1264 semantics
->precision
);
1269 /* Returns TRUE if, when truncating the current number, with BIT the
1270 new LSB, with the given lost fraction and rounding mode, the result
1271 would need to be rounded away from zero (i.e., by increasing the
1272 signficand). This routine must work for fcZero of both signs, and
1273 fcNormal numbers. */
1274 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1275 lostFraction lost_fraction
,
1276 unsigned int bit
) const {
1277 /* NaNs and infinities should not have lost fractions. */
1278 assert(isFiniteNonZero() || category
== fcZero
);
1280 /* Current callers never pass this so we don't handle it. */
1281 assert(lost_fraction
!= lfExactlyZero
);
1283 switch (rounding_mode
) {
1284 case rmNearestTiesToAway
:
1285 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1287 case rmNearestTiesToEven
:
1288 if (lost_fraction
== lfMoreThanHalf
)
1291 /* Our zeroes don't have a significand to test. */
1292 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1293 return APInt::tcExtractBit(significandParts(), bit
);
1300 case rmTowardPositive
:
1303 case rmTowardNegative
:
1306 llvm_unreachable("Invalid rounding mode found");
1309 IEEEFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1310 lostFraction lost_fraction
) {
1311 unsigned int omsb
; /* One, not zero, based MSB. */
1314 if (!isFiniteNonZero())
1317 /* Before rounding normalize the exponent of fcNormal numbers. */
1318 omsb
= significandMSB() + 1;
1321 /* OMSB is numbered from 1. We want to place it in the integer
1322 bit numbered PRECISION if possible, with a compensating change in
1324 exponentChange
= omsb
- semantics
->precision
;
1326 /* If the resulting exponent is too high, overflow according to
1327 the rounding mode. */
1328 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1329 return handleOverflow(rounding_mode
);
1331 /* Subnormal numbers have exponent minExponent, and their MSB
1332 is forced based on that. */
1333 if (exponent
+ exponentChange
< semantics
->minExponent
)
1334 exponentChange
= semantics
->minExponent
- exponent
;
1336 /* Shifting left is easy as we don't lose precision. */
1337 if (exponentChange
< 0) {
1338 assert(lost_fraction
== lfExactlyZero
);
1340 shiftSignificandLeft(-exponentChange
);
1345 if (exponentChange
> 0) {
1348 /* Shift right and capture any new lost fraction. */
1349 lf
= shiftSignificandRight(exponentChange
);
1351 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1353 /* Keep OMSB up-to-date. */
1354 if (omsb
> (unsigned) exponentChange
)
1355 omsb
-= exponentChange
;
1361 /* Now round the number according to rounding_mode given the lost
1364 /* As specified in IEEE 754, since we do not trap we do not report
1365 underflow for exact results. */
1366 if (lost_fraction
== lfExactlyZero
) {
1367 /* Canonicalize zeroes. */
1374 /* Increment the significand if we're rounding away from zero. */
1375 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1377 exponent
= semantics
->minExponent
;
1379 incrementSignificand();
1380 omsb
= significandMSB() + 1;
1382 /* Did the significand increment overflow? */
1383 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1384 /* Renormalize by incrementing the exponent and shifting our
1385 significand right one. However if we already have the
1386 maximum exponent we overflow to infinity. */
1387 if (exponent
== semantics
->maxExponent
) {
1388 category
= fcInfinity
;
1390 return (opStatus
) (opOverflow
| opInexact
);
1393 shiftSignificandRight(1);
1399 /* The normal case - we were and are not denormal, and any
1400 significand increment above didn't overflow. */
1401 if (omsb
== semantics
->precision
)
1404 /* We have a non-zero denormal. */
1405 assert(omsb
< semantics
->precision
);
1407 /* Canonicalize zeroes. */
1411 /* The fcZero case is a denormal that underflowed to zero. */
1412 return (opStatus
) (opUnderflow
| opInexact
);
1415 IEEEFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1417 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1419 llvm_unreachable(nullptr);
1421 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1422 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1423 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1424 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1425 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1426 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1427 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1430 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1431 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1432 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1433 // We need to be sure to flip the sign here for subtraction because we
1434 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1435 sign
= rhs
.sign
^ subtract
;
1437 copySignificand(rhs
);
1440 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1441 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1442 category
= fcInfinity
;
1443 sign
= rhs
.sign
^ subtract
;
1446 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1448 sign
= rhs
.sign
^ subtract
;
1451 case PackCategoriesIntoKey(fcZero
, fcZero
):
1452 /* Sign depends on rounding mode; handled by caller. */
1455 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1456 /* Differently signed infinities can only be validly
1458 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1465 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1470 /* Add or subtract two normal numbers. */
1471 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1474 lostFraction lost_fraction
;
1477 /* Determine if the operation on the absolute values is effectively
1478 an addition or subtraction. */
1479 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1481 /* Are we bigger exponent-wise than the RHS? */
1482 bits
= exponent
- rhs
.exponent
;
1484 /* Subtraction is more subtle than one might naively expect. */
1486 IEEEFloat
temp_rhs(rhs
);
1490 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1491 lost_fraction
= lfExactlyZero
;
1492 } else if (bits
> 0) {
1493 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1494 shiftSignificandLeft(1);
1497 lost_fraction
= shiftSignificandRight(-bits
- 1);
1498 temp_rhs
.shiftSignificandLeft(1);
1503 carry
= temp_rhs
.subtractSignificand
1504 (*this, lost_fraction
!= lfExactlyZero
);
1505 copySignificand(temp_rhs
);
1508 carry
= subtractSignificand
1509 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1512 /* Invert the lost fraction - it was on the RHS and
1514 if (lost_fraction
== lfLessThanHalf
)
1515 lost_fraction
= lfMoreThanHalf
;
1516 else if (lost_fraction
== lfMoreThanHalf
)
1517 lost_fraction
= lfLessThanHalf
;
1519 /* The code above is intended to ensure that no borrow is
1525 IEEEFloat
temp_rhs(rhs
);
1527 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1528 carry
= addSignificand(temp_rhs
);
1530 lost_fraction
= shiftSignificandRight(-bits
);
1531 carry
= addSignificand(rhs
);
1534 /* We have a guard bit; generating a carry cannot happen. */
1539 return lost_fraction
;
1542 IEEEFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1543 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1545 llvm_unreachable(nullptr);
1547 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1548 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1549 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1550 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1554 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1555 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1556 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1559 copySignificand(rhs
);
1562 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1563 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1564 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1565 category
= fcInfinity
;
1568 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1569 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1570 case PackCategoriesIntoKey(fcZero
, fcZero
):
1574 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1575 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1579 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1584 IEEEFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1585 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1587 llvm_unreachable(nullptr);
1589 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1590 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1591 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1593 copySignificand(rhs
);
1595 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1596 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1597 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1598 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1601 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1602 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1603 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1604 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1607 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1611 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1612 category
= fcInfinity
;
1615 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1616 case PackCategoriesIntoKey(fcZero
, fcZero
):
1620 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1625 IEEEFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
1626 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1628 llvm_unreachable(nullptr);
1630 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1631 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1632 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1633 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1634 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1635 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1636 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1639 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1640 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1641 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1644 copySignificand(rhs
);
1647 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1648 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1649 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1650 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1651 case PackCategoriesIntoKey(fcZero
, fcZero
):
1655 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1661 void IEEEFloat::changeSign() {
1662 /* Look mummy, this one's easy. */
1666 /* Normalized addition or subtraction. */
1667 IEEEFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
1668 roundingMode rounding_mode
,
1672 fs
= addOrSubtractSpecials(rhs
, subtract
);
1674 /* This return code means it was not a simple case. */
1675 if (fs
== opDivByZero
) {
1676 lostFraction lost_fraction
;
1678 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1679 fs
= normalize(rounding_mode
, lost_fraction
);
1681 /* Can only be zero if we lost no fraction. */
1682 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1685 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1686 positive zero unless rounding to minus infinity, except that
1687 adding two like-signed zeroes gives that zero. */
1688 if (category
== fcZero
) {
1689 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1690 sign
= (rounding_mode
== rmTowardNegative
);
1696 /* Normalized addition. */
1697 IEEEFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
1698 roundingMode rounding_mode
) {
1699 return addOrSubtract(rhs
, rounding_mode
, false);
1702 /* Normalized subtraction. */
1703 IEEEFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
1704 roundingMode rounding_mode
) {
1705 return addOrSubtract(rhs
, rounding_mode
, true);
1708 /* Normalized multiply. */
1709 IEEEFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
1710 roundingMode rounding_mode
) {
1714 fs
= multiplySpecials(rhs
);
1716 if (isFiniteNonZero()) {
1717 lostFraction lost_fraction
= multiplySignificand(rhs
, nullptr);
1718 fs
= normalize(rounding_mode
, lost_fraction
);
1719 if (lost_fraction
!= lfExactlyZero
)
1720 fs
= (opStatus
) (fs
| opInexact
);
1726 /* Normalized divide. */
1727 IEEEFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
1728 roundingMode rounding_mode
) {
1732 fs
= divideSpecials(rhs
);
1734 if (isFiniteNonZero()) {
1735 lostFraction lost_fraction
= divideSignificand(rhs
);
1736 fs
= normalize(rounding_mode
, lost_fraction
);
1737 if (lost_fraction
!= lfExactlyZero
)
1738 fs
= (opStatus
) (fs
| opInexact
);
1744 /* Normalized remainder. This is not currently correct in all cases. */
1745 IEEEFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
1747 IEEEFloat V
= *this;
1748 unsigned int origSign
= sign
;
1750 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1751 if (fs
== opDivByZero
)
1754 int parts
= partCount();
1755 integerPart
*x
= new integerPart
[parts
];
1757 fs
= V
.convertToInteger(makeMutableArrayRef(x
, parts
),
1758 parts
* integerPartWidth
, true, rmNearestTiesToEven
,
1760 if (fs
== opInvalidOp
) {
1765 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1766 rmNearestTiesToEven
);
1767 assert(fs
==opOK
); // should always work
1769 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1770 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1772 fs
= subtract(V
, rmNearestTiesToEven
);
1773 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1776 sign
= origSign
; // IEEE754 requires this
1781 /* Normalized llvm frem (C fmod). */
1782 IEEEFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
1784 fs
= modSpecials(rhs
);
1785 unsigned int origSign
= sign
;
1787 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
1788 compareAbsoluteValue(rhs
) != cmpLessThan
) {
1789 IEEEFloat V
= scalbn(rhs
, ilogb(*this) - ilogb(rhs
), rmNearestTiesToEven
);
1790 if (compareAbsoluteValue(V
) == cmpLessThan
)
1791 V
= scalbn(V
, -1, rmNearestTiesToEven
);
1794 fs
= subtract(V
, rmNearestTiesToEven
);
1798 sign
= origSign
; // fmod requires this
1802 /* Normalized fused-multiply-add. */
1803 IEEEFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
1804 const IEEEFloat
&addend
,
1805 roundingMode rounding_mode
) {
1808 /* Post-multiplication sign, before addition. */
1809 sign
^= multiplicand
.sign
;
1811 /* If and only if all arguments are normal do we need to do an
1812 extended-precision calculation. */
1813 if (isFiniteNonZero() &&
1814 multiplicand
.isFiniteNonZero() &&
1815 addend
.isFinite()) {
1816 lostFraction lost_fraction
;
1818 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1819 fs
= normalize(rounding_mode
, lost_fraction
);
1820 if (lost_fraction
!= lfExactlyZero
)
1821 fs
= (opStatus
) (fs
| opInexact
);
1823 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1824 positive zero unless rounding to minus infinity, except that
1825 adding two like-signed zeroes gives that zero. */
1826 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
)
1827 sign
= (rounding_mode
== rmTowardNegative
);
1829 fs
= multiplySpecials(multiplicand
);
1831 /* FS can only be opOK or opInvalidOp. There is no more work
1832 to do in the latter case. The IEEE-754R standard says it is
1833 implementation-defined in this case whether, if ADDEND is a
1834 quiet NaN, we raise invalid op; this implementation does so.
1836 If we need to do the addition we can do so with normal
1839 fs
= addOrSubtract(addend
, rounding_mode
, false);
1845 /* Rounding-mode corrrect round to integral value. */
1846 IEEEFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
1849 // If the exponent is large enough, we know that this value is already
1850 // integral, and the arithmetic below would potentially cause it to saturate
1851 // to +/-Inf. Bail out early instead.
1852 if (isFiniteNonZero() && exponent
+1 >= (int)semanticsPrecision(*semantics
))
1855 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1856 // precision of our format, and then subtract it back off again. The choice
1857 // of rounding modes for the addition/subtraction determines the rounding mode
1858 // for our integral rounding as well.
1859 // NOTE: When the input value is negative, we do subtraction followed by
1860 // addition instead.
1861 APInt
IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics
)), 1);
1862 IntegerConstant
<<= semanticsPrecision(*semantics
)-1;
1863 IEEEFloat
MagicConstant(*semantics
);
1864 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
1865 rmNearestTiesToEven
);
1866 MagicConstant
.sign
= sign
;
1871 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1872 bool inputSign
= isNegative();
1874 fs
= add(MagicConstant
, rounding_mode
);
1875 if (fs
!= opOK
&& fs
!= opInexact
)
1878 fs
= subtract(MagicConstant
, rounding_mode
);
1880 // Restore the input sign.
1881 if (inputSign
!= isNegative())
1888 /* Comparison requires normalized numbers. */
1889 IEEEFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
1892 assert(semantics
== rhs
.semantics
);
1894 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1896 llvm_unreachable(nullptr);
1898 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1899 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1900 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1901 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1902 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1903 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1904 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1905 return cmpUnordered
;
1907 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1908 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1909 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1913 return cmpGreaterThan
;
1915 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1916 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1917 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1919 return cmpGreaterThan
;
1923 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1924 if (sign
== rhs
.sign
)
1929 return cmpGreaterThan
;
1931 case PackCategoriesIntoKey(fcZero
, fcZero
):
1934 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1938 /* Two normal numbers. Do they have the same sign? */
1939 if (sign
!= rhs
.sign
) {
1941 result
= cmpLessThan
;
1943 result
= cmpGreaterThan
;
1945 /* Compare absolute values; invert result if negative. */
1946 result
= compareAbsoluteValue(rhs
);
1949 if (result
== cmpLessThan
)
1950 result
= cmpGreaterThan
;
1951 else if (result
== cmpGreaterThan
)
1952 result
= cmpLessThan
;
1959 /// IEEEFloat::convert - convert a value of one floating point type to another.
1960 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1961 /// records whether the transformation lost information, i.e. whether
1962 /// converting the result back to the original type will produce the
1963 /// original value (this is almost the same as return value==fsOK, but there
1964 /// are edge cases where this is not so).
1966 IEEEFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
1967 roundingMode rounding_mode
,
1969 lostFraction lostFraction
;
1970 unsigned int newPartCount
, oldPartCount
;
1973 const fltSemantics
&fromSemantics
= *semantics
;
1975 lostFraction
= lfExactlyZero
;
1976 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1977 oldPartCount
= partCount();
1978 shift
= toSemantics
.precision
- fromSemantics
.precision
;
1980 bool X86SpecialNan
= false;
1981 if (&fromSemantics
== &semX87DoubleExtended
&&
1982 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
1983 (!(*significandParts() & 0x8000000000000000ULL
) ||
1984 !(*significandParts() & 0x4000000000000000ULL
))) {
1985 // x86 has some unusual NaNs which cannot be represented in any other
1986 // format; note them here.
1987 X86SpecialNan
= true;
1990 // If this is a truncation of a denormal number, and the target semantics
1991 // has larger exponent range than the source semantics (this can happen
1992 // when truncating from PowerPC double-double to double format), the
1993 // right shift could lose result mantissa bits. Adjust exponent instead
1994 // of performing excessive shift.
1995 if (shift
< 0 && isFiniteNonZero()) {
1996 int exponentChange
= significandMSB() + 1 - fromSemantics
.precision
;
1997 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
1998 exponentChange
= toSemantics
.minExponent
- exponent
;
1999 if (exponentChange
< shift
)
2000 exponentChange
= shift
;
2001 if (exponentChange
< 0) {
2002 shift
-= exponentChange
;
2003 exponent
+= exponentChange
;
2007 // If this is a truncation, perform the shift before we narrow the storage.
2008 if (shift
< 0 && (isFiniteNonZero() || category
==fcNaN
))
2009 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
2011 // Fix the storage so it can hold to new value.
2012 if (newPartCount
> oldPartCount
) {
2013 // The new type requires more storage; make it available.
2014 integerPart
*newParts
;
2015 newParts
= new integerPart
[newPartCount
];
2016 APInt::tcSet(newParts
, 0, newPartCount
);
2017 if (isFiniteNonZero() || category
==fcNaN
)
2018 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
2020 significand
.parts
= newParts
;
2021 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
2022 // Switch to built-in storage for a single part.
2023 integerPart newPart
= 0;
2024 if (isFiniteNonZero() || category
==fcNaN
)
2025 newPart
= significandParts()[0];
2027 significand
.part
= newPart
;
2030 // Now that we have the right storage, switch the semantics.
2031 semantics
= &toSemantics
;
2033 // If this is an extension, perform the shift now that the storage is
2035 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
2036 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
2038 if (isFiniteNonZero()) {
2039 fs
= normalize(rounding_mode
, lostFraction
);
2040 *losesInfo
= (fs
!= opOK
);
2041 } else if (category
== fcNaN
) {
2042 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2044 // For x87 extended precision, we want to make a NaN, not a special NaN if
2045 // the input wasn't special either.
2046 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2047 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2049 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2050 // does not give you back the same bits. This is dubious, and we
2051 // don't currently do it. You're really supposed to get
2052 // an invalid operation signal at runtime, but nobody does that.
2062 /* Convert a floating point number to an integer according to the
2063 rounding mode. If the rounded integer value is out of range this
2064 returns an invalid operation exception and the contents of the
2065 destination parts are unspecified. If the rounded value is in
2066 range but the floating point number is not the exact integer, the C
2067 standard doesn't require an inexact exception to be raised. IEEE
2068 854 does require it so we do that.
2070 Note that for conversions to integer type the C standard requires
2071 round-to-zero to always be used. */
2072 IEEEFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2073 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2074 roundingMode rounding_mode
, bool *isExact
) const {
2075 lostFraction lost_fraction
;
2076 const integerPart
*src
;
2077 unsigned int dstPartsCount
, truncatedBits
;
2081 /* Handle the three special cases first. */
2082 if (category
== fcInfinity
|| category
== fcNaN
)
2085 dstPartsCount
= partCountForBits(width
);
2086 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2088 if (category
== fcZero
) {
2089 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2090 // Negative zero can't be represented as an int.
2095 src
= significandParts();
2097 /* Step 1: place our absolute value, with any fraction truncated, in
2100 /* Our absolute value is less than one; truncate everything. */
2101 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2102 /* For exponent -1 the integer bit represents .5, look at that.
2103 For smaller exponents leftmost truncated bit is 0. */
2104 truncatedBits
= semantics
->precision
-1U - exponent
;
2106 /* We want the most significant (exponent + 1) bits; the rest are
2108 unsigned int bits
= exponent
+ 1U;
2110 /* Hopelessly large in magnitude? */
2114 if (bits
< semantics
->precision
) {
2115 /* We truncate (semantics->precision - bits) bits. */
2116 truncatedBits
= semantics
->precision
- bits
;
2117 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2119 /* We want at least as many bits as are available. */
2120 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2122 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2123 bits
- semantics
->precision
);
2128 /* Step 2: work out any lost fraction, and increment the absolute
2129 value if we would round away from zero. */
2130 if (truncatedBits
) {
2131 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2133 if (lost_fraction
!= lfExactlyZero
&&
2134 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2135 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2136 return opInvalidOp
; /* Overflow. */
2139 lost_fraction
= lfExactlyZero
;
2142 /* Step 3: check if we fit in the destination. */
2143 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2147 /* Negative numbers cannot be represented as unsigned. */
2151 /* It takes omsb bits to represent the unsigned integer value.
2152 We lose a bit for the sign, but care is needed as the
2153 maximally negative integer is a special case. */
2154 if (omsb
== width
&&
2155 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2158 /* This case can happen because of rounding. */
2163 APInt::tcNegate (parts
.data(), dstPartsCount
);
2165 if (omsb
>= width
+ !isSigned
)
2169 if (lost_fraction
== lfExactlyZero
) {
2176 /* Same as convertToSignExtendedInteger, except we provide
2177 deterministic values in case of an invalid operation exception,
2178 namely zero for NaNs and the minimal or maximal value respectively
2179 for underflow or overflow.
2180 The *isExact output tells whether the result is exact, in the sense
2181 that converting it back to the original floating point type produces
2182 the original value. This is almost equivalent to result==opOK,
2183 except for negative zeroes.
2186 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2187 unsigned int width
, bool isSigned
,
2188 roundingMode rounding_mode
, bool *isExact
) const {
2191 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2194 if (fs
== opInvalidOp
) {
2195 unsigned int bits
, dstPartsCount
;
2197 dstPartsCount
= partCountForBits(width
);
2198 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2200 if (category
== fcNaN
)
2205 bits
= width
- isSigned
;
2207 APInt::tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2208 if (sign
&& isSigned
)
2209 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2215 /* Convert an unsigned integer SRC to a floating point number,
2216 rounding according to ROUNDING_MODE. The sign of the floating
2217 point number is not modified. */
2218 IEEEFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2219 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2220 unsigned int omsb
, precision
, dstCount
;
2222 lostFraction lost_fraction
;
2224 category
= fcNormal
;
2225 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2226 dst
= significandParts();
2227 dstCount
= partCount();
2228 precision
= semantics
->precision
;
2230 /* We want the most significant PRECISION bits of SRC. There may not
2231 be that many; extract what we can. */
2232 if (precision
<= omsb
) {
2233 exponent
= omsb
- 1;
2234 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2236 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2238 exponent
= precision
- 1;
2239 lost_fraction
= lfExactlyZero
;
2240 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2243 return normalize(rounding_mode
, lost_fraction
);
2246 IEEEFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2247 roundingMode rounding_mode
) {
2248 unsigned int partCount
= Val
.getNumWords();
2252 if (isSigned
&& api
.isNegative()) {
2257 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2260 /* Convert a two's complement integer SRC to a floating point number,
2261 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2262 integer is signed, in which case it must be sign-extended. */
2264 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2265 unsigned int srcCount
, bool isSigned
,
2266 roundingMode rounding_mode
) {
2270 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2273 /* If we're signed and negative negate a copy. */
2275 copy
= new integerPart
[srcCount
];
2276 APInt::tcAssign(copy
, src
, srcCount
);
2277 APInt::tcNegate(copy
, srcCount
);
2278 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2282 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2288 /* FIXME: should this just take a const APInt reference? */
2290 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2291 unsigned int width
, bool isSigned
,
2292 roundingMode rounding_mode
) {
2293 unsigned int partCount
= partCountForBits(width
);
2294 APInt api
= APInt(width
, makeArrayRef(parts
, partCount
));
2297 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2302 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2306 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2307 roundingMode rounding_mode
) {
2308 lostFraction lost_fraction
= lfExactlyZero
;
2310 category
= fcNormal
;
2314 integerPart
*significand
= significandParts();
2315 unsigned partsCount
= partCount();
2316 unsigned bitPos
= partsCount
* integerPartWidth
;
2317 bool computedTrailingFraction
= false;
2319 // Skip leading zeroes and any (hexa)decimal point.
2320 StringRef::iterator begin
= s
.begin();
2321 StringRef::iterator end
= s
.end();
2322 StringRef::iterator dot
;
2323 StringRef::iterator p
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2324 StringRef::iterator firstSignificantDigit
= p
;
2327 integerPart hex_value
;
2330 assert(dot
== end
&& "String contains multiple dots");
2335 hex_value
= hexDigitValue(*p
);
2336 if (hex_value
== -1U)
2341 // Store the number while we have space.
2344 hex_value
<<= bitPos
% integerPartWidth
;
2345 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2346 } else if (!computedTrailingFraction
) {
2347 lost_fraction
= trailingHexadecimalFraction(p
, end
, hex_value
);
2348 computedTrailingFraction
= true;
2352 /* Hex floats require an exponent but not a hexadecimal point. */
2353 assert(p
!= end
&& "Hex strings require an exponent");
2354 assert((*p
== 'p' || *p
== 'P') && "Invalid character in significand");
2355 assert(p
!= begin
&& "Significand has no digits");
2356 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
2358 /* Ignore the exponent if we are zero. */
2359 if (p
!= firstSignificantDigit
) {
2362 /* Implicit hexadecimal point? */
2366 /* Calculate the exponent adjustment implicit in the number of
2367 significant digits. */
2368 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2369 if (expAdjustment
< 0)
2371 expAdjustment
= expAdjustment
* 4 - 1;
2373 /* Adjust for writing the significand starting at the most
2374 significant nibble. */
2375 expAdjustment
+= semantics
->precision
;
2376 expAdjustment
-= partsCount
* integerPartWidth
;
2378 /* Adjust for the given exponent. */
2379 exponent
= totalExponent(p
+ 1, end
, expAdjustment
);
2382 return normalize(rounding_mode
, lost_fraction
);
2386 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2387 unsigned sigPartCount
, int exp
,
2388 roundingMode rounding_mode
) {
2389 unsigned int parts
, pow5PartCount
;
2390 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
2391 integerPart pow5Parts
[maxPowerOfFiveParts
];
2394 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2395 rounding_mode
== rmNearestTiesToAway
);
2397 parts
= partCountForBits(semantics
->precision
+ 11);
2399 /* Calculate pow(5, abs(exp)). */
2400 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2402 for (;; parts
*= 2) {
2403 opStatus sigStatus
, powStatus
;
2404 unsigned int excessPrecision
, truncatedBits
;
2406 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2407 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2408 truncatedBits
= excessPrecision
;
2410 IEEEFloat
decSig(calcSemantics
, uninitialized
);
2411 decSig
.makeZero(sign
);
2412 IEEEFloat
pow5(calcSemantics
);
2414 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2415 rmNearestTiesToEven
);
2416 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2417 rmNearestTiesToEven
);
2418 /* Add exp, as 10^n = 5^n * 2^n. */
2419 decSig
.exponent
+= exp
;
2421 lostFraction calcLostFraction
;
2422 integerPart HUerr
, HUdistance
;
2423 unsigned int powHUerr
;
2426 /* multiplySignificand leaves the precision-th bit set to 1. */
2427 calcLostFraction
= decSig
.multiplySignificand(pow5
, nullptr);
2428 powHUerr
= powStatus
!= opOK
;
2430 calcLostFraction
= decSig
.divideSignificand(pow5
);
2431 /* Denormal numbers have less precision. */
2432 if (decSig
.exponent
< semantics
->minExponent
) {
2433 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2434 truncatedBits
= excessPrecision
;
2435 if (excessPrecision
> calcSemantics
.precision
)
2436 excessPrecision
= calcSemantics
.precision
;
2438 /* Extra half-ulp lost in reciprocal of exponent. */
2439 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2442 /* Both multiplySignificand and divideSignificand return the
2443 result with the integer bit set. */
2444 assert(APInt::tcExtractBit
2445 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2447 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2449 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2450 excessPrecision
, isNearest
);
2452 /* Are we guaranteed to round correctly if we truncate? */
2453 if (HUdistance
>= HUerr
) {
2454 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2455 calcSemantics
.precision
- excessPrecision
,
2457 /* Take the exponent of decSig. If we tcExtract-ed less bits
2458 above we must adjust our exponent to compensate for the
2459 implicit right shift. */
2460 exponent
= (decSig
.exponent
+ semantics
->precision
2461 - (calcSemantics
.precision
- excessPrecision
));
2462 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2465 return normalize(rounding_mode
, calcLostFraction
);
2471 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
2475 /* Scan the text. */
2476 StringRef::iterator p
= str
.begin();
2477 interpretDecimal(p
, str
.end(), &D
);
2479 /* Handle the quick cases. First the case of no significant digits,
2480 i.e. zero, and then exponents that are obviously too large or too
2481 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2482 definitely overflows if
2484 (exp - 1) * L >= maxExponent
2486 and definitely underflows to zero where
2488 (exp + 1) * L <= minExponent - precision
2490 With integer arithmetic the tightest bounds for L are
2492 93/28 < L < 196/59 [ numerator <= 256 ]
2493 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2496 // Test if we have a zero number allowing for strings with no null terminators
2497 // and zero decimals with non-zero exponents.
2499 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2500 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2501 // be at most one dot. On the other hand, if we have a zero with a non-zero
2502 // exponent, then we know that D.firstSigDigit will be non-numeric.
2503 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
2507 /* Check whether the normalized exponent is high enough to overflow
2508 max during the log-rebasing in the max-exponent check below. */
2509 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
2510 fs
= handleOverflow(rounding_mode
);
2512 /* If it wasn't, then it also wasn't high enough to overflow max
2513 during the log-rebasing in the min-exponent check. Check that it
2514 won't overflow min in either check, then perform the min-exponent
2516 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
2517 (D
.normalizedExponent
+ 1) * 28738 <=
2518 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2519 /* Underflow to zero and round. */
2520 category
= fcNormal
;
2522 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2524 /* We can finally safely perform the max-exponent check. */
2525 } else if ((D
.normalizedExponent
- 1) * 42039
2526 >= 12655 * semantics
->maxExponent
) {
2527 /* Overflow and round. */
2528 fs
= handleOverflow(rounding_mode
);
2530 integerPart
*decSignificand
;
2531 unsigned int partCount
;
2533 /* A tight upper bound on number of bits required to hold an
2534 N-digit decimal integer is N * 196 / 59. Allocate enough space
2535 to hold the full significand, and an extra part required by
2537 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2538 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2539 decSignificand
= new integerPart
[partCount
+ 1];
2542 /* Convert to binary efficiently - we do almost all multiplication
2543 in an integerPart. When this would overflow do we do a single
2544 bignum multiplication, and then revert again to multiplication
2545 in an integerPart. */
2547 integerPart decValue
, val
, multiplier
;
2555 if (p
== str
.end()) {
2559 decValue
= decDigitValue(*p
++);
2560 assert(decValue
< 10U && "Invalid character in significand");
2562 val
= val
* 10 + decValue
;
2563 /* The maximum number that can be multiplied by ten with any
2564 digit added without overflowing an integerPart. */
2565 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2567 /* Multiply out the current part. */
2568 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2569 partCount
, partCount
+ 1, false);
2571 /* If we used another part (likely but not guaranteed), increase
2573 if (decSignificand
[partCount
])
2575 } while (p
<= D
.lastSigDigit
);
2577 category
= fcNormal
;
2578 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2579 D
.exponent
, rounding_mode
);
2581 delete [] decSignificand
;
2587 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
2588 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("+Inf")) {
2593 if (str
.equals("-inf") || str
.equals("-INFINITY") || str
.equals("-Inf")) {
2598 if (str
.equals("nan") || str
.equals("NaN")) {
2599 makeNaN(false, false);
2603 if (str
.equals("-nan") || str
.equals("-NaN")) {
2604 makeNaN(false, true);
2611 IEEEFloat::opStatus
IEEEFloat::convertFromString(StringRef str
,
2612 roundingMode rounding_mode
) {
2613 assert(!str
.empty() && "Invalid string length");
2615 // Handle special cases.
2616 if (convertFromStringSpecials(str
))
2619 /* Handle a leading minus sign. */
2620 StringRef::iterator p
= str
.begin();
2621 size_t slen
= str
.size();
2622 sign
= *p
== '-' ? 1 : 0;
2623 if (*p
== '-' || *p
== '+') {
2626 assert(slen
&& "String has no digits");
2629 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2630 assert(slen
- 2 && "Invalid string");
2631 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2635 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2638 /* Write out a hexadecimal representation of the floating point value
2639 to DST, which must be of sufficient size, in the C99 form
2640 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2641 excluding the terminating NUL.
2643 If UPPERCASE, the output is in upper case, otherwise in lower case.
2645 HEXDIGITS digits appear altogether, rounding the value if
2646 necessary. If HEXDIGITS is 0, the minimal precision to display the
2647 number precisely is used instead. If nothing would appear after
2648 the decimal point it is suppressed.
2650 The decimal exponent is always printed and has at least one digit.
2651 Zero values display an exponent of zero. Infinities and NaNs
2652 appear as "infinity" or "nan" respectively.
2654 The above rules are as specified by C99. There is ambiguity about
2655 what the leading hexadecimal digit should be. This implementation
2656 uses whatever is necessary so that the exponent is displayed as
2657 stored. This implies the exponent will fall within the IEEE format
2658 range, and the leading hexadecimal digit will be 0 (for denormals),
2659 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2660 any other digits zero).
2662 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2664 roundingMode rounding_mode
) const {
2673 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2674 dst
+= sizeof infinityL
- 1;
2678 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2679 dst
+= sizeof NaNU
- 1;
2684 *dst
++ = upperCase
? 'X': 'x';
2686 if (hexDigits
> 1) {
2688 memset (dst
, '0', hexDigits
- 1);
2689 dst
+= hexDigits
- 1;
2691 *dst
++ = upperCase
? 'P': 'p';
2696 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2702 return static_cast<unsigned int>(dst
- p
);
2705 /* Does the hard work of outputting the correctly rounded hexadecimal
2706 form of a normal floating point number with the specified number of
2707 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2708 digits necessary to print the value precisely is output. */
2709 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2711 roundingMode rounding_mode
) const {
2712 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2713 const char *hexDigitChars
;
2714 const integerPart
*significand
;
2719 *dst
++ = upperCase
? 'X': 'x';
2722 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2724 significand
= significandParts();
2725 partsCount
= partCount();
2727 /* +3 because the first digit only uses the single integer bit, so
2728 we have 3 virtual zero most-significant-bits. */
2729 valueBits
= semantics
->precision
+ 3;
2730 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2732 /* The natural number of digits required ignoring trailing
2733 insignificant zeroes. */
2734 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2736 /* hexDigits of zero means use the required number for the
2737 precision. Otherwise, see if we are truncating. If we are,
2738 find out if we need to round away from zero. */
2740 if (hexDigits
< outputDigits
) {
2741 /* We are dropping non-zero bits, so need to check how to round.
2742 "bits" is the number of dropped bits. */
2744 lostFraction fraction
;
2746 bits
= valueBits
- hexDigits
* 4;
2747 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2748 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2750 outputDigits
= hexDigits
;
2753 /* Write the digits consecutively, and start writing in the location
2754 of the hexadecimal point. We move the most significant digit
2755 left and add the hexadecimal point later. */
2758 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2760 while (outputDigits
&& count
) {
2763 /* Put the most significant integerPartWidth bits in "part". */
2764 if (--count
== partsCount
)
2765 part
= 0; /* An imaginary higher zero part. */
2767 part
= significand
[count
] << shift
;
2770 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2772 /* Convert as much of "part" to hexdigits as we can. */
2773 unsigned int curDigits
= integerPartWidth
/ 4;
2775 if (curDigits
> outputDigits
)
2776 curDigits
= outputDigits
;
2777 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2778 outputDigits
-= curDigits
;
2784 /* Note that hexDigitChars has a trailing '0'. */
2787 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2788 } while (*q
== '0');
2791 /* Add trailing zeroes. */
2792 memset (dst
, '0', outputDigits
);
2793 dst
+= outputDigits
;
2796 /* Move the most significant digit to before the point, and if there
2797 is something after the decimal point add it. This must come
2798 after rounding above. */
2805 /* Finally output the exponent. */
2806 *dst
++ = upperCase
? 'P': 'p';
2808 return writeSignedDecimal (dst
, exponent
);
2811 hash_code
hash_value(const IEEEFloat
&Arg
) {
2812 if (!Arg
.isFiniteNonZero())
2813 return hash_combine((uint8_t)Arg
.category
,
2814 // NaN has no sign, fix it at zero.
2815 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
2816 Arg
.semantics
->precision
);
2818 // Normal floats need their exponent and significand hashed.
2819 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
2820 Arg
.semantics
->precision
, Arg
.exponent
,
2822 Arg
.significandParts(),
2823 Arg
.significandParts() + Arg
.partCount()));
2826 // Conversion from APFloat to/from host float/double. It may eventually be
2827 // possible to eliminate these and have everybody deal with APFloats, but that
2828 // will take a while. This approach will not easily extend to long double.
2829 // Current implementation requires integerPartWidth==64, which is correct at
2830 // the moment but could be made more general.
2832 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2833 // the actual IEEE respresentations. We compensate for that here.
2835 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2836 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
2837 assert(partCount()==2);
2839 uint64_t myexponent
, mysignificand
;
2841 if (isFiniteNonZero()) {
2842 myexponent
= exponent
+16383; //bias
2843 mysignificand
= significandParts()[0];
2844 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2845 myexponent
= 0; // denormal
2846 } else if (category
==fcZero
) {
2849 } else if (category
==fcInfinity
) {
2850 myexponent
= 0x7fff;
2851 mysignificand
= 0x8000000000000000ULL
;
2853 assert(category
== fcNaN
&& "Unknown category");
2854 myexponent
= 0x7fff;
2855 mysignificand
= significandParts()[0];
2859 words
[0] = mysignificand
;
2860 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2861 (myexponent
& 0x7fffLL
);
2862 return APInt(80, words
);
2865 APInt
IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2866 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
2867 assert(partCount()==2);
2873 // Convert number to double. To avoid spurious underflows, we re-
2874 // normalize against the "double" minExponent first, and only *then*
2875 // truncate the mantissa. The result of that second conversion
2876 // may be inexact, but should never underflow.
2877 // Declare fltSemantics before APFloat that uses it (and
2878 // saves pointer to it) to ensure correct destruction order.
2879 fltSemantics extendedSemantics
= *semantics
;
2880 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
2881 IEEEFloat
extended(*this);
2882 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2883 assert(fs
== opOK
&& !losesInfo
);
2886 IEEEFloat
u(extended
);
2887 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2888 assert(fs
== opOK
|| fs
== opInexact
);
2890 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
2892 // If conversion was exact or resulted in a special case, we're done;
2893 // just set the second double to zero. Otherwise, re-convert back to
2894 // the extended format and compute the difference. This now should
2895 // convert exactly to double.
2896 if (u
.isFiniteNonZero() && losesInfo
) {
2897 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2898 assert(fs
== opOK
&& !losesInfo
);
2901 IEEEFloat
v(extended
);
2902 v
.subtract(u
, rmNearestTiesToEven
);
2903 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2904 assert(fs
== opOK
&& !losesInfo
);
2906 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
2911 return APInt(128, words
);
2914 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2915 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEquad
);
2916 assert(partCount()==2);
2918 uint64_t myexponent
, mysignificand
, mysignificand2
;
2920 if (isFiniteNonZero()) {
2921 myexponent
= exponent
+16383; //bias
2922 mysignificand
= significandParts()[0];
2923 mysignificand2
= significandParts()[1];
2924 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
2925 myexponent
= 0; // denormal
2926 } else if (category
==fcZero
) {
2928 mysignificand
= mysignificand2
= 0;
2929 } else if (category
==fcInfinity
) {
2930 myexponent
= 0x7fff;
2931 mysignificand
= mysignificand2
= 0;
2933 assert(category
== fcNaN
&& "Unknown category!");
2934 myexponent
= 0x7fff;
2935 mysignificand
= significandParts()[0];
2936 mysignificand2
= significandParts()[1];
2940 words
[0] = mysignificand
;
2941 words
[1] = ((uint64_t)(sign
& 1) << 63) |
2942 ((myexponent
& 0x7fff) << 48) |
2943 (mysignificand2
& 0xffffffffffffLL
);
2945 return APInt(128, words
);
2948 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
2949 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
);
2950 assert(partCount()==1);
2952 uint64_t myexponent
, mysignificand
;
2954 if (isFiniteNonZero()) {
2955 myexponent
= exponent
+1023; //bias
2956 mysignificand
= *significandParts();
2957 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2958 myexponent
= 0; // denormal
2959 } else if (category
==fcZero
) {
2962 } else if (category
==fcInfinity
) {
2966 assert(category
== fcNaN
&& "Unknown category!");
2968 mysignificand
= *significandParts();
2971 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2972 ((myexponent
& 0x7ff) << 52) |
2973 (mysignificand
& 0xfffffffffffffLL
))));
2976 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
2977 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
);
2978 assert(partCount()==1);
2980 uint32_t myexponent
, mysignificand
;
2982 if (isFiniteNonZero()) {
2983 myexponent
= exponent
+127; //bias
2984 mysignificand
= (uint32_t)*significandParts();
2985 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2986 myexponent
= 0; // denormal
2987 } else if (category
==fcZero
) {
2990 } else if (category
==fcInfinity
) {
2994 assert(category
== fcNaN
&& "Unknown category!");
2996 mysignificand
= (uint32_t)*significandParts();
2999 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
3000 (mysignificand
& 0x7fffff)));
3003 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
3004 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
);
3005 assert(partCount()==1);
3007 uint32_t myexponent
, mysignificand
;
3009 if (isFiniteNonZero()) {
3010 myexponent
= exponent
+15; //bias
3011 mysignificand
= (uint32_t)*significandParts();
3012 if (myexponent
== 1 && !(mysignificand
& 0x400))
3013 myexponent
= 0; // denormal
3014 } else if (category
==fcZero
) {
3017 } else if (category
==fcInfinity
) {
3021 assert(category
== fcNaN
&& "Unknown category!");
3023 mysignificand
= (uint32_t)*significandParts();
3026 return APInt(16, (((sign
&1) << 15) | ((myexponent
&0x1f) << 10) |
3027 (mysignificand
& 0x3ff)));
3030 // This function creates an APInt that is just a bit map of the floating
3031 // point constant as it would appear in memory. It is not a conversion,
3032 // and treating the result as a normal integer is unlikely to be useful.
3034 APInt
IEEEFloat::bitcastToAPInt() const {
3035 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
3036 return convertHalfAPFloatToAPInt();
3038 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3039 return convertFloatAPFloatToAPInt();
3041 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3042 return convertDoubleAPFloatToAPInt();
3044 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3045 return convertQuadrupleAPFloatToAPInt();
3047 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3048 return convertPPCDoubleDoubleAPFloatToAPInt();
3050 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3052 return convertF80LongDoubleAPFloatToAPInt();
3055 float IEEEFloat::convertToFloat() const {
3056 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3057 "Float semantics are not IEEEsingle");
3058 APInt api
= bitcastToAPInt();
3059 return api
.bitsToFloat();
3062 double IEEEFloat::convertToDouble() const {
3063 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3064 "Float semantics are not IEEEdouble");
3065 APInt api
= bitcastToAPInt();
3066 return api
.bitsToDouble();
3069 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3070 /// does not support these bit patterns:
3071 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3072 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3073 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3074 /// exponent = 0, integer bit 1 ("pseudodenormal")
3075 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3076 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3077 assert(api
.getBitWidth()==80);
3078 uint64_t i1
= api
.getRawData()[0];
3079 uint64_t i2
= api
.getRawData()[1];
3080 uint64_t myexponent
= (i2
& 0x7fff);
3081 uint64_t mysignificand
= i1
;
3082 uint8_t myintegerbit
= mysignificand
>> 63;
3084 initialize(&semX87DoubleExtended
);
3085 assert(partCount()==2);
3087 sign
= static_cast<unsigned int>(i2
>>15);
3088 if (myexponent
== 0 && mysignificand
== 0) {
3089 // exponent, significand meaningless
3091 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3092 // exponent, significand meaningless
3093 category
= fcInfinity
;
3094 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3095 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3096 // exponent meaningless
3098 significandParts()[0] = mysignificand
;
3099 significandParts()[1] = 0;
3101 category
= fcNormal
;
3102 exponent
= myexponent
- 16383;
3103 significandParts()[0] = mysignificand
;
3104 significandParts()[1] = 0;
3105 if (myexponent
==0) // denormal
3110 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
) {
3111 assert(api
.getBitWidth()==128);
3112 uint64_t i1
= api
.getRawData()[0];
3113 uint64_t i2
= api
.getRawData()[1];
3117 // Get the first double and convert to our format.
3118 initFromDoubleAPInt(APInt(64, i1
));
3119 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3120 assert(fs
== opOK
&& !losesInfo
);
3123 // Unless we have a special case, add in second double.
3124 if (isFiniteNonZero()) {
3125 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3126 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3127 assert(fs
== opOK
&& !losesInfo
);
3130 add(v
, rmNearestTiesToEven
);
3134 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
3135 assert(api
.getBitWidth()==128);
3136 uint64_t i1
= api
.getRawData()[0];
3137 uint64_t i2
= api
.getRawData()[1];
3138 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
3139 uint64_t mysignificand
= i1
;
3140 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
3142 initialize(&semIEEEquad
);
3143 assert(partCount()==2);
3145 sign
= static_cast<unsigned int>(i2
>>63);
3146 if (myexponent
==0 &&
3147 (mysignificand
==0 && mysignificand2
==0)) {
3148 // exponent, significand meaningless
3150 } else if (myexponent
==0x7fff &&
3151 (mysignificand
==0 && mysignificand2
==0)) {
3152 // exponent, significand meaningless
3153 category
= fcInfinity
;
3154 } else if (myexponent
==0x7fff &&
3155 (mysignificand
!=0 || mysignificand2
!=0)) {
3156 // exponent meaningless
3158 significandParts()[0] = mysignificand
;
3159 significandParts()[1] = mysignificand2
;
3161 category
= fcNormal
;
3162 exponent
= myexponent
- 16383;
3163 significandParts()[0] = mysignificand
;
3164 significandParts()[1] = mysignificand2
;
3165 if (myexponent
==0) // denormal
3168 significandParts()[1] |= 0x1000000000000LL
; // integer bit
3172 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
3173 assert(api
.getBitWidth()==64);
3174 uint64_t i
= *api
.getRawData();
3175 uint64_t myexponent
= (i
>> 52) & 0x7ff;
3176 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
3178 initialize(&semIEEEdouble
);
3179 assert(partCount()==1);
3181 sign
= static_cast<unsigned int>(i
>>63);
3182 if (myexponent
==0 && mysignificand
==0) {
3183 // exponent, significand meaningless
3185 } else if (myexponent
==0x7ff && mysignificand
==0) {
3186 // exponent, significand meaningless
3187 category
= fcInfinity
;
3188 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3189 // exponent meaningless
3191 *significandParts() = mysignificand
;
3193 category
= fcNormal
;
3194 exponent
= myexponent
- 1023;
3195 *significandParts() = mysignificand
;
3196 if (myexponent
==0) // denormal
3199 *significandParts() |= 0x10000000000000LL
; // integer bit
3203 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
3204 assert(api
.getBitWidth()==32);
3205 uint32_t i
= (uint32_t)*api
.getRawData();
3206 uint32_t myexponent
= (i
>> 23) & 0xff;
3207 uint32_t mysignificand
= i
& 0x7fffff;
3209 initialize(&semIEEEsingle
);
3210 assert(partCount()==1);
3213 if (myexponent
==0 && mysignificand
==0) {
3214 // exponent, significand meaningless
3216 } else if (myexponent
==0xff && mysignificand
==0) {
3217 // exponent, significand meaningless
3218 category
= fcInfinity
;
3219 } else if (myexponent
==0xff && mysignificand
!=0) {
3220 // sign, exponent, significand meaningless
3222 *significandParts() = mysignificand
;
3224 category
= fcNormal
;
3225 exponent
= myexponent
- 127; //bias
3226 *significandParts() = mysignificand
;
3227 if (myexponent
==0) // denormal
3230 *significandParts() |= 0x800000; // integer bit
3234 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
3235 assert(api
.getBitWidth()==16);
3236 uint32_t i
= (uint32_t)*api
.getRawData();
3237 uint32_t myexponent
= (i
>> 10) & 0x1f;
3238 uint32_t mysignificand
= i
& 0x3ff;
3240 initialize(&semIEEEhalf
);
3241 assert(partCount()==1);
3244 if (myexponent
==0 && mysignificand
==0) {
3245 // exponent, significand meaningless
3247 } else if (myexponent
==0x1f && mysignificand
==0) {
3248 // exponent, significand meaningless
3249 category
= fcInfinity
;
3250 } else if (myexponent
==0x1f && mysignificand
!=0) {
3251 // sign, exponent, significand meaningless
3253 *significandParts() = mysignificand
;
3255 category
= fcNormal
;
3256 exponent
= myexponent
- 15; //bias
3257 *significandParts() = mysignificand
;
3258 if (myexponent
==0) // denormal
3261 *significandParts() |= 0x400; // integer bit
3265 /// Treat api as containing the bits of a floating point number. Currently
3266 /// we infer the floating point type from the size of the APInt. The
3267 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3268 /// when the size is anything else).
3269 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
3270 if (Sem
== &semIEEEhalf
)
3271 return initFromHalfAPInt(api
);
3272 if (Sem
== &semIEEEsingle
)
3273 return initFromFloatAPInt(api
);
3274 if (Sem
== &semIEEEdouble
)
3275 return initFromDoubleAPInt(api
);
3276 if (Sem
== &semX87DoubleExtended
)
3277 return initFromF80LongDoubleAPInt(api
);
3278 if (Sem
== &semIEEEquad
)
3279 return initFromQuadrupleAPInt(api
);
3280 if (Sem
== &semPPCDoubleDoubleLegacy
)
3281 return initFromPPCDoubleDoubleAPInt(api
);
3283 llvm_unreachable(nullptr);
3286 /// Make this number the largest magnitude normal number in the given
3288 void IEEEFloat::makeLargest(bool Negative
) {
3289 // We want (in interchange format):
3290 // sign = {Negative}
3292 // significand = 1..1
3293 category
= fcNormal
;
3295 exponent
= semantics
->maxExponent
;
3297 // Use memset to set all but the highest integerPart to all ones.
3298 integerPart
*significand
= significandParts();
3299 unsigned PartCount
= partCount();
3300 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
3302 // Set the high integerPart especially setting all unused top bits for
3303 // internal consistency.
3304 const unsigned NumUnusedHighBits
=
3305 PartCount
*integerPartWidth
- semantics
->precision
;
3306 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
3307 ? (~integerPart(0) >> NumUnusedHighBits
)
3311 /// Make this number the smallest magnitude denormal number in the given
3313 void IEEEFloat::makeSmallest(bool Negative
) {
3314 // We want (in interchange format):
3315 // sign = {Negative}
3317 // significand = 0..01
3318 category
= fcNormal
;
3320 exponent
= semantics
->minExponent
;
3321 APInt::tcSet(significandParts(), 1, partCount());
3324 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
3325 // We want (in interchange format):
3326 // sign = {Negative}
3328 // significand = 10..0
3330 category
= fcNormal
;
3333 exponent
= semantics
->minExponent
;
3334 significandParts()[partCountForBits(semantics
->precision
) - 1] |=
3335 (((integerPart
)1) << ((semantics
->precision
- 1) % integerPartWidth
));
3338 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
3339 initFromAPInt(&Sem
, API
);
3342 IEEEFloat::IEEEFloat(float f
) {
3343 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
3346 IEEEFloat::IEEEFloat(double d
) {
3347 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
3351 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
3352 Buffer
.append(Str
.begin(), Str
.end());
3355 /// Removes data from the given significand until it is no more
3356 /// precise than is required for the desired precision.
3357 void AdjustToPrecision(APInt
&significand
,
3358 int &exp
, unsigned FormatPrecision
) {
3359 unsigned bits
= significand
.getActiveBits();
3361 // 196/59 is a very slight overestimate of lg_2(10).
3362 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3364 if (bits
<= bitsRequired
) return;
3366 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3367 if (!tensRemovable
) return;
3369 exp
+= tensRemovable
;
3371 APInt
divisor(significand
.getBitWidth(), 1);
3372 APInt
powten(significand
.getBitWidth(), 10);
3374 if (tensRemovable
& 1)
3376 tensRemovable
>>= 1;
3377 if (!tensRemovable
) break;
3381 significand
= significand
.udiv(divisor
);
3383 // Truncate the significand down to its active bit count.
3384 significand
= significand
.trunc(significand
.getActiveBits());
3388 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
3389 int &exp
, unsigned FormatPrecision
) {
3390 unsigned N
= buffer
.size();
3391 if (N
<= FormatPrecision
) return;
3393 // The most significant figures are the last ones in the buffer.
3394 unsigned FirstSignificant
= N
- FormatPrecision
;
3397 // FIXME: this probably shouldn't use 'round half up'.
3399 // Rounding down is just a truncation, except we also want to drop
3400 // trailing zeros from the new result.
3401 if (buffer
[FirstSignificant
- 1] < '5') {
3402 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
3405 exp
+= FirstSignificant
;
3406 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3410 // Rounding up requires a decimal add-with-carry. If we continue
3411 // the carry, the newly-introduced zeros will just be truncated.
3412 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
3413 if (buffer
[I
] == '9') {
3421 // If we carried through, we have exactly one digit of precision.
3422 if (FirstSignificant
== N
) {
3423 exp
+= FirstSignificant
;
3425 buffer
.push_back('1');
3429 exp
+= FirstSignificant
;
3430 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3434 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
3435 unsigned FormatMaxPadding
, bool TruncateZero
) const {
3439 return append(Str
, "-Inf");
3441 return append(Str
, "+Inf");
3443 case fcNaN
: return append(Str
, "NaN");
3449 if (!FormatMaxPadding
) {
3451 append(Str
, "0.0E+0");
3454 if (FormatPrecision
> 1)
3455 Str
.append(FormatPrecision
- 1, '0');
3456 append(Str
, "e+00");
3469 // Decompose the number into an APInt and an exponent.
3470 int exp
= exponent
- ((int) semantics
->precision
- 1);
3471 APInt
significand(semantics
->precision
,
3472 makeArrayRef(significandParts(),
3473 partCountForBits(semantics
->precision
)));
3475 // Set FormatPrecision if zero. We want to do this before we
3476 // truncate trailing zeros, as those are part of the precision.
3477 if (!FormatPrecision
) {
3478 // We use enough digits so the number can be round-tripped back to an
3479 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3480 // Accurately" by Steele and White.
3481 // FIXME: Using a formula based purely on the precision is conservative;
3482 // we can print fewer digits depending on the actual value being printed.
3484 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3485 FormatPrecision
= 2 + semantics
->precision
* 59 / 196;
3488 // Ignore trailing binary zeros.
3489 int trailingZeros
= significand
.countTrailingZeros();
3490 exp
+= trailingZeros
;
3491 significand
.lshrInPlace(trailingZeros
);
3493 // Change the exponent from 2^e to 10^e.
3496 } else if (exp
> 0) {
3498 significand
= significand
.zext(semantics
->precision
+ exp
);
3499 significand
<<= exp
;
3501 } else { /* exp < 0 */
3504 // We transform this using the identity:
3505 // (N)(2^-e) == (N)(5^e)(10^-e)
3506 // This means we have to multiply N (the significand) by 5^e.
3507 // To avoid overflow, we have to operate on numbers large
3508 // enough to store N * 5^e:
3509 // log2(N * 5^e) == log2(N) + e * log2(5)
3510 // <= semantics->precision + e * 137 / 59
3511 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3513 unsigned precision
= semantics
->precision
+ (137 * texp
+ 136) / 59;
3515 // Multiply significand by 5^e.
3516 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3517 significand
= significand
.zext(precision
);
3518 APInt
five_to_the_i(precision
, 5);
3520 if (texp
& 1) significand
*= five_to_the_i
;
3524 five_to_the_i
*= five_to_the_i
;
3528 AdjustToPrecision(significand
, exp
, FormatPrecision
);
3530 SmallVector
<char, 256> buffer
;
3533 unsigned precision
= significand
.getBitWidth();
3534 APInt
ten(precision
, 10);
3535 APInt
digit(precision
, 0);
3537 bool inTrail
= true;
3538 while (significand
!= 0) {
3539 // digit <- significand % 10
3540 // significand <- significand / 10
3541 APInt::udivrem(significand
, ten
, significand
, digit
);
3543 unsigned d
= digit
.getZExtValue();
3545 // Drop trailing zeros.
3546 if (inTrail
&& !d
) exp
++;
3548 buffer
.push_back((char) ('0' + d
));
3553 assert(!buffer
.empty() && "no characters in buffer!");
3555 // Drop down to FormatPrecision.
3556 // TODO: don't do more precise calculations above than are required.
3557 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
3559 unsigned NDigits
= buffer
.size();
3561 // Check whether we should use scientific notation.
3562 bool FormatScientific
;
3563 if (!FormatMaxPadding
)
3564 FormatScientific
= true;
3569 // But we shouldn't make the number look more precise than it is.
3570 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
3571 NDigits
+ (unsigned) exp
> FormatPrecision
);
3573 // Power of the most significant digit.
3574 int MSD
= exp
+ (int) (NDigits
- 1);
3577 FormatScientific
= false;
3579 // 765e-5 == 0.00765
3581 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
3586 // Scientific formatting is pretty straightforward.
3587 if (FormatScientific
) {
3588 exp
+= (NDigits
- 1);
3590 Str
.push_back(buffer
[NDigits
-1]);
3592 if (NDigits
== 1 && TruncateZero
)
3595 for (unsigned I
= 1; I
!= NDigits
; ++I
)
3596 Str
.push_back(buffer
[NDigits
-1-I
]);
3597 // Fill with zeros up to FormatPrecision.
3598 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
3599 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
3600 // For !TruncateZero we use lower 'e'.
3601 Str
.push_back(TruncateZero
? 'E' : 'e');
3603 Str
.push_back(exp
>= 0 ? '+' : '-');
3604 if (exp
< 0) exp
= -exp
;
3605 SmallVector
<char, 6> expbuf
;
3607 expbuf
.push_back((char) ('0' + (exp
% 10)));
3610 // Exponent always at least two digits if we do not truncate zeros.
3611 if (!TruncateZero
&& expbuf
.size() < 2)
3612 expbuf
.push_back('0');
3613 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
3614 Str
.push_back(expbuf
[E
-1-I
]);
3618 // Non-scientific, positive exponents.
3620 for (unsigned I
= 0; I
!= NDigits
; ++I
)
3621 Str
.push_back(buffer
[NDigits
-1-I
]);
3622 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
3627 // Non-scientific, negative exponents.
3629 // The number of digits to the left of the decimal point.
3630 int NWholeDigits
= exp
+ (int) NDigits
;
3633 if (NWholeDigits
> 0) {
3634 for (; I
!= (unsigned) NWholeDigits
; ++I
)
3635 Str
.push_back(buffer
[NDigits
-I
-1]);
3638 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
3642 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
3646 for (; I
!= NDigits
; ++I
)
3647 Str
.push_back(buffer
[NDigits
-I
-1]);
3650 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
3651 // Special floats and denormals have no exact inverse.
3652 if (!isFiniteNonZero())
3655 // Check that the number is a power of two by making sure that only the
3656 // integer bit is set in the significand.
3657 if (significandLSB() != semantics
->precision
- 1)
3661 IEEEFloat
reciprocal(*semantics
, 1ULL);
3662 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
3665 // Avoid multiplication with a denormal, it is not safe on all platforms and
3666 // may be slower than a normal division.
3667 if (reciprocal
.isDenormal())
3670 assert(reciprocal
.isFiniteNonZero() &&
3671 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
3674 *inv
= APFloat(reciprocal
, *semantics
);
3679 bool IEEEFloat::isSignaling() const {
3683 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3684 // first bit of the trailing significand being 0.
3685 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
3688 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3690 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3691 /// appropriate sign switching before/after the computation.
3692 IEEEFloat::opStatus
IEEEFloat::next(bool nextDown
) {
3693 // If we are performing nextDown, swap sign so we have -x.
3697 // Compute nextUp(x)
3698 opStatus result
= opOK
;
3700 // Handle each float category separately.
3703 // nextUp(+inf) = +inf
3706 // nextUp(-inf) = -getLargest()
3710 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3711 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3712 // change the payload.
3713 if (isSignaling()) {
3714 result
= opInvalidOp
;
3715 // For consistency, propagate the sign of the sNaN to the qNaN.
3716 makeNaN(false, isNegative(), nullptr);
3720 // nextUp(pm 0) = +getSmallest()
3721 makeSmallest(false);
3724 // nextUp(-getSmallest()) = -0
3725 if (isSmallest() && isNegative()) {
3726 APInt::tcSet(significandParts(), 0, partCount());
3732 // nextUp(getLargest()) == INFINITY
3733 if (isLargest() && !isNegative()) {
3734 APInt::tcSet(significandParts(), 0, partCount());
3735 category
= fcInfinity
;
3736 exponent
= semantics
->maxExponent
+ 1;
3740 // nextUp(normal) == normal + inc.
3742 // If we are negative, we need to decrement the significand.
3744 // We only cross a binade boundary that requires adjusting the exponent
3746 // 1. exponent != semantics->minExponent. This implies we are not in the
3747 // smallest binade or are dealing with denormals.
3748 // 2. Our significand excluding the integral bit is all zeros.
3749 bool WillCrossBinadeBoundary
=
3750 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
3752 // Decrement the significand.
3754 // We always do this since:
3755 // 1. If we are dealing with a non-binade decrement, by definition we
3756 // just decrement the significand.
3757 // 2. If we are dealing with a normal -> normal binade decrement, since
3758 // we have an explicit integral bit the fact that all bits but the
3759 // integral bit are zero implies that subtracting one will yield a
3760 // significand with 0 integral bit and 1 in all other spots. Thus we
3761 // must just adjust the exponent and set the integral bit to 1.
3762 // 3. If we are dealing with a normal -> denormal binade decrement,
3763 // since we set the integral bit to 0 when we represent denormals, we
3764 // just decrement the significand.
3765 integerPart
*Parts
= significandParts();
3766 APInt::tcDecrement(Parts
, partCount());
3768 if (WillCrossBinadeBoundary
) {
3769 // Our result is a normal number. Do the following:
3770 // 1. Set the integral bit to 1.
3771 // 2. Decrement the exponent.
3772 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3776 // If we are positive, we need to increment the significand.
3778 // We only cross a binade boundary that requires adjusting the exponent if
3779 // the input is not a denormal and all of said input's significand bits
3780 // are set. If all of said conditions are true: clear the significand, set
3781 // the integral bit to 1, and increment the exponent. If we have a
3782 // denormal always increment since moving denormals and the numbers in the
3783 // smallest normal binade have the same exponent in our representation.
3784 bool WillCrossBinadeBoundary
= !isDenormal() && isSignificandAllOnes();
3786 if (WillCrossBinadeBoundary
) {
3787 integerPart
*Parts
= significandParts();
3788 APInt::tcSet(Parts
, 0, partCount());
3789 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3790 assert(exponent
!= semantics
->maxExponent
&&
3791 "We can not increment an exponent beyond the maxExponent allowed"
3792 " by the given floating point semantics.");
3795 incrementSignificand();
3801 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3808 void IEEEFloat::makeInf(bool Negative
) {
3809 category
= fcInfinity
;
3811 exponent
= semantics
->maxExponent
+ 1;
3812 APInt::tcSet(significandParts(), 0, partCount());
3815 void IEEEFloat::makeZero(bool Negative
) {
3818 exponent
= semantics
->minExponent
-1;
3819 APInt::tcSet(significandParts(), 0, partCount());
3822 void IEEEFloat::makeQuiet() {
3824 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
3827 int ilogb(const IEEEFloat
&Arg
) {
3829 return IEEEFloat::IEK_NaN
;
3831 return IEEEFloat::IEK_Zero
;
3832 if (Arg
.isInfinity())
3833 return IEEEFloat::IEK_Inf
;
3834 if (!Arg
.isDenormal())
3835 return Arg
.exponent
;
3837 IEEEFloat
Normalized(Arg
);
3838 int SignificandBits
= Arg
.getSemantics().precision
- 1;
3840 Normalized
.exponent
+= SignificandBits
;
3841 Normalized
.normalize(IEEEFloat::rmNearestTiesToEven
, lfExactlyZero
);
3842 return Normalized
.exponent
- SignificandBits
;
3845 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, IEEEFloat::roundingMode RoundingMode
) {
3846 auto MaxExp
= X
.getSemantics().maxExponent
;
3847 auto MinExp
= X
.getSemantics().minExponent
;
3849 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3850 // overflow; clamp it to a safe range before adding, but ensure that the range
3851 // is large enough that the clamp does not change the result. The range we
3852 // need to support is the difference between the largest possible exponent and
3853 // the normalized exponent of half the smallest denormal.
3855 int SignificandBits
= X
.getSemantics().precision
- 1;
3856 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
3858 // Clamp to one past the range ends to let normalize handle overlflow.
3859 X
.exponent
+= std::min(std::max(Exp
, -MaxIncrement
- 1), MaxIncrement
);
3860 X
.normalize(RoundingMode
, lfExactlyZero
);
3866 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, IEEEFloat::roundingMode RM
) {
3869 // Quiet signalling nans.
3870 if (Exp
== IEEEFloat::IEK_NaN
) {
3871 IEEEFloat
Quiet(Val
);
3876 if (Exp
== IEEEFloat::IEK_Inf
)
3879 // 1 is added because frexp is defined to return a normalized fraction in
3880 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3881 Exp
= Exp
== IEEEFloat::IEK_Zero
? 0 : Exp
+ 1;
3882 return scalbn(Val
, -Exp
, RM
);
3885 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
3887 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
3888 assert(Semantics
== &semPPCDoubleDouble
);
3891 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
3893 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
3894 APFloat(semIEEEdouble
, uninitialized
)}) {
3895 assert(Semantics
== &semPPCDoubleDouble
);
3898 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
3899 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
3900 APFloat(semIEEEdouble
)}) {
3901 assert(Semantics
== &semPPCDoubleDouble
);
3904 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
3906 Floats(new APFloat
[2]{
3907 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
3908 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
3909 assert(Semantics
== &semPPCDoubleDouble
);
3912 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
3915 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
3916 assert(Semantics
== &semPPCDoubleDouble
);
3917 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
3918 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
3921 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
3922 : Semantics(RHS
.Semantics
),
3923 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
3924 APFloat(RHS
.Floats
[1])}
3926 assert(Semantics
== &semPPCDoubleDouble
);
3929 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
3930 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
3931 RHS
.Semantics
= &semBogus
;
3932 assert(Semantics
== &semPPCDoubleDouble
);
3935 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
3936 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
3937 Floats
[0] = RHS
.Floats
[0];
3938 Floats
[1] = RHS
.Floats
[1];
3939 } else if (this != &RHS
) {
3940 this->~DoubleAPFloat();
3941 new (this) DoubleAPFloat(RHS
);
3946 // Implement addition, subtraction, multiplication and division based on:
3947 // "Software for Doubled-Precision Floating-Point Computations",
3948 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3949 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
3950 const APFloat
&c
, const APFloat
&cc
,
3954 Status
|= z
.add(c
, RM
);
3955 if (!z
.isFinite()) {
3956 if (!z
.isInfinity()) {
3957 Floats
[0] = std::move(z
);
3958 Floats
[1].makeZero(/* Neg = */ false);
3959 return (opStatus
)Status
;
3962 auto AComparedToC
= a
.compareAbsoluteValue(c
);
3964 Status
|= z
.add(aa
, RM
);
3965 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3966 // z = cc + aa + c + a;
3967 Status
|= z
.add(c
, RM
);
3968 Status
|= z
.add(a
, RM
);
3970 // z = cc + aa + a + c;
3971 Status
|= z
.add(a
, RM
);
3972 Status
|= z
.add(c
, RM
);
3974 if (!z
.isFinite()) {
3975 Floats
[0] = std::move(z
);
3976 Floats
[1].makeZero(/* Neg = */ false);
3977 return (opStatus
)Status
;
3981 Status
|= zz
.add(cc
, RM
);
3982 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3983 // Floats[1] = a - z + c + zz;
3985 Status
|= Floats
[1].subtract(z
, RM
);
3986 Status
|= Floats
[1].add(c
, RM
);
3987 Status
|= Floats
[1].add(zz
, RM
);
3989 // Floats[1] = c - z + a + zz;
3991 Status
|= Floats
[1].subtract(z
, RM
);
3992 Status
|= Floats
[1].add(a
, RM
);
3993 Status
|= Floats
[1].add(zz
, RM
);
3998 Status
|= q
.subtract(z
, RM
);
4000 // zz = q + c + (a - (q + z)) + aa + cc;
4001 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4003 Status
|= zz
.add(c
, RM
);
4004 Status
|= q
.add(z
, RM
);
4005 Status
|= q
.subtract(a
, RM
);
4007 Status
|= zz
.add(q
, RM
);
4008 Status
|= zz
.add(aa
, RM
);
4009 Status
|= zz
.add(cc
, RM
);
4010 if (zz
.isZero() && !zz
.isNegative()) {
4011 Floats
[0] = std::move(z
);
4012 Floats
[1].makeZero(/* Neg = */ false);
4016 Status
|= Floats
[0].add(zz
, RM
);
4017 if (!Floats
[0].isFinite()) {
4018 Floats
[1].makeZero(/* Neg = */ false);
4019 return (opStatus
)Status
;
4021 Floats
[1] = std::move(z
);
4022 Status
|= Floats
[1].subtract(Floats
[0], RM
);
4023 Status
|= Floats
[1].add(zz
, RM
);
4025 return (opStatus
)Status
;
4028 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
4029 const DoubleAPFloat
&RHS
,
4032 if (LHS
.getCategory() == fcNaN
) {
4036 if (RHS
.getCategory() == fcNaN
) {
4040 if (LHS
.getCategory() == fcZero
) {
4044 if (RHS
.getCategory() == fcZero
) {
4048 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4049 LHS
.isNegative() != RHS
.isNegative()) {
4050 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4053 if (LHS
.getCategory() == fcInfinity
) {
4057 if (RHS
.getCategory() == fcInfinity
) {
4061 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
4063 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
4065 assert(&A
.getSemantics() == &semIEEEdouble
);
4066 assert(&AA
.getSemantics() == &semIEEEdouble
);
4067 assert(&C
.getSemantics() == &semIEEEdouble
);
4068 assert(&CC
.getSemantics() == &semIEEEdouble
);
4069 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
4070 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
4071 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
4074 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
4076 return addWithSpecial(*this, RHS
, *this, RM
);
4079 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
4082 auto Ret
= add(RHS
, RM
);
4087 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
4088 APFloat::roundingMode RM
) {
4089 const auto &LHS
= *this;
4091 /* Interesting observation: For special categories, finding the lowest
4092 common ancestor of the following layered graph gives the correct
4101 e.g. NaN * NaN = NaN
4103 Normal * Zero = Zero
4106 if (LHS
.getCategory() == fcNaN
) {
4110 if (RHS
.getCategory() == fcNaN
) {
4114 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
4115 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
4116 Out
.makeNaN(false, false, nullptr);
4119 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
4123 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
4127 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
4128 "Special cases not handled exhaustively");
4131 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
4134 Status
|= T
.multiply(C
, RM
);
4135 if (!T
.isFiniteNonZero()) {
4137 Floats
[1].makeZero(/* Neg = */ false);
4138 return (opStatus
)Status
;
4141 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4144 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
4149 Status
|= V
.multiply(D
, RM
);
4152 Status
|= W
.multiply(C
, RM
);
4153 Status
|= V
.add(W
, RM
);
4155 Status
|= Tau
.add(V
, RM
);
4159 Status
|= U
.add(Tau
, RM
);
4162 if (!U
.isFinite()) {
4163 Floats
[1].makeZero(/* Neg = */ false);
4165 // Floats[1] = (t - u) + tau
4166 Status
|= T
.subtract(U
, RM
);
4167 Status
|= T
.add(Tau
, RM
);
4170 return (opStatus
)Status
;
4173 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
4174 APFloat::roundingMode RM
) {
4175 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4176 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4178 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
4179 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4183 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
4184 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4185 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4187 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4188 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4192 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
4193 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4194 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4195 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4196 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4201 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
4202 const DoubleAPFloat
&Addend
,
4203 APFloat::roundingMode RM
) {
4204 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4205 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4206 auto Ret
= Tmp
.fusedMultiplyAdd(
4207 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
4208 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
4209 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4213 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
4214 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4215 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4216 auto Ret
= Tmp
.roundToIntegral(RM
);
4217 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4221 void DoubleAPFloat::changeSign() {
4222 Floats
[0].changeSign();
4223 Floats
[1].changeSign();
4227 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
4228 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
4229 if (Result
!= cmpEqual
)
4231 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
4232 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
4233 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
4234 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
4235 if (Against
&& !RHSAgainst
)
4237 if (!Against
&& RHSAgainst
)
4238 return cmpGreaterThan
;
4239 if (!Against
&& !RHSAgainst
)
4241 if (Against
&& RHSAgainst
)
4242 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
4247 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
4248 return Floats
[0].getCategory();
4251 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
4253 void DoubleAPFloat::makeInf(bool Neg
) {
4254 Floats
[0].makeInf(Neg
);
4255 Floats
[1].makeZero(/* Neg = */ false);
4258 void DoubleAPFloat::makeZero(bool Neg
) {
4259 Floats
[0].makeZero(Neg
);
4260 Floats
[1].makeZero(/* Neg = */ false);
4263 void DoubleAPFloat::makeLargest(bool Neg
) {
4264 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4265 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
4266 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
4271 void DoubleAPFloat::makeSmallest(bool Neg
) {
4272 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4273 Floats
[0].makeSmallest(Neg
);
4274 Floats
[1].makeZero(/* Neg = */ false);
4277 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
4278 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4279 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
4281 Floats
[0].changeSign();
4282 Floats
[1].makeZero(/* Neg = */ false);
4285 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
4286 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
4287 Floats
[1].makeZero(/* Neg = */ false);
4290 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
4291 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
4292 // |Float[0]| > |Float[1]|
4293 if (Result
== APFloat::cmpEqual
)
4294 return Floats
[1].compare(RHS
.Floats
[1]);
4298 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
4299 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
4300 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
4303 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
4305 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
4306 return hash_combine(Arg
.Semantics
);
4309 APInt
DoubleAPFloat::bitcastToAPInt() const {
4310 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4312 Floats
[0].bitcastToAPInt().getRawData()[0],
4313 Floats
[1].bitcastToAPInt().getRawData()[0],
4315 return APInt(128, 2, Data
);
4318 APFloat::opStatus
DoubleAPFloat::convertFromString(StringRef S
,
4320 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4321 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4322 auto Ret
= Tmp
.convertFromString(S
, RM
);
4323 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4327 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
4328 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4329 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4330 auto Ret
= Tmp
.next(nextDown
);
4331 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4336 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
4337 unsigned int Width
, bool IsSigned
,
4338 roundingMode RM
, bool *IsExact
) const {
4339 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4340 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4341 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
4344 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
4347 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4348 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4349 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
4350 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4355 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
4356 unsigned int InputSize
,
4357 bool IsSigned
, roundingMode RM
) {
4358 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4359 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4360 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4361 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4366 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
4367 unsigned int InputSize
,
4368 bool IsSigned
, roundingMode RM
) {
4369 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4370 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4371 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4372 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4376 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
4377 unsigned int HexDigits
,
4379 roundingMode RM
) const {
4380 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4381 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4382 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
4385 bool DoubleAPFloat::isDenormal() const {
4386 return getCategory() == fcNormal
&&
4387 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
4388 // (double)(Hi + Lo) == Hi defines a normal number.
4389 Floats
[0].compare(Floats
[0] + Floats
[1]) != cmpEqual
);
4392 bool DoubleAPFloat::isSmallest() const {
4393 if (getCategory() != fcNormal
)
4395 DoubleAPFloat
Tmp(*this);
4396 Tmp
.makeSmallest(this->isNegative());
4397 return Tmp
.compare(*this) == cmpEqual
;
4400 bool DoubleAPFloat::isLargest() const {
4401 if (getCategory() != fcNormal
)
4403 DoubleAPFloat
Tmp(*this);
4404 Tmp
.makeLargest(this->isNegative());
4405 return Tmp
.compare(*this) == cmpEqual
;
4408 bool DoubleAPFloat::isInteger() const {
4409 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4410 return Floats
[0].isInteger() && Floats
[1].isInteger();
4413 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
4414 unsigned FormatPrecision
,
4415 unsigned FormatMaxPadding
,
4416 bool TruncateZero
) const {
4417 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4418 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4419 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
4422 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
4423 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4424 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4426 return Tmp
.getExactInverse(nullptr);
4427 APFloat
Inv(semPPCDoubleDoubleLegacy
);
4428 auto Ret
= Tmp
.getExactInverse(&Inv
);
4429 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
4433 DoubleAPFloat
scalbn(DoubleAPFloat Arg
, int Exp
, APFloat::roundingMode RM
) {
4434 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4435 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
4436 scalbn(Arg
.Floats
[1], Exp
, RM
));
4439 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
4440 APFloat::roundingMode RM
) {
4441 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4442 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
4443 APFloat Second
= Arg
.Floats
[1];
4444 if (Arg
.getCategory() == APFloat::fcNormal
)
4445 Second
= scalbn(Second
, -Exp
, RM
);
4446 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
4449 } // End detail namespace
4451 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
4452 if (usesLayout
<IEEEFloat
>(Semantics
)) {
4453 new (&IEEE
) IEEEFloat(std::move(F
));
4456 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
4457 const fltSemantics
& S
= F
.getSemantics();
4459 DoubleAPFloat(Semantics
, APFloat(std::move(F
), S
),
4460 APFloat(semIEEEdouble
));
4463 llvm_unreachable("Unexpected semantics");
4466 APFloat::opStatus
APFloat::convertFromString(StringRef Str
, roundingMode RM
) {
4467 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
4470 hash_code
hash_value(const APFloat
&Arg
) {
4471 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
4472 return hash_value(Arg
.U
.IEEE
);
4473 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
4474 return hash_value(Arg
.U
.Double
);
4475 llvm_unreachable("Unexpected semantics");
4478 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
4479 : APFloat(Semantics
) {
4480 convertFromString(S
, rmNearestTiesToEven
);
4483 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
4484 roundingMode RM
, bool *losesInfo
) {
4485 if (&getSemantics() == &ToSemantics
) {
4489 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4490 usesLayout
<IEEEFloat
>(ToSemantics
))
4491 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
4492 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4493 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
4494 assert(&ToSemantics
== &semPPCDoubleDouble
);
4495 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
4496 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
4499 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
4500 usesLayout
<IEEEFloat
>(ToSemantics
)) {
4501 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
4502 *this = APFloat(std::move(getIEEE()), ToSemantics
);
4505 llvm_unreachable("Unexpected semantics");
4508 APFloat
APFloat::getAllOnesValue(unsigned BitWidth
, bool isIEEE
) {
4512 return APFloat(semIEEEhalf
, APInt::getAllOnesValue(BitWidth
));
4514 return APFloat(semIEEEsingle
, APInt::getAllOnesValue(BitWidth
));
4516 return APFloat(semIEEEdouble
, APInt::getAllOnesValue(BitWidth
));
4518 return APFloat(semX87DoubleExtended
, APInt::getAllOnesValue(BitWidth
));
4520 return APFloat(semIEEEquad
, APInt::getAllOnesValue(BitWidth
));
4522 llvm_unreachable("Unknown floating bit width");
4525 assert(BitWidth
== 128);
4526 return APFloat(semPPCDoubleDouble
, APInt::getAllOnesValue(BitWidth
));
4530 void APFloat::print(raw_ostream
&OS
) const {
4531 SmallVector
<char, 16> Buffer
;
4533 OS
<< Buffer
<< "\n";
4536 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4537 LLVM_DUMP_METHOD
void APFloat::dump() const { print(dbgs()); }
4540 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
4541 NID
.Add(bitcastToAPInt());
4544 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4545 an APSInt, whose initial bit-width and signed-ness are used to determine the
4546 precision of the conversion.
4548 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
4549 roundingMode rounding_mode
,
4550 bool *isExact
) const {
4551 unsigned bitWidth
= result
.getBitWidth();
4552 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
4553 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
4554 rounding_mode
, isExact
);
4555 // Keeps the original signed-ness.
4556 result
= APInt(bitWidth
, parts
);
4560 } // End llvm namespace
4562 #undef APFLOAT_DISPATCH_ON_SEMANTICS