1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent
;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent
;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision
;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits
;
70 // Returns true if any number described by this semantics can be precisely
71 // represented by the specified semantics.
72 bool isRepresentableBy(const fltSemantics
&S
) const {
73 return maxExponent
<= S
.maxExponent
&& minExponent
>= S
.minExponent
&&
74 precision
<= S
.precision
;
78 static const fltSemantics semIEEEhalf
= {15, -14, 11, 16};
79 static const fltSemantics semBFloat
= {127, -126, 8, 16};
80 static const fltSemantics semIEEEsingle
= {127, -126, 24, 32};
81 static const fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
82 static const fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
83 static const fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
84 static const fltSemantics semBogus
= {0, 0, 0, 0};
86 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
87 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
88 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
89 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
90 to each other, and two 11-bit exponents.
92 Note: we need to make the value different from semBogus as otherwise
93 an unsafe optimization may collapse both values to a single address,
94 and we heavily rely on them having distinct addresses. */
95 static const fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 0};
97 /* These are legacy semantics for the fallback, inaccrurate implementation of
98 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
99 operation. It's equivalent to having an IEEE number with consecutive 106
100 bits of mantissa and 11 bits of exponent.
102 It's not equivalent to IBM double-double. For example, a legit IBM
103 double-double, 1 + epsilon:
105 1 + epsilon = 1 + (1 >> 1076)
107 is not representable by a consecutive 106 bits of mantissa.
109 Currently, these semantics are used in the following way:
111 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
112 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
113 semPPCDoubleDoubleLegacy -> IEEE operations
115 We use bitcastToAPInt() to get the bit representation (in APInt) of the
116 underlying IEEEdouble, then use the APInt constructor to construct the
119 TODO: Implement all operations in semPPCDoubleDouble, and delete these
121 static const fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
124 const llvm::fltSemantics
&APFloatBase::EnumToSemantics(Semantics S
) {
134 case S_x87DoubleExtended
:
135 return x87DoubleExtended();
138 case S_PPCDoubleDouble
:
139 return PPCDoubleDouble();
141 llvm_unreachable("Unrecognised floating semantics");
144 APFloatBase::Semantics
145 APFloatBase::SemanticsToEnum(const llvm::fltSemantics
&Sem
) {
146 if (&Sem
== &llvm::APFloat::IEEEhalf())
148 else if (&Sem
== &llvm::APFloat::BFloat())
150 else if (&Sem
== &llvm::APFloat::IEEEsingle())
152 else if (&Sem
== &llvm::APFloat::IEEEdouble())
154 else if (&Sem
== &llvm::APFloat::x87DoubleExtended())
155 return S_x87DoubleExtended
;
156 else if (&Sem
== &llvm::APFloat::IEEEquad())
158 else if (&Sem
== &llvm::APFloat::PPCDoubleDouble())
159 return S_PPCDoubleDouble
;
161 llvm_unreachable("Unknown floating semantics");
164 const fltSemantics
&APFloatBase::IEEEhalf() {
167 const fltSemantics
&APFloatBase::BFloat() {
170 const fltSemantics
&APFloatBase::IEEEsingle() {
171 return semIEEEsingle
;
173 const fltSemantics
&APFloatBase::IEEEdouble() {
174 return semIEEEdouble
;
176 const fltSemantics
&APFloatBase::IEEEquad() {
179 const fltSemantics
&APFloatBase::x87DoubleExtended() {
180 return semX87DoubleExtended
;
182 const fltSemantics
&APFloatBase::Bogus() {
185 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
186 return semPPCDoubleDouble
;
189 constexpr RoundingMode
APFloatBase::rmNearestTiesToEven
;
190 constexpr RoundingMode
APFloatBase::rmTowardPositive
;
191 constexpr RoundingMode
APFloatBase::rmTowardNegative
;
192 constexpr RoundingMode
APFloatBase::rmTowardZero
;
193 constexpr RoundingMode
APFloatBase::rmNearestTiesToAway
;
195 /* A tight upper bound on number of parts required to hold the value
198 power * 815 / (351 * integerPartWidth) + 1
200 However, whilst the result may require only this many parts,
201 because we are multiplying two values to get it, the
202 multiplication may require an extra part with the excess part
203 being zero (consider the trivial case of 1 * 1, tcFullMultiply
204 requires two parts to hold the single-part result). So we add an
205 extra one to guarantee enough space whilst multiplying. */
206 const unsigned int maxExponent
= 16383;
207 const unsigned int maxPrecision
= 113;
208 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
209 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
211 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
212 return semantics
.precision
;
214 APFloatBase::ExponentType
215 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
216 return semantics
.maxExponent
;
218 APFloatBase::ExponentType
219 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
220 return semantics
.minExponent
;
222 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
223 return semantics
.sizeInBits
;
226 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
227 return Sem
.sizeInBits
;
230 /* A bunch of private, handy routines. */
232 static inline Error
createError(const Twine
&Err
) {
233 return make_error
<StringError
>(Err
, inconvertibleErrorCode());
236 static inline unsigned int
237 partCountForBits(unsigned int bits
)
239 return ((bits
) + APFloatBase::integerPartWidth
- 1) / APFloatBase::integerPartWidth
;
242 /* Returns 0U-9U. Return values >= 10U are not digits. */
243 static inline unsigned int
244 decDigitValue(unsigned int c
)
249 /* Return the value of a decimal exponent of the form
252 If the exponent overflows, returns a large exponent with the
254 static Expected
<int> readExponent(StringRef::iterator begin
,
255 StringRef::iterator end
) {
257 unsigned int absExponent
;
258 const unsigned int overlargeExponent
= 24000; /* FIXME. */
259 StringRef::iterator p
= begin
;
261 // Treat no exponent as 0 to match binutils
262 if (p
== end
|| ((*p
== '-' || *p
== '+') && (p
+ 1) == end
)) {
266 isNegative
= (*p
== '-');
267 if (*p
== '-' || *p
== '+') {
270 return createError("Exponent has no digits");
273 absExponent
= decDigitValue(*p
++);
274 if (absExponent
>= 10U)
275 return createError("Invalid character in exponent");
277 for (; p
!= end
; ++p
) {
280 value
= decDigitValue(*p
);
282 return createError("Invalid character in exponent");
284 absExponent
= absExponent
* 10U + value
;
285 if (absExponent
>= overlargeExponent
) {
286 absExponent
= overlargeExponent
;
292 return -(int) absExponent
;
294 return (int) absExponent
;
297 /* This is ugly and needs cleaning up, but I don't immediately see
298 how whilst remaining safe. */
299 static Expected
<int> totalExponent(StringRef::iterator p
,
300 StringRef::iterator end
,
301 int exponentAdjustment
) {
302 int unsignedExponent
;
303 bool negative
, overflow
;
307 return createError("Exponent has no digits");
309 negative
= *p
== '-';
310 if (*p
== '-' || *p
== '+') {
313 return createError("Exponent has no digits");
316 unsignedExponent
= 0;
318 for (; p
!= end
; ++p
) {
321 value
= decDigitValue(*p
);
323 return createError("Invalid character in exponent");
325 unsignedExponent
= unsignedExponent
* 10 + value
;
326 if (unsignedExponent
> 32767) {
332 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
336 exponent
= unsignedExponent
;
338 exponent
= -exponent
;
339 exponent
+= exponentAdjustment
;
340 if (exponent
> 32767 || exponent
< -32768)
345 exponent
= negative
? -32768: 32767;
350 static Expected
<StringRef::iterator
>
351 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
352 StringRef::iterator
*dot
) {
353 StringRef::iterator p
= begin
;
355 while (p
!= end
&& *p
== '0')
358 if (p
!= end
&& *p
== '.') {
361 if (end
- begin
== 1)
362 return createError("Significand has no digits");
364 while (p
!= end
&& *p
== '0')
371 /* Given a normal decimal floating point number of the form
375 where the decimal point and exponent are optional, fill out the
376 structure D. Exponent is appropriate if the significand is
377 treated as an integer, and normalizedExponent if the significand
378 is taken to have the decimal point after a single leading
381 If the value is zero, V->firstSigDigit points to a non-digit, and
382 the return exponent is zero.
385 const char *firstSigDigit
;
386 const char *lastSigDigit
;
388 int normalizedExponent
;
391 static Error
interpretDecimal(StringRef::iterator begin
,
392 StringRef::iterator end
, decimalInfo
*D
) {
393 StringRef::iterator dot
= end
;
395 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
397 return PtrOrErr
.takeError();
398 StringRef::iterator p
= *PtrOrErr
;
400 D
->firstSigDigit
= p
;
402 D
->normalizedExponent
= 0;
404 for (; p
!= end
; ++p
) {
407 return createError("String contains multiple dots");
412 if (decDigitValue(*p
) >= 10U)
417 if (*p
!= 'e' && *p
!= 'E')
418 return createError("Invalid character in significand");
420 return createError("Significand has no digits");
421 if (dot
!= end
&& p
- begin
== 1)
422 return createError("Significand has no digits");
424 /* p points to the first non-digit in the string */
425 auto ExpOrErr
= readExponent(p
+ 1, end
);
427 return ExpOrErr
.takeError();
428 D
->exponent
= *ExpOrErr
;
430 /* Implied decimal point? */
435 /* If number is all zeroes accept any exponent. */
436 if (p
!= D
->firstSigDigit
) {
437 /* Drop insignificant trailing zeroes. */
442 while (p
!= begin
&& *p
== '0');
443 while (p
!= begin
&& *p
== '.');
446 /* Adjust the exponents for any decimal point. */
447 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
448 D
->normalizedExponent
= (D
->exponent
+
449 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
450 - (dot
> D
->firstSigDigit
&& dot
< p
)));
454 return Error::success();
457 /* Return the trailing fraction of a hexadecimal number.
458 DIGITVALUE is the first hex digit of the fraction, P points to
460 static Expected
<lostFraction
>
461 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
462 unsigned int digitValue
) {
463 unsigned int hexDigit
;
465 /* If the first trailing digit isn't 0 or 8 we can work out the
466 fraction immediately. */
468 return lfMoreThanHalf
;
469 else if (digitValue
< 8 && digitValue
> 0)
470 return lfLessThanHalf
;
472 // Otherwise we need to find the first non-zero digit.
473 while (p
!= end
&& (*p
== '0' || *p
== '.'))
477 return createError("Invalid trailing hexadecimal fraction!");
479 hexDigit
= hexDigitValue(*p
);
481 /* If we ran off the end it is exactly zero or one-half, otherwise
484 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
486 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
489 /* Return the fraction lost were a bignum truncated losing the least
490 significant BITS bits. */
492 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
493 unsigned int partCount
,
498 lsb
= APInt::tcLSB(parts
, partCount
);
500 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
502 return lfExactlyZero
;
504 return lfExactlyHalf
;
505 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
506 APInt::tcExtractBit(parts
, bits
- 1))
507 return lfMoreThanHalf
;
509 return lfLessThanHalf
;
512 /* Shift DST right BITS bits noting lost fraction. */
514 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
516 lostFraction lost_fraction
;
518 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
520 APInt::tcShiftRight(dst
, parts
, bits
);
522 return lost_fraction
;
525 /* Combine the effect of two lost fractions. */
527 combineLostFractions(lostFraction moreSignificant
,
528 lostFraction lessSignificant
)
530 if (lessSignificant
!= lfExactlyZero
) {
531 if (moreSignificant
== lfExactlyZero
)
532 moreSignificant
= lfLessThanHalf
;
533 else if (moreSignificant
== lfExactlyHalf
)
534 moreSignificant
= lfMoreThanHalf
;
537 return moreSignificant
;
540 /* The error from the true value, in half-ulps, on multiplying two
541 floating point numbers, which differ from the value they
542 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
543 than the returned value.
545 See "How to Read Floating Point Numbers Accurately" by William D
548 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
550 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
552 if (HUerr1
+ HUerr2
== 0)
553 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
555 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
558 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
559 when the least significant BITS are truncated. BITS cannot be
561 static APFloatBase::integerPart
562 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
564 unsigned int count
, partBits
;
565 APFloatBase::integerPart part
, boundary
;
570 count
= bits
/ APFloatBase::integerPartWidth
;
571 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
573 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
576 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
581 if (part
- boundary
<= boundary
- part
)
582 return part
- boundary
;
584 return boundary
- part
;
587 if (part
== boundary
) {
590 return ~(APFloatBase::integerPart
) 0; /* A lot. */
593 } else if (part
== boundary
- 1) {
596 return ~(APFloatBase::integerPart
) 0; /* A lot. */
601 return ~(APFloatBase::integerPart
) 0; /* A lot. */
604 /* Place pow(5, power) in DST, and return the number of parts used.
605 DST must be at least one part larger than size of the answer. */
607 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
608 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
609 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
610 pow5s
[0] = 78125 * 5;
612 unsigned int partsCount
[16] = { 1 };
613 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
615 assert(power
<= maxExponent
);
620 *p1
= firstEightPowers
[power
& 7];
626 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
631 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
633 pc
= partsCount
[n
- 1];
634 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
636 if (pow5
[pc
- 1] == 0)
642 APFloatBase::integerPart
*tmp
;
644 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
646 if (p2
[result
- 1] == 0)
649 /* Now result is in p1 with partsCount parts and p2 is scratch
660 APInt::tcAssign(dst
, p1
, result
);
665 /* Zero at the end to avoid modular arithmetic when adding one; used
666 when rounding up during hexadecimal output. */
667 static const char hexDigitsLower
[] = "0123456789abcdef0";
668 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
669 static const char infinityL
[] = "infinity";
670 static const char infinityU
[] = "INFINITY";
671 static const char NaNL
[] = "nan";
672 static const char NaNU
[] = "NAN";
674 /* Write out an integerPart in hexadecimal, starting with the most
675 significant nibble. Write out exactly COUNT hexdigits, return
678 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
679 const char *hexDigitChars
)
681 unsigned int result
= count
;
683 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
685 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
687 dst
[count
] = hexDigitChars
[part
& 0xf];
694 /* Write out an unsigned decimal integer. */
696 writeUnsignedDecimal (char *dst
, unsigned int n
)
712 /* Write out a signed decimal integer. */
714 writeSignedDecimal (char *dst
, int value
)
718 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
720 dst
= writeUnsignedDecimal(dst
, value
);
727 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
730 semantics
= ourSemantics
;
733 significand
.parts
= new integerPart
[count
];
736 void IEEEFloat::freeSignificand() {
738 delete [] significand
.parts
;
741 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
742 assert(semantics
== rhs
.semantics
);
745 category
= rhs
.category
;
746 exponent
= rhs
.exponent
;
747 if (isFiniteNonZero() || category
== fcNaN
)
748 copySignificand(rhs
);
751 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
752 assert(isFiniteNonZero() || category
== fcNaN
);
753 assert(rhs
.partCount() >= partCount());
755 APInt::tcAssign(significandParts(), rhs
.significandParts(),
759 /* Make this number a NaN, with an arbitrary but deterministic value
760 for the significand. If double or longer, this is a signalling NaN,
761 which may not be ideal. If float, this is QNaN(0). */
762 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
765 exponent
= exponentNaN();
767 integerPart
*significand
= significandParts();
768 unsigned numParts
= partCount();
770 // Set the significand bits to the fill.
771 if (!fill
|| fill
->getNumWords() < numParts
)
772 APInt::tcSet(significand
, 0, numParts
);
774 APInt::tcAssign(significand
, fill
->getRawData(),
775 std::min(fill
->getNumWords(), numParts
));
777 // Zero out the excess bits of the significand.
778 unsigned bitsToPreserve
= semantics
->precision
- 1;
779 unsigned part
= bitsToPreserve
/ 64;
780 bitsToPreserve
%= 64;
781 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
782 for (part
++; part
!= numParts
; ++part
)
783 significand
[part
] = 0;
786 unsigned QNaNBit
= semantics
->precision
- 2;
789 // We always have to clear the QNaN bit to make it an SNaN.
790 APInt::tcClearBit(significand
, QNaNBit
);
792 // If there are no bits set in the payload, we have to set
793 // *something* to make it a NaN instead of an infinity;
794 // conventionally, this is the next bit down from the QNaN bit.
795 if (APInt::tcIsZero(significand
, numParts
))
796 APInt::tcSetBit(significand
, QNaNBit
- 1);
798 // We always have to set the QNaN bit to make it a QNaN.
799 APInt::tcSetBit(significand
, QNaNBit
);
802 // For x87 extended precision, we want to make a NaN, not a
803 // pseudo-NaN. Maybe we should expose the ability to make
805 if (semantics
== &semX87DoubleExtended
)
806 APInt::tcSetBit(significand
, QNaNBit
+ 1);
809 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
811 if (semantics
!= rhs
.semantics
) {
813 initialize(rhs
.semantics
);
821 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
824 semantics
= rhs
.semantics
;
825 significand
= rhs
.significand
;
826 exponent
= rhs
.exponent
;
827 category
= rhs
.category
;
830 rhs
.semantics
= &semBogus
;
834 bool IEEEFloat::isDenormal() const {
835 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
836 (APInt::tcExtractBit(significandParts(),
837 semantics
->precision
- 1) == 0);
840 bool IEEEFloat::isSmallest() const {
841 // The smallest number by magnitude in our format will be the smallest
842 // denormal, i.e. the floating point number with exponent being minimum
843 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
844 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
845 significandMSB() == 0;
848 bool IEEEFloat::isSignificandAllOnes() const {
849 // Test if the significand excluding the integral bit is all ones. This allows
850 // us to test for binade boundaries.
851 const integerPart
*Parts
= significandParts();
852 const unsigned PartCount
= partCountForBits(semantics
->precision
);
853 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
857 // Set the unused high bits to all ones when we compare.
858 const unsigned NumHighBits
=
859 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
860 assert(NumHighBits
<= integerPartWidth
&& NumHighBits
> 0 &&
861 "Can not have more high bits to fill than integerPartWidth");
862 const integerPart HighBitFill
=
863 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
864 if (~(Parts
[PartCount
- 1] | HighBitFill
))
870 bool IEEEFloat::isSignificandAllZeros() const {
871 // Test if the significand excluding the integral bit is all zeros. This
872 // allows us to test for binade boundaries.
873 const integerPart
*Parts
= significandParts();
874 const unsigned PartCount
= partCountForBits(semantics
->precision
);
876 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
880 // Compute how many bits are used in the final word.
881 const unsigned NumHighBits
=
882 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
883 assert(NumHighBits
< integerPartWidth
&& "Can not have more high bits to "
884 "clear than integerPartWidth");
885 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
887 if (Parts
[PartCount
- 1] & HighBitMask
)
893 bool IEEEFloat::isLargest() const {
894 // The largest number by magnitude in our format will be the floating point
895 // number with maximum exponent and with significand that is all ones.
896 return isFiniteNonZero() && exponent
== semantics
->maxExponent
897 && isSignificandAllOnes();
900 bool IEEEFloat::isInteger() const {
901 // This could be made more efficient; I'm going for obviously correct.
902 if (!isFinite()) return false;
903 IEEEFloat truncated
= *this;
904 truncated
.roundToIntegral(rmTowardZero
);
905 return compare(truncated
) == cmpEqual
;
908 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
911 if (semantics
!= rhs
.semantics
||
912 category
!= rhs
.category
||
915 if (category
==fcZero
|| category
==fcInfinity
)
918 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
921 return std::equal(significandParts(), significandParts() + partCount(),
922 rhs
.significandParts());
925 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
926 initialize(&ourSemantics
);
930 exponent
= ourSemantics
.precision
- 1;
931 significandParts()[0] = value
;
932 normalize(rmNearestTiesToEven
, lfExactlyZero
);
935 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
936 initialize(&ourSemantics
);
940 // Delegate to the previous constructor, because later copy constructor may
941 // actually inspects category, which can't be garbage.
942 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
943 : IEEEFloat(ourSemantics
) {}
945 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
946 initialize(rhs
.semantics
);
950 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
951 *this = std::move(rhs
);
954 IEEEFloat::~IEEEFloat() { freeSignificand(); }
956 unsigned int IEEEFloat::partCount() const {
957 return partCountForBits(semantics
->precision
+ 1);
960 const IEEEFloat::integerPart
*IEEEFloat::significandParts() const {
961 return const_cast<IEEEFloat
*>(this)->significandParts();
964 IEEEFloat::integerPart
*IEEEFloat::significandParts() {
966 return significand
.parts
;
968 return &significand
.part
;
971 void IEEEFloat::zeroSignificand() {
972 APInt::tcSet(significandParts(), 0, partCount());
975 /* Increment an fcNormal floating point number's significand. */
976 void IEEEFloat::incrementSignificand() {
979 carry
= APInt::tcIncrement(significandParts(), partCount());
981 /* Our callers should never cause us to overflow. */
986 /* Add the significand of the RHS. Returns the carry flag. */
987 IEEEFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
990 parts
= significandParts();
992 assert(semantics
== rhs
.semantics
);
993 assert(exponent
== rhs
.exponent
);
995 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
998 /* Subtract the significand of the RHS with a borrow flag. Returns
1000 IEEEFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
1001 integerPart borrow
) {
1004 parts
= significandParts();
1006 assert(semantics
== rhs
.semantics
);
1007 assert(exponent
== rhs
.exponent
);
1009 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
1013 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1014 on to the full-precision result of the multiplication. Returns the
1016 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
1018 unsigned int omsb
; // One, not zero, based MSB.
1019 unsigned int partsCount
, newPartsCount
, precision
;
1020 integerPart
*lhsSignificand
;
1021 integerPart scratch
[4];
1022 integerPart
*fullSignificand
;
1023 lostFraction lost_fraction
;
1026 assert(semantics
== rhs
.semantics
);
1028 precision
= semantics
->precision
;
1030 // Allocate space for twice as many bits as the original significand, plus one
1031 // extra bit for the addition to overflow into.
1032 newPartsCount
= partCountForBits(precision
* 2 + 1);
1034 if (newPartsCount
> 4)
1035 fullSignificand
= new integerPart
[newPartsCount
];
1037 fullSignificand
= scratch
;
1039 lhsSignificand
= significandParts();
1040 partsCount
= partCount();
1042 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
1043 rhs
.significandParts(), partsCount
, partsCount
);
1045 lost_fraction
= lfExactlyZero
;
1046 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1047 exponent
+= rhs
.exponent
;
1049 // Assume the operands involved in the multiplication are single-precision
1050 // FP, and the two multiplicants are:
1051 // *this = a23 . a22 ... a0 * 2^e1
1052 // rhs = b23 . b22 ... b0 * 2^e2
1053 // the result of multiplication is:
1054 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1055 // Note that there are three significant bits at the left-hand side of the
1056 // radix point: two for the multiplication, and an overflow bit for the
1057 // addition (that will always be zero at this point). Move the radix point
1058 // toward left by two bits, and adjust exponent accordingly.
1061 if (addend
.isNonZero()) {
1062 // The intermediate result of the multiplication has "2 * precision"
1063 // signicant bit; adjust the addend to be consistent with mul result.
1065 Significand savedSignificand
= significand
;
1066 const fltSemantics
*savedSemantics
= semantics
;
1067 fltSemantics extendedSemantics
;
1069 unsigned int extendedPrecision
;
1071 // Normalize our MSB to one below the top bit to allow for overflow.
1072 extendedPrecision
= 2 * precision
+ 1;
1073 if (omsb
!= extendedPrecision
- 1) {
1074 assert(extendedPrecision
> omsb
);
1075 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1076 (extendedPrecision
- 1) - omsb
);
1077 exponent
-= (extendedPrecision
- 1) - omsb
;
1080 /* Create new semantics. */
1081 extendedSemantics
= *semantics
;
1082 extendedSemantics
.precision
= extendedPrecision
;
1084 if (newPartsCount
== 1)
1085 significand
.part
= fullSignificand
[0];
1087 significand
.parts
= fullSignificand
;
1088 semantics
= &extendedSemantics
;
1090 // Make a copy so we can convert it to the extended semantics.
1091 // Note that we cannot convert the addend directly, as the extendedSemantics
1092 // is a local variable (which we take a reference to).
1093 IEEEFloat
extendedAddend(addend
);
1094 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
1095 assert(status
== opOK
);
1098 // Shift the significand of the addend right by one bit. This guarantees
1099 // that the high bit of the significand is zero (same as fullSignificand),
1100 // so the addition will overflow (if it does overflow at all) into the top bit.
1101 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1102 assert(lost_fraction
== lfExactlyZero
&&
1103 "Lost precision while shifting addend for fused-multiply-add.");
1105 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1107 /* Restore our state. */
1108 if (newPartsCount
== 1)
1109 fullSignificand
[0] = significand
.part
;
1110 significand
= savedSignificand
;
1111 semantics
= savedSemantics
;
1113 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1116 // Convert the result having "2 * precision" significant-bits back to the one
1117 // having "precision" significant-bits. First, move the radix point from
1118 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1119 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1120 exponent
-= precision
+ 1;
1122 // In case MSB resides at the left-hand side of radix point, shift the
1123 // mantissa right by some amount to make sure the MSB reside right before
1124 // the radix point (i.e. "MSB . rest-significant-bits").
1126 // Note that the result is not normalized when "omsb < precision". So, the
1127 // caller needs to call IEEEFloat::normalize() if normalized value is
1129 if (omsb
> precision
) {
1130 unsigned int bits
, significantParts
;
1133 bits
= omsb
- precision
;
1134 significantParts
= partCountForBits(omsb
);
1135 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1136 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1140 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1142 if (newPartsCount
> 4)
1143 delete [] fullSignificand
;
1145 return lost_fraction
;
1148 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
) {
1149 return multiplySignificand(rhs
, IEEEFloat(*semantics
));
1152 /* Multiply the significands of LHS and RHS to DST. */
1153 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1154 unsigned int bit
, i
, partsCount
;
1155 const integerPart
*rhsSignificand
;
1156 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1157 integerPart scratch
[4];
1158 lostFraction lost_fraction
;
1160 assert(semantics
== rhs
.semantics
);
1162 lhsSignificand
= significandParts();
1163 rhsSignificand
= rhs
.significandParts();
1164 partsCount
= partCount();
1167 dividend
= new integerPart
[partsCount
* 2];
1171 divisor
= dividend
+ partsCount
;
1173 /* Copy the dividend and divisor as they will be modified in-place. */
1174 for (i
= 0; i
< partsCount
; i
++) {
1175 dividend
[i
] = lhsSignificand
[i
];
1176 divisor
[i
] = rhsSignificand
[i
];
1177 lhsSignificand
[i
] = 0;
1180 exponent
-= rhs
.exponent
;
1182 unsigned int precision
= semantics
->precision
;
1184 /* Normalize the divisor. */
1185 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1188 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1191 /* Normalize the dividend. */
1192 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1195 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1198 /* Ensure the dividend >= divisor initially for the loop below.
1199 Incidentally, this means that the division loop below is
1200 guaranteed to set the integer bit to one. */
1201 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1203 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1204 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1207 /* Long division. */
1208 for (bit
= precision
; bit
; bit
-= 1) {
1209 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1210 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1211 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1214 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1217 /* Figure out the lost fraction. */
1218 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1221 lost_fraction
= lfMoreThanHalf
;
1223 lost_fraction
= lfExactlyHalf
;
1224 else if (APInt::tcIsZero(dividend
, partsCount
))
1225 lost_fraction
= lfExactlyZero
;
1227 lost_fraction
= lfLessThanHalf
;
1232 return lost_fraction
;
1235 unsigned int IEEEFloat::significandMSB() const {
1236 return APInt::tcMSB(significandParts(), partCount());
1239 unsigned int IEEEFloat::significandLSB() const {
1240 return APInt::tcLSB(significandParts(), partCount());
1243 /* Note that a zero result is NOT normalized to fcZero. */
1244 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1245 /* Our exponent should not overflow. */
1246 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1250 return shiftRight(significandParts(), partCount(), bits
);
1253 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1254 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1255 assert(bits
< semantics
->precision
);
1258 unsigned int partsCount
= partCount();
1260 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1263 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1267 IEEEFloat::cmpResult
1268 IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1271 assert(semantics
== rhs
.semantics
);
1272 assert(isFiniteNonZero());
1273 assert(rhs
.isFiniteNonZero());
1275 compare
= exponent
- rhs
.exponent
;
1277 /* If exponents are equal, do an unsigned bignum comparison of the
1280 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1284 return cmpGreaterThan
;
1285 else if (compare
< 0)
1291 /* Handle overflow. Sign is preserved. We either become infinity or
1292 the largest finite number. */
1293 IEEEFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1295 if (rounding_mode
== rmNearestTiesToEven
||
1296 rounding_mode
== rmNearestTiesToAway
||
1297 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1298 (rounding_mode
== rmTowardNegative
&& sign
)) {
1299 category
= fcInfinity
;
1300 return (opStatus
) (opOverflow
| opInexact
);
1303 /* Otherwise we become the largest finite number. */
1304 category
= fcNormal
;
1305 exponent
= semantics
->maxExponent
;
1306 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1307 semantics
->precision
);
1312 /* Returns TRUE if, when truncating the current number, with BIT the
1313 new LSB, with the given lost fraction and rounding mode, the result
1314 would need to be rounded away from zero (i.e., by increasing the
1315 signficand). This routine must work for fcZero of both signs, and
1316 fcNormal numbers. */
1317 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1318 lostFraction lost_fraction
,
1319 unsigned int bit
) const {
1320 /* NaNs and infinities should not have lost fractions. */
1321 assert(isFiniteNonZero() || category
== fcZero
);
1323 /* Current callers never pass this so we don't handle it. */
1324 assert(lost_fraction
!= lfExactlyZero
);
1326 switch (rounding_mode
) {
1327 case rmNearestTiesToAway
:
1328 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1330 case rmNearestTiesToEven
:
1331 if (lost_fraction
== lfMoreThanHalf
)
1334 /* Our zeroes don't have a significand to test. */
1335 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1336 return APInt::tcExtractBit(significandParts(), bit
);
1343 case rmTowardPositive
:
1346 case rmTowardNegative
:
1352 llvm_unreachable("Invalid rounding mode found");
1355 IEEEFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1356 lostFraction lost_fraction
) {
1357 unsigned int omsb
; /* One, not zero, based MSB. */
1360 if (!isFiniteNonZero())
1363 /* Before rounding normalize the exponent of fcNormal numbers. */
1364 omsb
= significandMSB() + 1;
1367 /* OMSB is numbered from 1. We want to place it in the integer
1368 bit numbered PRECISION if possible, with a compensating change in
1370 exponentChange
= omsb
- semantics
->precision
;
1372 /* If the resulting exponent is too high, overflow according to
1373 the rounding mode. */
1374 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1375 return handleOverflow(rounding_mode
);
1377 /* Subnormal numbers have exponent minExponent, and their MSB
1378 is forced based on that. */
1379 if (exponent
+ exponentChange
< semantics
->minExponent
)
1380 exponentChange
= semantics
->minExponent
- exponent
;
1382 /* Shifting left is easy as we don't lose precision. */
1383 if (exponentChange
< 0) {
1384 assert(lost_fraction
== lfExactlyZero
);
1386 shiftSignificandLeft(-exponentChange
);
1391 if (exponentChange
> 0) {
1394 /* Shift right and capture any new lost fraction. */
1395 lf
= shiftSignificandRight(exponentChange
);
1397 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1399 /* Keep OMSB up-to-date. */
1400 if (omsb
> (unsigned) exponentChange
)
1401 omsb
-= exponentChange
;
1407 /* Now round the number according to rounding_mode given the lost
1410 /* As specified in IEEE 754, since we do not trap we do not report
1411 underflow for exact results. */
1412 if (lost_fraction
== lfExactlyZero
) {
1413 /* Canonicalize zeroes. */
1420 /* Increment the significand if we're rounding away from zero. */
1421 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1423 exponent
= semantics
->minExponent
;
1425 incrementSignificand();
1426 omsb
= significandMSB() + 1;
1428 /* Did the significand increment overflow? */
1429 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1430 /* Renormalize by incrementing the exponent and shifting our
1431 significand right one. However if we already have the
1432 maximum exponent we overflow to infinity. */
1433 if (exponent
== semantics
->maxExponent
) {
1434 category
= fcInfinity
;
1436 return (opStatus
) (opOverflow
| opInexact
);
1439 shiftSignificandRight(1);
1445 /* The normal case - we were and are not denormal, and any
1446 significand increment above didn't overflow. */
1447 if (omsb
== semantics
->precision
)
1450 /* We have a non-zero denormal. */
1451 assert(omsb
< semantics
->precision
);
1453 /* Canonicalize zeroes. */
1457 /* The fcZero case is a denormal that underflowed to zero. */
1458 return (opStatus
) (opUnderflow
| opInexact
);
1461 IEEEFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1463 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1465 llvm_unreachable(nullptr);
1467 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1468 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1469 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1472 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1473 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1474 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1475 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1476 if (isSignaling()) {
1480 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1482 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1483 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1484 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1487 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1488 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1489 category
= fcInfinity
;
1490 sign
= rhs
.sign
^ subtract
;
1493 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1495 sign
= rhs
.sign
^ subtract
;
1498 case PackCategoriesIntoKey(fcZero
, fcZero
):
1499 /* Sign depends on rounding mode; handled by caller. */
1502 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1503 /* Differently signed infinities can only be validly
1505 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1512 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1517 /* Add or subtract two normal numbers. */
1518 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1521 lostFraction lost_fraction
;
1524 /* Determine if the operation on the absolute values is effectively
1525 an addition or subtraction. */
1526 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1528 /* Are we bigger exponent-wise than the RHS? */
1529 bits
= exponent
- rhs
.exponent
;
1531 /* Subtraction is more subtle than one might naively expect. */
1533 IEEEFloat
temp_rhs(rhs
);
1536 lost_fraction
= lfExactlyZero
;
1537 else if (bits
> 0) {
1538 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1539 shiftSignificandLeft(1);
1541 lost_fraction
= shiftSignificandRight(-bits
- 1);
1542 temp_rhs
.shiftSignificandLeft(1);
1545 // Should we reverse the subtraction.
1546 if (compareAbsoluteValue(temp_rhs
) == cmpLessThan
) {
1547 carry
= temp_rhs
.subtractSignificand
1548 (*this, lost_fraction
!= lfExactlyZero
);
1549 copySignificand(temp_rhs
);
1552 carry
= subtractSignificand
1553 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1556 /* Invert the lost fraction - it was on the RHS and
1558 if (lost_fraction
== lfLessThanHalf
)
1559 lost_fraction
= lfMoreThanHalf
;
1560 else if (lost_fraction
== lfMoreThanHalf
)
1561 lost_fraction
= lfLessThanHalf
;
1563 /* The code above is intended to ensure that no borrow is
1569 IEEEFloat
temp_rhs(rhs
);
1571 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1572 carry
= addSignificand(temp_rhs
);
1574 lost_fraction
= shiftSignificandRight(-bits
);
1575 carry
= addSignificand(rhs
);
1578 /* We have a guard bit; generating a carry cannot happen. */
1583 return lost_fraction
;
1586 IEEEFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1587 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1589 llvm_unreachable(nullptr);
1591 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1592 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1593 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1597 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1598 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1599 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1600 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1601 sign
^= rhs
.sign
; // restore the original sign
1602 if (isSignaling()) {
1606 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1608 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1609 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1610 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1611 category
= fcInfinity
;
1614 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1615 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1616 case PackCategoriesIntoKey(fcZero
, fcZero
):
1620 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1621 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1625 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1630 IEEEFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1631 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1633 llvm_unreachable(nullptr);
1635 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1636 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1637 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1641 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1642 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1643 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1644 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1645 sign
^= rhs
.sign
; // restore the original sign
1646 if (isSignaling()) {
1650 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1652 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1653 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1654 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1655 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1658 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1662 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1663 category
= fcInfinity
;
1666 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1667 case PackCategoriesIntoKey(fcZero
, fcZero
):
1671 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1676 IEEEFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
1677 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1679 llvm_unreachable(nullptr);
1681 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1682 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1683 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1686 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1687 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1688 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1689 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1690 if (isSignaling()) {
1694 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1696 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1697 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1698 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1701 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1702 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1703 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1704 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1705 case PackCategoriesIntoKey(fcZero
, fcZero
):
1709 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1714 IEEEFloat::opStatus
IEEEFloat::remainderSpecials(const IEEEFloat
&rhs
) {
1715 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1717 llvm_unreachable(nullptr);
1719 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1720 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1721 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1724 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1725 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1726 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1727 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1728 if (isSignaling()) {
1732 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1734 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1735 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1736 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1739 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1740 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1741 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1742 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1743 case PackCategoriesIntoKey(fcZero
, fcZero
):
1747 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1748 return opDivByZero
; // fake status, indicating this is not a special case
1753 void IEEEFloat::changeSign() {
1754 /* Look mummy, this one's easy. */
1758 /* Normalized addition or subtraction. */
1759 IEEEFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
1760 roundingMode rounding_mode
,
1764 fs
= addOrSubtractSpecials(rhs
, subtract
);
1766 /* This return code means it was not a simple case. */
1767 if (fs
== opDivByZero
) {
1768 lostFraction lost_fraction
;
1770 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1771 fs
= normalize(rounding_mode
, lost_fraction
);
1773 /* Can only be zero if we lost no fraction. */
1774 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1777 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1778 positive zero unless rounding to minus infinity, except that
1779 adding two like-signed zeroes gives that zero. */
1780 if (category
== fcZero
) {
1781 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1782 sign
= (rounding_mode
== rmTowardNegative
);
1788 /* Normalized addition. */
1789 IEEEFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
1790 roundingMode rounding_mode
) {
1791 return addOrSubtract(rhs
, rounding_mode
, false);
1794 /* Normalized subtraction. */
1795 IEEEFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
1796 roundingMode rounding_mode
) {
1797 return addOrSubtract(rhs
, rounding_mode
, true);
1800 /* Normalized multiply. */
1801 IEEEFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
1802 roundingMode rounding_mode
) {
1806 fs
= multiplySpecials(rhs
);
1808 if (isFiniteNonZero()) {
1809 lostFraction lost_fraction
= multiplySignificand(rhs
);
1810 fs
= normalize(rounding_mode
, lost_fraction
);
1811 if (lost_fraction
!= lfExactlyZero
)
1812 fs
= (opStatus
) (fs
| opInexact
);
1818 /* Normalized divide. */
1819 IEEEFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
1820 roundingMode rounding_mode
) {
1824 fs
= divideSpecials(rhs
);
1826 if (isFiniteNonZero()) {
1827 lostFraction lost_fraction
= divideSignificand(rhs
);
1828 fs
= normalize(rounding_mode
, lost_fraction
);
1829 if (lost_fraction
!= lfExactlyZero
)
1830 fs
= (opStatus
) (fs
| opInexact
);
1836 /* Normalized remainder. */
1837 IEEEFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
1839 unsigned int origSign
= sign
;
1841 // First handle the special cases.
1842 fs
= remainderSpecials(rhs
);
1843 if (fs
!= opDivByZero
)
1848 // Make sure the current value is less than twice the denom. If the addition
1849 // did not succeed (an overflow has happened), which means that the finite
1850 // value we currently posses must be less than twice the denom (as we are
1851 // using the same semantics).
1853 if (P2
.add(rhs
, rmNearestTiesToEven
) == opOK
) {
1858 // Lets work with absolute numbers.
1864 // To calculate the remainder we use the following scheme.
1866 // The remainder is defained as follows:
1868 // remainder = numer - rquot * denom = x - r * p
1870 // Where r is the result of: x/p, rounded toward the nearest integral value
1871 // (with halfway cases rounded toward the even number).
1873 // Currently, (after x mod 2p):
1874 // r is the number of 2p's present inside x, which is inherently, an even
1877 // We may split the remaining calculation into 4 options:
1878 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1879 // - if x == 0.5p then we round to the nearest even number which is 0, and we
1880 // are done as well.
1881 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
1882 // to subtract 1p at least once.
1883 // - if x >= p then we must subtract p at least once, as x must be a
1886 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
1888 // We can now split the remaining calculation to the following 3 options:
1889 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
1890 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
1891 // must round up to the next even number. so we must subtract p once more.
1892 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
1893 // integral, and subtract p once more.
1896 // Extend the semantics to prevent an overflow/underflow or inexact result.
1898 fltSemantics extendedSemantics
= *semantics
;
1899 extendedSemantics
.maxExponent
++;
1900 extendedSemantics
.minExponent
--;
1901 extendedSemantics
.precision
+= 2;
1903 IEEEFloat VEx
= *this;
1904 fs
= VEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
1905 assert(fs
== opOK
&& !losesInfo
);
1907 fs
= PEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
1908 assert(fs
== opOK
&& !losesInfo
);
1910 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
1912 fs
= VEx
.add(VEx
, rmNearestTiesToEven
);
1915 if (VEx
.compare(PEx
) == cmpGreaterThan
) {
1916 fs
= subtract(P
, rmNearestTiesToEven
);
1919 // Make VEx = this.add(this), but because we have different semantics, we do
1920 // not want to `convert` again, so we just subtract PEx twice (which equals
1921 // to the desired value).
1922 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
1924 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
1927 cmpResult result
= VEx
.compare(PEx
);
1928 if (result
== cmpGreaterThan
|| result
== cmpEqual
) {
1929 fs
= subtract(P
, rmNearestTiesToEven
);
1935 sign
= origSign
; // IEEE754 requires this
1941 /* Normalized llvm frem (C fmod). */
1942 IEEEFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
1944 fs
= modSpecials(rhs
);
1945 unsigned int origSign
= sign
;
1947 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
1948 compareAbsoluteValue(rhs
) != cmpLessThan
) {
1949 IEEEFloat V
= scalbn(rhs
, ilogb(*this) - ilogb(rhs
), rmNearestTiesToEven
);
1950 if (compareAbsoluteValue(V
) == cmpLessThan
)
1951 V
= scalbn(V
, -1, rmNearestTiesToEven
);
1954 fs
= subtract(V
, rmNearestTiesToEven
);
1958 sign
= origSign
; // fmod requires this
1962 /* Normalized fused-multiply-add. */
1963 IEEEFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
1964 const IEEEFloat
&addend
,
1965 roundingMode rounding_mode
) {
1968 /* Post-multiplication sign, before addition. */
1969 sign
^= multiplicand
.sign
;
1971 /* If and only if all arguments are normal do we need to do an
1972 extended-precision calculation. */
1973 if (isFiniteNonZero() &&
1974 multiplicand
.isFiniteNonZero() &&
1975 addend
.isFinite()) {
1976 lostFraction lost_fraction
;
1978 lost_fraction
= multiplySignificand(multiplicand
, addend
);
1979 fs
= normalize(rounding_mode
, lost_fraction
);
1980 if (lost_fraction
!= lfExactlyZero
)
1981 fs
= (opStatus
) (fs
| opInexact
);
1983 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1984 positive zero unless rounding to minus infinity, except that
1985 adding two like-signed zeroes gives that zero. */
1986 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
)
1987 sign
= (rounding_mode
== rmTowardNegative
);
1989 fs
= multiplySpecials(multiplicand
);
1991 /* FS can only be opOK or opInvalidOp. There is no more work
1992 to do in the latter case. The IEEE-754R standard says it is
1993 implementation-defined in this case whether, if ADDEND is a
1994 quiet NaN, we raise invalid op; this implementation does so.
1996 If we need to do the addition we can do so with normal
1999 fs
= addOrSubtract(addend
, rounding_mode
, false);
2005 /* Rounding-mode correct round to integral value. */
2006 IEEEFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
2010 // [IEEE Std 754-2008 6.1]:
2011 // The behavior of infinity in floating-point arithmetic is derived from the
2012 // limiting cases of real arithmetic with operands of arbitrarily
2013 // large magnitude, when such a limit exists.
2015 // Operations on infinite operands are usually exact and therefore signal no
2020 if (isSignaling()) {
2021 // [IEEE Std 754-2008 6.2]:
2022 // Under default exception handling, any operation signaling an invalid
2023 // operation exception and for which a floating-point result is to be
2024 // delivered shall deliver a quiet NaN.
2026 // [IEEE Std 754-2008 6.2]:
2027 // Signaling NaNs shall be reserved operands that, under default exception
2028 // handling, signal the invalid operation exception(see 7.2) for every
2029 // general-computational and signaling-computational operation except for
2030 // the conversions described in 5.12.
2033 // [IEEE Std 754-2008 6.2]:
2034 // For an operation with quiet NaN inputs, other than maximum and minimum
2035 // operations, if a floating-point result is to be delivered the result
2036 // shall be a quiet NaN which should be one of the input NaNs.
2038 // Every general-computational and quiet-computational operation involving
2039 // one or more input NaNs, none of them signaling, shall signal no
2040 // exception, except fusedMultiplyAdd might signal the invalid operation
2041 // exception(see 7.2).
2047 // [IEEE Std 754-2008 6.3]:
2048 // ... the sign of the result of conversions, the quantize operation, the
2049 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2050 // the sign of the first or only operand.
2054 // If the exponent is large enough, we know that this value is already
2055 // integral, and the arithmetic below would potentially cause it to saturate
2056 // to +/-Inf. Bail out early instead.
2057 if (exponent
+1 >= (int)semanticsPrecision(*semantics
))
2060 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2061 // precision of our format, and then subtract it back off again. The choice
2062 // of rounding modes for the addition/subtraction determines the rounding mode
2063 // for our integral rounding as well.
2064 // NOTE: When the input value is negative, we do subtraction followed by
2065 // addition instead.
2066 APInt
IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics
)), 1);
2067 IntegerConstant
<<= semanticsPrecision(*semantics
)-1;
2068 IEEEFloat
MagicConstant(*semantics
);
2069 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
2070 rmNearestTiesToEven
);
2072 MagicConstant
.sign
= sign
;
2074 // Preserve the input sign so that we can handle the case of zero result
2076 bool inputSign
= isNegative();
2078 fs
= add(MagicConstant
, rounding_mode
);
2080 // Current value and 'MagicConstant' are both integers, so the result of the
2081 // subtraction is always exact according to Sterbenz' lemma.
2082 subtract(MagicConstant
, rounding_mode
);
2084 // Restore the input sign.
2085 if (inputSign
!= isNegative())
2092 /* Comparison requires normalized numbers. */
2093 IEEEFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
2096 assert(semantics
== rhs
.semantics
);
2098 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
2100 llvm_unreachable(nullptr);
2102 case PackCategoriesIntoKey(fcNaN
, fcZero
):
2103 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
2104 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
2105 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
2106 case PackCategoriesIntoKey(fcZero
, fcNaN
):
2107 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
2108 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
2109 return cmpUnordered
;
2111 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
2112 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
2113 case PackCategoriesIntoKey(fcNormal
, fcZero
):
2117 return cmpGreaterThan
;
2119 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
2120 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
2121 case PackCategoriesIntoKey(fcZero
, fcNormal
):
2123 return cmpGreaterThan
;
2127 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
2128 if (sign
== rhs
.sign
)
2133 return cmpGreaterThan
;
2135 case PackCategoriesIntoKey(fcZero
, fcZero
):
2138 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2142 /* Two normal numbers. Do they have the same sign? */
2143 if (sign
!= rhs
.sign
) {
2145 result
= cmpLessThan
;
2147 result
= cmpGreaterThan
;
2149 /* Compare absolute values; invert result if negative. */
2150 result
= compareAbsoluteValue(rhs
);
2153 if (result
== cmpLessThan
)
2154 result
= cmpGreaterThan
;
2155 else if (result
== cmpGreaterThan
)
2156 result
= cmpLessThan
;
2163 /// IEEEFloat::convert - convert a value of one floating point type to another.
2164 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2165 /// records whether the transformation lost information, i.e. whether
2166 /// converting the result back to the original type will produce the
2167 /// original value (this is almost the same as return value==fsOK, but there
2168 /// are edge cases where this is not so).
2170 IEEEFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
2171 roundingMode rounding_mode
,
2173 lostFraction lostFraction
;
2174 unsigned int newPartCount
, oldPartCount
;
2177 const fltSemantics
&fromSemantics
= *semantics
;
2179 lostFraction
= lfExactlyZero
;
2180 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
2181 oldPartCount
= partCount();
2182 shift
= toSemantics
.precision
- fromSemantics
.precision
;
2184 bool X86SpecialNan
= false;
2185 if (&fromSemantics
== &semX87DoubleExtended
&&
2186 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
2187 (!(*significandParts() & 0x8000000000000000ULL
) ||
2188 !(*significandParts() & 0x4000000000000000ULL
))) {
2189 // x86 has some unusual NaNs which cannot be represented in any other
2190 // format; note them here.
2191 X86SpecialNan
= true;
2194 // If this is a truncation of a denormal number, and the target semantics
2195 // has larger exponent range than the source semantics (this can happen
2196 // when truncating from PowerPC double-double to double format), the
2197 // right shift could lose result mantissa bits. Adjust exponent instead
2198 // of performing excessive shift.
2199 if (shift
< 0 && isFiniteNonZero()) {
2200 int exponentChange
= significandMSB() + 1 - fromSemantics
.precision
;
2201 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
2202 exponentChange
= toSemantics
.minExponent
- exponent
;
2203 if (exponentChange
< shift
)
2204 exponentChange
= shift
;
2205 if (exponentChange
< 0) {
2206 shift
-= exponentChange
;
2207 exponent
+= exponentChange
;
2211 // If this is a truncation, perform the shift before we narrow the storage.
2212 if (shift
< 0 && (isFiniteNonZero() || category
==fcNaN
))
2213 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
2215 // Fix the storage so it can hold to new value.
2216 if (newPartCount
> oldPartCount
) {
2217 // The new type requires more storage; make it available.
2218 integerPart
*newParts
;
2219 newParts
= new integerPart
[newPartCount
];
2220 APInt::tcSet(newParts
, 0, newPartCount
);
2221 if (isFiniteNonZero() || category
==fcNaN
)
2222 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
2224 significand
.parts
= newParts
;
2225 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
2226 // Switch to built-in storage for a single part.
2227 integerPart newPart
= 0;
2228 if (isFiniteNonZero() || category
==fcNaN
)
2229 newPart
= significandParts()[0];
2231 significand
.part
= newPart
;
2234 // Now that we have the right storage, switch the semantics.
2235 semantics
= &toSemantics
;
2237 // If this is an extension, perform the shift now that the storage is
2239 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
2240 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
2242 if (isFiniteNonZero()) {
2243 fs
= normalize(rounding_mode
, lostFraction
);
2244 *losesInfo
= (fs
!= opOK
);
2245 } else if (category
== fcNaN
) {
2246 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2248 // For x87 extended precision, we want to make a NaN, not a special NaN if
2249 // the input wasn't special either.
2250 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2251 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2253 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2254 // This also guarantees that a sNaN does not become Inf on a truncation
2255 // that loses all payload bits.
2256 if (isSignaling()) {
2270 /* Convert a floating point number to an integer according to the
2271 rounding mode. If the rounded integer value is out of range this
2272 returns an invalid operation exception and the contents of the
2273 destination parts are unspecified. If the rounded value is in
2274 range but the floating point number is not the exact integer, the C
2275 standard doesn't require an inexact exception to be raised. IEEE
2276 854 does require it so we do that.
2278 Note that for conversions to integer type the C standard requires
2279 round-to-zero to always be used. */
2280 IEEEFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2281 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2282 roundingMode rounding_mode
, bool *isExact
) const {
2283 lostFraction lost_fraction
;
2284 const integerPart
*src
;
2285 unsigned int dstPartsCount
, truncatedBits
;
2289 /* Handle the three special cases first. */
2290 if (category
== fcInfinity
|| category
== fcNaN
)
2293 dstPartsCount
= partCountForBits(width
);
2294 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2296 if (category
== fcZero
) {
2297 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2298 // Negative zero can't be represented as an int.
2303 src
= significandParts();
2305 /* Step 1: place our absolute value, with any fraction truncated, in
2308 /* Our absolute value is less than one; truncate everything. */
2309 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2310 /* For exponent -1 the integer bit represents .5, look at that.
2311 For smaller exponents leftmost truncated bit is 0. */
2312 truncatedBits
= semantics
->precision
-1U - exponent
;
2314 /* We want the most significant (exponent + 1) bits; the rest are
2316 unsigned int bits
= exponent
+ 1U;
2318 /* Hopelessly large in magnitude? */
2322 if (bits
< semantics
->precision
) {
2323 /* We truncate (semantics->precision - bits) bits. */
2324 truncatedBits
= semantics
->precision
- bits
;
2325 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2327 /* We want at least as many bits as are available. */
2328 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2330 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2331 bits
- semantics
->precision
);
2336 /* Step 2: work out any lost fraction, and increment the absolute
2337 value if we would round away from zero. */
2338 if (truncatedBits
) {
2339 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2341 if (lost_fraction
!= lfExactlyZero
&&
2342 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2343 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2344 return opInvalidOp
; /* Overflow. */
2347 lost_fraction
= lfExactlyZero
;
2350 /* Step 3: check if we fit in the destination. */
2351 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2355 /* Negative numbers cannot be represented as unsigned. */
2359 /* It takes omsb bits to represent the unsigned integer value.
2360 We lose a bit for the sign, but care is needed as the
2361 maximally negative integer is a special case. */
2362 if (omsb
== width
&&
2363 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2366 /* This case can happen because of rounding. */
2371 APInt::tcNegate (parts
.data(), dstPartsCount
);
2373 if (omsb
>= width
+ !isSigned
)
2377 if (lost_fraction
== lfExactlyZero
) {
2384 /* Same as convertToSignExtendedInteger, except we provide
2385 deterministic values in case of an invalid operation exception,
2386 namely zero for NaNs and the minimal or maximal value respectively
2387 for underflow or overflow.
2388 The *isExact output tells whether the result is exact, in the sense
2389 that converting it back to the original floating point type produces
2390 the original value. This is almost equivalent to result==opOK,
2391 except for negative zeroes.
2394 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2395 unsigned int width
, bool isSigned
,
2396 roundingMode rounding_mode
, bool *isExact
) const {
2399 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2402 if (fs
== opInvalidOp
) {
2403 unsigned int bits
, dstPartsCount
;
2405 dstPartsCount
= partCountForBits(width
);
2406 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2408 if (category
== fcNaN
)
2413 bits
= width
- isSigned
;
2415 APInt::tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2416 if (sign
&& isSigned
)
2417 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2423 /* Convert an unsigned integer SRC to a floating point number,
2424 rounding according to ROUNDING_MODE. The sign of the floating
2425 point number is not modified. */
2426 IEEEFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2427 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2428 unsigned int omsb
, precision
, dstCount
;
2430 lostFraction lost_fraction
;
2432 category
= fcNormal
;
2433 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2434 dst
= significandParts();
2435 dstCount
= partCount();
2436 precision
= semantics
->precision
;
2438 /* We want the most significant PRECISION bits of SRC. There may not
2439 be that many; extract what we can. */
2440 if (precision
<= omsb
) {
2441 exponent
= omsb
- 1;
2442 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2444 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2446 exponent
= precision
- 1;
2447 lost_fraction
= lfExactlyZero
;
2448 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2451 return normalize(rounding_mode
, lost_fraction
);
2454 IEEEFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2455 roundingMode rounding_mode
) {
2456 unsigned int partCount
= Val
.getNumWords();
2460 if (isSigned
&& api
.isNegative()) {
2465 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2468 /* Convert a two's complement integer SRC to a floating point number,
2469 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2470 integer is signed, in which case it must be sign-extended. */
2472 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2473 unsigned int srcCount
, bool isSigned
,
2474 roundingMode rounding_mode
) {
2478 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2481 /* If we're signed and negative negate a copy. */
2483 copy
= new integerPart
[srcCount
];
2484 APInt::tcAssign(copy
, src
, srcCount
);
2485 APInt::tcNegate(copy
, srcCount
);
2486 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2490 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2496 /* FIXME: should this just take a const APInt reference? */
2498 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2499 unsigned int width
, bool isSigned
,
2500 roundingMode rounding_mode
) {
2501 unsigned int partCount
= partCountForBits(width
);
2502 APInt api
= APInt(width
, makeArrayRef(parts
, partCount
));
2505 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2510 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2513 Expected
<IEEEFloat::opStatus
>
2514 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2515 roundingMode rounding_mode
) {
2516 lostFraction lost_fraction
= lfExactlyZero
;
2518 category
= fcNormal
;
2522 integerPart
*significand
= significandParts();
2523 unsigned partsCount
= partCount();
2524 unsigned bitPos
= partsCount
* integerPartWidth
;
2525 bool computedTrailingFraction
= false;
2527 // Skip leading zeroes and any (hexa)decimal point.
2528 StringRef::iterator begin
= s
.begin();
2529 StringRef::iterator end
= s
.end();
2530 StringRef::iterator dot
;
2531 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2533 return PtrOrErr
.takeError();
2534 StringRef::iterator p
= *PtrOrErr
;
2535 StringRef::iterator firstSignificantDigit
= p
;
2538 integerPart hex_value
;
2542 return createError("String contains multiple dots");
2547 hex_value
= hexDigitValue(*p
);
2548 if (hex_value
== -1U)
2553 // Store the number while we have space.
2556 hex_value
<<= bitPos
% integerPartWidth
;
2557 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2558 } else if (!computedTrailingFraction
) {
2559 auto FractOrErr
= trailingHexadecimalFraction(p
, end
, hex_value
);
2561 return FractOrErr
.takeError();
2562 lost_fraction
= *FractOrErr
;
2563 computedTrailingFraction
= true;
2567 /* Hex floats require an exponent but not a hexadecimal point. */
2569 return createError("Hex strings require an exponent");
2570 if (*p
!= 'p' && *p
!= 'P')
2571 return createError("Invalid character in significand");
2573 return createError("Significand has no digits");
2574 if (dot
!= end
&& p
- begin
== 1)
2575 return createError("Significand has no digits");
2577 /* Ignore the exponent if we are zero. */
2578 if (p
!= firstSignificantDigit
) {
2581 /* Implicit hexadecimal point? */
2585 /* Calculate the exponent adjustment implicit in the number of
2586 significant digits. */
2587 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2588 if (expAdjustment
< 0)
2590 expAdjustment
= expAdjustment
* 4 - 1;
2592 /* Adjust for writing the significand starting at the most
2593 significant nibble. */
2594 expAdjustment
+= semantics
->precision
;
2595 expAdjustment
-= partsCount
* integerPartWidth
;
2597 /* Adjust for the given exponent. */
2598 auto ExpOrErr
= totalExponent(p
+ 1, end
, expAdjustment
);
2600 return ExpOrErr
.takeError();
2601 exponent
= *ExpOrErr
;
2604 return normalize(rounding_mode
, lost_fraction
);
2608 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2609 unsigned sigPartCount
, int exp
,
2610 roundingMode rounding_mode
) {
2611 unsigned int parts
, pow5PartCount
;
2612 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
2613 integerPart pow5Parts
[maxPowerOfFiveParts
];
2616 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2617 rounding_mode
== rmNearestTiesToAway
);
2619 parts
= partCountForBits(semantics
->precision
+ 11);
2621 /* Calculate pow(5, abs(exp)). */
2622 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2624 for (;; parts
*= 2) {
2625 opStatus sigStatus
, powStatus
;
2626 unsigned int excessPrecision
, truncatedBits
;
2628 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2629 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2630 truncatedBits
= excessPrecision
;
2632 IEEEFloat
decSig(calcSemantics
, uninitialized
);
2633 decSig
.makeZero(sign
);
2634 IEEEFloat
pow5(calcSemantics
);
2636 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2637 rmNearestTiesToEven
);
2638 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2639 rmNearestTiesToEven
);
2640 /* Add exp, as 10^n = 5^n * 2^n. */
2641 decSig
.exponent
+= exp
;
2643 lostFraction calcLostFraction
;
2644 integerPart HUerr
, HUdistance
;
2645 unsigned int powHUerr
;
2648 /* multiplySignificand leaves the precision-th bit set to 1. */
2649 calcLostFraction
= decSig
.multiplySignificand(pow5
);
2650 powHUerr
= powStatus
!= opOK
;
2652 calcLostFraction
= decSig
.divideSignificand(pow5
);
2653 /* Denormal numbers have less precision. */
2654 if (decSig
.exponent
< semantics
->minExponent
) {
2655 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2656 truncatedBits
= excessPrecision
;
2657 if (excessPrecision
> calcSemantics
.precision
)
2658 excessPrecision
= calcSemantics
.precision
;
2660 /* Extra half-ulp lost in reciprocal of exponent. */
2661 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2664 /* Both multiplySignificand and divideSignificand return the
2665 result with the integer bit set. */
2666 assert(APInt::tcExtractBit
2667 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2669 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2671 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2672 excessPrecision
, isNearest
);
2674 /* Are we guaranteed to round correctly if we truncate? */
2675 if (HUdistance
>= HUerr
) {
2676 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2677 calcSemantics
.precision
- excessPrecision
,
2679 /* Take the exponent of decSig. If we tcExtract-ed less bits
2680 above we must adjust our exponent to compensate for the
2681 implicit right shift. */
2682 exponent
= (decSig
.exponent
+ semantics
->precision
2683 - (calcSemantics
.precision
- excessPrecision
));
2684 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2687 return normalize(rounding_mode
, calcLostFraction
);
2692 Expected
<IEEEFloat::opStatus
>
2693 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
2697 /* Scan the text. */
2698 StringRef::iterator p
= str
.begin();
2699 if (Error Err
= interpretDecimal(p
, str
.end(), &D
))
2700 return std::move(Err
);
2702 /* Handle the quick cases. First the case of no significant digits,
2703 i.e. zero, and then exponents that are obviously too large or too
2704 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2705 definitely overflows if
2707 (exp - 1) * L >= maxExponent
2709 and definitely underflows to zero where
2711 (exp + 1) * L <= minExponent - precision
2713 With integer arithmetic the tightest bounds for L are
2715 93/28 < L < 196/59 [ numerator <= 256 ]
2716 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2719 // Test if we have a zero number allowing for strings with no null terminators
2720 // and zero decimals with non-zero exponents.
2722 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2723 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2724 // be at most one dot. On the other hand, if we have a zero with a non-zero
2725 // exponent, then we know that D.firstSigDigit will be non-numeric.
2726 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
2730 /* Check whether the normalized exponent is high enough to overflow
2731 max during the log-rebasing in the max-exponent check below. */
2732 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
2733 fs
= handleOverflow(rounding_mode
);
2735 /* If it wasn't, then it also wasn't high enough to overflow max
2736 during the log-rebasing in the min-exponent check. Check that it
2737 won't overflow min in either check, then perform the min-exponent
2739 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
2740 (D
.normalizedExponent
+ 1) * 28738 <=
2741 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2742 /* Underflow to zero and round. */
2743 category
= fcNormal
;
2745 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2747 /* We can finally safely perform the max-exponent check. */
2748 } else if ((D
.normalizedExponent
- 1) * 42039
2749 >= 12655 * semantics
->maxExponent
) {
2750 /* Overflow and round. */
2751 fs
= handleOverflow(rounding_mode
);
2753 integerPart
*decSignificand
;
2754 unsigned int partCount
;
2756 /* A tight upper bound on number of bits required to hold an
2757 N-digit decimal integer is N * 196 / 59. Allocate enough space
2758 to hold the full significand, and an extra part required by
2760 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2761 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2762 decSignificand
= new integerPart
[partCount
+ 1];
2765 /* Convert to binary efficiently - we do almost all multiplication
2766 in an integerPart. When this would overflow do we do a single
2767 bignum multiplication, and then revert again to multiplication
2768 in an integerPart. */
2770 integerPart decValue
, val
, multiplier
;
2778 if (p
== str
.end()) {
2782 decValue
= decDigitValue(*p
++);
2783 if (decValue
>= 10U) {
2784 delete[] decSignificand
;
2785 return createError("Invalid character in significand");
2788 val
= val
* 10 + decValue
;
2789 /* The maximum number that can be multiplied by ten with any
2790 digit added without overflowing an integerPart. */
2791 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2793 /* Multiply out the current part. */
2794 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2795 partCount
, partCount
+ 1, false);
2797 /* If we used another part (likely but not guaranteed), increase
2799 if (decSignificand
[partCount
])
2801 } while (p
<= D
.lastSigDigit
);
2803 category
= fcNormal
;
2804 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2805 D
.exponent
, rounding_mode
);
2807 delete [] decSignificand
;
2813 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
2814 const size_t MIN_NAME_SIZE
= 3;
2816 if (str
.size() < MIN_NAME_SIZE
)
2819 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("+Inf")) {
2824 bool IsNegative
= str
.front() == '-';
2826 str
= str
.drop_front();
2827 if (str
.size() < MIN_NAME_SIZE
)
2830 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("Inf")) {
2836 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
2837 bool IsSignaling
= str
.front() == 's' || str
.front() == 'S';
2839 str
= str
.drop_front();
2840 if (str
.size() < MIN_NAME_SIZE
)
2844 if (str
.startswith("nan") || str
.startswith("NaN")) {
2845 str
= str
.drop_front(3);
2847 // A NaN without payload.
2849 makeNaN(IsSignaling
, IsNegative
);
2853 // Allow the payload to be inside parentheses.
2854 if (str
.front() == '(') {
2855 // Parentheses should be balanced (and not empty).
2856 if (str
.size() <= 2 || str
.back() != ')')
2859 str
= str
.slice(1, str
.size() - 1);
2862 // Determine the payload number's radix.
2863 unsigned Radix
= 10;
2864 if (str
[0] == '0') {
2865 if (str
.size() > 1 && tolower(str
[1]) == 'x') {
2866 str
= str
.drop_front(2);
2872 // Parse the payload and make the NaN.
2874 if (!str
.getAsInteger(Radix
, Payload
)) {
2875 makeNaN(IsSignaling
, IsNegative
, &Payload
);
2883 Expected
<IEEEFloat::opStatus
>
2884 IEEEFloat::convertFromString(StringRef str
, roundingMode rounding_mode
) {
2886 return createError("Invalid string length");
2888 // Handle special cases.
2889 if (convertFromStringSpecials(str
))
2892 /* Handle a leading minus sign. */
2893 StringRef::iterator p
= str
.begin();
2894 size_t slen
= str
.size();
2895 sign
= *p
== '-' ? 1 : 0;
2896 if (*p
== '-' || *p
== '+') {
2900 return createError("String has no digits");
2903 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2905 return createError("Invalid string");
2906 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2910 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2913 /* Write out a hexadecimal representation of the floating point value
2914 to DST, which must be of sufficient size, in the C99 form
2915 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2916 excluding the terminating NUL.
2918 If UPPERCASE, the output is in upper case, otherwise in lower case.
2920 HEXDIGITS digits appear altogether, rounding the value if
2921 necessary. If HEXDIGITS is 0, the minimal precision to display the
2922 number precisely is used instead. If nothing would appear after
2923 the decimal point it is suppressed.
2925 The decimal exponent is always printed and has at least one digit.
2926 Zero values display an exponent of zero. Infinities and NaNs
2927 appear as "infinity" or "nan" respectively.
2929 The above rules are as specified by C99. There is ambiguity about
2930 what the leading hexadecimal digit should be. This implementation
2931 uses whatever is necessary so that the exponent is displayed as
2932 stored. This implies the exponent will fall within the IEEE format
2933 range, and the leading hexadecimal digit will be 0 (for denormals),
2934 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2935 any other digits zero).
2937 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2939 roundingMode rounding_mode
) const {
2948 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2949 dst
+= sizeof infinityL
- 1;
2953 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2954 dst
+= sizeof NaNU
- 1;
2959 *dst
++ = upperCase
? 'X': 'x';
2961 if (hexDigits
> 1) {
2963 memset (dst
, '0', hexDigits
- 1);
2964 dst
+= hexDigits
- 1;
2966 *dst
++ = upperCase
? 'P': 'p';
2971 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2977 return static_cast<unsigned int>(dst
- p
);
2980 /* Does the hard work of outputting the correctly rounded hexadecimal
2981 form of a normal floating point number with the specified number of
2982 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2983 digits necessary to print the value precisely is output. */
2984 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2986 roundingMode rounding_mode
) const {
2987 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2988 const char *hexDigitChars
;
2989 const integerPart
*significand
;
2994 *dst
++ = upperCase
? 'X': 'x';
2997 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2999 significand
= significandParts();
3000 partsCount
= partCount();
3002 /* +3 because the first digit only uses the single integer bit, so
3003 we have 3 virtual zero most-significant-bits. */
3004 valueBits
= semantics
->precision
+ 3;
3005 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
3007 /* The natural number of digits required ignoring trailing
3008 insignificant zeroes. */
3009 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
3011 /* hexDigits of zero means use the required number for the
3012 precision. Otherwise, see if we are truncating. If we are,
3013 find out if we need to round away from zero. */
3015 if (hexDigits
< outputDigits
) {
3016 /* We are dropping non-zero bits, so need to check how to round.
3017 "bits" is the number of dropped bits. */
3019 lostFraction fraction
;
3021 bits
= valueBits
- hexDigits
* 4;
3022 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
3023 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
3025 outputDigits
= hexDigits
;
3028 /* Write the digits consecutively, and start writing in the location
3029 of the hexadecimal point. We move the most significant digit
3030 left and add the hexadecimal point later. */
3033 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
3035 while (outputDigits
&& count
) {
3038 /* Put the most significant integerPartWidth bits in "part". */
3039 if (--count
== partsCount
)
3040 part
= 0; /* An imaginary higher zero part. */
3042 part
= significand
[count
] << shift
;
3045 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
3047 /* Convert as much of "part" to hexdigits as we can. */
3048 unsigned int curDigits
= integerPartWidth
/ 4;
3050 if (curDigits
> outputDigits
)
3051 curDigits
= outputDigits
;
3052 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
3053 outputDigits
-= curDigits
;
3059 /* Note that hexDigitChars has a trailing '0'. */
3062 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
3063 } while (*q
== '0');
3066 /* Add trailing zeroes. */
3067 memset (dst
, '0', outputDigits
);
3068 dst
+= outputDigits
;
3071 /* Move the most significant digit to before the point, and if there
3072 is something after the decimal point add it. This must come
3073 after rounding above. */
3080 /* Finally output the exponent. */
3081 *dst
++ = upperCase
? 'P': 'p';
3083 return writeSignedDecimal (dst
, exponent
);
3086 hash_code
hash_value(const IEEEFloat
&Arg
) {
3087 if (!Arg
.isFiniteNonZero())
3088 return hash_combine((uint8_t)Arg
.category
,
3089 // NaN has no sign, fix it at zero.
3090 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
3091 Arg
.semantics
->precision
);
3093 // Normal floats need their exponent and significand hashed.
3094 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
3095 Arg
.semantics
->precision
, Arg
.exponent
,
3097 Arg
.significandParts(),
3098 Arg
.significandParts() + Arg
.partCount()));
3101 // Conversion from APFloat to/from host float/double. It may eventually be
3102 // possible to eliminate these and have everybody deal with APFloats, but that
3103 // will take a while. This approach will not easily extend to long double.
3104 // Current implementation requires integerPartWidth==64, which is correct at
3105 // the moment but could be made more general.
3107 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3108 // the actual IEEE respresentations. We compensate for that here.
3110 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3111 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
3112 assert(partCount()==2);
3114 uint64_t myexponent
, mysignificand
;
3116 if (isFiniteNonZero()) {
3117 myexponent
= exponent
+16383; //bias
3118 mysignificand
= significandParts()[0];
3119 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
3120 myexponent
= 0; // denormal
3121 } else if (category
==fcZero
) {
3124 } else if (category
==fcInfinity
) {
3125 myexponent
= 0x7fff;
3126 mysignificand
= 0x8000000000000000ULL
;
3128 assert(category
== fcNaN
&& "Unknown category");
3129 myexponent
= 0x7fff;
3130 mysignificand
= significandParts()[0];
3134 words
[0] = mysignificand
;
3135 words
[1] = ((uint64_t)(sign
& 1) << 15) |
3136 (myexponent
& 0x7fffLL
);
3137 return APInt(80, words
);
3140 APInt
IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3141 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
3142 assert(partCount()==2);
3148 // Convert number to double. To avoid spurious underflows, we re-
3149 // normalize against the "double" minExponent first, and only *then*
3150 // truncate the mantissa. The result of that second conversion
3151 // may be inexact, but should never underflow.
3152 // Declare fltSemantics before APFloat that uses it (and
3153 // saves pointer to it) to ensure correct destruction order.
3154 fltSemantics extendedSemantics
= *semantics
;
3155 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
3156 IEEEFloat
extended(*this);
3157 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3158 assert(fs
== opOK
&& !losesInfo
);
3161 IEEEFloat
u(extended
);
3162 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3163 assert(fs
== opOK
|| fs
== opInexact
);
3165 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
3167 // If conversion was exact or resulted in a special case, we're done;
3168 // just set the second double to zero. Otherwise, re-convert back to
3169 // the extended format and compute the difference. This now should
3170 // convert exactly to double.
3171 if (u
.isFiniteNonZero() && losesInfo
) {
3172 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3173 assert(fs
== opOK
&& !losesInfo
);
3176 IEEEFloat
v(extended
);
3177 v
.subtract(u
, rmNearestTiesToEven
);
3178 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3179 assert(fs
== opOK
&& !losesInfo
);
3181 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
3186 return APInt(128, words
);
3189 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3190 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEquad
);
3191 assert(partCount()==2);
3193 uint64_t myexponent
, mysignificand
, mysignificand2
;
3195 if (isFiniteNonZero()) {
3196 myexponent
= exponent
+16383; //bias
3197 mysignificand
= significandParts()[0];
3198 mysignificand2
= significandParts()[1];
3199 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
3200 myexponent
= 0; // denormal
3201 } else if (category
==fcZero
) {
3203 mysignificand
= mysignificand2
= 0;
3204 } else if (category
==fcInfinity
) {
3205 myexponent
= 0x7fff;
3206 mysignificand
= mysignificand2
= 0;
3208 assert(category
== fcNaN
&& "Unknown category!");
3209 myexponent
= 0x7fff;
3210 mysignificand
= significandParts()[0];
3211 mysignificand2
= significandParts()[1];
3215 words
[0] = mysignificand
;
3216 words
[1] = ((uint64_t)(sign
& 1) << 63) |
3217 ((myexponent
& 0x7fff) << 48) |
3218 (mysignificand2
& 0xffffffffffffLL
);
3220 return APInt(128, words
);
3223 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
3224 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
);
3225 assert(partCount()==1);
3227 uint64_t myexponent
, mysignificand
;
3229 if (isFiniteNonZero()) {
3230 myexponent
= exponent
+1023; //bias
3231 mysignificand
= *significandParts();
3232 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
3233 myexponent
= 0; // denormal
3234 } else if (category
==fcZero
) {
3237 } else if (category
==fcInfinity
) {
3241 assert(category
== fcNaN
&& "Unknown category!");
3243 mysignificand
= *significandParts();
3246 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
3247 ((myexponent
& 0x7ff) << 52) |
3248 (mysignificand
& 0xfffffffffffffLL
))));
3251 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
3252 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
);
3253 assert(partCount()==1);
3255 uint32_t myexponent
, mysignificand
;
3257 if (isFiniteNonZero()) {
3258 myexponent
= exponent
+127; //bias
3259 mysignificand
= (uint32_t)*significandParts();
3260 if (myexponent
== 1 && !(mysignificand
& 0x800000))
3261 myexponent
= 0; // denormal
3262 } else if (category
==fcZero
) {
3265 } else if (category
==fcInfinity
) {
3269 assert(category
== fcNaN
&& "Unknown category!");
3271 mysignificand
= (uint32_t)*significandParts();
3274 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
3275 (mysignificand
& 0x7fffff)));
3278 APInt
IEEEFloat::convertBFloatAPFloatToAPInt() const {
3279 assert(semantics
== (const llvm::fltSemantics
*)&semBFloat
);
3280 assert(partCount() == 1);
3282 uint32_t myexponent
, mysignificand
;
3284 if (isFiniteNonZero()) {
3285 myexponent
= exponent
+ 127; // bias
3286 mysignificand
= (uint32_t)*significandParts();
3287 if (myexponent
== 1 && !(mysignificand
& 0x80))
3288 myexponent
= 0; // denormal
3289 } else if (category
== fcZero
) {
3292 } else if (category
== fcInfinity
) {
3296 assert(category
== fcNaN
&& "Unknown category!");
3298 mysignificand
= (uint32_t)*significandParts();
3301 return APInt(16, (((sign
& 1) << 15) | ((myexponent
& 0xff) << 7) |
3302 (mysignificand
& 0x7f)));
3305 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
3306 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
);
3307 assert(partCount()==1);
3309 uint32_t myexponent
, mysignificand
;
3311 if (isFiniteNonZero()) {
3312 myexponent
= exponent
+15; //bias
3313 mysignificand
= (uint32_t)*significandParts();
3314 if (myexponent
== 1 && !(mysignificand
& 0x400))
3315 myexponent
= 0; // denormal
3316 } else if (category
==fcZero
) {
3319 } else if (category
==fcInfinity
) {
3323 assert(category
== fcNaN
&& "Unknown category!");
3325 mysignificand
= (uint32_t)*significandParts();
3328 return APInt(16, (((sign
&1) << 15) | ((myexponent
&0x1f) << 10) |
3329 (mysignificand
& 0x3ff)));
3332 // This function creates an APInt that is just a bit map of the floating
3333 // point constant as it would appear in memory. It is not a conversion,
3334 // and treating the result as a normal integer is unlikely to be useful.
3336 APInt
IEEEFloat::bitcastToAPInt() const {
3337 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
3338 return convertHalfAPFloatToAPInt();
3340 if (semantics
== (const llvm::fltSemantics
*)&semBFloat
)
3341 return convertBFloatAPFloatToAPInt();
3343 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3344 return convertFloatAPFloatToAPInt();
3346 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3347 return convertDoubleAPFloatToAPInt();
3349 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3350 return convertQuadrupleAPFloatToAPInt();
3352 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3353 return convertPPCDoubleDoubleAPFloatToAPInt();
3355 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3357 return convertF80LongDoubleAPFloatToAPInt();
3360 float IEEEFloat::convertToFloat() const {
3361 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3362 "Float semantics are not IEEEsingle");
3363 APInt api
= bitcastToAPInt();
3364 return api
.bitsToFloat();
3367 double IEEEFloat::convertToDouble() const {
3368 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3369 "Float semantics are not IEEEdouble");
3370 APInt api
= bitcastToAPInt();
3371 return api
.bitsToDouble();
3374 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3375 /// does not support these bit patterns:
3376 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3377 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3378 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3379 /// exponent = 0, integer bit 1 ("pseudodenormal")
3380 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3381 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3382 assert(api
.getBitWidth()==80);
3383 uint64_t i1
= api
.getRawData()[0];
3384 uint64_t i2
= api
.getRawData()[1];
3385 uint64_t myexponent
= (i2
& 0x7fff);
3386 uint64_t mysignificand
= i1
;
3387 uint8_t myintegerbit
= mysignificand
>> 63;
3389 initialize(&semX87DoubleExtended
);
3390 assert(partCount()==2);
3392 sign
= static_cast<unsigned int>(i2
>>15);
3393 if (myexponent
== 0 && mysignificand
== 0) {
3395 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3397 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3398 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3400 exponent
= exponentNaN();
3401 significandParts()[0] = mysignificand
;
3402 significandParts()[1] = 0;
3404 category
= fcNormal
;
3405 exponent
= myexponent
- 16383;
3406 significandParts()[0] = mysignificand
;
3407 significandParts()[1] = 0;
3408 if (myexponent
==0) // denormal
3413 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
) {
3414 assert(api
.getBitWidth()==128);
3415 uint64_t i1
= api
.getRawData()[0];
3416 uint64_t i2
= api
.getRawData()[1];
3420 // Get the first double and convert to our format.
3421 initFromDoubleAPInt(APInt(64, i1
));
3422 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3423 assert(fs
== opOK
&& !losesInfo
);
3426 // Unless we have a special case, add in second double.
3427 if (isFiniteNonZero()) {
3428 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3429 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3430 assert(fs
== opOK
&& !losesInfo
);
3433 add(v
, rmNearestTiesToEven
);
3437 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
3438 assert(api
.getBitWidth()==128);
3439 uint64_t i1
= api
.getRawData()[0];
3440 uint64_t i2
= api
.getRawData()[1];
3441 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
3442 uint64_t mysignificand
= i1
;
3443 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
3445 initialize(&semIEEEquad
);
3446 assert(partCount()==2);
3448 sign
= static_cast<unsigned int>(i2
>>63);
3449 if (myexponent
==0 &&
3450 (mysignificand
==0 && mysignificand2
==0)) {
3452 } else if (myexponent
==0x7fff &&
3453 (mysignificand
==0 && mysignificand2
==0)) {
3455 } else if (myexponent
==0x7fff &&
3456 (mysignificand
!=0 || mysignificand2
!=0)) {
3458 exponent
= exponentNaN();
3459 significandParts()[0] = mysignificand
;
3460 significandParts()[1] = mysignificand2
;
3462 category
= fcNormal
;
3463 exponent
= myexponent
- 16383;
3464 significandParts()[0] = mysignificand
;
3465 significandParts()[1] = mysignificand2
;
3466 if (myexponent
==0) // denormal
3469 significandParts()[1] |= 0x1000000000000LL
; // integer bit
3473 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
3474 assert(api
.getBitWidth()==64);
3475 uint64_t i
= *api
.getRawData();
3476 uint64_t myexponent
= (i
>> 52) & 0x7ff;
3477 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
3479 initialize(&semIEEEdouble
);
3480 assert(partCount()==1);
3482 sign
= static_cast<unsigned int>(i
>>63);
3483 if (myexponent
==0 && mysignificand
==0) {
3485 } else if (myexponent
==0x7ff && mysignificand
==0) {
3487 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3489 exponent
= exponentNaN();
3490 *significandParts() = mysignificand
;
3492 category
= fcNormal
;
3493 exponent
= myexponent
- 1023;
3494 *significandParts() = mysignificand
;
3495 if (myexponent
==0) // denormal
3498 *significandParts() |= 0x10000000000000LL
; // integer bit
3502 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
3503 assert(api
.getBitWidth()==32);
3504 uint32_t i
= (uint32_t)*api
.getRawData();
3505 uint32_t myexponent
= (i
>> 23) & 0xff;
3506 uint32_t mysignificand
= i
& 0x7fffff;
3508 initialize(&semIEEEsingle
);
3509 assert(partCount()==1);
3512 if (myexponent
==0 && mysignificand
==0) {
3514 } else if (myexponent
==0xff && mysignificand
==0) {
3516 } else if (myexponent
==0xff && mysignificand
!=0) {
3518 exponent
= exponentNaN();
3519 *significandParts() = mysignificand
;
3521 category
= fcNormal
;
3522 exponent
= myexponent
- 127; //bias
3523 *significandParts() = mysignificand
;
3524 if (myexponent
==0) // denormal
3527 *significandParts() |= 0x800000; // integer bit
3531 void IEEEFloat::initFromBFloatAPInt(const APInt
&api
) {
3532 assert(api
.getBitWidth() == 16);
3533 uint32_t i
= (uint32_t)*api
.getRawData();
3534 uint32_t myexponent
= (i
>> 7) & 0xff;
3535 uint32_t mysignificand
= i
& 0x7f;
3537 initialize(&semBFloat
);
3538 assert(partCount() == 1);
3541 if (myexponent
== 0 && mysignificand
== 0) {
3543 } else if (myexponent
== 0xff && mysignificand
== 0) {
3545 } else if (myexponent
== 0xff && mysignificand
!= 0) {
3547 exponent
= exponentNaN();
3548 *significandParts() = mysignificand
;
3550 category
= fcNormal
;
3551 exponent
= myexponent
- 127; // bias
3552 *significandParts() = mysignificand
;
3553 if (myexponent
== 0) // denormal
3556 *significandParts() |= 0x80; // integer bit
3560 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
3561 assert(api
.getBitWidth()==16);
3562 uint32_t i
= (uint32_t)*api
.getRawData();
3563 uint32_t myexponent
= (i
>> 10) & 0x1f;
3564 uint32_t mysignificand
= i
& 0x3ff;
3566 initialize(&semIEEEhalf
);
3567 assert(partCount()==1);
3570 if (myexponent
==0 && mysignificand
==0) {
3572 } else if (myexponent
==0x1f && mysignificand
==0) {
3574 } else if (myexponent
==0x1f && mysignificand
!=0) {
3576 exponent
= exponentNaN();
3577 *significandParts() = mysignificand
;
3579 category
= fcNormal
;
3580 exponent
= myexponent
- 15; //bias
3581 *significandParts() = mysignificand
;
3582 if (myexponent
==0) // denormal
3585 *significandParts() |= 0x400; // integer bit
3589 /// Treat api as containing the bits of a floating point number. Currently
3590 /// we infer the floating point type from the size of the APInt. The
3591 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3592 /// when the size is anything else).
3593 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
3594 if (Sem
== &semIEEEhalf
)
3595 return initFromHalfAPInt(api
);
3596 if (Sem
== &semBFloat
)
3597 return initFromBFloatAPInt(api
);
3598 if (Sem
== &semIEEEsingle
)
3599 return initFromFloatAPInt(api
);
3600 if (Sem
== &semIEEEdouble
)
3601 return initFromDoubleAPInt(api
);
3602 if (Sem
== &semX87DoubleExtended
)
3603 return initFromF80LongDoubleAPInt(api
);
3604 if (Sem
== &semIEEEquad
)
3605 return initFromQuadrupleAPInt(api
);
3606 if (Sem
== &semPPCDoubleDoubleLegacy
)
3607 return initFromPPCDoubleDoubleAPInt(api
);
3609 llvm_unreachable(nullptr);
3612 /// Make this number the largest magnitude normal number in the given
3614 void IEEEFloat::makeLargest(bool Negative
) {
3615 // We want (in interchange format):
3616 // sign = {Negative}
3618 // significand = 1..1
3619 category
= fcNormal
;
3621 exponent
= semantics
->maxExponent
;
3623 // Use memset to set all but the highest integerPart to all ones.
3624 integerPart
*significand
= significandParts();
3625 unsigned PartCount
= partCount();
3626 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
3628 // Set the high integerPart especially setting all unused top bits for
3629 // internal consistency.
3630 const unsigned NumUnusedHighBits
=
3631 PartCount
*integerPartWidth
- semantics
->precision
;
3632 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
3633 ? (~integerPart(0) >> NumUnusedHighBits
)
3637 /// Make this number the smallest magnitude denormal number in the given
3639 void IEEEFloat::makeSmallest(bool Negative
) {
3640 // We want (in interchange format):
3641 // sign = {Negative}
3643 // significand = 0..01
3644 category
= fcNormal
;
3646 exponent
= semantics
->minExponent
;
3647 APInt::tcSet(significandParts(), 1, partCount());
3650 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
3651 // We want (in interchange format):
3652 // sign = {Negative}
3654 // significand = 10..0
3656 category
= fcNormal
;
3659 exponent
= semantics
->minExponent
;
3660 significandParts()[partCountForBits(semantics
->precision
) - 1] |=
3661 (((integerPart
)1) << ((semantics
->precision
- 1) % integerPartWidth
));
3664 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
3665 initFromAPInt(&Sem
, API
);
3668 IEEEFloat::IEEEFloat(float f
) {
3669 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
3672 IEEEFloat::IEEEFloat(double d
) {
3673 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
3677 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
3678 Buffer
.append(Str
.begin(), Str
.end());
3681 /// Removes data from the given significand until it is no more
3682 /// precise than is required for the desired precision.
3683 void AdjustToPrecision(APInt
&significand
,
3684 int &exp
, unsigned FormatPrecision
) {
3685 unsigned bits
= significand
.getActiveBits();
3687 // 196/59 is a very slight overestimate of lg_2(10).
3688 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3690 if (bits
<= bitsRequired
) return;
3692 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3693 if (!tensRemovable
) return;
3695 exp
+= tensRemovable
;
3697 APInt
divisor(significand
.getBitWidth(), 1);
3698 APInt
powten(significand
.getBitWidth(), 10);
3700 if (tensRemovable
& 1)
3702 tensRemovable
>>= 1;
3703 if (!tensRemovable
) break;
3707 significand
= significand
.udiv(divisor
);
3709 // Truncate the significand down to its active bit count.
3710 significand
= significand
.trunc(significand
.getActiveBits());
3714 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
3715 int &exp
, unsigned FormatPrecision
) {
3716 unsigned N
= buffer
.size();
3717 if (N
<= FormatPrecision
) return;
3719 // The most significant figures are the last ones in the buffer.
3720 unsigned FirstSignificant
= N
- FormatPrecision
;
3723 // FIXME: this probably shouldn't use 'round half up'.
3725 // Rounding down is just a truncation, except we also want to drop
3726 // trailing zeros from the new result.
3727 if (buffer
[FirstSignificant
- 1] < '5') {
3728 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
3731 exp
+= FirstSignificant
;
3732 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3736 // Rounding up requires a decimal add-with-carry. If we continue
3737 // the carry, the newly-introduced zeros will just be truncated.
3738 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
3739 if (buffer
[I
] == '9') {
3747 // If we carried through, we have exactly one digit of precision.
3748 if (FirstSignificant
== N
) {
3749 exp
+= FirstSignificant
;
3751 buffer
.push_back('1');
3755 exp
+= FirstSignificant
;
3756 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3760 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
3761 unsigned FormatMaxPadding
, bool TruncateZero
) const {
3765 return append(Str
, "-Inf");
3767 return append(Str
, "+Inf");
3769 case fcNaN
: return append(Str
, "NaN");
3775 if (!FormatMaxPadding
) {
3777 append(Str
, "0.0E+0");
3780 if (FormatPrecision
> 1)
3781 Str
.append(FormatPrecision
- 1, '0');
3782 append(Str
, "e+00");
3795 // Decompose the number into an APInt and an exponent.
3796 int exp
= exponent
- ((int) semantics
->precision
- 1);
3797 APInt
significand(semantics
->precision
,
3798 makeArrayRef(significandParts(),
3799 partCountForBits(semantics
->precision
)));
3801 // Set FormatPrecision if zero. We want to do this before we
3802 // truncate trailing zeros, as those are part of the precision.
3803 if (!FormatPrecision
) {
3804 // We use enough digits so the number can be round-tripped back to an
3805 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3806 // Accurately" by Steele and White.
3807 // FIXME: Using a formula based purely on the precision is conservative;
3808 // we can print fewer digits depending on the actual value being printed.
3810 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3811 FormatPrecision
= 2 + semantics
->precision
* 59 / 196;
3814 // Ignore trailing binary zeros.
3815 int trailingZeros
= significand
.countTrailingZeros();
3816 exp
+= trailingZeros
;
3817 significand
.lshrInPlace(trailingZeros
);
3819 // Change the exponent from 2^e to 10^e.
3822 } else if (exp
> 0) {
3824 significand
= significand
.zext(semantics
->precision
+ exp
);
3825 significand
<<= exp
;
3827 } else { /* exp < 0 */
3830 // We transform this using the identity:
3831 // (N)(2^-e) == (N)(5^e)(10^-e)
3832 // This means we have to multiply N (the significand) by 5^e.
3833 // To avoid overflow, we have to operate on numbers large
3834 // enough to store N * 5^e:
3835 // log2(N * 5^e) == log2(N) + e * log2(5)
3836 // <= semantics->precision + e * 137 / 59
3837 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3839 unsigned precision
= semantics
->precision
+ (137 * texp
+ 136) / 59;
3841 // Multiply significand by 5^e.
3842 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3843 significand
= significand
.zext(precision
);
3844 APInt
five_to_the_i(precision
, 5);
3846 if (texp
& 1) significand
*= five_to_the_i
;
3850 five_to_the_i
*= five_to_the_i
;
3854 AdjustToPrecision(significand
, exp
, FormatPrecision
);
3856 SmallVector
<char, 256> buffer
;
3859 unsigned precision
= significand
.getBitWidth();
3860 APInt
ten(precision
, 10);
3861 APInt
digit(precision
, 0);
3863 bool inTrail
= true;
3864 while (significand
!= 0) {
3865 // digit <- significand % 10
3866 // significand <- significand / 10
3867 APInt::udivrem(significand
, ten
, significand
, digit
);
3869 unsigned d
= digit
.getZExtValue();
3871 // Drop trailing zeros.
3872 if (inTrail
&& !d
) exp
++;
3874 buffer
.push_back((char) ('0' + d
));
3879 assert(!buffer
.empty() && "no characters in buffer!");
3881 // Drop down to FormatPrecision.
3882 // TODO: don't do more precise calculations above than are required.
3883 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
3885 unsigned NDigits
= buffer
.size();
3887 // Check whether we should use scientific notation.
3888 bool FormatScientific
;
3889 if (!FormatMaxPadding
)
3890 FormatScientific
= true;
3895 // But we shouldn't make the number look more precise than it is.
3896 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
3897 NDigits
+ (unsigned) exp
> FormatPrecision
);
3899 // Power of the most significant digit.
3900 int MSD
= exp
+ (int) (NDigits
- 1);
3903 FormatScientific
= false;
3905 // 765e-5 == 0.00765
3907 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
3912 // Scientific formatting is pretty straightforward.
3913 if (FormatScientific
) {
3914 exp
+= (NDigits
- 1);
3916 Str
.push_back(buffer
[NDigits
-1]);
3918 if (NDigits
== 1 && TruncateZero
)
3921 for (unsigned I
= 1; I
!= NDigits
; ++I
)
3922 Str
.push_back(buffer
[NDigits
-1-I
]);
3923 // Fill with zeros up to FormatPrecision.
3924 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
3925 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
3926 // For !TruncateZero we use lower 'e'.
3927 Str
.push_back(TruncateZero
? 'E' : 'e');
3929 Str
.push_back(exp
>= 0 ? '+' : '-');
3930 if (exp
< 0) exp
= -exp
;
3931 SmallVector
<char, 6> expbuf
;
3933 expbuf
.push_back((char) ('0' + (exp
% 10)));
3936 // Exponent always at least two digits if we do not truncate zeros.
3937 if (!TruncateZero
&& expbuf
.size() < 2)
3938 expbuf
.push_back('0');
3939 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
3940 Str
.push_back(expbuf
[E
-1-I
]);
3944 // Non-scientific, positive exponents.
3946 for (unsigned I
= 0; I
!= NDigits
; ++I
)
3947 Str
.push_back(buffer
[NDigits
-1-I
]);
3948 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
3953 // Non-scientific, negative exponents.
3955 // The number of digits to the left of the decimal point.
3956 int NWholeDigits
= exp
+ (int) NDigits
;
3959 if (NWholeDigits
> 0) {
3960 for (; I
!= (unsigned) NWholeDigits
; ++I
)
3961 Str
.push_back(buffer
[NDigits
-I
-1]);
3964 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
3968 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
3972 for (; I
!= NDigits
; ++I
)
3973 Str
.push_back(buffer
[NDigits
-I
-1]);
3976 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
3977 // Special floats and denormals have no exact inverse.
3978 if (!isFiniteNonZero())
3981 // Check that the number is a power of two by making sure that only the
3982 // integer bit is set in the significand.
3983 if (significandLSB() != semantics
->precision
- 1)
3987 IEEEFloat
reciprocal(*semantics
, 1ULL);
3988 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
3991 // Avoid multiplication with a denormal, it is not safe on all platforms and
3992 // may be slower than a normal division.
3993 if (reciprocal
.isDenormal())
3996 assert(reciprocal
.isFiniteNonZero() &&
3997 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
4000 *inv
= APFloat(reciprocal
, *semantics
);
4005 bool IEEEFloat::isSignaling() const {
4009 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4010 // first bit of the trailing significand being 0.
4011 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
4014 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4016 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4017 /// appropriate sign switching before/after the computation.
4018 IEEEFloat::opStatus
IEEEFloat::next(bool nextDown
) {
4019 // If we are performing nextDown, swap sign so we have -x.
4023 // Compute nextUp(x)
4024 opStatus result
= opOK
;
4026 // Handle each float category separately.
4029 // nextUp(+inf) = +inf
4032 // nextUp(-inf) = -getLargest()
4036 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4037 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4038 // change the payload.
4039 if (isSignaling()) {
4040 result
= opInvalidOp
;
4041 // For consistency, propagate the sign of the sNaN to the qNaN.
4042 makeNaN(false, isNegative(), nullptr);
4046 // nextUp(pm 0) = +getSmallest()
4047 makeSmallest(false);
4050 // nextUp(-getSmallest()) = -0
4051 if (isSmallest() && isNegative()) {
4052 APInt::tcSet(significandParts(), 0, partCount());
4058 // nextUp(getLargest()) == INFINITY
4059 if (isLargest() && !isNegative()) {
4060 APInt::tcSet(significandParts(), 0, partCount());
4061 category
= fcInfinity
;
4062 exponent
= semantics
->maxExponent
+ 1;
4066 // nextUp(normal) == normal + inc.
4068 // If we are negative, we need to decrement the significand.
4070 // We only cross a binade boundary that requires adjusting the exponent
4072 // 1. exponent != semantics->minExponent. This implies we are not in the
4073 // smallest binade or are dealing with denormals.
4074 // 2. Our significand excluding the integral bit is all zeros.
4075 bool WillCrossBinadeBoundary
=
4076 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
4078 // Decrement the significand.
4080 // We always do this since:
4081 // 1. If we are dealing with a non-binade decrement, by definition we
4082 // just decrement the significand.
4083 // 2. If we are dealing with a normal -> normal binade decrement, since
4084 // we have an explicit integral bit the fact that all bits but the
4085 // integral bit are zero implies that subtracting one will yield a
4086 // significand with 0 integral bit and 1 in all other spots. Thus we
4087 // must just adjust the exponent and set the integral bit to 1.
4088 // 3. If we are dealing with a normal -> denormal binade decrement,
4089 // since we set the integral bit to 0 when we represent denormals, we
4090 // just decrement the significand.
4091 integerPart
*Parts
= significandParts();
4092 APInt::tcDecrement(Parts
, partCount());
4094 if (WillCrossBinadeBoundary
) {
4095 // Our result is a normal number. Do the following:
4096 // 1. Set the integral bit to 1.
4097 // 2. Decrement the exponent.
4098 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4102 // If we are positive, we need to increment the significand.
4104 // We only cross a binade boundary that requires adjusting the exponent if
4105 // the input is not a denormal and all of said input's significand bits
4106 // are set. If all of said conditions are true: clear the significand, set
4107 // the integral bit to 1, and increment the exponent. If we have a
4108 // denormal always increment since moving denormals and the numbers in the
4109 // smallest normal binade have the same exponent in our representation.
4110 bool WillCrossBinadeBoundary
= !isDenormal() && isSignificandAllOnes();
4112 if (WillCrossBinadeBoundary
) {
4113 integerPart
*Parts
= significandParts();
4114 APInt::tcSet(Parts
, 0, partCount());
4115 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4116 assert(exponent
!= semantics
->maxExponent
&&
4117 "We can not increment an exponent beyond the maxExponent allowed"
4118 " by the given floating point semantics.");
4121 incrementSignificand();
4127 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4134 APFloatBase::ExponentType
IEEEFloat::exponentNaN() const {
4135 return semantics
->maxExponent
+ 1;
4138 APFloatBase::ExponentType
IEEEFloat::exponentInf() const {
4139 return semantics
->maxExponent
+ 1;
4142 APFloatBase::ExponentType
IEEEFloat::exponentZero() const {
4143 return semantics
->minExponent
- 1;
4146 void IEEEFloat::makeInf(bool Negative
) {
4147 category
= fcInfinity
;
4149 exponent
= exponentInf();
4150 APInt::tcSet(significandParts(), 0, partCount());
4153 void IEEEFloat::makeZero(bool Negative
) {
4156 exponent
= exponentZero();
4157 APInt::tcSet(significandParts(), 0, partCount());
4160 void IEEEFloat::makeQuiet() {
4162 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
4165 int ilogb(const IEEEFloat
&Arg
) {
4167 return IEEEFloat::IEK_NaN
;
4169 return IEEEFloat::IEK_Zero
;
4170 if (Arg
.isInfinity())
4171 return IEEEFloat::IEK_Inf
;
4172 if (!Arg
.isDenormal())
4173 return Arg
.exponent
;
4175 IEEEFloat
Normalized(Arg
);
4176 int SignificandBits
= Arg
.getSemantics().precision
- 1;
4178 Normalized
.exponent
+= SignificandBits
;
4179 Normalized
.normalize(IEEEFloat::rmNearestTiesToEven
, lfExactlyZero
);
4180 return Normalized
.exponent
- SignificandBits
;
4183 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, IEEEFloat::roundingMode RoundingMode
) {
4184 auto MaxExp
= X
.getSemantics().maxExponent
;
4185 auto MinExp
= X
.getSemantics().minExponent
;
4187 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4188 // overflow; clamp it to a safe range before adding, but ensure that the range
4189 // is large enough that the clamp does not change the result. The range we
4190 // need to support is the difference between the largest possible exponent and
4191 // the normalized exponent of half the smallest denormal.
4193 int SignificandBits
= X
.getSemantics().precision
- 1;
4194 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
4196 // Clamp to one past the range ends to let normalize handle overlflow.
4197 X
.exponent
+= std::min(std::max(Exp
, -MaxIncrement
- 1), MaxIncrement
);
4198 X
.normalize(RoundingMode
, lfExactlyZero
);
4204 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, IEEEFloat::roundingMode RM
) {
4207 // Quiet signalling nans.
4208 if (Exp
== IEEEFloat::IEK_NaN
) {
4209 IEEEFloat
Quiet(Val
);
4214 if (Exp
== IEEEFloat::IEK_Inf
)
4217 // 1 is added because frexp is defined to return a normalized fraction in
4218 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4219 Exp
= Exp
== IEEEFloat::IEK_Zero
? 0 : Exp
+ 1;
4220 return scalbn(Val
, -Exp
, RM
);
4223 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
4225 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
4226 assert(Semantics
== &semPPCDoubleDouble
);
4229 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
4231 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
4232 APFloat(semIEEEdouble
, uninitialized
)}) {
4233 assert(Semantics
== &semPPCDoubleDouble
);
4236 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
4237 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
4238 APFloat(semIEEEdouble
)}) {
4239 assert(Semantics
== &semPPCDoubleDouble
);
4242 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
4244 Floats(new APFloat
[2]{
4245 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
4246 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
4247 assert(Semantics
== &semPPCDoubleDouble
);
4250 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
4253 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
4254 assert(Semantics
== &semPPCDoubleDouble
);
4255 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
4256 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
4259 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
4260 : Semantics(RHS
.Semantics
),
4261 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
4262 APFloat(RHS
.Floats
[1])}
4264 assert(Semantics
== &semPPCDoubleDouble
);
4267 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
4268 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
4269 RHS
.Semantics
= &semBogus
;
4270 assert(Semantics
== &semPPCDoubleDouble
);
4273 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
4274 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
4275 Floats
[0] = RHS
.Floats
[0];
4276 Floats
[1] = RHS
.Floats
[1];
4277 } else if (this != &RHS
) {
4278 this->~DoubleAPFloat();
4279 new (this) DoubleAPFloat(RHS
);
4284 // Implement addition, subtraction, multiplication and division based on:
4285 // "Software for Doubled-Precision Floating-Point Computations",
4286 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4287 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
4288 const APFloat
&c
, const APFloat
&cc
,
4292 Status
|= z
.add(c
, RM
);
4293 if (!z
.isFinite()) {
4294 if (!z
.isInfinity()) {
4295 Floats
[0] = std::move(z
);
4296 Floats
[1].makeZero(/* Neg = */ false);
4297 return (opStatus
)Status
;
4300 auto AComparedToC
= a
.compareAbsoluteValue(c
);
4302 Status
|= z
.add(aa
, RM
);
4303 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4304 // z = cc + aa + c + a;
4305 Status
|= z
.add(c
, RM
);
4306 Status
|= z
.add(a
, RM
);
4308 // z = cc + aa + a + c;
4309 Status
|= z
.add(a
, RM
);
4310 Status
|= z
.add(c
, RM
);
4312 if (!z
.isFinite()) {
4313 Floats
[0] = std::move(z
);
4314 Floats
[1].makeZero(/* Neg = */ false);
4315 return (opStatus
)Status
;
4319 Status
|= zz
.add(cc
, RM
);
4320 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4321 // Floats[1] = a - z + c + zz;
4323 Status
|= Floats
[1].subtract(z
, RM
);
4324 Status
|= Floats
[1].add(c
, RM
);
4325 Status
|= Floats
[1].add(zz
, RM
);
4327 // Floats[1] = c - z + a + zz;
4329 Status
|= Floats
[1].subtract(z
, RM
);
4330 Status
|= Floats
[1].add(a
, RM
);
4331 Status
|= Floats
[1].add(zz
, RM
);
4336 Status
|= q
.subtract(z
, RM
);
4338 // zz = q + c + (a - (q + z)) + aa + cc;
4339 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4341 Status
|= zz
.add(c
, RM
);
4342 Status
|= q
.add(z
, RM
);
4343 Status
|= q
.subtract(a
, RM
);
4345 Status
|= zz
.add(q
, RM
);
4346 Status
|= zz
.add(aa
, RM
);
4347 Status
|= zz
.add(cc
, RM
);
4348 if (zz
.isZero() && !zz
.isNegative()) {
4349 Floats
[0] = std::move(z
);
4350 Floats
[1].makeZero(/* Neg = */ false);
4354 Status
|= Floats
[0].add(zz
, RM
);
4355 if (!Floats
[0].isFinite()) {
4356 Floats
[1].makeZero(/* Neg = */ false);
4357 return (opStatus
)Status
;
4359 Floats
[1] = std::move(z
);
4360 Status
|= Floats
[1].subtract(Floats
[0], RM
);
4361 Status
|= Floats
[1].add(zz
, RM
);
4363 return (opStatus
)Status
;
4366 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
4367 const DoubleAPFloat
&RHS
,
4370 if (LHS
.getCategory() == fcNaN
) {
4374 if (RHS
.getCategory() == fcNaN
) {
4378 if (LHS
.getCategory() == fcZero
) {
4382 if (RHS
.getCategory() == fcZero
) {
4386 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4387 LHS
.isNegative() != RHS
.isNegative()) {
4388 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4391 if (LHS
.getCategory() == fcInfinity
) {
4395 if (RHS
.getCategory() == fcInfinity
) {
4399 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
4401 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
4403 assert(&A
.getSemantics() == &semIEEEdouble
);
4404 assert(&AA
.getSemantics() == &semIEEEdouble
);
4405 assert(&C
.getSemantics() == &semIEEEdouble
);
4406 assert(&CC
.getSemantics() == &semIEEEdouble
);
4407 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
4408 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
4409 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
4412 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
4414 return addWithSpecial(*this, RHS
, *this, RM
);
4417 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
4420 auto Ret
= add(RHS
, RM
);
4425 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
4426 APFloat::roundingMode RM
) {
4427 const auto &LHS
= *this;
4429 /* Interesting observation: For special categories, finding the lowest
4430 common ancestor of the following layered graph gives the correct
4439 e.g. NaN * NaN = NaN
4441 Normal * Zero = Zero
4444 if (LHS
.getCategory() == fcNaN
) {
4448 if (RHS
.getCategory() == fcNaN
) {
4452 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
4453 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
4454 Out
.makeNaN(false, false, nullptr);
4457 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
4461 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
4465 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
4466 "Special cases not handled exhaustively");
4469 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
4472 Status
|= T
.multiply(C
, RM
);
4473 if (!T
.isFiniteNonZero()) {
4475 Floats
[1].makeZero(/* Neg = */ false);
4476 return (opStatus
)Status
;
4479 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4482 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
4487 Status
|= V
.multiply(D
, RM
);
4490 Status
|= W
.multiply(C
, RM
);
4491 Status
|= V
.add(W
, RM
);
4493 Status
|= Tau
.add(V
, RM
);
4497 Status
|= U
.add(Tau
, RM
);
4500 if (!U
.isFinite()) {
4501 Floats
[1].makeZero(/* Neg = */ false);
4503 // Floats[1] = (t - u) + tau
4504 Status
|= T
.subtract(U
, RM
);
4505 Status
|= T
.add(Tau
, RM
);
4508 return (opStatus
)Status
;
4511 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
4512 APFloat::roundingMode RM
) {
4513 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4514 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4516 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
4517 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4521 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
4522 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4523 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4525 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4526 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4530 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
4531 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4532 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4533 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4534 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4539 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
4540 const DoubleAPFloat
&Addend
,
4541 APFloat::roundingMode RM
) {
4542 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4543 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4544 auto Ret
= Tmp
.fusedMultiplyAdd(
4545 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
4546 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
4547 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4551 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
4552 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4553 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4554 auto Ret
= Tmp
.roundToIntegral(RM
);
4555 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4559 void DoubleAPFloat::changeSign() {
4560 Floats
[0].changeSign();
4561 Floats
[1].changeSign();
4565 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
4566 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
4567 if (Result
!= cmpEqual
)
4569 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
4570 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
4571 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
4572 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
4573 if (Against
&& !RHSAgainst
)
4575 if (!Against
&& RHSAgainst
)
4576 return cmpGreaterThan
;
4577 if (!Against
&& !RHSAgainst
)
4579 if (Against
&& RHSAgainst
)
4580 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
4585 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
4586 return Floats
[0].getCategory();
4589 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
4591 void DoubleAPFloat::makeInf(bool Neg
) {
4592 Floats
[0].makeInf(Neg
);
4593 Floats
[1].makeZero(/* Neg = */ false);
4596 void DoubleAPFloat::makeZero(bool Neg
) {
4597 Floats
[0].makeZero(Neg
);
4598 Floats
[1].makeZero(/* Neg = */ false);
4601 void DoubleAPFloat::makeLargest(bool Neg
) {
4602 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4603 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
4604 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
4609 void DoubleAPFloat::makeSmallest(bool Neg
) {
4610 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4611 Floats
[0].makeSmallest(Neg
);
4612 Floats
[1].makeZero(/* Neg = */ false);
4615 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
4616 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4617 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
4619 Floats
[0].changeSign();
4620 Floats
[1].makeZero(/* Neg = */ false);
4623 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
4624 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
4625 Floats
[1].makeZero(/* Neg = */ false);
4628 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
4629 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
4630 // |Float[0]| > |Float[1]|
4631 if (Result
== APFloat::cmpEqual
)
4632 return Floats
[1].compare(RHS
.Floats
[1]);
4636 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
4637 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
4638 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
4641 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
4643 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
4644 return hash_combine(Arg
.Semantics
);
4647 APInt
DoubleAPFloat::bitcastToAPInt() const {
4648 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4650 Floats
[0].bitcastToAPInt().getRawData()[0],
4651 Floats
[1].bitcastToAPInt().getRawData()[0],
4653 return APInt(128, 2, Data
);
4656 Expected
<APFloat::opStatus
> DoubleAPFloat::convertFromString(StringRef S
,
4658 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4659 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4660 auto Ret
= Tmp
.convertFromString(S
, RM
);
4661 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4665 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
4666 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4667 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4668 auto Ret
= Tmp
.next(nextDown
);
4669 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4674 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
4675 unsigned int Width
, bool IsSigned
,
4676 roundingMode RM
, bool *IsExact
) const {
4677 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4678 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4679 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
4682 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
4685 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4686 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4687 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
4688 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4693 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
4694 unsigned int InputSize
,
4695 bool IsSigned
, roundingMode RM
) {
4696 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4697 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4698 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4699 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4704 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
4705 unsigned int InputSize
,
4706 bool IsSigned
, roundingMode RM
) {
4707 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4708 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4709 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4710 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4714 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
4715 unsigned int HexDigits
,
4717 roundingMode RM
) const {
4718 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4719 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4720 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
4723 bool DoubleAPFloat::isDenormal() const {
4724 return getCategory() == fcNormal
&&
4725 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
4726 // (double)(Hi + Lo) == Hi defines a normal number.
4727 Floats
[0] != Floats
[0] + Floats
[1]);
4730 bool DoubleAPFloat::isSmallest() const {
4731 if (getCategory() != fcNormal
)
4733 DoubleAPFloat
Tmp(*this);
4734 Tmp
.makeSmallest(this->isNegative());
4735 return Tmp
.compare(*this) == cmpEqual
;
4738 bool DoubleAPFloat::isLargest() const {
4739 if (getCategory() != fcNormal
)
4741 DoubleAPFloat
Tmp(*this);
4742 Tmp
.makeLargest(this->isNegative());
4743 return Tmp
.compare(*this) == cmpEqual
;
4746 bool DoubleAPFloat::isInteger() const {
4747 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4748 return Floats
[0].isInteger() && Floats
[1].isInteger();
4751 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
4752 unsigned FormatPrecision
,
4753 unsigned FormatMaxPadding
,
4754 bool TruncateZero
) const {
4755 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4756 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4757 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
4760 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
4761 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4762 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4764 return Tmp
.getExactInverse(nullptr);
4765 APFloat
Inv(semPPCDoubleDoubleLegacy
);
4766 auto Ret
= Tmp
.getExactInverse(&Inv
);
4767 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
4771 DoubleAPFloat
scalbn(const DoubleAPFloat
&Arg
, int Exp
,
4772 APFloat::roundingMode RM
) {
4773 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4774 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
4775 scalbn(Arg
.Floats
[1], Exp
, RM
));
4778 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
4779 APFloat::roundingMode RM
) {
4780 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4781 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
4782 APFloat Second
= Arg
.Floats
[1];
4783 if (Arg
.getCategory() == APFloat::fcNormal
)
4784 Second
= scalbn(Second
, -Exp
, RM
);
4785 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
4788 } // namespace detail
4790 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
4791 if (usesLayout
<IEEEFloat
>(Semantics
)) {
4792 new (&IEEE
) IEEEFloat(std::move(F
));
4795 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
4796 const fltSemantics
& S
= F
.getSemantics();
4798 DoubleAPFloat(Semantics
, APFloat(std::move(F
), S
),
4799 APFloat(semIEEEdouble
));
4802 llvm_unreachable("Unexpected semantics");
4805 Expected
<APFloat::opStatus
> APFloat::convertFromString(StringRef Str
,
4807 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
4810 hash_code
hash_value(const APFloat
&Arg
) {
4811 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
4812 return hash_value(Arg
.U
.IEEE
);
4813 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
4814 return hash_value(Arg
.U
.Double
);
4815 llvm_unreachable("Unexpected semantics");
4818 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
4819 : APFloat(Semantics
) {
4820 auto StatusOrErr
= convertFromString(S
, rmNearestTiesToEven
);
4821 assert(StatusOrErr
&& "Invalid floating point representation");
4822 consumeError(StatusOrErr
.takeError());
4825 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
4826 roundingMode RM
, bool *losesInfo
) {
4827 if (&getSemantics() == &ToSemantics
) {
4831 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4832 usesLayout
<IEEEFloat
>(ToSemantics
))
4833 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
4834 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4835 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
4836 assert(&ToSemantics
== &semPPCDoubleDouble
);
4837 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
4838 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
4841 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
4842 usesLayout
<IEEEFloat
>(ToSemantics
)) {
4843 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
4844 *this = APFloat(std::move(getIEEE()), ToSemantics
);
4847 llvm_unreachable("Unexpected semantics");
4850 APFloat
APFloat::getAllOnesValue(const fltSemantics
&Semantics
,
4851 unsigned BitWidth
) {
4852 return APFloat(Semantics
, APInt::getAllOnesValue(BitWidth
));
4855 void APFloat::print(raw_ostream
&OS
) const {
4856 SmallVector
<char, 16> Buffer
;
4858 OS
<< Buffer
<< "\n";
4861 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4862 LLVM_DUMP_METHOD
void APFloat::dump() const { print(dbgs()); }
4865 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
4866 NID
.Add(bitcastToAPInt());
4869 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4870 an APSInt, whose initial bit-width and signed-ness are used to determine the
4871 precision of the conversion.
4873 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
4874 roundingMode rounding_mode
,
4875 bool *isExact
) const {
4876 unsigned bitWidth
= result
.getBitWidth();
4877 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
4878 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
4879 rounding_mode
, isExact
);
4880 // Keeps the original signed-ness.
4881 result
= APInt(bitWidth
, parts
);
4885 double APFloat::convertToDouble() const {
4886 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEdouble
)
4887 return getIEEE().convertToDouble();
4888 assert(getSemantics().isRepresentableBy(semIEEEdouble
) &&
4889 "Float semantics is not representable by IEEEdouble");
4890 APFloat Temp
= *this;
4892 opStatus St
= Temp
.convert(semIEEEdouble
, rmNearestTiesToEven
, &LosesInfo
);
4893 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
4895 return Temp
.getIEEE().convertToDouble();
4898 float APFloat::convertToFloat() const {
4899 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEsingle
)
4900 return getIEEE().convertToFloat();
4901 assert(getSemantics().isRepresentableBy(semIEEEsingle
) &&
4902 "Float semantics is not representable by IEEEsingle");
4903 APFloat Temp
= *this;
4905 opStatus St
= Temp
.convert(semIEEEsingle
, rmNearestTiesToEven
, &LosesInfo
);
4906 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
4908 return Temp
.getIEEE().convertToFloat();
4913 #undef APFLOAT_DISPATCH_ON_SEMANTICS