1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FoldingSet.h"
18 #include "llvm/ADT/Hashing.h"
19 #include "llvm/ADT/StringExtras.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Config/llvm-config.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/ErrorHandling.h"
24 #include "llvm/Support/MathExtras.h"
25 #include "llvm/Support/raw_ostream.h"
29 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
31 if (usesLayout<IEEEFloat>(getSemantics())) \
32 return U.IEEE.METHOD_CALL; \
33 if (usesLayout<DoubleAPFloat>(getSemantics())) \
34 return U.Double.METHOD_CALL; \
35 llvm_unreachable("Unexpected semantics"); \
40 /// A macro used to combine two fcCategory enums into one key which can be used
41 /// in a switch statement to classify how the interaction of two APFloat's
42 /// categories affects an operation.
44 /// TODO: If clang source code is ever allowed to use constexpr in its own
45 /// codebase, change this into a static inline function.
46 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
48 /* Assumed in hexadecimal significand parsing, and conversion to
49 hexadecimal strings. */
50 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
53 /* Represents floating point arithmetic semantics. */
55 /* The largest E such that 2^E is representable; this matches the
56 definition of IEEE 754. */
57 APFloatBase::ExponentType maxExponent
;
59 /* The smallest E such that 2^E is a normalized number; this
60 matches the definition of IEEE 754. */
61 APFloatBase::ExponentType minExponent
;
63 /* Number of bits in the significand. This includes the integer
65 unsigned int precision
;
67 /* Number of bits actually used in the semantics. */
68 unsigned int sizeInBits
;
71 static const fltSemantics semIEEEhalf
= {15, -14, 11, 16};
72 static const fltSemantics semIEEEsingle
= {127, -126, 24, 32};
73 static const fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
74 static const fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
75 static const fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
76 static const fltSemantics semBogus
= {0, 0, 0, 0};
78 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
79 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
80 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
81 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
82 to each other, and two 11-bit exponents.
84 Note: we need to make the value different from semBogus as otherwise
85 an unsafe optimization may collapse both values to a single address,
86 and we heavily rely on them having distinct addresses. */
87 static const fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 0};
89 /* These are legacy semantics for the fallback, inaccrurate implementation of
90 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
91 operation. It's equivalent to having an IEEE number with consecutive 106
92 bits of mantissa and 11 bits of exponent.
94 It's not equivalent to IBM double-double. For example, a legit IBM
95 double-double, 1 + epsilon:
97 1 + epsilon = 1 + (1 >> 1076)
99 is not representable by a consecutive 106 bits of mantissa.
101 Currently, these semantics are used in the following way:
103 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
104 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
105 semPPCDoubleDoubleLegacy -> IEEE operations
107 We use bitcastToAPInt() to get the bit representation (in APInt) of the
108 underlying IEEEdouble, then use the APInt constructor to construct the
111 TODO: Implement all operations in semPPCDoubleDouble, and delete these
113 static const fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
116 const fltSemantics
&APFloatBase::IEEEhalf() {
119 const fltSemantics
&APFloatBase::IEEEsingle() {
120 return semIEEEsingle
;
122 const fltSemantics
&APFloatBase::IEEEdouble() {
123 return semIEEEdouble
;
125 const fltSemantics
&APFloatBase::IEEEquad() {
128 const fltSemantics
&APFloatBase::x87DoubleExtended() {
129 return semX87DoubleExtended
;
131 const fltSemantics
&APFloatBase::Bogus() {
134 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
135 return semPPCDoubleDouble
;
138 /* A tight upper bound on number of parts required to hold the value
141 power * 815 / (351 * integerPartWidth) + 1
143 However, whilst the result may require only this many parts,
144 because we are multiplying two values to get it, the
145 multiplication may require an extra part with the excess part
146 being zero (consider the trivial case of 1 * 1, tcFullMultiply
147 requires two parts to hold the single-part result). So we add an
148 extra one to guarantee enough space whilst multiplying. */
149 const unsigned int maxExponent
= 16383;
150 const unsigned int maxPrecision
= 113;
151 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
152 const unsigned int maxPowerOfFiveParts
= 2 + ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
154 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
155 return semantics
.precision
;
157 APFloatBase::ExponentType
158 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
159 return semantics
.maxExponent
;
161 APFloatBase::ExponentType
162 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
163 return semantics
.minExponent
;
165 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
166 return semantics
.sizeInBits
;
169 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
170 return Sem
.sizeInBits
;
173 /* A bunch of private, handy routines. */
175 static inline unsigned int
176 partCountForBits(unsigned int bits
)
178 return ((bits
) + APFloatBase::integerPartWidth
- 1) / APFloatBase::integerPartWidth
;
181 /* Returns 0U-9U. Return values >= 10U are not digits. */
182 static inline unsigned int
183 decDigitValue(unsigned int c
)
188 /* Return the value of a decimal exponent of the form
191 If the exponent overflows, returns a large exponent with the
194 readExponent(StringRef::iterator begin
, StringRef::iterator end
)
197 unsigned int absExponent
;
198 const unsigned int overlargeExponent
= 24000; /* FIXME. */
199 StringRef::iterator p
= begin
;
201 assert(p
!= end
&& "Exponent has no digits");
203 isNegative
= (*p
== '-');
204 if (*p
== '-' || *p
== '+') {
206 assert(p
!= end
&& "Exponent has no digits");
209 absExponent
= decDigitValue(*p
++);
210 assert(absExponent
< 10U && "Invalid character in exponent");
212 for (; p
!= end
; ++p
) {
215 value
= decDigitValue(*p
);
216 assert(value
< 10U && "Invalid character in exponent");
218 value
+= absExponent
* 10;
219 if (absExponent
>= overlargeExponent
) {
220 absExponent
= overlargeExponent
;
221 p
= end
; /* outwit assert below */
227 assert(p
== end
&& "Invalid exponent in exponent");
230 return -(int) absExponent
;
232 return (int) absExponent
;
235 /* This is ugly and needs cleaning up, but I don't immediately see
236 how whilst remaining safe. */
238 totalExponent(StringRef::iterator p
, StringRef::iterator end
,
239 int exponentAdjustment
)
241 int unsignedExponent
;
242 bool negative
, overflow
;
245 assert(p
!= end
&& "Exponent has no digits");
247 negative
= *p
== '-';
248 if (*p
== '-' || *p
== '+') {
250 assert(p
!= end
&& "Exponent has no digits");
253 unsignedExponent
= 0;
255 for (; p
!= end
; ++p
) {
258 value
= decDigitValue(*p
);
259 assert(value
< 10U && "Invalid character in exponent");
261 unsignedExponent
= unsignedExponent
* 10 + value
;
262 if (unsignedExponent
> 32767) {
268 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
272 exponent
= unsignedExponent
;
274 exponent
= -exponent
;
275 exponent
+= exponentAdjustment
;
276 if (exponent
> 32767 || exponent
< -32768)
281 exponent
= negative
? -32768: 32767;
286 static StringRef::iterator
287 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
288 StringRef::iterator
*dot
)
290 StringRef::iterator p
= begin
;
292 while (p
!= end
&& *p
== '0')
295 if (p
!= end
&& *p
== '.') {
298 assert(end
- begin
!= 1 && "Significand has no digits");
300 while (p
!= end
&& *p
== '0')
307 /* Given a normal decimal floating point number of the form
311 where the decimal point and exponent are optional, fill out the
312 structure D. Exponent is appropriate if the significand is
313 treated as an integer, and normalizedExponent if the significand
314 is taken to have the decimal point after a single leading
317 If the value is zero, V->firstSigDigit points to a non-digit, and
318 the return exponent is zero.
321 const char *firstSigDigit
;
322 const char *lastSigDigit
;
324 int normalizedExponent
;
328 interpretDecimal(StringRef::iterator begin
, StringRef::iterator end
,
331 StringRef::iterator dot
= end
;
332 StringRef::iterator p
= skipLeadingZeroesAndAnyDot (begin
, end
, &dot
);
334 D
->firstSigDigit
= p
;
336 D
->normalizedExponent
= 0;
338 for (; p
!= end
; ++p
) {
340 assert(dot
== end
&& "String contains multiple dots");
345 if (decDigitValue(*p
) >= 10U)
350 assert((*p
== 'e' || *p
== 'E') && "Invalid character in significand");
351 assert(p
!= begin
&& "Significand has no digits");
352 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
354 /* p points to the first non-digit in the string */
355 D
->exponent
= readExponent(p
+ 1, end
);
357 /* Implied decimal point? */
362 /* If number is all zeroes accept any exponent. */
363 if (p
!= D
->firstSigDigit
) {
364 /* Drop insignificant trailing zeroes. */
369 while (p
!= begin
&& *p
== '0');
370 while (p
!= begin
&& *p
== '.');
373 /* Adjust the exponents for any decimal point. */
374 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
375 D
->normalizedExponent
= (D
->exponent
+
376 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
377 - (dot
> D
->firstSigDigit
&& dot
< p
)));
383 /* Return the trailing fraction of a hexadecimal number.
384 DIGITVALUE is the first hex digit of the fraction, P points to
387 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
388 unsigned int digitValue
)
390 unsigned int hexDigit
;
392 /* If the first trailing digit isn't 0 or 8 we can work out the
393 fraction immediately. */
395 return lfMoreThanHalf
;
396 else if (digitValue
< 8 && digitValue
> 0)
397 return lfLessThanHalf
;
399 // Otherwise we need to find the first non-zero digit.
400 while (p
!= end
&& (*p
== '0' || *p
== '.'))
403 assert(p
!= end
&& "Invalid trailing hexadecimal fraction!");
405 hexDigit
= hexDigitValue(*p
);
407 /* If we ran off the end it is exactly zero or one-half, otherwise
410 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
412 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
415 /* Return the fraction lost were a bignum truncated losing the least
416 significant BITS bits. */
418 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
419 unsigned int partCount
,
424 lsb
= APInt::tcLSB(parts
, partCount
);
426 /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
428 return lfExactlyZero
;
430 return lfExactlyHalf
;
431 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
432 APInt::tcExtractBit(parts
, bits
- 1))
433 return lfMoreThanHalf
;
435 return lfLessThanHalf
;
438 /* Shift DST right BITS bits noting lost fraction. */
440 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
442 lostFraction lost_fraction
;
444 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
446 APInt::tcShiftRight(dst
, parts
, bits
);
448 return lost_fraction
;
451 /* Combine the effect of two lost fractions. */
453 combineLostFractions(lostFraction moreSignificant
,
454 lostFraction lessSignificant
)
456 if (lessSignificant
!= lfExactlyZero
) {
457 if (moreSignificant
== lfExactlyZero
)
458 moreSignificant
= lfLessThanHalf
;
459 else if (moreSignificant
== lfExactlyHalf
)
460 moreSignificant
= lfMoreThanHalf
;
463 return moreSignificant
;
466 /* The error from the true value, in half-ulps, on multiplying two
467 floating point numbers, which differ from the value they
468 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
469 than the returned value.
471 See "How to Read Floating Point Numbers Accurately" by William D
474 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
476 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
478 if (HUerr1
+ HUerr2
== 0)
479 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
481 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
484 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
485 when the least significant BITS are truncated. BITS cannot be
487 static APFloatBase::integerPart
488 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
490 unsigned int count
, partBits
;
491 APFloatBase::integerPart part
, boundary
;
496 count
= bits
/ APFloatBase::integerPartWidth
;
497 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
499 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
502 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
507 if (part
- boundary
<= boundary
- part
)
508 return part
- boundary
;
510 return boundary
- part
;
513 if (part
== boundary
) {
516 return ~(APFloatBase::integerPart
) 0; /* A lot. */
519 } else if (part
== boundary
- 1) {
522 return ~(APFloatBase::integerPart
) 0; /* A lot. */
527 return ~(APFloatBase::integerPart
) 0; /* A lot. */
530 /* Place pow(5, power) in DST, and return the number of parts used.
531 DST must be at least one part larger than size of the answer. */
533 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
534 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
535 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
536 pow5s
[0] = 78125 * 5;
538 unsigned int partsCount
[16] = { 1 };
539 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
541 assert(power
<= maxExponent
);
546 *p1
= firstEightPowers
[power
& 7];
552 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
557 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
559 pc
= partsCount
[n
- 1];
560 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
562 if (pow5
[pc
- 1] == 0)
568 APFloatBase::integerPart
*tmp
;
570 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
572 if (p2
[result
- 1] == 0)
575 /* Now result is in p1 with partsCount parts and p2 is scratch
586 APInt::tcAssign(dst
, p1
, result
);
591 /* Zero at the end to avoid modular arithmetic when adding one; used
592 when rounding up during hexadecimal output. */
593 static const char hexDigitsLower
[] = "0123456789abcdef0";
594 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
595 static const char infinityL
[] = "infinity";
596 static const char infinityU
[] = "INFINITY";
597 static const char NaNL
[] = "nan";
598 static const char NaNU
[] = "NAN";
600 /* Write out an integerPart in hexadecimal, starting with the most
601 significant nibble. Write out exactly COUNT hexdigits, return
604 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
605 const char *hexDigitChars
)
607 unsigned int result
= count
;
609 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
611 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
613 dst
[count
] = hexDigitChars
[part
& 0xf];
620 /* Write out an unsigned decimal integer. */
622 writeUnsignedDecimal (char *dst
, unsigned int n
)
638 /* Write out a signed decimal integer. */
640 writeSignedDecimal (char *dst
, int value
)
644 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
646 dst
= writeUnsignedDecimal(dst
, value
);
653 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
656 semantics
= ourSemantics
;
659 significand
.parts
= new integerPart
[count
];
662 void IEEEFloat::freeSignificand() {
664 delete [] significand
.parts
;
667 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
668 assert(semantics
== rhs
.semantics
);
671 category
= rhs
.category
;
672 exponent
= rhs
.exponent
;
673 if (isFiniteNonZero() || category
== fcNaN
)
674 copySignificand(rhs
);
677 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
678 assert(isFiniteNonZero() || category
== fcNaN
);
679 assert(rhs
.partCount() >= partCount());
681 APInt::tcAssign(significandParts(), rhs
.significandParts(),
685 /* Make this number a NaN, with an arbitrary but deterministic value
686 for the significand. If double or longer, this is a signalling NaN,
687 which may not be ideal. If float, this is QNaN(0). */
688 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
692 integerPart
*significand
= significandParts();
693 unsigned numParts
= partCount();
695 // Set the significand bits to the fill.
696 if (!fill
|| fill
->getNumWords() < numParts
)
697 APInt::tcSet(significand
, 0, numParts
);
699 APInt::tcAssign(significand
, fill
->getRawData(),
700 std::min(fill
->getNumWords(), numParts
));
702 // Zero out the excess bits of the significand.
703 unsigned bitsToPreserve
= semantics
->precision
- 1;
704 unsigned part
= bitsToPreserve
/ 64;
705 bitsToPreserve
%= 64;
706 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
707 for (part
++; part
!= numParts
; ++part
)
708 significand
[part
] = 0;
711 unsigned QNaNBit
= semantics
->precision
- 2;
714 // We always have to clear the QNaN bit to make it an SNaN.
715 APInt::tcClearBit(significand
, QNaNBit
);
717 // If there are no bits set in the payload, we have to set
718 // *something* to make it a NaN instead of an infinity;
719 // conventionally, this is the next bit down from the QNaN bit.
720 if (APInt::tcIsZero(significand
, numParts
))
721 APInt::tcSetBit(significand
, QNaNBit
- 1);
723 // We always have to set the QNaN bit to make it a QNaN.
724 APInt::tcSetBit(significand
, QNaNBit
);
727 // For x87 extended precision, we want to make a NaN, not a
728 // pseudo-NaN. Maybe we should expose the ability to make
730 if (semantics
== &semX87DoubleExtended
)
731 APInt::tcSetBit(significand
, QNaNBit
+ 1);
734 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
736 if (semantics
!= rhs
.semantics
) {
738 initialize(rhs
.semantics
);
746 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
749 semantics
= rhs
.semantics
;
750 significand
= rhs
.significand
;
751 exponent
= rhs
.exponent
;
752 category
= rhs
.category
;
755 rhs
.semantics
= &semBogus
;
759 bool IEEEFloat::isDenormal() const {
760 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
761 (APInt::tcExtractBit(significandParts(),
762 semantics
->precision
- 1) == 0);
765 bool IEEEFloat::isSmallest() const {
766 // The smallest number by magnitude in our format will be the smallest
767 // denormal, i.e. the floating point number with exponent being minimum
768 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
769 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
770 significandMSB() == 0;
773 bool IEEEFloat::isSignificandAllOnes() const {
774 // Test if the significand excluding the integral bit is all ones. This allows
775 // us to test for binade boundaries.
776 const integerPart
*Parts
= significandParts();
777 const unsigned PartCount
= partCount();
778 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
782 // Set the unused high bits to all ones when we compare.
783 const unsigned NumHighBits
=
784 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
785 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
786 "fill than integerPartWidth");
787 const integerPart HighBitFill
=
788 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
789 if (~(Parts
[PartCount
- 1] | HighBitFill
))
795 bool IEEEFloat::isSignificandAllZeros() const {
796 // Test if the significand excluding the integral bit is all zeros. This
797 // allows us to test for binade boundaries.
798 const integerPart
*Parts
= significandParts();
799 const unsigned PartCount
= partCount();
801 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
805 const unsigned NumHighBits
=
806 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
807 assert(NumHighBits
<= integerPartWidth
&& "Can not have more high bits to "
808 "clear than integerPartWidth");
809 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
811 if (Parts
[PartCount
- 1] & HighBitMask
)
817 bool IEEEFloat::isLargest() const {
818 // The largest number by magnitude in our format will be the floating point
819 // number with maximum exponent and with significand that is all ones.
820 return isFiniteNonZero() && exponent
== semantics
->maxExponent
821 && isSignificandAllOnes();
824 bool IEEEFloat::isInteger() const {
825 // This could be made more efficient; I'm going for obviously correct.
826 if (!isFinite()) return false;
827 IEEEFloat truncated
= *this;
828 truncated
.roundToIntegral(rmTowardZero
);
829 return compare(truncated
) == cmpEqual
;
832 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
835 if (semantics
!= rhs
.semantics
||
836 category
!= rhs
.category
||
839 if (category
==fcZero
|| category
==fcInfinity
)
842 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
845 return std::equal(significandParts(), significandParts() + partCount(),
846 rhs
.significandParts());
849 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
850 initialize(&ourSemantics
);
854 exponent
= ourSemantics
.precision
- 1;
855 significandParts()[0] = value
;
856 normalize(rmNearestTiesToEven
, lfExactlyZero
);
859 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
860 initialize(&ourSemantics
);
865 // Delegate to the previous constructor, because later copy constructor may
866 // actually inspects category, which can't be garbage.
867 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
868 : IEEEFloat(ourSemantics
) {}
870 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
871 initialize(rhs
.semantics
);
875 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
876 *this = std::move(rhs
);
879 IEEEFloat::~IEEEFloat() { freeSignificand(); }
881 unsigned int IEEEFloat::partCount() const {
882 return partCountForBits(semantics
->precision
+ 1);
885 const IEEEFloat::integerPart
*IEEEFloat::significandParts() const {
886 return const_cast<IEEEFloat
*>(this)->significandParts();
889 IEEEFloat::integerPart
*IEEEFloat::significandParts() {
891 return significand
.parts
;
893 return &significand
.part
;
896 void IEEEFloat::zeroSignificand() {
897 APInt::tcSet(significandParts(), 0, partCount());
900 /* Increment an fcNormal floating point number's significand. */
901 void IEEEFloat::incrementSignificand() {
904 carry
= APInt::tcIncrement(significandParts(), partCount());
906 /* Our callers should never cause us to overflow. */
911 /* Add the significand of the RHS. Returns the carry flag. */
912 IEEEFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
915 parts
= significandParts();
917 assert(semantics
== rhs
.semantics
);
918 assert(exponent
== rhs
.exponent
);
920 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
923 /* Subtract the significand of the RHS with a borrow flag. Returns
925 IEEEFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
926 integerPart borrow
) {
929 parts
= significandParts();
931 assert(semantics
== rhs
.semantics
);
932 assert(exponent
== rhs
.exponent
);
934 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
938 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
939 on to the full-precision result of the multiplication. Returns the
941 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
942 const IEEEFloat
*addend
) {
943 unsigned int omsb
; // One, not zero, based MSB.
944 unsigned int partsCount
, newPartsCount
, precision
;
945 integerPart
*lhsSignificand
;
946 integerPart scratch
[4];
947 integerPart
*fullSignificand
;
948 lostFraction lost_fraction
;
951 assert(semantics
== rhs
.semantics
);
953 precision
= semantics
->precision
;
955 // Allocate space for twice as many bits as the original significand, plus one
956 // extra bit for the addition to overflow into.
957 newPartsCount
= partCountForBits(precision
* 2 + 1);
959 if (newPartsCount
> 4)
960 fullSignificand
= new integerPart
[newPartsCount
];
962 fullSignificand
= scratch
;
964 lhsSignificand
= significandParts();
965 partsCount
= partCount();
967 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
968 rhs
.significandParts(), partsCount
, partsCount
);
970 lost_fraction
= lfExactlyZero
;
971 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
972 exponent
+= rhs
.exponent
;
974 // Assume the operands involved in the multiplication are single-precision
975 // FP, and the two multiplicants are:
976 // *this = a23 . a22 ... a0 * 2^e1
977 // rhs = b23 . b22 ... b0 * 2^e2
978 // the result of multiplication is:
979 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
980 // Note that there are three significant bits at the left-hand side of the
981 // radix point: two for the multiplication, and an overflow bit for the
982 // addition (that will always be zero at this point). Move the radix point
983 // toward left by two bits, and adjust exponent accordingly.
986 if (addend
&& addend
->isNonZero()) {
987 // The intermediate result of the multiplication has "2 * precision"
988 // signicant bit; adjust the addend to be consistent with mul result.
990 Significand savedSignificand
= significand
;
991 const fltSemantics
*savedSemantics
= semantics
;
992 fltSemantics extendedSemantics
;
994 unsigned int extendedPrecision
;
996 // Normalize our MSB to one below the top bit to allow for overflow.
997 extendedPrecision
= 2 * precision
+ 1;
998 if (omsb
!= extendedPrecision
- 1) {
999 assert(extendedPrecision
> omsb
);
1000 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1001 (extendedPrecision
- 1) - omsb
);
1002 exponent
-= (extendedPrecision
- 1) - omsb
;
1005 /* Create new semantics. */
1006 extendedSemantics
= *semantics
;
1007 extendedSemantics
.precision
= extendedPrecision
;
1009 if (newPartsCount
== 1)
1010 significand
.part
= fullSignificand
[0];
1012 significand
.parts
= fullSignificand
;
1013 semantics
= &extendedSemantics
;
1015 IEEEFloat
extendedAddend(*addend
);
1016 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
1017 assert(status
== opOK
);
1020 // Shift the significand of the addend right by one bit. This guarantees
1021 // that the high bit of the significand is zero (same as fullSignificand),
1022 // so the addition will overflow (if it does overflow at all) into the top bit.
1023 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1024 assert(lost_fraction
== lfExactlyZero
&&
1025 "Lost precision while shifting addend for fused-multiply-add.");
1027 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1029 /* Restore our state. */
1030 if (newPartsCount
== 1)
1031 fullSignificand
[0] = significand
.part
;
1032 significand
= savedSignificand
;
1033 semantics
= savedSemantics
;
1035 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1038 // Convert the result having "2 * precision" significant-bits back to the one
1039 // having "precision" significant-bits. First, move the radix point from
1040 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1041 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1042 exponent
-= precision
+ 1;
1044 // In case MSB resides at the left-hand side of radix point, shift the
1045 // mantissa right by some amount to make sure the MSB reside right before
1046 // the radix point (i.e. "MSB . rest-significant-bits").
1048 // Note that the result is not normalized when "omsb < precision". So, the
1049 // caller needs to call IEEEFloat::normalize() if normalized value is
1051 if (omsb
> precision
) {
1052 unsigned int bits
, significantParts
;
1055 bits
= omsb
- precision
;
1056 significantParts
= partCountForBits(omsb
);
1057 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1058 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1062 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1064 if (newPartsCount
> 4)
1065 delete [] fullSignificand
;
1067 return lost_fraction
;
1070 /* Multiply the significands of LHS and RHS to DST. */
1071 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1072 unsigned int bit
, i
, partsCount
;
1073 const integerPart
*rhsSignificand
;
1074 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1075 integerPart scratch
[4];
1076 lostFraction lost_fraction
;
1078 assert(semantics
== rhs
.semantics
);
1080 lhsSignificand
= significandParts();
1081 rhsSignificand
= rhs
.significandParts();
1082 partsCount
= partCount();
1085 dividend
= new integerPart
[partsCount
* 2];
1089 divisor
= dividend
+ partsCount
;
1091 /* Copy the dividend and divisor as they will be modified in-place. */
1092 for (i
= 0; i
< partsCount
; i
++) {
1093 dividend
[i
] = lhsSignificand
[i
];
1094 divisor
[i
] = rhsSignificand
[i
];
1095 lhsSignificand
[i
] = 0;
1098 exponent
-= rhs
.exponent
;
1100 unsigned int precision
= semantics
->precision
;
1102 /* Normalize the divisor. */
1103 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1106 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1109 /* Normalize the dividend. */
1110 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1113 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1116 /* Ensure the dividend >= divisor initially for the loop below.
1117 Incidentally, this means that the division loop below is
1118 guaranteed to set the integer bit to one. */
1119 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1121 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1122 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1125 /* Long division. */
1126 for (bit
= precision
; bit
; bit
-= 1) {
1127 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1128 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1129 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1132 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1135 /* Figure out the lost fraction. */
1136 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1139 lost_fraction
= lfMoreThanHalf
;
1141 lost_fraction
= lfExactlyHalf
;
1142 else if (APInt::tcIsZero(dividend
, partsCount
))
1143 lost_fraction
= lfExactlyZero
;
1145 lost_fraction
= lfLessThanHalf
;
1150 return lost_fraction
;
1153 unsigned int IEEEFloat::significandMSB() const {
1154 return APInt::tcMSB(significandParts(), partCount());
1157 unsigned int IEEEFloat::significandLSB() const {
1158 return APInt::tcLSB(significandParts(), partCount());
1161 /* Note that a zero result is NOT normalized to fcZero. */
1162 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1163 /* Our exponent should not overflow. */
1164 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1168 return shiftRight(significandParts(), partCount(), bits
);
1171 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1172 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1173 assert(bits
< semantics
->precision
);
1176 unsigned int partsCount
= partCount();
1178 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1181 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1185 IEEEFloat::cmpResult
1186 IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1189 assert(semantics
== rhs
.semantics
);
1190 assert(isFiniteNonZero());
1191 assert(rhs
.isFiniteNonZero());
1193 compare
= exponent
- rhs
.exponent
;
1195 /* If exponents are equal, do an unsigned bignum comparison of the
1198 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1202 return cmpGreaterThan
;
1203 else if (compare
< 0)
1209 /* Handle overflow. Sign is preserved. We either become infinity or
1210 the largest finite number. */
1211 IEEEFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1213 if (rounding_mode
== rmNearestTiesToEven
||
1214 rounding_mode
== rmNearestTiesToAway
||
1215 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1216 (rounding_mode
== rmTowardNegative
&& sign
)) {
1217 category
= fcInfinity
;
1218 return (opStatus
) (opOverflow
| opInexact
);
1221 /* Otherwise we become the largest finite number. */
1222 category
= fcNormal
;
1223 exponent
= semantics
->maxExponent
;
1224 APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
1225 semantics
->precision
);
1230 /* Returns TRUE if, when truncating the current number, with BIT the
1231 new LSB, with the given lost fraction and rounding mode, the result
1232 would need to be rounded away from zero (i.e., by increasing the
1233 signficand). This routine must work for fcZero of both signs, and
1234 fcNormal numbers. */
1235 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1236 lostFraction lost_fraction
,
1237 unsigned int bit
) const {
1238 /* NaNs and infinities should not have lost fractions. */
1239 assert(isFiniteNonZero() || category
== fcZero
);
1241 /* Current callers never pass this so we don't handle it. */
1242 assert(lost_fraction
!= lfExactlyZero
);
1244 switch (rounding_mode
) {
1245 case rmNearestTiesToAway
:
1246 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1248 case rmNearestTiesToEven
:
1249 if (lost_fraction
== lfMoreThanHalf
)
1252 /* Our zeroes don't have a significand to test. */
1253 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1254 return APInt::tcExtractBit(significandParts(), bit
);
1261 case rmTowardPositive
:
1264 case rmTowardNegative
:
1267 llvm_unreachable("Invalid rounding mode found");
1270 IEEEFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1271 lostFraction lost_fraction
) {
1272 unsigned int omsb
; /* One, not zero, based MSB. */
1275 if (!isFiniteNonZero())
1278 /* Before rounding normalize the exponent of fcNormal numbers. */
1279 omsb
= significandMSB() + 1;
1282 /* OMSB is numbered from 1. We want to place it in the integer
1283 bit numbered PRECISION if possible, with a compensating change in
1285 exponentChange
= omsb
- semantics
->precision
;
1287 /* If the resulting exponent is too high, overflow according to
1288 the rounding mode. */
1289 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1290 return handleOverflow(rounding_mode
);
1292 /* Subnormal numbers have exponent minExponent, and their MSB
1293 is forced based on that. */
1294 if (exponent
+ exponentChange
< semantics
->minExponent
)
1295 exponentChange
= semantics
->minExponent
- exponent
;
1297 /* Shifting left is easy as we don't lose precision. */
1298 if (exponentChange
< 0) {
1299 assert(lost_fraction
== lfExactlyZero
);
1301 shiftSignificandLeft(-exponentChange
);
1306 if (exponentChange
> 0) {
1309 /* Shift right and capture any new lost fraction. */
1310 lf
= shiftSignificandRight(exponentChange
);
1312 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1314 /* Keep OMSB up-to-date. */
1315 if (omsb
> (unsigned) exponentChange
)
1316 omsb
-= exponentChange
;
1322 /* Now round the number according to rounding_mode given the lost
1325 /* As specified in IEEE 754, since we do not trap we do not report
1326 underflow for exact results. */
1327 if (lost_fraction
== lfExactlyZero
) {
1328 /* Canonicalize zeroes. */
1335 /* Increment the significand if we're rounding away from zero. */
1336 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1338 exponent
= semantics
->minExponent
;
1340 incrementSignificand();
1341 omsb
= significandMSB() + 1;
1343 /* Did the significand increment overflow? */
1344 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1345 /* Renormalize by incrementing the exponent and shifting our
1346 significand right one. However if we already have the
1347 maximum exponent we overflow to infinity. */
1348 if (exponent
== semantics
->maxExponent
) {
1349 category
= fcInfinity
;
1351 return (opStatus
) (opOverflow
| opInexact
);
1354 shiftSignificandRight(1);
1360 /* The normal case - we were and are not denormal, and any
1361 significand increment above didn't overflow. */
1362 if (omsb
== semantics
->precision
)
1365 /* We have a non-zero denormal. */
1366 assert(omsb
< semantics
->precision
);
1368 /* Canonicalize zeroes. */
1372 /* The fcZero case is a denormal that underflowed to zero. */
1373 return (opStatus
) (opUnderflow
| opInexact
);
1376 IEEEFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1378 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1380 llvm_unreachable(nullptr);
1382 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1383 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1384 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1385 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1386 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1387 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1388 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1391 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1392 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1393 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1394 // We need to be sure to flip the sign here for subtraction because we
1395 // don't have a separate negate operation so -NaN becomes 0 - NaN here.
1396 sign
= rhs
.sign
^ subtract
;
1398 copySignificand(rhs
);
1401 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1402 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1403 category
= fcInfinity
;
1404 sign
= rhs
.sign
^ subtract
;
1407 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1409 sign
= rhs
.sign
^ subtract
;
1412 case PackCategoriesIntoKey(fcZero
, fcZero
):
1413 /* Sign depends on rounding mode; handled by caller. */
1416 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1417 /* Differently signed infinities can only be validly
1419 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1426 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1431 /* Add or subtract two normal numbers. */
1432 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1435 lostFraction lost_fraction
;
1438 /* Determine if the operation on the absolute values is effectively
1439 an addition or subtraction. */
1440 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1442 /* Are we bigger exponent-wise than the RHS? */
1443 bits
= exponent
- rhs
.exponent
;
1445 /* Subtraction is more subtle than one might naively expect. */
1447 IEEEFloat
temp_rhs(rhs
);
1451 reverse
= compareAbsoluteValue(temp_rhs
) == cmpLessThan
;
1452 lost_fraction
= lfExactlyZero
;
1453 } else if (bits
> 0) {
1454 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1455 shiftSignificandLeft(1);
1458 lost_fraction
= shiftSignificandRight(-bits
- 1);
1459 temp_rhs
.shiftSignificandLeft(1);
1464 carry
= temp_rhs
.subtractSignificand
1465 (*this, lost_fraction
!= lfExactlyZero
);
1466 copySignificand(temp_rhs
);
1469 carry
= subtractSignificand
1470 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1473 /* Invert the lost fraction - it was on the RHS and
1475 if (lost_fraction
== lfLessThanHalf
)
1476 lost_fraction
= lfMoreThanHalf
;
1477 else if (lost_fraction
== lfMoreThanHalf
)
1478 lost_fraction
= lfLessThanHalf
;
1480 /* The code above is intended to ensure that no borrow is
1486 IEEEFloat
temp_rhs(rhs
);
1488 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1489 carry
= addSignificand(temp_rhs
);
1491 lost_fraction
= shiftSignificandRight(-bits
);
1492 carry
= addSignificand(rhs
);
1495 /* We have a guard bit; generating a carry cannot happen. */
1500 return lost_fraction
;
1503 IEEEFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1504 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1506 llvm_unreachable(nullptr);
1508 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1509 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1510 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1511 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1515 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1516 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1517 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1520 copySignificand(rhs
);
1523 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1524 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1525 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1526 category
= fcInfinity
;
1529 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1530 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1531 case PackCategoriesIntoKey(fcZero
, fcZero
):
1535 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1536 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1540 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1545 IEEEFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1546 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1548 llvm_unreachable(nullptr);
1550 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1551 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1552 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1554 copySignificand(rhs
);
1556 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1557 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1558 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1559 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1562 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1563 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1564 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1565 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1568 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1572 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1573 category
= fcInfinity
;
1576 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1577 case PackCategoriesIntoKey(fcZero
, fcZero
):
1581 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1586 IEEEFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
1587 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1589 llvm_unreachable(nullptr);
1591 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1592 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1593 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1594 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1595 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1596 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1597 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1600 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1601 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1602 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1605 copySignificand(rhs
);
1608 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1609 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1610 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1611 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1612 case PackCategoriesIntoKey(fcZero
, fcZero
):
1616 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1622 void IEEEFloat::changeSign() {
1623 /* Look mummy, this one's easy. */
1627 /* Normalized addition or subtraction. */
1628 IEEEFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
1629 roundingMode rounding_mode
,
1633 fs
= addOrSubtractSpecials(rhs
, subtract
);
1635 /* This return code means it was not a simple case. */
1636 if (fs
== opDivByZero
) {
1637 lostFraction lost_fraction
;
1639 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
1640 fs
= normalize(rounding_mode
, lost_fraction
);
1642 /* Can only be zero if we lost no fraction. */
1643 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
1646 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1647 positive zero unless rounding to minus infinity, except that
1648 adding two like-signed zeroes gives that zero. */
1649 if (category
== fcZero
) {
1650 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
1651 sign
= (rounding_mode
== rmTowardNegative
);
1657 /* Normalized addition. */
1658 IEEEFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
1659 roundingMode rounding_mode
) {
1660 return addOrSubtract(rhs
, rounding_mode
, false);
1663 /* Normalized subtraction. */
1664 IEEEFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
1665 roundingMode rounding_mode
) {
1666 return addOrSubtract(rhs
, rounding_mode
, true);
1669 /* Normalized multiply. */
1670 IEEEFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
1671 roundingMode rounding_mode
) {
1675 fs
= multiplySpecials(rhs
);
1677 if (isFiniteNonZero()) {
1678 lostFraction lost_fraction
= multiplySignificand(rhs
, nullptr);
1679 fs
= normalize(rounding_mode
, lost_fraction
);
1680 if (lost_fraction
!= lfExactlyZero
)
1681 fs
= (opStatus
) (fs
| opInexact
);
1687 /* Normalized divide. */
1688 IEEEFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
1689 roundingMode rounding_mode
) {
1693 fs
= divideSpecials(rhs
);
1695 if (isFiniteNonZero()) {
1696 lostFraction lost_fraction
= divideSignificand(rhs
);
1697 fs
= normalize(rounding_mode
, lost_fraction
);
1698 if (lost_fraction
!= lfExactlyZero
)
1699 fs
= (opStatus
) (fs
| opInexact
);
1705 /* Normalized remainder. This is not currently correct in all cases. */
1706 IEEEFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
1708 IEEEFloat V
= *this;
1709 unsigned int origSign
= sign
;
1711 fs
= V
.divide(rhs
, rmNearestTiesToEven
);
1712 if (fs
== opDivByZero
)
1715 int parts
= partCount();
1716 integerPart
*x
= new integerPart
[parts
];
1718 fs
= V
.convertToInteger(makeMutableArrayRef(x
, parts
),
1719 parts
* integerPartWidth
, true, rmNearestTiesToEven
,
1721 if (fs
== opInvalidOp
) {
1726 fs
= V
.convertFromZeroExtendedInteger(x
, parts
* integerPartWidth
, true,
1727 rmNearestTiesToEven
);
1728 assert(fs
==opOK
); // should always work
1730 fs
= V
.multiply(rhs
, rmNearestTiesToEven
);
1731 assert(fs
==opOK
|| fs
==opInexact
); // should not overflow or underflow
1733 fs
= subtract(V
, rmNearestTiesToEven
);
1734 assert(fs
==opOK
|| fs
==opInexact
); // likewise
1737 sign
= origSign
; // IEEE754 requires this
1742 /* Normalized llvm frem (C fmod). */
1743 IEEEFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
1745 fs
= modSpecials(rhs
);
1746 unsigned int origSign
= sign
;
1748 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
1749 compareAbsoluteValue(rhs
) != cmpLessThan
) {
1750 IEEEFloat V
= scalbn(rhs
, ilogb(*this) - ilogb(rhs
), rmNearestTiesToEven
);
1751 if (compareAbsoluteValue(V
) == cmpLessThan
)
1752 V
= scalbn(V
, -1, rmNearestTiesToEven
);
1755 fs
= subtract(V
, rmNearestTiesToEven
);
1759 sign
= origSign
; // fmod requires this
1763 /* Normalized fused-multiply-add. */
1764 IEEEFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
1765 const IEEEFloat
&addend
,
1766 roundingMode rounding_mode
) {
1769 /* Post-multiplication sign, before addition. */
1770 sign
^= multiplicand
.sign
;
1772 /* If and only if all arguments are normal do we need to do an
1773 extended-precision calculation. */
1774 if (isFiniteNonZero() &&
1775 multiplicand
.isFiniteNonZero() &&
1776 addend
.isFinite()) {
1777 lostFraction lost_fraction
;
1779 lost_fraction
= multiplySignificand(multiplicand
, &addend
);
1780 fs
= normalize(rounding_mode
, lost_fraction
);
1781 if (lost_fraction
!= lfExactlyZero
)
1782 fs
= (opStatus
) (fs
| opInexact
);
1784 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
1785 positive zero unless rounding to minus infinity, except that
1786 adding two like-signed zeroes gives that zero. */
1787 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
)
1788 sign
= (rounding_mode
== rmTowardNegative
);
1790 fs
= multiplySpecials(multiplicand
);
1792 /* FS can only be opOK or opInvalidOp. There is no more work
1793 to do in the latter case. The IEEE-754R standard says it is
1794 implementation-defined in this case whether, if ADDEND is a
1795 quiet NaN, we raise invalid op; this implementation does so.
1797 If we need to do the addition we can do so with normal
1800 fs
= addOrSubtract(addend
, rounding_mode
, false);
1806 /* Rounding-mode corrrect round to integral value. */
1807 IEEEFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
1810 // If the exponent is large enough, we know that this value is already
1811 // integral, and the arithmetic below would potentially cause it to saturate
1812 // to +/-Inf. Bail out early instead.
1813 if (isFiniteNonZero() && exponent
+1 >= (int)semanticsPrecision(*semantics
))
1816 // The algorithm here is quite simple: we add 2^(p-1), where p is the
1817 // precision of our format, and then subtract it back off again. The choice
1818 // of rounding modes for the addition/subtraction determines the rounding mode
1819 // for our integral rounding as well.
1820 // NOTE: When the input value is negative, we do subtraction followed by
1821 // addition instead.
1822 APInt
IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics
)), 1);
1823 IntegerConstant
<<= semanticsPrecision(*semantics
)-1;
1824 IEEEFloat
MagicConstant(*semantics
);
1825 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
1826 rmNearestTiesToEven
);
1827 MagicConstant
.sign
= sign
;
1832 // Preserve the input sign so that we can handle 0.0/-0.0 cases correctly.
1833 bool inputSign
= isNegative();
1835 fs
= add(MagicConstant
, rounding_mode
);
1836 if (fs
!= opOK
&& fs
!= opInexact
)
1839 fs
= subtract(MagicConstant
, rounding_mode
);
1841 // Restore the input sign.
1842 if (inputSign
!= isNegative())
1849 /* Comparison requires normalized numbers. */
1850 IEEEFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
1853 assert(semantics
== rhs
.semantics
);
1855 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1857 llvm_unreachable(nullptr);
1859 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1860 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1861 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1862 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1863 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1864 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1865 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1866 return cmpUnordered
;
1868 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1869 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1870 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1874 return cmpGreaterThan
;
1876 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1877 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1878 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1880 return cmpGreaterThan
;
1884 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1885 if (sign
== rhs
.sign
)
1890 return cmpGreaterThan
;
1892 case PackCategoriesIntoKey(fcZero
, fcZero
):
1895 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1899 /* Two normal numbers. Do they have the same sign? */
1900 if (sign
!= rhs
.sign
) {
1902 result
= cmpLessThan
;
1904 result
= cmpGreaterThan
;
1906 /* Compare absolute values; invert result if negative. */
1907 result
= compareAbsoluteValue(rhs
);
1910 if (result
== cmpLessThan
)
1911 result
= cmpGreaterThan
;
1912 else if (result
== cmpGreaterThan
)
1913 result
= cmpLessThan
;
1920 /// IEEEFloat::convert - convert a value of one floating point type to another.
1921 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
1922 /// records whether the transformation lost information, i.e. whether
1923 /// converting the result back to the original type will produce the
1924 /// original value (this is almost the same as return value==fsOK, but there
1925 /// are edge cases where this is not so).
1927 IEEEFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
1928 roundingMode rounding_mode
,
1930 lostFraction lostFraction
;
1931 unsigned int newPartCount
, oldPartCount
;
1934 const fltSemantics
&fromSemantics
= *semantics
;
1936 lostFraction
= lfExactlyZero
;
1937 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
1938 oldPartCount
= partCount();
1939 shift
= toSemantics
.precision
- fromSemantics
.precision
;
1941 bool X86SpecialNan
= false;
1942 if (&fromSemantics
== &semX87DoubleExtended
&&
1943 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
1944 (!(*significandParts() & 0x8000000000000000ULL
) ||
1945 !(*significandParts() & 0x4000000000000000ULL
))) {
1946 // x86 has some unusual NaNs which cannot be represented in any other
1947 // format; note them here.
1948 X86SpecialNan
= true;
1951 // If this is a truncation of a denormal number, and the target semantics
1952 // has larger exponent range than the source semantics (this can happen
1953 // when truncating from PowerPC double-double to double format), the
1954 // right shift could lose result mantissa bits. Adjust exponent instead
1955 // of performing excessive shift.
1956 if (shift
< 0 && isFiniteNonZero()) {
1957 int exponentChange
= significandMSB() + 1 - fromSemantics
.precision
;
1958 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
1959 exponentChange
= toSemantics
.minExponent
- exponent
;
1960 if (exponentChange
< shift
)
1961 exponentChange
= shift
;
1962 if (exponentChange
< 0) {
1963 shift
-= exponentChange
;
1964 exponent
+= exponentChange
;
1968 // If this is a truncation, perform the shift before we narrow the storage.
1969 if (shift
< 0 && (isFiniteNonZero() || category
==fcNaN
))
1970 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
1972 // Fix the storage so it can hold to new value.
1973 if (newPartCount
> oldPartCount
) {
1974 // The new type requires more storage; make it available.
1975 integerPart
*newParts
;
1976 newParts
= new integerPart
[newPartCount
];
1977 APInt::tcSet(newParts
, 0, newPartCount
);
1978 if (isFiniteNonZero() || category
==fcNaN
)
1979 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
1981 significand
.parts
= newParts
;
1982 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
1983 // Switch to built-in storage for a single part.
1984 integerPart newPart
= 0;
1985 if (isFiniteNonZero() || category
==fcNaN
)
1986 newPart
= significandParts()[0];
1988 significand
.part
= newPart
;
1991 // Now that we have the right storage, switch the semantics.
1992 semantics
= &toSemantics
;
1994 // If this is an extension, perform the shift now that the storage is
1996 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
1997 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
1999 if (isFiniteNonZero()) {
2000 fs
= normalize(rounding_mode
, lostFraction
);
2001 *losesInfo
= (fs
!= opOK
);
2002 } else if (category
== fcNaN
) {
2003 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2005 // For x87 extended precision, we want to make a NaN, not a special NaN if
2006 // the input wasn't special either.
2007 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2008 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2010 // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
2011 // does not give you back the same bits. This is dubious, and we
2012 // don't currently do it. You're really supposed to get
2013 // an invalid operation signal at runtime, but nobody does that.
2023 /* Convert a floating point number to an integer according to the
2024 rounding mode. If the rounded integer value is out of range this
2025 returns an invalid operation exception and the contents of the
2026 destination parts are unspecified. If the rounded value is in
2027 range but the floating point number is not the exact integer, the C
2028 standard doesn't require an inexact exception to be raised. IEEE
2029 854 does require it so we do that.
2031 Note that for conversions to integer type the C standard requires
2032 round-to-zero to always be used. */
2033 IEEEFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2034 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2035 roundingMode rounding_mode
, bool *isExact
) const {
2036 lostFraction lost_fraction
;
2037 const integerPart
*src
;
2038 unsigned int dstPartsCount
, truncatedBits
;
2042 /* Handle the three special cases first. */
2043 if (category
== fcInfinity
|| category
== fcNaN
)
2046 dstPartsCount
= partCountForBits(width
);
2047 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2049 if (category
== fcZero
) {
2050 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2051 // Negative zero can't be represented as an int.
2056 src
= significandParts();
2058 /* Step 1: place our absolute value, with any fraction truncated, in
2061 /* Our absolute value is less than one; truncate everything. */
2062 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2063 /* For exponent -1 the integer bit represents .5, look at that.
2064 For smaller exponents leftmost truncated bit is 0. */
2065 truncatedBits
= semantics
->precision
-1U - exponent
;
2067 /* We want the most significant (exponent + 1) bits; the rest are
2069 unsigned int bits
= exponent
+ 1U;
2071 /* Hopelessly large in magnitude? */
2075 if (bits
< semantics
->precision
) {
2076 /* We truncate (semantics->precision - bits) bits. */
2077 truncatedBits
= semantics
->precision
- bits
;
2078 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2080 /* We want at least as many bits as are available. */
2081 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2083 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2084 bits
- semantics
->precision
);
2089 /* Step 2: work out any lost fraction, and increment the absolute
2090 value if we would round away from zero. */
2091 if (truncatedBits
) {
2092 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2094 if (lost_fraction
!= lfExactlyZero
&&
2095 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2096 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2097 return opInvalidOp
; /* Overflow. */
2100 lost_fraction
= lfExactlyZero
;
2103 /* Step 3: check if we fit in the destination. */
2104 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2108 /* Negative numbers cannot be represented as unsigned. */
2112 /* It takes omsb bits to represent the unsigned integer value.
2113 We lose a bit for the sign, but care is needed as the
2114 maximally negative integer is a special case. */
2115 if (omsb
== width
&&
2116 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2119 /* This case can happen because of rounding. */
2124 APInt::tcNegate (parts
.data(), dstPartsCount
);
2126 if (omsb
>= width
+ !isSigned
)
2130 if (lost_fraction
== lfExactlyZero
) {
2137 /* Same as convertToSignExtendedInteger, except we provide
2138 deterministic values in case of an invalid operation exception,
2139 namely zero for NaNs and the minimal or maximal value respectively
2140 for underflow or overflow.
2141 The *isExact output tells whether the result is exact, in the sense
2142 that converting it back to the original floating point type produces
2143 the original value. This is almost equivalent to result==opOK,
2144 except for negative zeroes.
2147 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2148 unsigned int width
, bool isSigned
,
2149 roundingMode rounding_mode
, bool *isExact
) const {
2152 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2155 if (fs
== opInvalidOp
) {
2156 unsigned int bits
, dstPartsCount
;
2158 dstPartsCount
= partCountForBits(width
);
2159 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2161 if (category
== fcNaN
)
2166 bits
= width
- isSigned
;
2168 APInt::tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2169 if (sign
&& isSigned
)
2170 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2176 /* Convert an unsigned integer SRC to a floating point number,
2177 rounding according to ROUNDING_MODE. The sign of the floating
2178 point number is not modified. */
2179 IEEEFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2180 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2181 unsigned int omsb
, precision
, dstCount
;
2183 lostFraction lost_fraction
;
2185 category
= fcNormal
;
2186 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2187 dst
= significandParts();
2188 dstCount
= partCount();
2189 precision
= semantics
->precision
;
2191 /* We want the most significant PRECISION bits of SRC. There may not
2192 be that many; extract what we can. */
2193 if (precision
<= omsb
) {
2194 exponent
= omsb
- 1;
2195 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2197 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2199 exponent
= precision
- 1;
2200 lost_fraction
= lfExactlyZero
;
2201 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2204 return normalize(rounding_mode
, lost_fraction
);
2207 IEEEFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2208 roundingMode rounding_mode
) {
2209 unsigned int partCount
= Val
.getNumWords();
2213 if (isSigned
&& api
.isNegative()) {
2218 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2221 /* Convert a two's complement integer SRC to a floating point number,
2222 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2223 integer is signed, in which case it must be sign-extended. */
2225 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2226 unsigned int srcCount
, bool isSigned
,
2227 roundingMode rounding_mode
) {
2231 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2234 /* If we're signed and negative negate a copy. */
2236 copy
= new integerPart
[srcCount
];
2237 APInt::tcAssign(copy
, src
, srcCount
);
2238 APInt::tcNegate(copy
, srcCount
);
2239 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2243 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2249 /* FIXME: should this just take a const APInt reference? */
2251 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2252 unsigned int width
, bool isSigned
,
2253 roundingMode rounding_mode
) {
2254 unsigned int partCount
= partCountForBits(width
);
2255 APInt api
= APInt(width
, makeArrayRef(parts
, partCount
));
2258 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2263 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2267 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2268 roundingMode rounding_mode
) {
2269 lostFraction lost_fraction
= lfExactlyZero
;
2271 category
= fcNormal
;
2275 integerPart
*significand
= significandParts();
2276 unsigned partsCount
= partCount();
2277 unsigned bitPos
= partsCount
* integerPartWidth
;
2278 bool computedTrailingFraction
= false;
2280 // Skip leading zeroes and any (hexa)decimal point.
2281 StringRef::iterator begin
= s
.begin();
2282 StringRef::iterator end
= s
.end();
2283 StringRef::iterator dot
;
2284 StringRef::iterator p
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2285 StringRef::iterator firstSignificantDigit
= p
;
2288 integerPart hex_value
;
2291 assert(dot
== end
&& "String contains multiple dots");
2296 hex_value
= hexDigitValue(*p
);
2297 if (hex_value
== -1U)
2302 // Store the number while we have space.
2305 hex_value
<<= bitPos
% integerPartWidth
;
2306 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2307 } else if (!computedTrailingFraction
) {
2308 lost_fraction
= trailingHexadecimalFraction(p
, end
, hex_value
);
2309 computedTrailingFraction
= true;
2313 /* Hex floats require an exponent but not a hexadecimal point. */
2314 assert(p
!= end
&& "Hex strings require an exponent");
2315 assert((*p
== 'p' || *p
== 'P') && "Invalid character in significand");
2316 assert(p
!= begin
&& "Significand has no digits");
2317 assert((dot
== end
|| p
- begin
!= 1) && "Significand has no digits");
2319 /* Ignore the exponent if we are zero. */
2320 if (p
!= firstSignificantDigit
) {
2323 /* Implicit hexadecimal point? */
2327 /* Calculate the exponent adjustment implicit in the number of
2328 significant digits. */
2329 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2330 if (expAdjustment
< 0)
2332 expAdjustment
= expAdjustment
* 4 - 1;
2334 /* Adjust for writing the significand starting at the most
2335 significant nibble. */
2336 expAdjustment
+= semantics
->precision
;
2337 expAdjustment
-= partsCount
* integerPartWidth
;
2339 /* Adjust for the given exponent. */
2340 exponent
= totalExponent(p
+ 1, end
, expAdjustment
);
2343 return normalize(rounding_mode
, lost_fraction
);
2347 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2348 unsigned sigPartCount
, int exp
,
2349 roundingMode rounding_mode
) {
2350 unsigned int parts
, pow5PartCount
;
2351 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
2352 integerPart pow5Parts
[maxPowerOfFiveParts
];
2355 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2356 rounding_mode
== rmNearestTiesToAway
);
2358 parts
= partCountForBits(semantics
->precision
+ 11);
2360 /* Calculate pow(5, abs(exp)). */
2361 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2363 for (;; parts
*= 2) {
2364 opStatus sigStatus
, powStatus
;
2365 unsigned int excessPrecision
, truncatedBits
;
2367 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2368 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2369 truncatedBits
= excessPrecision
;
2371 IEEEFloat
decSig(calcSemantics
, uninitialized
);
2372 decSig
.makeZero(sign
);
2373 IEEEFloat
pow5(calcSemantics
);
2375 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2376 rmNearestTiesToEven
);
2377 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2378 rmNearestTiesToEven
);
2379 /* Add exp, as 10^n = 5^n * 2^n. */
2380 decSig
.exponent
+= exp
;
2382 lostFraction calcLostFraction
;
2383 integerPart HUerr
, HUdistance
;
2384 unsigned int powHUerr
;
2387 /* multiplySignificand leaves the precision-th bit set to 1. */
2388 calcLostFraction
= decSig
.multiplySignificand(pow5
, nullptr);
2389 powHUerr
= powStatus
!= opOK
;
2391 calcLostFraction
= decSig
.divideSignificand(pow5
);
2392 /* Denormal numbers have less precision. */
2393 if (decSig
.exponent
< semantics
->minExponent
) {
2394 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2395 truncatedBits
= excessPrecision
;
2396 if (excessPrecision
> calcSemantics
.precision
)
2397 excessPrecision
= calcSemantics
.precision
;
2399 /* Extra half-ulp lost in reciprocal of exponent. */
2400 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2403 /* Both multiplySignificand and divideSignificand return the
2404 result with the integer bit set. */
2405 assert(APInt::tcExtractBit
2406 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2408 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2410 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2411 excessPrecision
, isNearest
);
2413 /* Are we guaranteed to round correctly if we truncate? */
2414 if (HUdistance
>= HUerr
) {
2415 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2416 calcSemantics
.precision
- excessPrecision
,
2418 /* Take the exponent of decSig. If we tcExtract-ed less bits
2419 above we must adjust our exponent to compensate for the
2420 implicit right shift. */
2421 exponent
= (decSig
.exponent
+ semantics
->precision
2422 - (calcSemantics
.precision
- excessPrecision
));
2423 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2426 return normalize(rounding_mode
, calcLostFraction
);
2432 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
2436 /* Scan the text. */
2437 StringRef::iterator p
= str
.begin();
2438 interpretDecimal(p
, str
.end(), &D
);
2440 /* Handle the quick cases. First the case of no significant digits,
2441 i.e. zero, and then exponents that are obviously too large or too
2442 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
2443 definitely overflows if
2445 (exp - 1) * L >= maxExponent
2447 and definitely underflows to zero where
2449 (exp + 1) * L <= minExponent - precision
2451 With integer arithmetic the tightest bounds for L are
2453 93/28 < L < 196/59 [ numerator <= 256 ]
2454 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
2457 // Test if we have a zero number allowing for strings with no null terminators
2458 // and zero decimals with non-zero exponents.
2460 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
2461 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
2462 // be at most one dot. On the other hand, if we have a zero with a non-zero
2463 // exponent, then we know that D.firstSigDigit will be non-numeric.
2464 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
2468 /* Check whether the normalized exponent is high enough to overflow
2469 max during the log-rebasing in the max-exponent check below. */
2470 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
2471 fs
= handleOverflow(rounding_mode
);
2473 /* If it wasn't, then it also wasn't high enough to overflow max
2474 during the log-rebasing in the min-exponent check. Check that it
2475 won't overflow min in either check, then perform the min-exponent
2477 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
2478 (D
.normalizedExponent
+ 1) * 28738 <=
2479 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
2480 /* Underflow to zero and round. */
2481 category
= fcNormal
;
2483 fs
= normalize(rounding_mode
, lfLessThanHalf
);
2485 /* We can finally safely perform the max-exponent check. */
2486 } else if ((D
.normalizedExponent
- 1) * 42039
2487 >= 12655 * semantics
->maxExponent
) {
2488 /* Overflow and round. */
2489 fs
= handleOverflow(rounding_mode
);
2491 integerPart
*decSignificand
;
2492 unsigned int partCount
;
2494 /* A tight upper bound on number of bits required to hold an
2495 N-digit decimal integer is N * 196 / 59. Allocate enough space
2496 to hold the full significand, and an extra part required by
2498 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
2499 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
2500 decSignificand
= new integerPart
[partCount
+ 1];
2503 /* Convert to binary efficiently - we do almost all multiplication
2504 in an integerPart. When this would overflow do we do a single
2505 bignum multiplication, and then revert again to multiplication
2506 in an integerPart. */
2508 integerPart decValue
, val
, multiplier
;
2516 if (p
== str
.end()) {
2520 decValue
= decDigitValue(*p
++);
2521 assert(decValue
< 10U && "Invalid character in significand");
2523 val
= val
* 10 + decValue
;
2524 /* The maximum number that can be multiplied by ten with any
2525 digit added without overflowing an integerPart. */
2526 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
2528 /* Multiply out the current part. */
2529 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
2530 partCount
, partCount
+ 1, false);
2532 /* If we used another part (likely but not guaranteed), increase
2534 if (decSignificand
[partCount
])
2536 } while (p
<= D
.lastSigDigit
);
2538 category
= fcNormal
;
2539 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
2540 D
.exponent
, rounding_mode
);
2542 delete [] decSignificand
;
2548 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
2549 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("+Inf")) {
2554 if (str
.equals("-inf") || str
.equals("-INFINITY") || str
.equals("-Inf")) {
2559 if (str
.equals("nan") || str
.equals("NaN")) {
2560 makeNaN(false, false);
2564 if (str
.equals("-nan") || str
.equals("-NaN")) {
2565 makeNaN(false, true);
2572 IEEEFloat::opStatus
IEEEFloat::convertFromString(StringRef str
,
2573 roundingMode rounding_mode
) {
2574 assert(!str
.empty() && "Invalid string length");
2576 // Handle special cases.
2577 if (convertFromStringSpecials(str
))
2580 /* Handle a leading minus sign. */
2581 StringRef::iterator p
= str
.begin();
2582 size_t slen
= str
.size();
2583 sign
= *p
== '-' ? 1 : 0;
2584 if (*p
== '-' || *p
== '+') {
2587 assert(slen
&& "String has no digits");
2590 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
2591 assert(slen
- 2 && "Invalid string");
2592 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
2596 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
2599 /* Write out a hexadecimal representation of the floating point value
2600 to DST, which must be of sufficient size, in the C99 form
2601 [-]0xh.hhhhp[+-]d. Return the number of characters written,
2602 excluding the terminating NUL.
2604 If UPPERCASE, the output is in upper case, otherwise in lower case.
2606 HEXDIGITS digits appear altogether, rounding the value if
2607 necessary. If HEXDIGITS is 0, the minimal precision to display the
2608 number precisely is used instead. If nothing would appear after
2609 the decimal point it is suppressed.
2611 The decimal exponent is always printed and has at least one digit.
2612 Zero values display an exponent of zero. Infinities and NaNs
2613 appear as "infinity" or "nan" respectively.
2615 The above rules are as specified by C99. There is ambiguity about
2616 what the leading hexadecimal digit should be. This implementation
2617 uses whatever is necessary so that the exponent is displayed as
2618 stored. This implies the exponent will fall within the IEEE format
2619 range, and the leading hexadecimal digit will be 0 (for denormals),
2620 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
2621 any other digits zero).
2623 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
2625 roundingMode rounding_mode
) const {
2634 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
2635 dst
+= sizeof infinityL
- 1;
2639 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
2640 dst
+= sizeof NaNU
- 1;
2645 *dst
++ = upperCase
? 'X': 'x';
2647 if (hexDigits
> 1) {
2649 memset (dst
, '0', hexDigits
- 1);
2650 dst
+= hexDigits
- 1;
2652 *dst
++ = upperCase
? 'P': 'p';
2657 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
2663 return static_cast<unsigned int>(dst
- p
);
2666 /* Does the hard work of outputting the correctly rounded hexadecimal
2667 form of a normal floating point number with the specified number of
2668 hexadecimal digits. If HEXDIGITS is zero the minimum number of
2669 digits necessary to print the value precisely is output. */
2670 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
2672 roundingMode rounding_mode
) const {
2673 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
2674 const char *hexDigitChars
;
2675 const integerPart
*significand
;
2680 *dst
++ = upperCase
? 'X': 'x';
2683 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
2685 significand
= significandParts();
2686 partsCount
= partCount();
2688 /* +3 because the first digit only uses the single integer bit, so
2689 we have 3 virtual zero most-significant-bits. */
2690 valueBits
= semantics
->precision
+ 3;
2691 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
2693 /* The natural number of digits required ignoring trailing
2694 insignificant zeroes. */
2695 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
2697 /* hexDigits of zero means use the required number for the
2698 precision. Otherwise, see if we are truncating. If we are,
2699 find out if we need to round away from zero. */
2701 if (hexDigits
< outputDigits
) {
2702 /* We are dropping non-zero bits, so need to check how to round.
2703 "bits" is the number of dropped bits. */
2705 lostFraction fraction
;
2707 bits
= valueBits
- hexDigits
* 4;
2708 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
2709 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
2711 outputDigits
= hexDigits
;
2714 /* Write the digits consecutively, and start writing in the location
2715 of the hexadecimal point. We move the most significant digit
2716 left and add the hexadecimal point later. */
2719 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
2721 while (outputDigits
&& count
) {
2724 /* Put the most significant integerPartWidth bits in "part". */
2725 if (--count
== partsCount
)
2726 part
= 0; /* An imaginary higher zero part. */
2728 part
= significand
[count
] << shift
;
2731 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
2733 /* Convert as much of "part" to hexdigits as we can. */
2734 unsigned int curDigits
= integerPartWidth
/ 4;
2736 if (curDigits
> outputDigits
)
2737 curDigits
= outputDigits
;
2738 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
2739 outputDigits
-= curDigits
;
2745 /* Note that hexDigitChars has a trailing '0'. */
2748 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
2749 } while (*q
== '0');
2752 /* Add trailing zeroes. */
2753 memset (dst
, '0', outputDigits
);
2754 dst
+= outputDigits
;
2757 /* Move the most significant digit to before the point, and if there
2758 is something after the decimal point add it. This must come
2759 after rounding above. */
2766 /* Finally output the exponent. */
2767 *dst
++ = upperCase
? 'P': 'p';
2769 return writeSignedDecimal (dst
, exponent
);
2772 hash_code
hash_value(const IEEEFloat
&Arg
) {
2773 if (!Arg
.isFiniteNonZero())
2774 return hash_combine((uint8_t)Arg
.category
,
2775 // NaN has no sign, fix it at zero.
2776 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
2777 Arg
.semantics
->precision
);
2779 // Normal floats need their exponent and significand hashed.
2780 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
2781 Arg
.semantics
->precision
, Arg
.exponent
,
2783 Arg
.significandParts(),
2784 Arg
.significandParts() + Arg
.partCount()));
2787 // Conversion from APFloat to/from host float/double. It may eventually be
2788 // possible to eliminate these and have everybody deal with APFloats, but that
2789 // will take a while. This approach will not easily extend to long double.
2790 // Current implementation requires integerPartWidth==64, which is correct at
2791 // the moment but could be made more general.
2793 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
2794 // the actual IEEE respresentations. We compensate for that here.
2796 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
2797 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
2798 assert(partCount()==2);
2800 uint64_t myexponent
, mysignificand
;
2802 if (isFiniteNonZero()) {
2803 myexponent
= exponent
+16383; //bias
2804 mysignificand
= significandParts()[0];
2805 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
2806 myexponent
= 0; // denormal
2807 } else if (category
==fcZero
) {
2810 } else if (category
==fcInfinity
) {
2811 myexponent
= 0x7fff;
2812 mysignificand
= 0x8000000000000000ULL
;
2814 assert(category
== fcNaN
&& "Unknown category");
2815 myexponent
= 0x7fff;
2816 mysignificand
= significandParts()[0];
2820 words
[0] = mysignificand
;
2821 words
[1] = ((uint64_t)(sign
& 1) << 15) |
2822 (myexponent
& 0x7fffLL
);
2823 return APInt(80, words
);
2826 APInt
IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
2827 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
2828 assert(partCount()==2);
2834 // Convert number to double. To avoid spurious underflows, we re-
2835 // normalize against the "double" minExponent first, and only *then*
2836 // truncate the mantissa. The result of that second conversion
2837 // may be inexact, but should never underflow.
2838 // Declare fltSemantics before APFloat that uses it (and
2839 // saves pointer to it) to ensure correct destruction order.
2840 fltSemantics extendedSemantics
= *semantics
;
2841 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
2842 IEEEFloat
extended(*this);
2843 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2844 assert(fs
== opOK
&& !losesInfo
);
2847 IEEEFloat
u(extended
);
2848 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2849 assert(fs
== opOK
|| fs
== opInexact
);
2851 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
2853 // If conversion was exact or resulted in a special case, we're done;
2854 // just set the second double to zero. Otherwise, re-convert back to
2855 // the extended format and compute the difference. This now should
2856 // convert exactly to double.
2857 if (u
.isFiniteNonZero() && losesInfo
) {
2858 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2859 assert(fs
== opOK
&& !losesInfo
);
2862 IEEEFloat
v(extended
);
2863 v
.subtract(u
, rmNearestTiesToEven
);
2864 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
2865 assert(fs
== opOK
&& !losesInfo
);
2867 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
2872 return APInt(128, words
);
2875 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
2876 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEquad
);
2877 assert(partCount()==2);
2879 uint64_t myexponent
, mysignificand
, mysignificand2
;
2881 if (isFiniteNonZero()) {
2882 myexponent
= exponent
+16383; //bias
2883 mysignificand
= significandParts()[0];
2884 mysignificand2
= significandParts()[1];
2885 if (myexponent
==1 && !(mysignificand2
& 0x1000000000000LL
))
2886 myexponent
= 0; // denormal
2887 } else if (category
==fcZero
) {
2889 mysignificand
= mysignificand2
= 0;
2890 } else if (category
==fcInfinity
) {
2891 myexponent
= 0x7fff;
2892 mysignificand
= mysignificand2
= 0;
2894 assert(category
== fcNaN
&& "Unknown category!");
2895 myexponent
= 0x7fff;
2896 mysignificand
= significandParts()[0];
2897 mysignificand2
= significandParts()[1];
2901 words
[0] = mysignificand
;
2902 words
[1] = ((uint64_t)(sign
& 1) << 63) |
2903 ((myexponent
& 0x7fff) << 48) |
2904 (mysignificand2
& 0xffffffffffffLL
);
2906 return APInt(128, words
);
2909 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
2910 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
);
2911 assert(partCount()==1);
2913 uint64_t myexponent
, mysignificand
;
2915 if (isFiniteNonZero()) {
2916 myexponent
= exponent
+1023; //bias
2917 mysignificand
= *significandParts();
2918 if (myexponent
==1 && !(mysignificand
& 0x10000000000000LL
))
2919 myexponent
= 0; // denormal
2920 } else if (category
==fcZero
) {
2923 } else if (category
==fcInfinity
) {
2927 assert(category
== fcNaN
&& "Unknown category!");
2929 mysignificand
= *significandParts();
2932 return APInt(64, ((((uint64_t)(sign
& 1) << 63) |
2933 ((myexponent
& 0x7ff) << 52) |
2934 (mysignificand
& 0xfffffffffffffLL
))));
2937 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
2938 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
);
2939 assert(partCount()==1);
2941 uint32_t myexponent
, mysignificand
;
2943 if (isFiniteNonZero()) {
2944 myexponent
= exponent
+127; //bias
2945 mysignificand
= (uint32_t)*significandParts();
2946 if (myexponent
== 1 && !(mysignificand
& 0x800000))
2947 myexponent
= 0; // denormal
2948 } else if (category
==fcZero
) {
2951 } else if (category
==fcInfinity
) {
2955 assert(category
== fcNaN
&& "Unknown category!");
2957 mysignificand
= (uint32_t)*significandParts();
2960 return APInt(32, (((sign
&1) << 31) | ((myexponent
&0xff) << 23) |
2961 (mysignificand
& 0x7fffff)));
2964 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
2965 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
);
2966 assert(partCount()==1);
2968 uint32_t myexponent
, mysignificand
;
2970 if (isFiniteNonZero()) {
2971 myexponent
= exponent
+15; //bias
2972 mysignificand
= (uint32_t)*significandParts();
2973 if (myexponent
== 1 && !(mysignificand
& 0x400))
2974 myexponent
= 0; // denormal
2975 } else if (category
==fcZero
) {
2978 } else if (category
==fcInfinity
) {
2982 assert(category
== fcNaN
&& "Unknown category!");
2984 mysignificand
= (uint32_t)*significandParts();
2987 return APInt(16, (((sign
&1) << 15) | ((myexponent
&0x1f) << 10) |
2988 (mysignificand
& 0x3ff)));
2991 // This function creates an APInt that is just a bit map of the floating
2992 // point constant as it would appear in memory. It is not a conversion,
2993 // and treating the result as a normal integer is unlikely to be useful.
2995 APInt
IEEEFloat::bitcastToAPInt() const {
2996 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
2997 return convertHalfAPFloatToAPInt();
2999 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3000 return convertFloatAPFloatToAPInt();
3002 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3003 return convertDoubleAPFloatToAPInt();
3005 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3006 return convertQuadrupleAPFloatToAPInt();
3008 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3009 return convertPPCDoubleDoubleAPFloatToAPInt();
3011 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3013 return convertF80LongDoubleAPFloatToAPInt();
3016 float IEEEFloat::convertToFloat() const {
3017 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3018 "Float semantics are not IEEEsingle");
3019 APInt api
= bitcastToAPInt();
3020 return api
.bitsToFloat();
3023 double IEEEFloat::convertToDouble() const {
3024 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3025 "Float semantics are not IEEEdouble");
3026 APInt api
= bitcastToAPInt();
3027 return api
.bitsToDouble();
3030 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3031 /// does not support these bit patterns:
3032 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3033 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3034 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3035 /// exponent = 0, integer bit 1 ("pseudodenormal")
3036 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3037 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3038 assert(api
.getBitWidth()==80);
3039 uint64_t i1
= api
.getRawData()[0];
3040 uint64_t i2
= api
.getRawData()[1];
3041 uint64_t myexponent
= (i2
& 0x7fff);
3042 uint64_t mysignificand
= i1
;
3043 uint8_t myintegerbit
= mysignificand
>> 63;
3045 initialize(&semX87DoubleExtended
);
3046 assert(partCount()==2);
3048 sign
= static_cast<unsigned int>(i2
>>15);
3049 if (myexponent
== 0 && mysignificand
== 0) {
3050 // exponent, significand meaningless
3052 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3053 // exponent, significand meaningless
3054 category
= fcInfinity
;
3055 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3056 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3057 // exponent meaningless
3059 significandParts()[0] = mysignificand
;
3060 significandParts()[1] = 0;
3062 category
= fcNormal
;
3063 exponent
= myexponent
- 16383;
3064 significandParts()[0] = mysignificand
;
3065 significandParts()[1] = 0;
3066 if (myexponent
==0) // denormal
3071 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
) {
3072 assert(api
.getBitWidth()==128);
3073 uint64_t i1
= api
.getRawData()[0];
3074 uint64_t i2
= api
.getRawData()[1];
3078 // Get the first double and convert to our format.
3079 initFromDoubleAPInt(APInt(64, i1
));
3080 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3081 assert(fs
== opOK
&& !losesInfo
);
3084 // Unless we have a special case, add in second double.
3085 if (isFiniteNonZero()) {
3086 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3087 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3088 assert(fs
== opOK
&& !losesInfo
);
3091 add(v
, rmNearestTiesToEven
);
3095 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
3096 assert(api
.getBitWidth()==128);
3097 uint64_t i1
= api
.getRawData()[0];
3098 uint64_t i2
= api
.getRawData()[1];
3099 uint64_t myexponent
= (i2
>> 48) & 0x7fff;
3100 uint64_t mysignificand
= i1
;
3101 uint64_t mysignificand2
= i2
& 0xffffffffffffLL
;
3103 initialize(&semIEEEquad
);
3104 assert(partCount()==2);
3106 sign
= static_cast<unsigned int>(i2
>>63);
3107 if (myexponent
==0 &&
3108 (mysignificand
==0 && mysignificand2
==0)) {
3109 // exponent, significand meaningless
3111 } else if (myexponent
==0x7fff &&
3112 (mysignificand
==0 && mysignificand2
==0)) {
3113 // exponent, significand meaningless
3114 category
= fcInfinity
;
3115 } else if (myexponent
==0x7fff &&
3116 (mysignificand
!=0 || mysignificand2
!=0)) {
3117 // exponent meaningless
3119 significandParts()[0] = mysignificand
;
3120 significandParts()[1] = mysignificand2
;
3122 category
= fcNormal
;
3123 exponent
= myexponent
- 16383;
3124 significandParts()[0] = mysignificand
;
3125 significandParts()[1] = mysignificand2
;
3126 if (myexponent
==0) // denormal
3129 significandParts()[1] |= 0x1000000000000LL
; // integer bit
3133 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
3134 assert(api
.getBitWidth()==64);
3135 uint64_t i
= *api
.getRawData();
3136 uint64_t myexponent
= (i
>> 52) & 0x7ff;
3137 uint64_t mysignificand
= i
& 0xfffffffffffffLL
;
3139 initialize(&semIEEEdouble
);
3140 assert(partCount()==1);
3142 sign
= static_cast<unsigned int>(i
>>63);
3143 if (myexponent
==0 && mysignificand
==0) {
3144 // exponent, significand meaningless
3146 } else if (myexponent
==0x7ff && mysignificand
==0) {
3147 // exponent, significand meaningless
3148 category
= fcInfinity
;
3149 } else if (myexponent
==0x7ff && mysignificand
!=0) {
3150 // exponent meaningless
3152 *significandParts() = mysignificand
;
3154 category
= fcNormal
;
3155 exponent
= myexponent
- 1023;
3156 *significandParts() = mysignificand
;
3157 if (myexponent
==0) // denormal
3160 *significandParts() |= 0x10000000000000LL
; // integer bit
3164 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
3165 assert(api
.getBitWidth()==32);
3166 uint32_t i
= (uint32_t)*api
.getRawData();
3167 uint32_t myexponent
= (i
>> 23) & 0xff;
3168 uint32_t mysignificand
= i
& 0x7fffff;
3170 initialize(&semIEEEsingle
);
3171 assert(partCount()==1);
3174 if (myexponent
==0 && mysignificand
==0) {
3175 // exponent, significand meaningless
3177 } else if (myexponent
==0xff && mysignificand
==0) {
3178 // exponent, significand meaningless
3179 category
= fcInfinity
;
3180 } else if (myexponent
==0xff && mysignificand
!=0) {
3181 // sign, exponent, significand meaningless
3183 *significandParts() = mysignificand
;
3185 category
= fcNormal
;
3186 exponent
= myexponent
- 127; //bias
3187 *significandParts() = mysignificand
;
3188 if (myexponent
==0) // denormal
3191 *significandParts() |= 0x800000; // integer bit
3195 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
3196 assert(api
.getBitWidth()==16);
3197 uint32_t i
= (uint32_t)*api
.getRawData();
3198 uint32_t myexponent
= (i
>> 10) & 0x1f;
3199 uint32_t mysignificand
= i
& 0x3ff;
3201 initialize(&semIEEEhalf
);
3202 assert(partCount()==1);
3205 if (myexponent
==0 && mysignificand
==0) {
3206 // exponent, significand meaningless
3208 } else if (myexponent
==0x1f && mysignificand
==0) {
3209 // exponent, significand meaningless
3210 category
= fcInfinity
;
3211 } else if (myexponent
==0x1f && mysignificand
!=0) {
3212 // sign, exponent, significand meaningless
3214 *significandParts() = mysignificand
;
3216 category
= fcNormal
;
3217 exponent
= myexponent
- 15; //bias
3218 *significandParts() = mysignificand
;
3219 if (myexponent
==0) // denormal
3222 *significandParts() |= 0x400; // integer bit
3226 /// Treat api as containing the bits of a floating point number. Currently
3227 /// we infer the floating point type from the size of the APInt. The
3228 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
3229 /// when the size is anything else).
3230 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
3231 if (Sem
== &semIEEEhalf
)
3232 return initFromHalfAPInt(api
);
3233 if (Sem
== &semIEEEsingle
)
3234 return initFromFloatAPInt(api
);
3235 if (Sem
== &semIEEEdouble
)
3236 return initFromDoubleAPInt(api
);
3237 if (Sem
== &semX87DoubleExtended
)
3238 return initFromF80LongDoubleAPInt(api
);
3239 if (Sem
== &semIEEEquad
)
3240 return initFromQuadrupleAPInt(api
);
3241 if (Sem
== &semPPCDoubleDoubleLegacy
)
3242 return initFromPPCDoubleDoubleAPInt(api
);
3244 llvm_unreachable(nullptr);
3247 /// Make this number the largest magnitude normal number in the given
3249 void IEEEFloat::makeLargest(bool Negative
) {
3250 // We want (in interchange format):
3251 // sign = {Negative}
3253 // significand = 1..1
3254 category
= fcNormal
;
3256 exponent
= semantics
->maxExponent
;
3258 // Use memset to set all but the highest integerPart to all ones.
3259 integerPart
*significand
= significandParts();
3260 unsigned PartCount
= partCount();
3261 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
3263 // Set the high integerPart especially setting all unused top bits for
3264 // internal consistency.
3265 const unsigned NumUnusedHighBits
=
3266 PartCount
*integerPartWidth
- semantics
->precision
;
3267 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
3268 ? (~integerPart(0) >> NumUnusedHighBits
)
3272 /// Make this number the smallest magnitude denormal number in the given
3274 void IEEEFloat::makeSmallest(bool Negative
) {
3275 // We want (in interchange format):
3276 // sign = {Negative}
3278 // significand = 0..01
3279 category
= fcNormal
;
3281 exponent
= semantics
->minExponent
;
3282 APInt::tcSet(significandParts(), 1, partCount());
3285 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
3286 // We want (in interchange format):
3287 // sign = {Negative}
3289 // significand = 10..0
3291 category
= fcNormal
;
3294 exponent
= semantics
->minExponent
;
3295 significandParts()[partCountForBits(semantics
->precision
) - 1] |=
3296 (((integerPart
)1) << ((semantics
->precision
- 1) % integerPartWidth
));
3299 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
3300 initFromAPInt(&Sem
, API
);
3303 IEEEFloat::IEEEFloat(float f
) {
3304 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
3307 IEEEFloat::IEEEFloat(double d
) {
3308 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
3312 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
3313 Buffer
.append(Str
.begin(), Str
.end());
3316 /// Removes data from the given significand until it is no more
3317 /// precise than is required for the desired precision.
3318 void AdjustToPrecision(APInt
&significand
,
3319 int &exp
, unsigned FormatPrecision
) {
3320 unsigned bits
= significand
.getActiveBits();
3322 // 196/59 is a very slight overestimate of lg_2(10).
3323 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3325 if (bits
<= bitsRequired
) return;
3327 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3328 if (!tensRemovable
) return;
3330 exp
+= tensRemovable
;
3332 APInt
divisor(significand
.getBitWidth(), 1);
3333 APInt
powten(significand
.getBitWidth(), 10);
3335 if (tensRemovable
& 1)
3337 tensRemovable
>>= 1;
3338 if (!tensRemovable
) break;
3342 significand
= significand
.udiv(divisor
);
3344 // Truncate the significand down to its active bit count.
3345 significand
= significand
.trunc(significand
.getActiveBits());
3349 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
3350 int &exp
, unsigned FormatPrecision
) {
3351 unsigned N
= buffer
.size();
3352 if (N
<= FormatPrecision
) return;
3354 // The most significant figures are the last ones in the buffer.
3355 unsigned FirstSignificant
= N
- FormatPrecision
;
3358 // FIXME: this probably shouldn't use 'round half up'.
3360 // Rounding down is just a truncation, except we also want to drop
3361 // trailing zeros from the new result.
3362 if (buffer
[FirstSignificant
- 1] < '5') {
3363 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
3366 exp
+= FirstSignificant
;
3367 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3371 // Rounding up requires a decimal add-with-carry. If we continue
3372 // the carry, the newly-introduced zeros will just be truncated.
3373 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
3374 if (buffer
[I
] == '9') {
3382 // If we carried through, we have exactly one digit of precision.
3383 if (FirstSignificant
== N
) {
3384 exp
+= FirstSignificant
;
3386 buffer
.push_back('1');
3390 exp
+= FirstSignificant
;
3391 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
3395 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
3396 unsigned FormatMaxPadding
, bool TruncateZero
) const {
3400 return append(Str
, "-Inf");
3402 return append(Str
, "+Inf");
3404 case fcNaN
: return append(Str
, "NaN");
3410 if (!FormatMaxPadding
) {
3412 append(Str
, "0.0E+0");
3415 if (FormatPrecision
> 1)
3416 Str
.append(FormatPrecision
- 1, '0');
3417 append(Str
, "e+00");
3430 // Decompose the number into an APInt and an exponent.
3431 int exp
= exponent
- ((int) semantics
->precision
- 1);
3432 APInt
significand(semantics
->precision
,
3433 makeArrayRef(significandParts(),
3434 partCountForBits(semantics
->precision
)));
3436 // Set FormatPrecision if zero. We want to do this before we
3437 // truncate trailing zeros, as those are part of the precision.
3438 if (!FormatPrecision
) {
3439 // We use enough digits so the number can be round-tripped back to an
3440 // APFloat. The formula comes from "How to Print Floating-Point Numbers
3441 // Accurately" by Steele and White.
3442 // FIXME: Using a formula based purely on the precision is conservative;
3443 // we can print fewer digits depending on the actual value being printed.
3445 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
3446 FormatPrecision
= 2 + semantics
->precision
* 59 / 196;
3449 // Ignore trailing binary zeros.
3450 int trailingZeros
= significand
.countTrailingZeros();
3451 exp
+= trailingZeros
;
3452 significand
.lshrInPlace(trailingZeros
);
3454 // Change the exponent from 2^e to 10^e.
3457 } else if (exp
> 0) {
3459 significand
= significand
.zext(semantics
->precision
+ exp
);
3460 significand
<<= exp
;
3462 } else { /* exp < 0 */
3465 // We transform this using the identity:
3466 // (N)(2^-e) == (N)(5^e)(10^-e)
3467 // This means we have to multiply N (the significand) by 5^e.
3468 // To avoid overflow, we have to operate on numbers large
3469 // enough to store N * 5^e:
3470 // log2(N * 5^e) == log2(N) + e * log2(5)
3471 // <= semantics->precision + e * 137 / 59
3472 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
3474 unsigned precision
= semantics
->precision
+ (137 * texp
+ 136) / 59;
3476 // Multiply significand by 5^e.
3477 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
3478 significand
= significand
.zext(precision
);
3479 APInt
five_to_the_i(precision
, 5);
3481 if (texp
& 1) significand
*= five_to_the_i
;
3485 five_to_the_i
*= five_to_the_i
;
3489 AdjustToPrecision(significand
, exp
, FormatPrecision
);
3491 SmallVector
<char, 256> buffer
;
3494 unsigned precision
= significand
.getBitWidth();
3495 APInt
ten(precision
, 10);
3496 APInt
digit(precision
, 0);
3498 bool inTrail
= true;
3499 while (significand
!= 0) {
3500 // digit <- significand % 10
3501 // significand <- significand / 10
3502 APInt::udivrem(significand
, ten
, significand
, digit
);
3504 unsigned d
= digit
.getZExtValue();
3506 // Drop trailing zeros.
3507 if (inTrail
&& !d
) exp
++;
3509 buffer
.push_back((char) ('0' + d
));
3514 assert(!buffer
.empty() && "no characters in buffer!");
3516 // Drop down to FormatPrecision.
3517 // TODO: don't do more precise calculations above than are required.
3518 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
3520 unsigned NDigits
= buffer
.size();
3522 // Check whether we should use scientific notation.
3523 bool FormatScientific
;
3524 if (!FormatMaxPadding
)
3525 FormatScientific
= true;
3530 // But we shouldn't make the number look more precise than it is.
3531 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
3532 NDigits
+ (unsigned) exp
> FormatPrecision
);
3534 // Power of the most significant digit.
3535 int MSD
= exp
+ (int) (NDigits
- 1);
3538 FormatScientific
= false;
3540 // 765e-5 == 0.00765
3542 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
3547 // Scientific formatting is pretty straightforward.
3548 if (FormatScientific
) {
3549 exp
+= (NDigits
- 1);
3551 Str
.push_back(buffer
[NDigits
-1]);
3553 if (NDigits
== 1 && TruncateZero
)
3556 for (unsigned I
= 1; I
!= NDigits
; ++I
)
3557 Str
.push_back(buffer
[NDigits
-1-I
]);
3558 // Fill with zeros up to FormatPrecision.
3559 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
3560 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
3561 // For !TruncateZero we use lower 'e'.
3562 Str
.push_back(TruncateZero
? 'E' : 'e');
3564 Str
.push_back(exp
>= 0 ? '+' : '-');
3565 if (exp
< 0) exp
= -exp
;
3566 SmallVector
<char, 6> expbuf
;
3568 expbuf
.push_back((char) ('0' + (exp
% 10)));
3571 // Exponent always at least two digits if we do not truncate zeros.
3572 if (!TruncateZero
&& expbuf
.size() < 2)
3573 expbuf
.push_back('0');
3574 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
3575 Str
.push_back(expbuf
[E
-1-I
]);
3579 // Non-scientific, positive exponents.
3581 for (unsigned I
= 0; I
!= NDigits
; ++I
)
3582 Str
.push_back(buffer
[NDigits
-1-I
]);
3583 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
3588 // Non-scientific, negative exponents.
3590 // The number of digits to the left of the decimal point.
3591 int NWholeDigits
= exp
+ (int) NDigits
;
3594 if (NWholeDigits
> 0) {
3595 for (; I
!= (unsigned) NWholeDigits
; ++I
)
3596 Str
.push_back(buffer
[NDigits
-I
-1]);
3599 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
3603 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
3607 for (; I
!= NDigits
; ++I
)
3608 Str
.push_back(buffer
[NDigits
-I
-1]);
3611 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
3612 // Special floats and denormals have no exact inverse.
3613 if (!isFiniteNonZero())
3616 // Check that the number is a power of two by making sure that only the
3617 // integer bit is set in the significand.
3618 if (significandLSB() != semantics
->precision
- 1)
3622 IEEEFloat
reciprocal(*semantics
, 1ULL);
3623 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
3626 // Avoid multiplication with a denormal, it is not safe on all platforms and
3627 // may be slower than a normal division.
3628 if (reciprocal
.isDenormal())
3631 assert(reciprocal
.isFiniteNonZero() &&
3632 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
3635 *inv
= APFloat(reciprocal
, *semantics
);
3640 bool IEEEFloat::isSignaling() const {
3644 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
3645 // first bit of the trailing significand being 0.
3646 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
3649 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
3651 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
3652 /// appropriate sign switching before/after the computation.
3653 IEEEFloat::opStatus
IEEEFloat::next(bool nextDown
) {
3654 // If we are performing nextDown, swap sign so we have -x.
3658 // Compute nextUp(x)
3659 opStatus result
= opOK
;
3661 // Handle each float category separately.
3664 // nextUp(+inf) = +inf
3667 // nextUp(-inf) = -getLargest()
3671 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
3672 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
3673 // change the payload.
3674 if (isSignaling()) {
3675 result
= opInvalidOp
;
3676 // For consistency, propagate the sign of the sNaN to the qNaN.
3677 makeNaN(false, isNegative(), nullptr);
3681 // nextUp(pm 0) = +getSmallest()
3682 makeSmallest(false);
3685 // nextUp(-getSmallest()) = -0
3686 if (isSmallest() && isNegative()) {
3687 APInt::tcSet(significandParts(), 0, partCount());
3693 // nextUp(getLargest()) == INFINITY
3694 if (isLargest() && !isNegative()) {
3695 APInt::tcSet(significandParts(), 0, partCount());
3696 category
= fcInfinity
;
3697 exponent
= semantics
->maxExponent
+ 1;
3701 // nextUp(normal) == normal + inc.
3703 // If we are negative, we need to decrement the significand.
3705 // We only cross a binade boundary that requires adjusting the exponent
3707 // 1. exponent != semantics->minExponent. This implies we are not in the
3708 // smallest binade or are dealing with denormals.
3709 // 2. Our significand excluding the integral bit is all zeros.
3710 bool WillCrossBinadeBoundary
=
3711 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
3713 // Decrement the significand.
3715 // We always do this since:
3716 // 1. If we are dealing with a non-binade decrement, by definition we
3717 // just decrement the significand.
3718 // 2. If we are dealing with a normal -> normal binade decrement, since
3719 // we have an explicit integral bit the fact that all bits but the
3720 // integral bit are zero implies that subtracting one will yield a
3721 // significand with 0 integral bit and 1 in all other spots. Thus we
3722 // must just adjust the exponent and set the integral bit to 1.
3723 // 3. If we are dealing with a normal -> denormal binade decrement,
3724 // since we set the integral bit to 0 when we represent denormals, we
3725 // just decrement the significand.
3726 integerPart
*Parts
= significandParts();
3727 APInt::tcDecrement(Parts
, partCount());
3729 if (WillCrossBinadeBoundary
) {
3730 // Our result is a normal number. Do the following:
3731 // 1. Set the integral bit to 1.
3732 // 2. Decrement the exponent.
3733 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3737 // If we are positive, we need to increment the significand.
3739 // We only cross a binade boundary that requires adjusting the exponent if
3740 // the input is not a denormal and all of said input's significand bits
3741 // are set. If all of said conditions are true: clear the significand, set
3742 // the integral bit to 1, and increment the exponent. If we have a
3743 // denormal always increment since moving denormals and the numbers in the
3744 // smallest normal binade have the same exponent in our representation.
3745 bool WillCrossBinadeBoundary
= !isDenormal() && isSignificandAllOnes();
3747 if (WillCrossBinadeBoundary
) {
3748 integerPart
*Parts
= significandParts();
3749 APInt::tcSet(Parts
, 0, partCount());
3750 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
3751 assert(exponent
!= semantics
->maxExponent
&&
3752 "We can not increment an exponent beyond the maxExponent allowed"
3753 " by the given floating point semantics.");
3756 incrementSignificand();
3762 // If we are performing nextDown, swap sign so we have -nextUp(-x)
3769 void IEEEFloat::makeInf(bool Negative
) {
3770 category
= fcInfinity
;
3772 exponent
= semantics
->maxExponent
+ 1;
3773 APInt::tcSet(significandParts(), 0, partCount());
3776 void IEEEFloat::makeZero(bool Negative
) {
3779 exponent
= semantics
->minExponent
-1;
3780 APInt::tcSet(significandParts(), 0, partCount());
3783 void IEEEFloat::makeQuiet() {
3785 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
3788 int ilogb(const IEEEFloat
&Arg
) {
3790 return IEEEFloat::IEK_NaN
;
3792 return IEEEFloat::IEK_Zero
;
3793 if (Arg
.isInfinity())
3794 return IEEEFloat::IEK_Inf
;
3795 if (!Arg
.isDenormal())
3796 return Arg
.exponent
;
3798 IEEEFloat
Normalized(Arg
);
3799 int SignificandBits
= Arg
.getSemantics().precision
- 1;
3801 Normalized
.exponent
+= SignificandBits
;
3802 Normalized
.normalize(IEEEFloat::rmNearestTiesToEven
, lfExactlyZero
);
3803 return Normalized
.exponent
- SignificandBits
;
3806 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, IEEEFloat::roundingMode RoundingMode
) {
3807 auto MaxExp
= X
.getSemantics().maxExponent
;
3808 auto MinExp
= X
.getSemantics().minExponent
;
3810 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
3811 // overflow; clamp it to a safe range before adding, but ensure that the range
3812 // is large enough that the clamp does not change the result. The range we
3813 // need to support is the difference between the largest possible exponent and
3814 // the normalized exponent of half the smallest denormal.
3816 int SignificandBits
= X
.getSemantics().precision
- 1;
3817 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
3819 // Clamp to one past the range ends to let normalize handle overlflow.
3820 X
.exponent
+= std::min(std::max(Exp
, -MaxIncrement
- 1), MaxIncrement
);
3821 X
.normalize(RoundingMode
, lfExactlyZero
);
3827 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, IEEEFloat::roundingMode RM
) {
3830 // Quiet signalling nans.
3831 if (Exp
== IEEEFloat::IEK_NaN
) {
3832 IEEEFloat
Quiet(Val
);
3837 if (Exp
== IEEEFloat::IEK_Inf
)
3840 // 1 is added because frexp is defined to return a normalized fraction in
3841 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
3842 Exp
= Exp
== IEEEFloat::IEK_Zero
? 0 : Exp
+ 1;
3843 return scalbn(Val
, -Exp
, RM
);
3846 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
3848 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
3849 assert(Semantics
== &semPPCDoubleDouble
);
3852 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
3854 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
3855 APFloat(semIEEEdouble
, uninitialized
)}) {
3856 assert(Semantics
== &semPPCDoubleDouble
);
3859 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
3860 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
3861 APFloat(semIEEEdouble
)}) {
3862 assert(Semantics
== &semPPCDoubleDouble
);
3865 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
3867 Floats(new APFloat
[2]{
3868 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
3869 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
3870 assert(Semantics
== &semPPCDoubleDouble
);
3873 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
3876 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
3877 assert(Semantics
== &semPPCDoubleDouble
);
3878 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
3879 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
3882 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
3883 : Semantics(RHS
.Semantics
),
3884 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
3885 APFloat(RHS
.Floats
[1])}
3887 assert(Semantics
== &semPPCDoubleDouble
);
3890 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
3891 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
3892 RHS
.Semantics
= &semBogus
;
3893 assert(Semantics
== &semPPCDoubleDouble
);
3896 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
3897 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
3898 Floats
[0] = RHS
.Floats
[0];
3899 Floats
[1] = RHS
.Floats
[1];
3900 } else if (this != &RHS
) {
3901 this->~DoubleAPFloat();
3902 new (this) DoubleAPFloat(RHS
);
3907 // Implement addition, subtraction, multiplication and division based on:
3908 // "Software for Doubled-Precision Floating-Point Computations",
3909 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
3910 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
3911 const APFloat
&c
, const APFloat
&cc
,
3915 Status
|= z
.add(c
, RM
);
3916 if (!z
.isFinite()) {
3917 if (!z
.isInfinity()) {
3918 Floats
[0] = std::move(z
);
3919 Floats
[1].makeZero(/* Neg = */ false);
3920 return (opStatus
)Status
;
3923 auto AComparedToC
= a
.compareAbsoluteValue(c
);
3925 Status
|= z
.add(aa
, RM
);
3926 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3927 // z = cc + aa + c + a;
3928 Status
|= z
.add(c
, RM
);
3929 Status
|= z
.add(a
, RM
);
3931 // z = cc + aa + a + c;
3932 Status
|= z
.add(a
, RM
);
3933 Status
|= z
.add(c
, RM
);
3935 if (!z
.isFinite()) {
3936 Floats
[0] = std::move(z
);
3937 Floats
[1].makeZero(/* Neg = */ false);
3938 return (opStatus
)Status
;
3942 Status
|= zz
.add(cc
, RM
);
3943 if (AComparedToC
== APFloat::cmpGreaterThan
) {
3944 // Floats[1] = a - z + c + zz;
3946 Status
|= Floats
[1].subtract(z
, RM
);
3947 Status
|= Floats
[1].add(c
, RM
);
3948 Status
|= Floats
[1].add(zz
, RM
);
3950 // Floats[1] = c - z + a + zz;
3952 Status
|= Floats
[1].subtract(z
, RM
);
3953 Status
|= Floats
[1].add(a
, RM
);
3954 Status
|= Floats
[1].add(zz
, RM
);
3959 Status
|= q
.subtract(z
, RM
);
3961 // zz = q + c + (a - (q + z)) + aa + cc;
3962 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
3964 Status
|= zz
.add(c
, RM
);
3965 Status
|= q
.add(z
, RM
);
3966 Status
|= q
.subtract(a
, RM
);
3968 Status
|= zz
.add(q
, RM
);
3969 Status
|= zz
.add(aa
, RM
);
3970 Status
|= zz
.add(cc
, RM
);
3971 if (zz
.isZero() && !zz
.isNegative()) {
3972 Floats
[0] = std::move(z
);
3973 Floats
[1].makeZero(/* Neg = */ false);
3977 Status
|= Floats
[0].add(zz
, RM
);
3978 if (!Floats
[0].isFinite()) {
3979 Floats
[1].makeZero(/* Neg = */ false);
3980 return (opStatus
)Status
;
3982 Floats
[1] = std::move(z
);
3983 Status
|= Floats
[1].subtract(Floats
[0], RM
);
3984 Status
|= Floats
[1].add(zz
, RM
);
3986 return (opStatus
)Status
;
3989 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
3990 const DoubleAPFloat
&RHS
,
3993 if (LHS
.getCategory() == fcNaN
) {
3997 if (RHS
.getCategory() == fcNaN
) {
4001 if (LHS
.getCategory() == fcZero
) {
4005 if (RHS
.getCategory() == fcZero
) {
4009 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4010 LHS
.isNegative() != RHS
.isNegative()) {
4011 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4014 if (LHS
.getCategory() == fcInfinity
) {
4018 if (RHS
.getCategory() == fcInfinity
) {
4022 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
4024 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
4026 assert(&A
.getSemantics() == &semIEEEdouble
);
4027 assert(&AA
.getSemantics() == &semIEEEdouble
);
4028 assert(&C
.getSemantics() == &semIEEEdouble
);
4029 assert(&CC
.getSemantics() == &semIEEEdouble
);
4030 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
4031 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
4032 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
4035 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
4037 return addWithSpecial(*this, RHS
, *this, RM
);
4040 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
4043 auto Ret
= add(RHS
, RM
);
4048 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
4049 APFloat::roundingMode RM
) {
4050 const auto &LHS
= *this;
4052 /* Interesting observation: For special categories, finding the lowest
4053 common ancestor of the following layered graph gives the correct
4062 e.g. NaN * NaN = NaN
4064 Normal * Zero = Zero
4067 if (LHS
.getCategory() == fcNaN
) {
4071 if (RHS
.getCategory() == fcNaN
) {
4075 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
4076 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
4077 Out
.makeNaN(false, false, nullptr);
4080 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
4084 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
4088 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
4089 "Special cases not handled exhaustively");
4092 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
4095 Status
|= T
.multiply(C
, RM
);
4096 if (!T
.isFiniteNonZero()) {
4098 Floats
[1].makeZero(/* Neg = */ false);
4099 return (opStatus
)Status
;
4102 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4105 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
4110 Status
|= V
.multiply(D
, RM
);
4113 Status
|= W
.multiply(C
, RM
);
4114 Status
|= V
.add(W
, RM
);
4116 Status
|= Tau
.add(V
, RM
);
4120 Status
|= U
.add(Tau
, RM
);
4123 if (!U
.isFinite()) {
4124 Floats
[1].makeZero(/* Neg = */ false);
4126 // Floats[1] = (t - u) + tau
4127 Status
|= T
.subtract(U
, RM
);
4128 Status
|= T
.add(Tau
, RM
);
4131 return (opStatus
)Status
;
4134 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
4135 APFloat::roundingMode RM
) {
4136 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4137 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4139 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
4140 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4144 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
4145 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4146 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4148 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4149 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4153 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
4154 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4155 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4156 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4157 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4162 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
4163 const DoubleAPFloat
&Addend
,
4164 APFloat::roundingMode RM
) {
4165 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4166 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4167 auto Ret
= Tmp
.fusedMultiplyAdd(
4168 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
4169 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
4170 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4174 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
4175 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4176 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4177 auto Ret
= Tmp
.roundToIntegral(RM
);
4178 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4182 void DoubleAPFloat::changeSign() {
4183 Floats
[0].changeSign();
4184 Floats
[1].changeSign();
4188 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
4189 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
4190 if (Result
!= cmpEqual
)
4192 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
4193 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
4194 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
4195 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
4196 if (Against
&& !RHSAgainst
)
4198 if (!Against
&& RHSAgainst
)
4199 return cmpGreaterThan
;
4200 if (!Against
&& !RHSAgainst
)
4202 if (Against
&& RHSAgainst
)
4203 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
4208 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
4209 return Floats
[0].getCategory();
4212 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
4214 void DoubleAPFloat::makeInf(bool Neg
) {
4215 Floats
[0].makeInf(Neg
);
4216 Floats
[1].makeZero(/* Neg = */ false);
4219 void DoubleAPFloat::makeZero(bool Neg
) {
4220 Floats
[0].makeZero(Neg
);
4221 Floats
[1].makeZero(/* Neg = */ false);
4224 void DoubleAPFloat::makeLargest(bool Neg
) {
4225 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4226 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
4227 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
4232 void DoubleAPFloat::makeSmallest(bool Neg
) {
4233 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4234 Floats
[0].makeSmallest(Neg
);
4235 Floats
[1].makeZero(/* Neg = */ false);
4238 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
4239 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4240 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
4242 Floats
[0].changeSign();
4243 Floats
[1].makeZero(/* Neg = */ false);
4246 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
4247 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
4248 Floats
[1].makeZero(/* Neg = */ false);
4251 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
4252 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
4253 // |Float[0]| > |Float[1]|
4254 if (Result
== APFloat::cmpEqual
)
4255 return Floats
[1].compare(RHS
.Floats
[1]);
4259 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
4260 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
4261 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
4264 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
4266 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
4267 return hash_combine(Arg
.Semantics
);
4270 APInt
DoubleAPFloat::bitcastToAPInt() const {
4271 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4273 Floats
[0].bitcastToAPInt().getRawData()[0],
4274 Floats
[1].bitcastToAPInt().getRawData()[0],
4276 return APInt(128, 2, Data
);
4279 APFloat::opStatus
DoubleAPFloat::convertFromString(StringRef S
,
4281 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4282 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4283 auto Ret
= Tmp
.convertFromString(S
, RM
);
4284 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4288 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
4289 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4290 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4291 auto Ret
= Tmp
.next(nextDown
);
4292 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4297 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
4298 unsigned int Width
, bool IsSigned
,
4299 roundingMode RM
, bool *IsExact
) const {
4300 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4301 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4302 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
4305 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
4308 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4309 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4310 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
4311 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4316 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
4317 unsigned int InputSize
,
4318 bool IsSigned
, roundingMode RM
) {
4319 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4320 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4321 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4322 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4327 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
4328 unsigned int InputSize
,
4329 bool IsSigned
, roundingMode RM
) {
4330 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4331 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4332 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
4333 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4337 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
4338 unsigned int HexDigits
,
4340 roundingMode RM
) const {
4341 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4342 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4343 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
4346 bool DoubleAPFloat::isDenormal() const {
4347 return getCategory() == fcNormal
&&
4348 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
4349 // (double)(Hi + Lo) == Hi defines a normal number.
4350 Floats
[0].compare(Floats
[0] + Floats
[1]) != cmpEqual
);
4353 bool DoubleAPFloat::isSmallest() const {
4354 if (getCategory() != fcNormal
)
4356 DoubleAPFloat
Tmp(*this);
4357 Tmp
.makeSmallest(this->isNegative());
4358 return Tmp
.compare(*this) == cmpEqual
;
4361 bool DoubleAPFloat::isLargest() const {
4362 if (getCategory() != fcNormal
)
4364 DoubleAPFloat
Tmp(*this);
4365 Tmp
.makeLargest(this->isNegative());
4366 return Tmp
.compare(*this) == cmpEqual
;
4369 bool DoubleAPFloat::isInteger() const {
4370 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4371 return Floats
[0].isInteger() && Floats
[1].isInteger();
4374 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
4375 unsigned FormatPrecision
,
4376 unsigned FormatMaxPadding
,
4377 bool TruncateZero
) const {
4378 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4379 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
4380 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
4383 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
4384 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4385 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4387 return Tmp
.getExactInverse(nullptr);
4388 APFloat
Inv(semPPCDoubleDoubleLegacy
);
4389 auto Ret
= Tmp
.getExactInverse(&Inv
);
4390 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
4394 DoubleAPFloat
scalbn(DoubleAPFloat Arg
, int Exp
, APFloat::roundingMode RM
) {
4395 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4396 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
4397 scalbn(Arg
.Floats
[1], Exp
, RM
));
4400 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
4401 APFloat::roundingMode RM
) {
4402 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4403 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
4404 APFloat Second
= Arg
.Floats
[1];
4405 if (Arg
.getCategory() == APFloat::fcNormal
)
4406 Second
= scalbn(Second
, -Exp
, RM
);
4407 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
4410 } // End detail namespace
4412 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
4413 if (usesLayout
<IEEEFloat
>(Semantics
)) {
4414 new (&IEEE
) IEEEFloat(std::move(F
));
4417 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
4419 DoubleAPFloat(Semantics
, APFloat(std::move(F
), F
.getSemantics()),
4420 APFloat(semIEEEdouble
));
4423 llvm_unreachable("Unexpected semantics");
4426 APFloat::opStatus
APFloat::convertFromString(StringRef Str
, roundingMode RM
) {
4427 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
4430 hash_code
hash_value(const APFloat
&Arg
) {
4431 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
4432 return hash_value(Arg
.U
.IEEE
);
4433 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
4434 return hash_value(Arg
.U
.Double
);
4435 llvm_unreachable("Unexpected semantics");
4438 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
4439 : APFloat(Semantics
) {
4440 convertFromString(S
, rmNearestTiesToEven
);
4443 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
4444 roundingMode RM
, bool *losesInfo
) {
4445 if (&getSemantics() == &ToSemantics
) {
4449 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4450 usesLayout
<IEEEFloat
>(ToSemantics
))
4451 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
4452 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
4453 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
4454 assert(&ToSemantics
== &semPPCDoubleDouble
);
4455 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
4456 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
4459 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
4460 usesLayout
<IEEEFloat
>(ToSemantics
)) {
4461 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
4462 *this = APFloat(std::move(getIEEE()), ToSemantics
);
4465 llvm_unreachable("Unexpected semantics");
4468 APFloat
APFloat::getAllOnesValue(unsigned BitWidth
, bool isIEEE
) {
4472 return APFloat(semIEEEhalf
, APInt::getAllOnesValue(BitWidth
));
4474 return APFloat(semIEEEsingle
, APInt::getAllOnesValue(BitWidth
));
4476 return APFloat(semIEEEdouble
, APInt::getAllOnesValue(BitWidth
));
4478 return APFloat(semX87DoubleExtended
, APInt::getAllOnesValue(BitWidth
));
4480 return APFloat(semIEEEquad
, APInt::getAllOnesValue(BitWidth
));
4482 llvm_unreachable("Unknown floating bit width");
4485 assert(BitWidth
== 128);
4486 return APFloat(semPPCDoubleDouble
, APInt::getAllOnesValue(BitWidth
));
4490 void APFloat::print(raw_ostream
&OS
) const {
4491 SmallVector
<char, 16> Buffer
;
4493 OS
<< Buffer
<< "\n";
4496 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4497 LLVM_DUMP_METHOD
void APFloat::dump() const { print(dbgs()); }
4500 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
4501 NID
.Add(bitcastToAPInt());
4504 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
4505 an APSInt, whose initial bit-width and signed-ness are used to determine the
4506 precision of the conversion.
4508 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
4509 roundingMode rounding_mode
,
4510 bool *isExact
) const {
4511 unsigned bitWidth
= result
.getBitWidth();
4512 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
4513 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
4514 rounding_mode
, isExact
);
4515 // Keeps the original signed-ness.
4516 result
= APInt(bitWidth
, parts
);
4520 } // End llvm namespace
4522 #undef APFLOAT_DISPATCH_ON_SEMANTICS