1 //===-- APFloat.cpp - Implement APFloat class -----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements a class to represent arbitrary precision floating
10 // point values and provide a variety of arithmetic operations on them.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/FloatingPointMode.h"
18 #include "llvm/ADT/FoldingSet.h"
19 #include "llvm/ADT/Hashing.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/StringExtras.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/Config/llvm-config.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
31 #define APFLOAT_DISPATCH_ON_SEMANTICS(METHOD_CALL) \
33 if (usesLayout<IEEEFloat>(getSemantics())) \
34 return U.IEEE.METHOD_CALL; \
35 if (usesLayout<DoubleAPFloat>(getSemantics())) \
36 return U.Double.METHOD_CALL; \
37 llvm_unreachable("Unexpected semantics"); \
42 /// A macro used to combine two fcCategory enums into one key which can be used
43 /// in a switch statement to classify how the interaction of two APFloat's
44 /// categories affects an operation.
46 /// TODO: If clang source code is ever allowed to use constexpr in its own
47 /// codebase, change this into a static inline function.
48 #define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs))
50 /* Assumed in hexadecimal significand parsing, and conversion to
51 hexadecimal strings. */
52 static_assert(APFloatBase::integerPartWidth
% 4 == 0, "Part width must be divisible by 4!");
56 // How the nonfinite values Inf and NaN are represented.
57 enum class fltNonfiniteBehavior
{
58 // Represents standard IEEE 754 behavior. A value is nonfinite if the
59 // exponent field is all 1s. In such cases, a value is Inf if the
60 // significand bits are all zero, and NaN otherwise
63 // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
64 // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
65 // representation for Inf, and operations that would ordinarily produce Inf
66 // produce NaN instead.
67 // The details of the NaN representation(s) in this form are determined by the
68 // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
69 // encodings do not distinguish between signalling and quiet NaN.
73 // How NaN values are represented. This is curently only used in combination
74 // with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
75 // while having IEEE non-finite behavior is liable to lead to unexpected
77 enum class fltNanEncoding
{
78 // Represents the standard IEEE behavior where a value is NaN if its
79 // exponent is all 1s and the significand is non-zero.
82 // Represents the behavior in the Float8E4M3 floating point type where NaN is
83 // represented by having the exponent and mantissa set to all 1s.
84 // This behavior matches the FP8 E4M3 type described in
85 // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
86 // as non-signalling, although the paper does not state whether the NaN
87 // values are signalling or not.
90 // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
91 // where NaN is represented by a sign bit of 1 and all 0s in the exponent
92 // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
93 // there is only one NaN value, it is treated as quiet NaN. This matches the
94 // behavior described in https://arxiv.org/abs/2206.02915 .
98 /* Represents floating point arithmetic semantics. */
100 /* The largest E such that 2^E is representable; this matches the
101 definition of IEEE 754. */
102 APFloatBase::ExponentType maxExponent
;
104 /* The smallest E such that 2^E is a normalized number; this
105 matches the definition of IEEE 754. */
106 APFloatBase::ExponentType minExponent
;
108 /* Number of bits in the significand. This includes the integer
110 unsigned int precision
;
112 /* Number of bits actually used in the semantics. */
113 unsigned int sizeInBits
;
115 fltNonfiniteBehavior nonFiniteBehavior
= fltNonfiniteBehavior::IEEE754
;
117 fltNanEncoding nanEncoding
= fltNanEncoding::IEEE
;
118 // Returns true if any number described by this semantics can be precisely
119 // represented by the specified semantics. Does not take into account
120 // the value of fltNonfiniteBehavior.
121 bool isRepresentableBy(const fltSemantics
&S
) const {
122 return maxExponent
<= S
.maxExponent
&& minExponent
>= S
.minExponent
&&
123 precision
<= S
.precision
;
127 static constexpr fltSemantics semIEEEhalf
= {15, -14, 11, 16};
128 static constexpr fltSemantics semBFloat
= {127, -126, 8, 16};
129 static constexpr fltSemantics semIEEEsingle
= {127, -126, 24, 32};
130 static constexpr fltSemantics semIEEEdouble
= {1023, -1022, 53, 64};
131 static constexpr fltSemantics semIEEEquad
= {16383, -16382, 113, 128};
132 static constexpr fltSemantics semFloat8E5M2
= {15, -14, 3, 8};
133 static constexpr fltSemantics semFloat8E5M2FNUZ
= {
134 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
135 static constexpr fltSemantics semFloat8E4M3FN
= {
136 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::AllOnes
};
137 static constexpr fltSemantics semFloat8E4M3FNUZ
= {
138 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
139 static constexpr fltSemantics semFloat8E4M3B11FNUZ
= {
140 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly
, fltNanEncoding::NegativeZero
};
141 static constexpr fltSemantics semFloatTF32
= {127, -126, 11, 19};
142 static constexpr fltSemantics semX87DoubleExtended
= {16383, -16382, 64, 80};
143 static constexpr fltSemantics semBogus
= {0, 0, 0, 0};
145 /* The IBM double-double semantics. Such a number consists of a pair of IEEE
146 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
147 (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
148 Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
149 to each other, and two 11-bit exponents.
151 Note: we need to make the value different from semBogus as otherwise
152 an unsafe optimization may collapse both values to a single address,
153 and we heavily rely on them having distinct addresses. */
154 static constexpr fltSemantics semPPCDoubleDouble
= {-1, 0, 0, 128};
156 /* These are legacy semantics for the fallback, inaccrurate implementation of
157 IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
158 operation. It's equivalent to having an IEEE number with consecutive 106
159 bits of mantissa and 11 bits of exponent.
161 It's not equivalent to IBM double-double. For example, a legit IBM
162 double-double, 1 + epsilon:
164 1 + epsilon = 1 + (1 >> 1076)
166 is not representable by a consecutive 106 bits of mantissa.
168 Currently, these semantics are used in the following way:
170 semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
171 (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
172 semPPCDoubleDoubleLegacy -> IEEE operations
174 We use bitcastToAPInt() to get the bit representation (in APInt) of the
175 underlying IEEEdouble, then use the APInt constructor to construct the
178 TODO: Implement all operations in semPPCDoubleDouble, and delete these
180 static constexpr fltSemantics semPPCDoubleDoubleLegacy
= {1023, -1022 + 53,
183 const llvm::fltSemantics
&APFloatBase::EnumToSemantics(Semantics S
) {
195 case S_PPCDoubleDouble
:
196 return PPCDoubleDouble();
199 case S_Float8E5M2FNUZ
:
200 return Float8E5M2FNUZ();
202 return Float8E4M3FN();
203 case S_Float8E4M3FNUZ
:
204 return Float8E4M3FNUZ();
205 case S_Float8E4M3B11FNUZ
:
206 return Float8E4M3B11FNUZ();
209 case S_x87DoubleExtended
:
210 return x87DoubleExtended();
212 llvm_unreachable("Unrecognised floating semantics");
215 APFloatBase::Semantics
216 APFloatBase::SemanticsToEnum(const llvm::fltSemantics
&Sem
) {
217 if (&Sem
== &llvm::APFloat::IEEEhalf())
219 else if (&Sem
== &llvm::APFloat::BFloat())
221 else if (&Sem
== &llvm::APFloat::IEEEsingle())
223 else if (&Sem
== &llvm::APFloat::IEEEdouble())
225 else if (&Sem
== &llvm::APFloat::IEEEquad())
227 else if (&Sem
== &llvm::APFloat::PPCDoubleDouble())
228 return S_PPCDoubleDouble
;
229 else if (&Sem
== &llvm::APFloat::Float8E5M2())
231 else if (&Sem
== &llvm::APFloat::Float8E5M2FNUZ())
232 return S_Float8E5M2FNUZ
;
233 else if (&Sem
== &llvm::APFloat::Float8E4M3FN())
234 return S_Float8E4M3FN
;
235 else if (&Sem
== &llvm::APFloat::Float8E4M3FNUZ())
236 return S_Float8E4M3FNUZ
;
237 else if (&Sem
== &llvm::APFloat::Float8E4M3B11FNUZ())
238 return S_Float8E4M3B11FNUZ
;
239 else if (&Sem
== &llvm::APFloat::FloatTF32())
241 else if (&Sem
== &llvm::APFloat::x87DoubleExtended())
242 return S_x87DoubleExtended
;
244 llvm_unreachable("Unknown floating semantics");
247 const fltSemantics
&APFloatBase::IEEEhalf() { return semIEEEhalf
; }
248 const fltSemantics
&APFloatBase::BFloat() { return semBFloat
; }
249 const fltSemantics
&APFloatBase::IEEEsingle() { return semIEEEsingle
; }
250 const fltSemantics
&APFloatBase::IEEEdouble() { return semIEEEdouble
; }
251 const fltSemantics
&APFloatBase::IEEEquad() { return semIEEEquad
; }
252 const fltSemantics
&APFloatBase::PPCDoubleDouble() {
253 return semPPCDoubleDouble
;
255 const fltSemantics
&APFloatBase::Float8E5M2() { return semFloat8E5M2
; }
256 const fltSemantics
&APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ
; }
257 const fltSemantics
&APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN
; }
258 const fltSemantics
&APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ
; }
259 const fltSemantics
&APFloatBase::Float8E4M3B11FNUZ() {
260 return semFloat8E4M3B11FNUZ
;
262 const fltSemantics
&APFloatBase::FloatTF32() { return semFloatTF32
; }
263 const fltSemantics
&APFloatBase::x87DoubleExtended() {
264 return semX87DoubleExtended
;
266 const fltSemantics
&APFloatBase::Bogus() { return semBogus
; }
268 constexpr RoundingMode
APFloatBase::rmNearestTiesToEven
;
269 constexpr RoundingMode
APFloatBase::rmTowardPositive
;
270 constexpr RoundingMode
APFloatBase::rmTowardNegative
;
271 constexpr RoundingMode
APFloatBase::rmTowardZero
;
272 constexpr RoundingMode
APFloatBase::rmNearestTiesToAway
;
274 /* A tight upper bound on number of parts required to hold the value
277 power * 815 / (351 * integerPartWidth) + 1
279 However, whilst the result may require only this many parts,
280 because we are multiplying two values to get it, the
281 multiplication may require an extra part with the excess part
282 being zero (consider the trivial case of 1 * 1, tcFullMultiply
283 requires two parts to hold the single-part result). So we add an
284 extra one to guarantee enough space whilst multiplying. */
285 const unsigned int maxExponent
= 16383;
286 const unsigned int maxPrecision
= 113;
287 const unsigned int maxPowerOfFiveExponent
= maxExponent
+ maxPrecision
- 1;
288 const unsigned int maxPowerOfFiveParts
=
290 ((maxPowerOfFiveExponent
* 815) / (351 * APFloatBase::integerPartWidth
));
292 unsigned int APFloatBase::semanticsPrecision(const fltSemantics
&semantics
) {
293 return semantics
.precision
;
295 APFloatBase::ExponentType
296 APFloatBase::semanticsMaxExponent(const fltSemantics
&semantics
) {
297 return semantics
.maxExponent
;
299 APFloatBase::ExponentType
300 APFloatBase::semanticsMinExponent(const fltSemantics
&semantics
) {
301 return semantics
.minExponent
;
303 unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics
&semantics
) {
304 return semantics
.sizeInBits
;
306 unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics
&semantics
,
308 // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
309 // at least one more bit than the MaxExponent to hold the max FP value.
310 unsigned int MinBitWidth
= semanticsMaxExponent(semantics
) + 1;
311 // Extra sign bit needed.
317 bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics
&Src
,
318 const fltSemantics
&Dst
) {
319 // Exponent range must be larger.
320 if (Src
.maxExponent
>= Dst
.maxExponent
|| Src
.minExponent
<= Dst
.minExponent
)
323 // If the mantissa is long enough, the result value could still be denormal
324 // with a larger exponent range.
326 // FIXME: This condition is probably not accurate but also shouldn't be a
327 // practical concern with existing types.
328 return Dst
.precision
>= Src
.precision
;
331 unsigned APFloatBase::getSizeInBits(const fltSemantics
&Sem
) {
332 return Sem
.sizeInBits
;
335 static constexpr APFloatBase::ExponentType
336 exponentZero(const fltSemantics
&semantics
) {
337 return semantics
.minExponent
- 1;
340 static constexpr APFloatBase::ExponentType
341 exponentInf(const fltSemantics
&semantics
) {
342 return semantics
.maxExponent
+ 1;
345 static constexpr APFloatBase::ExponentType
346 exponentNaN(const fltSemantics
&semantics
) {
347 if (semantics
.nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
348 if (semantics
.nanEncoding
== fltNanEncoding::NegativeZero
)
349 return exponentZero(semantics
);
350 return semantics
.maxExponent
;
352 return semantics
.maxExponent
+ 1;
355 /* A bunch of private, handy routines. */
357 static inline Error
createError(const Twine
&Err
) {
358 return make_error
<StringError
>(Err
, inconvertibleErrorCode());
361 static constexpr inline unsigned int partCountForBits(unsigned int bits
) {
362 return ((bits
) + APFloatBase::integerPartWidth
- 1) / APFloatBase::integerPartWidth
;
365 /* Returns 0U-9U. Return values >= 10U are not digits. */
366 static inline unsigned int
367 decDigitValue(unsigned int c
)
372 /* Return the value of a decimal exponent of the form
375 If the exponent overflows, returns a large exponent with the
377 static Expected
<int> readExponent(StringRef::iterator begin
,
378 StringRef::iterator end
) {
380 unsigned int absExponent
;
381 const unsigned int overlargeExponent
= 24000; /* FIXME. */
382 StringRef::iterator p
= begin
;
384 // Treat no exponent as 0 to match binutils
385 if (p
== end
|| ((*p
== '-' || *p
== '+') && (p
+ 1) == end
)) {
389 isNegative
= (*p
== '-');
390 if (*p
== '-' || *p
== '+') {
393 return createError("Exponent has no digits");
396 absExponent
= decDigitValue(*p
++);
397 if (absExponent
>= 10U)
398 return createError("Invalid character in exponent");
400 for (; p
!= end
; ++p
) {
403 value
= decDigitValue(*p
);
405 return createError("Invalid character in exponent");
407 absExponent
= absExponent
* 10U + value
;
408 if (absExponent
>= overlargeExponent
) {
409 absExponent
= overlargeExponent
;
415 return -(int) absExponent
;
417 return (int) absExponent
;
420 /* This is ugly and needs cleaning up, but I don't immediately see
421 how whilst remaining safe. */
422 static Expected
<int> totalExponent(StringRef::iterator p
,
423 StringRef::iterator end
,
424 int exponentAdjustment
) {
425 int unsignedExponent
;
426 bool negative
, overflow
;
430 return createError("Exponent has no digits");
432 negative
= *p
== '-';
433 if (*p
== '-' || *p
== '+') {
436 return createError("Exponent has no digits");
439 unsignedExponent
= 0;
441 for (; p
!= end
; ++p
) {
444 value
= decDigitValue(*p
);
446 return createError("Invalid character in exponent");
448 unsignedExponent
= unsignedExponent
* 10 + value
;
449 if (unsignedExponent
> 32767) {
455 if (exponentAdjustment
> 32767 || exponentAdjustment
< -32768)
459 exponent
= unsignedExponent
;
461 exponent
= -exponent
;
462 exponent
+= exponentAdjustment
;
463 if (exponent
> 32767 || exponent
< -32768)
468 exponent
= negative
? -32768: 32767;
473 static Expected
<StringRef::iterator
>
474 skipLeadingZeroesAndAnyDot(StringRef::iterator begin
, StringRef::iterator end
,
475 StringRef::iterator
*dot
) {
476 StringRef::iterator p
= begin
;
478 while (p
!= end
&& *p
== '0')
481 if (p
!= end
&& *p
== '.') {
484 if (end
- begin
== 1)
485 return createError("Significand has no digits");
487 while (p
!= end
&& *p
== '0')
494 /* Given a normal decimal floating point number of the form
498 where the decimal point and exponent are optional, fill out the
499 structure D. Exponent is appropriate if the significand is
500 treated as an integer, and normalizedExponent if the significand
501 is taken to have the decimal point after a single leading
504 If the value is zero, V->firstSigDigit points to a non-digit, and
505 the return exponent is zero.
508 const char *firstSigDigit
;
509 const char *lastSigDigit
;
511 int normalizedExponent
;
514 static Error
interpretDecimal(StringRef::iterator begin
,
515 StringRef::iterator end
, decimalInfo
*D
) {
516 StringRef::iterator dot
= end
;
518 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
520 return PtrOrErr
.takeError();
521 StringRef::iterator p
= *PtrOrErr
;
523 D
->firstSigDigit
= p
;
525 D
->normalizedExponent
= 0;
527 for (; p
!= end
; ++p
) {
530 return createError("String contains multiple dots");
535 if (decDigitValue(*p
) >= 10U)
540 if (*p
!= 'e' && *p
!= 'E')
541 return createError("Invalid character in significand");
543 return createError("Significand has no digits");
544 if (dot
!= end
&& p
- begin
== 1)
545 return createError("Significand has no digits");
547 /* p points to the first non-digit in the string */
548 auto ExpOrErr
= readExponent(p
+ 1, end
);
550 return ExpOrErr
.takeError();
551 D
->exponent
= *ExpOrErr
;
553 /* Implied decimal point? */
558 /* If number is all zeroes accept any exponent. */
559 if (p
!= D
->firstSigDigit
) {
560 /* Drop insignificant trailing zeroes. */
565 while (p
!= begin
&& *p
== '0');
566 while (p
!= begin
&& *p
== '.');
569 /* Adjust the exponents for any decimal point. */
570 D
->exponent
+= static_cast<APFloat::ExponentType
>((dot
- p
) - (dot
> p
));
571 D
->normalizedExponent
= (D
->exponent
+
572 static_cast<APFloat::ExponentType
>((p
- D
->firstSigDigit
)
573 - (dot
> D
->firstSigDigit
&& dot
< p
)));
577 return Error::success();
580 /* Return the trailing fraction of a hexadecimal number.
581 DIGITVALUE is the first hex digit of the fraction, P points to
583 static Expected
<lostFraction
>
584 trailingHexadecimalFraction(StringRef::iterator p
, StringRef::iterator end
,
585 unsigned int digitValue
) {
586 unsigned int hexDigit
;
588 /* If the first trailing digit isn't 0 or 8 we can work out the
589 fraction immediately. */
591 return lfMoreThanHalf
;
592 else if (digitValue
< 8 && digitValue
> 0)
593 return lfLessThanHalf
;
595 // Otherwise we need to find the first non-zero digit.
596 while (p
!= end
&& (*p
== '0' || *p
== '.'))
600 return createError("Invalid trailing hexadecimal fraction!");
602 hexDigit
= hexDigitValue(*p
);
604 /* If we ran off the end it is exactly zero or one-half, otherwise
606 if (hexDigit
== UINT_MAX
)
607 return digitValue
== 0 ? lfExactlyZero
: lfExactlyHalf
;
609 return digitValue
== 0 ? lfLessThanHalf
: lfMoreThanHalf
;
612 /* Return the fraction lost were a bignum truncated losing the least
613 significant BITS bits. */
615 lostFractionThroughTruncation(const APFloatBase::integerPart
*parts
,
616 unsigned int partCount
,
621 lsb
= APInt::tcLSB(parts
, partCount
);
623 /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
625 return lfExactlyZero
;
627 return lfExactlyHalf
;
628 if (bits
<= partCount
* APFloatBase::integerPartWidth
&&
629 APInt::tcExtractBit(parts
, bits
- 1))
630 return lfMoreThanHalf
;
632 return lfLessThanHalf
;
635 /* Shift DST right BITS bits noting lost fraction. */
637 shiftRight(APFloatBase::integerPart
*dst
, unsigned int parts
, unsigned int bits
)
639 lostFraction lost_fraction
;
641 lost_fraction
= lostFractionThroughTruncation(dst
, parts
, bits
);
643 APInt::tcShiftRight(dst
, parts
, bits
);
645 return lost_fraction
;
648 /* Combine the effect of two lost fractions. */
650 combineLostFractions(lostFraction moreSignificant
,
651 lostFraction lessSignificant
)
653 if (lessSignificant
!= lfExactlyZero
) {
654 if (moreSignificant
== lfExactlyZero
)
655 moreSignificant
= lfLessThanHalf
;
656 else if (moreSignificant
== lfExactlyHalf
)
657 moreSignificant
= lfMoreThanHalf
;
660 return moreSignificant
;
663 /* The error from the true value, in half-ulps, on multiplying two
664 floating point numbers, which differ from the value they
665 approximate by at most HUE1 and HUE2 half-ulps, is strictly less
666 than the returned value.
668 See "How to Read Floating Point Numbers Accurately" by William D
671 HUerrBound(bool inexactMultiply
, unsigned int HUerr1
, unsigned int HUerr2
)
673 assert(HUerr1
< 2 || HUerr2
< 2 || (HUerr1
+ HUerr2
< 8));
675 if (HUerr1
+ HUerr2
== 0)
676 return inexactMultiply
* 2; /* <= inexactMultiply half-ulps. */
678 return inexactMultiply
+ 2 * (HUerr1
+ HUerr2
);
681 /* The number of ulps from the boundary (zero, or half if ISNEAREST)
682 when the least significant BITS are truncated. BITS cannot be
684 static APFloatBase::integerPart
685 ulpsFromBoundary(const APFloatBase::integerPart
*parts
, unsigned int bits
,
687 unsigned int count
, partBits
;
688 APFloatBase::integerPart part
, boundary
;
693 count
= bits
/ APFloatBase::integerPartWidth
;
694 partBits
= bits
% APFloatBase::integerPartWidth
+ 1;
696 part
= parts
[count
] & (~(APFloatBase::integerPart
) 0 >> (APFloatBase::integerPartWidth
- partBits
));
699 boundary
= (APFloatBase::integerPart
) 1 << (partBits
- 1);
704 if (part
- boundary
<= boundary
- part
)
705 return part
- boundary
;
707 return boundary
- part
;
710 if (part
== boundary
) {
713 return ~(APFloatBase::integerPart
) 0; /* A lot. */
716 } else if (part
== boundary
- 1) {
719 return ~(APFloatBase::integerPart
) 0; /* A lot. */
724 return ~(APFloatBase::integerPart
) 0; /* A lot. */
727 /* Place pow(5, power) in DST, and return the number of parts used.
728 DST must be at least one part larger than size of the answer. */
730 powerOf5(APFloatBase::integerPart
*dst
, unsigned int power
) {
731 static const APFloatBase::integerPart firstEightPowers
[] = { 1, 5, 25, 125, 625, 3125, 15625, 78125 };
732 APFloatBase::integerPart pow5s
[maxPowerOfFiveParts
* 2 + 5];
733 pow5s
[0] = 78125 * 5;
735 unsigned int partsCount
[16] = { 1 };
736 APFloatBase::integerPart scratch
[maxPowerOfFiveParts
], *p1
, *p2
, *pow5
;
738 assert(power
<= maxExponent
);
743 *p1
= firstEightPowers
[power
& 7];
749 for (unsigned int n
= 0; power
; power
>>= 1, n
++) {
754 /* Calculate pow(5,pow(2,n+3)) if we haven't yet. */
756 pc
= partsCount
[n
- 1];
757 APInt::tcFullMultiply(pow5
, pow5
- pc
, pow5
- pc
, pc
, pc
);
759 if (pow5
[pc
- 1] == 0)
765 APFloatBase::integerPart
*tmp
;
767 APInt::tcFullMultiply(p2
, p1
, pow5
, result
, pc
);
769 if (p2
[result
- 1] == 0)
772 /* Now result is in p1 with partsCount parts and p2 is scratch
783 APInt::tcAssign(dst
, p1
, result
);
788 /* Zero at the end to avoid modular arithmetic when adding one; used
789 when rounding up during hexadecimal output. */
790 static const char hexDigitsLower
[] = "0123456789abcdef0";
791 static const char hexDigitsUpper
[] = "0123456789ABCDEF0";
792 static const char infinityL
[] = "infinity";
793 static const char infinityU
[] = "INFINITY";
794 static const char NaNL
[] = "nan";
795 static const char NaNU
[] = "NAN";
797 /* Write out an integerPart in hexadecimal, starting with the most
798 significant nibble. Write out exactly COUNT hexdigits, return
801 partAsHex (char *dst
, APFloatBase::integerPart part
, unsigned int count
,
802 const char *hexDigitChars
)
804 unsigned int result
= count
;
806 assert(count
!= 0 && count
<= APFloatBase::integerPartWidth
/ 4);
808 part
>>= (APFloatBase::integerPartWidth
- 4 * count
);
810 dst
[count
] = hexDigitChars
[part
& 0xf];
817 /* Write out an unsigned decimal integer. */
819 writeUnsignedDecimal (char *dst
, unsigned int n
)
835 /* Write out a signed decimal integer. */
837 writeSignedDecimal (char *dst
, int value
)
841 dst
= writeUnsignedDecimal(dst
, -(unsigned) value
);
843 dst
= writeUnsignedDecimal(dst
, value
);
850 void IEEEFloat::initialize(const fltSemantics
*ourSemantics
) {
853 semantics
= ourSemantics
;
856 significand
.parts
= new integerPart
[count
];
859 void IEEEFloat::freeSignificand() {
861 delete [] significand
.parts
;
864 void IEEEFloat::assign(const IEEEFloat
&rhs
) {
865 assert(semantics
== rhs
.semantics
);
868 category
= rhs
.category
;
869 exponent
= rhs
.exponent
;
870 if (isFiniteNonZero() || category
== fcNaN
)
871 copySignificand(rhs
);
874 void IEEEFloat::copySignificand(const IEEEFloat
&rhs
) {
875 assert(isFiniteNonZero() || category
== fcNaN
);
876 assert(rhs
.partCount() >= partCount());
878 APInt::tcAssign(significandParts(), rhs
.significandParts(),
882 /* Make this number a NaN, with an arbitrary but deterministic value
883 for the significand. If double or longer, this is a signalling NaN,
884 which may not be ideal. If float, this is QNaN(0). */
885 void IEEEFloat::makeNaN(bool SNaN
, bool Negative
, const APInt
*fill
) {
888 exponent
= exponentNaN();
890 integerPart
*significand
= significandParts();
891 unsigned numParts
= partCount();
894 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
895 // Finite-only types do not distinguish signalling and quiet NaN, so
896 // make them all signalling.
898 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
900 fill_storage
= APInt::getZero(semantics
->precision
- 1);
902 fill_storage
= APInt::getAllOnes(semantics
->precision
- 1);
904 fill
= &fill_storage
;
907 // Set the significand bits to the fill.
908 if (!fill
|| fill
->getNumWords() < numParts
)
909 APInt::tcSet(significand
, 0, numParts
);
911 APInt::tcAssign(significand
, fill
->getRawData(),
912 std::min(fill
->getNumWords(), numParts
));
914 // Zero out the excess bits of the significand.
915 unsigned bitsToPreserve
= semantics
->precision
- 1;
916 unsigned part
= bitsToPreserve
/ 64;
917 bitsToPreserve
%= 64;
918 significand
[part
] &= ((1ULL << bitsToPreserve
) - 1);
919 for (part
++; part
!= numParts
; ++part
)
920 significand
[part
] = 0;
923 unsigned QNaNBit
= semantics
->precision
- 2;
926 // We always have to clear the QNaN bit to make it an SNaN.
927 APInt::tcClearBit(significand
, QNaNBit
);
929 // If there are no bits set in the payload, we have to set
930 // *something* to make it a NaN instead of an infinity;
931 // conventionally, this is the next bit down from the QNaN bit.
932 if (APInt::tcIsZero(significand
, numParts
))
933 APInt::tcSetBit(significand
, QNaNBit
- 1);
934 } else if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
935 // The only NaN is a quiet NaN, and it has no bits sets in the significand.
938 // We always have to set the QNaN bit to make it a QNaN.
939 APInt::tcSetBit(significand
, QNaNBit
);
942 // For x87 extended precision, we want to make a NaN, not a
943 // pseudo-NaN. Maybe we should expose the ability to make
945 if (semantics
== &semX87DoubleExtended
)
946 APInt::tcSetBit(significand
, QNaNBit
+ 1);
949 IEEEFloat
&IEEEFloat::operator=(const IEEEFloat
&rhs
) {
951 if (semantics
!= rhs
.semantics
) {
953 initialize(rhs
.semantics
);
961 IEEEFloat
&IEEEFloat::operator=(IEEEFloat
&&rhs
) {
964 semantics
= rhs
.semantics
;
965 significand
= rhs
.significand
;
966 exponent
= rhs
.exponent
;
967 category
= rhs
.category
;
970 rhs
.semantics
= &semBogus
;
974 bool IEEEFloat::isDenormal() const {
975 return isFiniteNonZero() && (exponent
== semantics
->minExponent
) &&
976 (APInt::tcExtractBit(significandParts(),
977 semantics
->precision
- 1) == 0);
980 bool IEEEFloat::isSmallest() const {
981 // The smallest number by magnitude in our format will be the smallest
982 // denormal, i.e. the floating point number with exponent being minimum
983 // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
984 return isFiniteNonZero() && exponent
== semantics
->minExponent
&&
985 significandMSB() == 0;
988 bool IEEEFloat::isSmallestNormalized() const {
989 return getCategory() == fcNormal
&& exponent
== semantics
->minExponent
&&
990 isSignificandAllZerosExceptMSB();
993 bool IEEEFloat::isSignificandAllOnes() const {
994 // Test if the significand excluding the integral bit is all ones. This allows
995 // us to test for binade boundaries.
996 const integerPart
*Parts
= significandParts();
997 const unsigned PartCount
= partCountForBits(semantics
->precision
);
998 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
1002 // Set the unused high bits to all ones when we compare.
1003 const unsigned NumHighBits
=
1004 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
1005 assert(NumHighBits
<= integerPartWidth
&& NumHighBits
> 0 &&
1006 "Can not have more high bits to fill than integerPartWidth");
1007 const integerPart HighBitFill
=
1008 ~integerPart(0) << (integerPartWidth
- NumHighBits
);
1009 if (~(Parts
[PartCount
- 1] | HighBitFill
))
1015 bool IEEEFloat::isSignificandAllOnesExceptLSB() const {
1016 // Test if the significand excluding the integral bit is all ones except for
1017 // the least significant bit.
1018 const integerPart
*Parts
= significandParts();
1023 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1024 for (unsigned i
= 0; i
< PartCount
- 1; i
++) {
1025 if (~Parts
[i
] & ~unsigned{!i
})
1029 // Set the unused high bits to all ones when we compare.
1030 const unsigned NumHighBits
=
1031 PartCount
* integerPartWidth
- semantics
->precision
+ 1;
1032 assert(NumHighBits
<= integerPartWidth
&& NumHighBits
> 0 &&
1033 "Can not have more high bits to fill than integerPartWidth");
1034 const integerPart HighBitFill
= ~integerPart(0)
1035 << (integerPartWidth
- NumHighBits
);
1036 if (~(Parts
[PartCount
- 1] | HighBitFill
| 0x1))
1042 bool IEEEFloat::isSignificandAllZeros() const {
1043 // Test if the significand excluding the integral bit is all zeros. This
1044 // allows us to test for binade boundaries.
1045 const integerPart
*Parts
= significandParts();
1046 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1048 for (unsigned i
= 0; i
< PartCount
- 1; i
++)
1052 // Compute how many bits are used in the final word.
1053 const unsigned NumHighBits
=
1054 PartCount
*integerPartWidth
- semantics
->precision
+ 1;
1055 assert(NumHighBits
< integerPartWidth
&& "Can not have more high bits to "
1056 "clear than integerPartWidth");
1057 const integerPart HighBitMask
= ~integerPart(0) >> NumHighBits
;
1059 if (Parts
[PartCount
- 1] & HighBitMask
)
1065 bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
1066 const integerPart
*Parts
= significandParts();
1067 const unsigned PartCount
= partCountForBits(semantics
->precision
);
1069 for (unsigned i
= 0; i
< PartCount
- 1; i
++) {
1074 const unsigned NumHighBits
=
1075 PartCount
* integerPartWidth
- semantics
->precision
+ 1;
1076 return Parts
[PartCount
- 1] == integerPart(1)
1077 << (integerPartWidth
- NumHighBits
);
1080 bool IEEEFloat::isLargest() const {
1081 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1082 semantics
->nanEncoding
== fltNanEncoding::AllOnes
) {
1083 // The largest number by magnitude in our format will be the floating point
1084 // number with maximum exponent and with significand that is all ones except
1086 return isFiniteNonZero() && exponent
== semantics
->maxExponent
&&
1087 isSignificandAllOnesExceptLSB();
1089 // The largest number by magnitude in our format will be the floating point
1090 // number with maximum exponent and with significand that is all ones.
1091 return isFiniteNonZero() && exponent
== semantics
->maxExponent
&&
1092 isSignificandAllOnes();
1096 bool IEEEFloat::isInteger() const {
1097 // This could be made more efficient; I'm going for obviously correct.
1098 if (!isFinite()) return false;
1099 IEEEFloat truncated
= *this;
1100 truncated
.roundToIntegral(rmTowardZero
);
1101 return compare(truncated
) == cmpEqual
;
1104 bool IEEEFloat::bitwiseIsEqual(const IEEEFloat
&rhs
) const {
1107 if (semantics
!= rhs
.semantics
||
1108 category
!= rhs
.category
||
1111 if (category
==fcZero
|| category
==fcInfinity
)
1114 if (isFiniteNonZero() && exponent
!= rhs
.exponent
)
1117 return std::equal(significandParts(), significandParts() + partCount(),
1118 rhs
.significandParts());
1121 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, integerPart value
) {
1122 initialize(&ourSemantics
);
1124 category
= fcNormal
;
1126 exponent
= ourSemantics
.precision
- 1;
1127 significandParts()[0] = value
;
1128 normalize(rmNearestTiesToEven
, lfExactlyZero
);
1131 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
) {
1132 initialize(&ourSemantics
);
1136 // Delegate to the previous constructor, because later copy constructor may
1137 // actually inspects category, which can't be garbage.
1138 IEEEFloat::IEEEFloat(const fltSemantics
&ourSemantics
, uninitializedTag tag
)
1139 : IEEEFloat(ourSemantics
) {}
1141 IEEEFloat::IEEEFloat(const IEEEFloat
&rhs
) {
1142 initialize(rhs
.semantics
);
1146 IEEEFloat::IEEEFloat(IEEEFloat
&&rhs
) : semantics(&semBogus
) {
1147 *this = std::move(rhs
);
1150 IEEEFloat::~IEEEFloat() { freeSignificand(); }
1152 unsigned int IEEEFloat::partCount() const {
1153 return partCountForBits(semantics
->precision
+ 1);
1156 const IEEEFloat::integerPart
*IEEEFloat::significandParts() const {
1157 return const_cast<IEEEFloat
*>(this)->significandParts();
1160 IEEEFloat::integerPart
*IEEEFloat::significandParts() {
1161 if (partCount() > 1)
1162 return significand
.parts
;
1164 return &significand
.part
;
1167 void IEEEFloat::zeroSignificand() {
1168 APInt::tcSet(significandParts(), 0, partCount());
1171 /* Increment an fcNormal floating point number's significand. */
1172 void IEEEFloat::incrementSignificand() {
1175 carry
= APInt::tcIncrement(significandParts(), partCount());
1177 /* Our callers should never cause us to overflow. */
1182 /* Add the significand of the RHS. Returns the carry flag. */
1183 IEEEFloat::integerPart
IEEEFloat::addSignificand(const IEEEFloat
&rhs
) {
1186 parts
= significandParts();
1188 assert(semantics
== rhs
.semantics
);
1189 assert(exponent
== rhs
.exponent
);
1191 return APInt::tcAdd(parts
, rhs
.significandParts(), 0, partCount());
1194 /* Subtract the significand of the RHS with a borrow flag. Returns
1196 IEEEFloat::integerPart
IEEEFloat::subtractSignificand(const IEEEFloat
&rhs
,
1197 integerPart borrow
) {
1200 parts
= significandParts();
1202 assert(semantics
== rhs
.semantics
);
1203 assert(exponent
== rhs
.exponent
);
1205 return APInt::tcSubtract(parts
, rhs
.significandParts(), borrow
,
1209 /* Multiply the significand of the RHS. If ADDEND is non-NULL, add it
1210 on to the full-precision result of the multiplication. Returns the
1212 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
,
1214 unsigned int omsb
; // One, not zero, based MSB.
1215 unsigned int partsCount
, newPartsCount
, precision
;
1216 integerPart
*lhsSignificand
;
1217 integerPart scratch
[4];
1218 integerPart
*fullSignificand
;
1219 lostFraction lost_fraction
;
1222 assert(semantics
== rhs
.semantics
);
1224 precision
= semantics
->precision
;
1226 // Allocate space for twice as many bits as the original significand, plus one
1227 // extra bit for the addition to overflow into.
1228 newPartsCount
= partCountForBits(precision
* 2 + 1);
1230 if (newPartsCount
> 4)
1231 fullSignificand
= new integerPart
[newPartsCount
];
1233 fullSignificand
= scratch
;
1235 lhsSignificand
= significandParts();
1236 partsCount
= partCount();
1238 APInt::tcFullMultiply(fullSignificand
, lhsSignificand
,
1239 rhs
.significandParts(), partsCount
, partsCount
);
1241 lost_fraction
= lfExactlyZero
;
1242 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1243 exponent
+= rhs
.exponent
;
1245 // Assume the operands involved in the multiplication are single-precision
1246 // FP, and the two multiplicants are:
1247 // *this = a23 . a22 ... a0 * 2^e1
1248 // rhs = b23 . b22 ... b0 * 2^e2
1249 // the result of multiplication is:
1250 // *this = c48 c47 c46 . c45 ... c0 * 2^(e1+e2)
1251 // Note that there are three significant bits at the left-hand side of the
1252 // radix point: two for the multiplication, and an overflow bit for the
1253 // addition (that will always be zero at this point). Move the radix point
1254 // toward left by two bits, and adjust exponent accordingly.
1257 if (addend
.isNonZero()) {
1258 // The intermediate result of the multiplication has "2 * precision"
1259 // signicant bit; adjust the addend to be consistent with mul result.
1261 Significand savedSignificand
= significand
;
1262 const fltSemantics
*savedSemantics
= semantics
;
1263 fltSemantics extendedSemantics
;
1265 unsigned int extendedPrecision
;
1267 // Normalize our MSB to one below the top bit to allow for overflow.
1268 extendedPrecision
= 2 * precision
+ 1;
1269 if (omsb
!= extendedPrecision
- 1) {
1270 assert(extendedPrecision
> omsb
);
1271 APInt::tcShiftLeft(fullSignificand
, newPartsCount
,
1272 (extendedPrecision
- 1) - omsb
);
1273 exponent
-= (extendedPrecision
- 1) - omsb
;
1276 /* Create new semantics. */
1277 extendedSemantics
= *semantics
;
1278 extendedSemantics
.precision
= extendedPrecision
;
1280 if (newPartsCount
== 1)
1281 significand
.part
= fullSignificand
[0];
1283 significand
.parts
= fullSignificand
;
1284 semantics
= &extendedSemantics
;
1286 // Make a copy so we can convert it to the extended semantics.
1287 // Note that we cannot convert the addend directly, as the extendedSemantics
1288 // is a local variable (which we take a reference to).
1289 IEEEFloat
extendedAddend(addend
);
1290 status
= extendedAddend
.convert(extendedSemantics
, rmTowardZero
, &ignored
);
1291 assert(status
== opOK
);
1294 // Shift the significand of the addend right by one bit. This guarantees
1295 // that the high bit of the significand is zero (same as fullSignificand),
1296 // so the addition will overflow (if it does overflow at all) into the top bit.
1297 lost_fraction
= extendedAddend
.shiftSignificandRight(1);
1298 assert(lost_fraction
== lfExactlyZero
&&
1299 "Lost precision while shifting addend for fused-multiply-add.");
1301 lost_fraction
= addOrSubtractSignificand(extendedAddend
, false);
1303 /* Restore our state. */
1304 if (newPartsCount
== 1)
1305 fullSignificand
[0] = significand
.part
;
1306 significand
= savedSignificand
;
1307 semantics
= savedSemantics
;
1309 omsb
= APInt::tcMSB(fullSignificand
, newPartsCount
) + 1;
1312 // Convert the result having "2 * precision" significant-bits back to the one
1313 // having "precision" significant-bits. First, move the radix point from
1314 // poision "2*precision - 1" to "precision - 1". The exponent need to be
1315 // adjusted by "2*precision - 1" - "precision - 1" = "precision".
1316 exponent
-= precision
+ 1;
1318 // In case MSB resides at the left-hand side of radix point, shift the
1319 // mantissa right by some amount to make sure the MSB reside right before
1320 // the radix point (i.e. "MSB . rest-significant-bits").
1322 // Note that the result is not normalized when "omsb < precision". So, the
1323 // caller needs to call IEEEFloat::normalize() if normalized value is
1325 if (omsb
> precision
) {
1326 unsigned int bits
, significantParts
;
1329 bits
= omsb
- precision
;
1330 significantParts
= partCountForBits(omsb
);
1331 lf
= shiftRight(fullSignificand
, significantParts
, bits
);
1332 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1336 APInt::tcAssign(lhsSignificand
, fullSignificand
, partsCount
);
1338 if (newPartsCount
> 4)
1339 delete [] fullSignificand
;
1341 return lost_fraction
;
1344 lostFraction
IEEEFloat::multiplySignificand(const IEEEFloat
&rhs
) {
1345 return multiplySignificand(rhs
, IEEEFloat(*semantics
));
1348 /* Multiply the significands of LHS and RHS to DST. */
1349 lostFraction
IEEEFloat::divideSignificand(const IEEEFloat
&rhs
) {
1350 unsigned int bit
, i
, partsCount
;
1351 const integerPart
*rhsSignificand
;
1352 integerPart
*lhsSignificand
, *dividend
, *divisor
;
1353 integerPart scratch
[4];
1354 lostFraction lost_fraction
;
1356 assert(semantics
== rhs
.semantics
);
1358 lhsSignificand
= significandParts();
1359 rhsSignificand
= rhs
.significandParts();
1360 partsCount
= partCount();
1363 dividend
= new integerPart
[partsCount
* 2];
1367 divisor
= dividend
+ partsCount
;
1369 /* Copy the dividend and divisor as they will be modified in-place. */
1370 for (i
= 0; i
< partsCount
; i
++) {
1371 dividend
[i
] = lhsSignificand
[i
];
1372 divisor
[i
] = rhsSignificand
[i
];
1373 lhsSignificand
[i
] = 0;
1376 exponent
-= rhs
.exponent
;
1378 unsigned int precision
= semantics
->precision
;
1380 /* Normalize the divisor. */
1381 bit
= precision
- APInt::tcMSB(divisor
, partsCount
) - 1;
1384 APInt::tcShiftLeft(divisor
, partsCount
, bit
);
1387 /* Normalize the dividend. */
1388 bit
= precision
- APInt::tcMSB(dividend
, partsCount
) - 1;
1391 APInt::tcShiftLeft(dividend
, partsCount
, bit
);
1394 /* Ensure the dividend >= divisor initially for the loop below.
1395 Incidentally, this means that the division loop below is
1396 guaranteed to set the integer bit to one. */
1397 if (APInt::tcCompare(dividend
, divisor
, partsCount
) < 0) {
1399 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1400 assert(APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0);
1403 /* Long division. */
1404 for (bit
= precision
; bit
; bit
-= 1) {
1405 if (APInt::tcCompare(dividend
, divisor
, partsCount
) >= 0) {
1406 APInt::tcSubtract(dividend
, divisor
, 0, partsCount
);
1407 APInt::tcSetBit(lhsSignificand
, bit
- 1);
1410 APInt::tcShiftLeft(dividend
, partsCount
, 1);
1413 /* Figure out the lost fraction. */
1414 int cmp
= APInt::tcCompare(dividend
, divisor
, partsCount
);
1417 lost_fraction
= lfMoreThanHalf
;
1419 lost_fraction
= lfExactlyHalf
;
1420 else if (APInt::tcIsZero(dividend
, partsCount
))
1421 lost_fraction
= lfExactlyZero
;
1423 lost_fraction
= lfLessThanHalf
;
1428 return lost_fraction
;
1431 unsigned int IEEEFloat::significandMSB() const {
1432 return APInt::tcMSB(significandParts(), partCount());
1435 unsigned int IEEEFloat::significandLSB() const {
1436 return APInt::tcLSB(significandParts(), partCount());
1439 /* Note that a zero result is NOT normalized to fcZero. */
1440 lostFraction
IEEEFloat::shiftSignificandRight(unsigned int bits
) {
1441 /* Our exponent should not overflow. */
1442 assert((ExponentType
) (exponent
+ bits
) >= exponent
);
1446 return shiftRight(significandParts(), partCount(), bits
);
1449 /* Shift the significand left BITS bits, subtract BITS from its exponent. */
1450 void IEEEFloat::shiftSignificandLeft(unsigned int bits
) {
1451 assert(bits
< semantics
->precision
);
1454 unsigned int partsCount
= partCount();
1456 APInt::tcShiftLeft(significandParts(), partsCount
, bits
);
1459 assert(!APInt::tcIsZero(significandParts(), partsCount
));
1463 IEEEFloat::cmpResult
1464 IEEEFloat::compareAbsoluteValue(const IEEEFloat
&rhs
) const {
1467 assert(semantics
== rhs
.semantics
);
1468 assert(isFiniteNonZero());
1469 assert(rhs
.isFiniteNonZero());
1471 compare
= exponent
- rhs
.exponent
;
1473 /* If exponents are equal, do an unsigned bignum comparison of the
1476 compare
= APInt::tcCompare(significandParts(), rhs
.significandParts(),
1480 return cmpGreaterThan
;
1481 else if (compare
< 0)
1487 /* Set the least significant BITS bits of a bignum, clear the
1489 static void tcSetLeastSignificantBits(APInt::WordType
*dst
, unsigned parts
,
1492 while (bits
> APInt::APINT_BITS_PER_WORD
) {
1493 dst
[i
++] = ~(APInt::WordType
)0;
1494 bits
-= APInt::APINT_BITS_PER_WORD
;
1498 dst
[i
++] = ~(APInt::WordType
)0 >> (APInt::APINT_BITS_PER_WORD
- bits
);
1504 /* Handle overflow. Sign is preserved. We either become infinity or
1505 the largest finite number. */
1506 IEEEFloat::opStatus
IEEEFloat::handleOverflow(roundingMode rounding_mode
) {
1508 if (rounding_mode
== rmNearestTiesToEven
||
1509 rounding_mode
== rmNearestTiesToAway
||
1510 (rounding_mode
== rmTowardPositive
&& !sign
) ||
1511 (rounding_mode
== rmTowardNegative
&& sign
)) {
1512 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
)
1513 makeNaN(false, sign
);
1515 category
= fcInfinity
;
1516 return (opStatus
) (opOverflow
| opInexact
);
1519 /* Otherwise we become the largest finite number. */
1520 category
= fcNormal
;
1521 exponent
= semantics
->maxExponent
;
1522 tcSetLeastSignificantBits(significandParts(), partCount(),
1523 semantics
->precision
);
1524 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1525 semantics
->nanEncoding
== fltNanEncoding::AllOnes
)
1526 APInt::tcClearBit(significandParts(), 0);
1531 /* Returns TRUE if, when truncating the current number, with BIT the
1532 new LSB, with the given lost fraction and rounding mode, the result
1533 would need to be rounded away from zero (i.e., by increasing the
1534 signficand). This routine must work for fcZero of both signs, and
1535 fcNormal numbers. */
1536 bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode
,
1537 lostFraction lost_fraction
,
1538 unsigned int bit
) const {
1539 /* NaNs and infinities should not have lost fractions. */
1540 assert(isFiniteNonZero() || category
== fcZero
);
1542 /* Current callers never pass this so we don't handle it. */
1543 assert(lost_fraction
!= lfExactlyZero
);
1545 switch (rounding_mode
) {
1546 case rmNearestTiesToAway
:
1547 return lost_fraction
== lfExactlyHalf
|| lost_fraction
== lfMoreThanHalf
;
1549 case rmNearestTiesToEven
:
1550 if (lost_fraction
== lfMoreThanHalf
)
1553 /* Our zeroes don't have a significand to test. */
1554 if (lost_fraction
== lfExactlyHalf
&& category
!= fcZero
)
1555 return APInt::tcExtractBit(significandParts(), bit
);
1562 case rmTowardPositive
:
1565 case rmTowardNegative
:
1571 llvm_unreachable("Invalid rounding mode found");
1574 IEEEFloat::opStatus
IEEEFloat::normalize(roundingMode rounding_mode
,
1575 lostFraction lost_fraction
) {
1576 unsigned int omsb
; /* One, not zero, based MSB. */
1579 if (!isFiniteNonZero())
1582 /* Before rounding normalize the exponent of fcNormal numbers. */
1583 omsb
= significandMSB() + 1;
1586 /* OMSB is numbered from 1. We want to place it in the integer
1587 bit numbered PRECISION if possible, with a compensating change in
1589 exponentChange
= omsb
- semantics
->precision
;
1591 /* If the resulting exponent is too high, overflow according to
1592 the rounding mode. */
1593 if (exponent
+ exponentChange
> semantics
->maxExponent
)
1594 return handleOverflow(rounding_mode
);
1596 /* Subnormal numbers have exponent minExponent, and their MSB
1597 is forced based on that. */
1598 if (exponent
+ exponentChange
< semantics
->minExponent
)
1599 exponentChange
= semantics
->minExponent
- exponent
;
1601 /* Shifting left is easy as we don't lose precision. */
1602 if (exponentChange
< 0) {
1603 assert(lost_fraction
== lfExactlyZero
);
1605 shiftSignificandLeft(-exponentChange
);
1610 if (exponentChange
> 0) {
1613 /* Shift right and capture any new lost fraction. */
1614 lf
= shiftSignificandRight(exponentChange
);
1616 lost_fraction
= combineLostFractions(lf
, lost_fraction
);
1618 /* Keep OMSB up-to-date. */
1619 if (omsb
> (unsigned) exponentChange
)
1620 omsb
-= exponentChange
;
1626 // The all-ones values is an overflow if NaN is all ones. If NaN is
1627 // represented by negative zero, then it is a valid finite value.
1628 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1629 semantics
->nanEncoding
== fltNanEncoding::AllOnes
&&
1630 exponent
== semantics
->maxExponent
&& isSignificandAllOnes())
1631 return handleOverflow(rounding_mode
);
1633 /* Now round the number according to rounding_mode given the lost
1636 /* As specified in IEEE 754, since we do not trap we do not report
1637 underflow for exact results. */
1638 if (lost_fraction
== lfExactlyZero
) {
1639 /* Canonicalize zeroes. */
1642 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
1649 /* Increment the significand if we're rounding away from zero. */
1650 if (roundAwayFromZero(rounding_mode
, lost_fraction
, 0)) {
1652 exponent
= semantics
->minExponent
;
1654 incrementSignificand();
1655 omsb
= significandMSB() + 1;
1657 /* Did the significand increment overflow? */
1658 if (omsb
== (unsigned) semantics
->precision
+ 1) {
1659 /* Renormalize by incrementing the exponent and shifting our
1660 significand right one. However if we already have the
1661 maximum exponent we overflow to infinity. */
1662 if (exponent
== semantics
->maxExponent
)
1663 // Invoke overflow handling with a rounding mode that will guarantee
1664 // that the result gets turned into the correct infinity representation.
1665 // This is needed instead of just setting the category to infinity to
1666 // account for 8-bit floating point types that have no inf, only NaN.
1667 return handleOverflow(sign
? rmTowardNegative
: rmTowardPositive
);
1669 shiftSignificandRight(1);
1674 // The all-ones values is an overflow if NaN is all ones. If NaN is
1675 // represented by negative zero, then it is a valid finite value.
1676 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
1677 semantics
->nanEncoding
== fltNanEncoding::AllOnes
&&
1678 exponent
== semantics
->maxExponent
&& isSignificandAllOnes())
1679 return handleOverflow(rounding_mode
);
1682 /* The normal case - we were and are not denormal, and any
1683 significand increment above didn't overflow. */
1684 if (omsb
== semantics
->precision
)
1687 /* We have a non-zero denormal. */
1688 assert(omsb
< semantics
->precision
);
1690 /* Canonicalize zeroes. */
1693 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
1697 /* The fcZero case is a denormal that underflowed to zero. */
1698 return (opStatus
) (opUnderflow
| opInexact
);
1701 IEEEFloat::opStatus
IEEEFloat::addOrSubtractSpecials(const IEEEFloat
&rhs
,
1703 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1705 llvm_unreachable(nullptr);
1707 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1708 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1709 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1712 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1713 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1714 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1715 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1716 if (isSignaling()) {
1720 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1722 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1723 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1724 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1727 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1728 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1729 category
= fcInfinity
;
1730 sign
= rhs
.sign
^ subtract
;
1733 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1735 sign
= rhs
.sign
^ subtract
;
1738 case PackCategoriesIntoKey(fcZero
, fcZero
):
1739 /* Sign depends on rounding mode; handled by caller. */
1742 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1743 /* Differently signed infinities can only be validly
1745 if (((sign
^ rhs
.sign
)!=0) != subtract
) {
1752 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1757 /* Add or subtract two normal numbers. */
1758 lostFraction
IEEEFloat::addOrSubtractSignificand(const IEEEFloat
&rhs
,
1761 lostFraction lost_fraction
;
1764 /* Determine if the operation on the absolute values is effectively
1765 an addition or subtraction. */
1766 subtract
^= static_cast<bool>(sign
^ rhs
.sign
);
1768 /* Are we bigger exponent-wise than the RHS? */
1769 bits
= exponent
- rhs
.exponent
;
1771 /* Subtraction is more subtle than one might naively expect. */
1773 IEEEFloat
temp_rhs(rhs
);
1776 lost_fraction
= lfExactlyZero
;
1777 else if (bits
> 0) {
1778 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
- 1);
1779 shiftSignificandLeft(1);
1781 lost_fraction
= shiftSignificandRight(-bits
- 1);
1782 temp_rhs
.shiftSignificandLeft(1);
1785 // Should we reverse the subtraction.
1786 if (compareAbsoluteValue(temp_rhs
) == cmpLessThan
) {
1787 carry
= temp_rhs
.subtractSignificand
1788 (*this, lost_fraction
!= lfExactlyZero
);
1789 copySignificand(temp_rhs
);
1792 carry
= subtractSignificand
1793 (temp_rhs
, lost_fraction
!= lfExactlyZero
);
1796 /* Invert the lost fraction - it was on the RHS and
1798 if (lost_fraction
== lfLessThanHalf
)
1799 lost_fraction
= lfMoreThanHalf
;
1800 else if (lost_fraction
== lfMoreThanHalf
)
1801 lost_fraction
= lfLessThanHalf
;
1803 /* The code above is intended to ensure that no borrow is
1809 IEEEFloat
temp_rhs(rhs
);
1811 lost_fraction
= temp_rhs
.shiftSignificandRight(bits
);
1812 carry
= addSignificand(temp_rhs
);
1814 lost_fraction
= shiftSignificandRight(-bits
);
1815 carry
= addSignificand(rhs
);
1818 /* We have a guard bit; generating a carry cannot happen. */
1823 return lost_fraction
;
1826 IEEEFloat::opStatus
IEEEFloat::multiplySpecials(const IEEEFloat
&rhs
) {
1827 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1829 llvm_unreachable(nullptr);
1831 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1832 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1833 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1837 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1838 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1839 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1840 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1841 sign
^= rhs
.sign
; // restore the original sign
1842 if (isSignaling()) {
1846 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1848 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1849 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1850 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1851 category
= fcInfinity
;
1854 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1855 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1856 case PackCategoriesIntoKey(fcZero
, fcZero
):
1860 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1861 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1865 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1870 IEEEFloat::opStatus
IEEEFloat::divideSpecials(const IEEEFloat
&rhs
) {
1871 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1873 llvm_unreachable(nullptr);
1875 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1876 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1877 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1881 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1882 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1883 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1884 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1885 sign
^= rhs
.sign
; // restore the original sign
1886 if (isSignaling()) {
1890 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1892 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1893 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1894 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1895 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1898 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1902 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1903 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
)
1904 makeNaN(false, sign
);
1906 category
= fcInfinity
;
1909 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1910 case PackCategoriesIntoKey(fcZero
, fcZero
):
1914 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1919 IEEEFloat::opStatus
IEEEFloat::modSpecials(const IEEEFloat
&rhs
) {
1920 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1922 llvm_unreachable(nullptr);
1924 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1925 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1926 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1929 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1930 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1931 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1932 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1933 if (isSignaling()) {
1937 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1939 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1940 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1941 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1944 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1945 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1946 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1947 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1948 case PackCategoriesIntoKey(fcZero
, fcZero
):
1952 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1957 IEEEFloat::opStatus
IEEEFloat::remainderSpecials(const IEEEFloat
&rhs
) {
1958 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
1960 llvm_unreachable(nullptr);
1962 case PackCategoriesIntoKey(fcZero
, fcNaN
):
1963 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
1964 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
1967 case PackCategoriesIntoKey(fcNaN
, fcZero
):
1968 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
1969 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
1970 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
1971 if (isSignaling()) {
1975 return rhs
.isSignaling() ? opInvalidOp
: opOK
;
1977 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
1978 case PackCategoriesIntoKey(fcZero
, fcNormal
):
1979 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
1982 case PackCategoriesIntoKey(fcNormal
, fcZero
):
1983 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
1984 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
1985 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
1986 case PackCategoriesIntoKey(fcZero
, fcZero
):
1990 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
1991 return opDivByZero
; // fake status, indicating this is not a special case
1996 void IEEEFloat::changeSign() {
1997 // With NaN-as-negative-zero, neither NaN or negative zero can change
1999 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
&&
2000 (isZero() || isNaN()))
2002 /* Look mummy, this one's easy. */
2006 /* Normalized addition or subtraction. */
2007 IEEEFloat::opStatus
IEEEFloat::addOrSubtract(const IEEEFloat
&rhs
,
2008 roundingMode rounding_mode
,
2012 fs
= addOrSubtractSpecials(rhs
, subtract
);
2014 /* This return code means it was not a simple case. */
2015 if (fs
== opDivByZero
) {
2016 lostFraction lost_fraction
;
2018 lost_fraction
= addOrSubtractSignificand(rhs
, subtract
);
2019 fs
= normalize(rounding_mode
, lost_fraction
);
2021 /* Can only be zero if we lost no fraction. */
2022 assert(category
!= fcZero
|| lost_fraction
== lfExactlyZero
);
2025 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2026 positive zero unless rounding to minus infinity, except that
2027 adding two like-signed zeroes gives that zero. */
2028 if (category
== fcZero
) {
2029 if (rhs
.category
!= fcZero
|| (sign
== rhs
.sign
) == subtract
)
2030 sign
= (rounding_mode
== rmTowardNegative
);
2031 // NaN-in-negative-zero means zeros need to be normalized to +0.
2032 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2039 /* Normalized addition. */
2040 IEEEFloat::opStatus
IEEEFloat::add(const IEEEFloat
&rhs
,
2041 roundingMode rounding_mode
) {
2042 return addOrSubtract(rhs
, rounding_mode
, false);
2045 /* Normalized subtraction. */
2046 IEEEFloat::opStatus
IEEEFloat::subtract(const IEEEFloat
&rhs
,
2047 roundingMode rounding_mode
) {
2048 return addOrSubtract(rhs
, rounding_mode
, true);
2051 /* Normalized multiply. */
2052 IEEEFloat::opStatus
IEEEFloat::multiply(const IEEEFloat
&rhs
,
2053 roundingMode rounding_mode
) {
2057 fs
= multiplySpecials(rhs
);
2059 if (isZero() && semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2061 if (isFiniteNonZero()) {
2062 lostFraction lost_fraction
= multiplySignificand(rhs
);
2063 fs
= normalize(rounding_mode
, lost_fraction
);
2064 if (lost_fraction
!= lfExactlyZero
)
2065 fs
= (opStatus
) (fs
| opInexact
);
2071 /* Normalized divide. */
2072 IEEEFloat::opStatus
IEEEFloat::divide(const IEEEFloat
&rhs
,
2073 roundingMode rounding_mode
) {
2077 fs
= divideSpecials(rhs
);
2079 if (isZero() && semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2081 if (isFiniteNonZero()) {
2082 lostFraction lost_fraction
= divideSignificand(rhs
);
2083 fs
= normalize(rounding_mode
, lost_fraction
);
2084 if (lost_fraction
!= lfExactlyZero
)
2085 fs
= (opStatus
) (fs
| opInexact
);
2091 /* Normalized remainder. */
2092 IEEEFloat::opStatus
IEEEFloat::remainder(const IEEEFloat
&rhs
) {
2094 unsigned int origSign
= sign
;
2096 // First handle the special cases.
2097 fs
= remainderSpecials(rhs
);
2098 if (fs
!= opDivByZero
)
2103 // Make sure the current value is less than twice the denom. If the addition
2104 // did not succeed (an overflow has happened), which means that the finite
2105 // value we currently posses must be less than twice the denom (as we are
2106 // using the same semantics).
2108 if (P2
.add(rhs
, rmNearestTiesToEven
) == opOK
) {
2113 // Lets work with absolute numbers.
2119 // To calculate the remainder we use the following scheme.
2121 // The remainder is defained as follows:
2123 // remainder = numer - rquot * denom = x - r * p
2125 // Where r is the result of: x/p, rounded toward the nearest integral value
2126 // (with halfway cases rounded toward the even number).
2128 // Currently, (after x mod 2p):
2129 // r is the number of 2p's present inside x, which is inherently, an even
2132 // We may split the remaining calculation into 4 options:
2133 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2134 // - if x == 0.5p then we round to the nearest even number which is 0, and we
2135 // are done as well.
2136 // - if 0.5p < x < p then we round to nearest number which is 1, and we have
2137 // to subtract 1p at least once.
2138 // - if x >= p then we must subtract p at least once, as x must be a
2141 // By now, we were done, or we added 1 to r, which in turn, now an odd number.
2143 // We can now split the remaining calculation to the following 3 options:
2144 // - if x < 0.5p then we round to the nearest number with is 0, and are done.
2145 // - if x == 0.5p then we round to the nearest even number. As r is odd, we
2146 // must round up to the next even number. so we must subtract p once more.
2147 // - if x > 0.5p (and inherently x < p) then we must round r up to the next
2148 // integral, and subtract p once more.
2151 // Extend the semantics to prevent an overflow/underflow or inexact result.
2153 fltSemantics extendedSemantics
= *semantics
;
2154 extendedSemantics
.maxExponent
++;
2155 extendedSemantics
.minExponent
--;
2156 extendedSemantics
.precision
+= 2;
2158 IEEEFloat VEx
= *this;
2159 fs
= VEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2160 assert(fs
== opOK
&& !losesInfo
);
2162 fs
= PEx
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
2163 assert(fs
== opOK
&& !losesInfo
);
2165 // It is simpler to work with 2x instead of 0.5p, and we do not need to lose
2167 fs
= VEx
.add(VEx
, rmNearestTiesToEven
);
2170 if (VEx
.compare(PEx
) == cmpGreaterThan
) {
2171 fs
= subtract(P
, rmNearestTiesToEven
);
2174 // Make VEx = this.add(this), but because we have different semantics, we do
2175 // not want to `convert` again, so we just subtract PEx twice (which equals
2176 // to the desired value).
2177 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
2179 fs
= VEx
.subtract(PEx
, rmNearestTiesToEven
);
2182 cmpResult result
= VEx
.compare(PEx
);
2183 if (result
== cmpGreaterThan
|| result
== cmpEqual
) {
2184 fs
= subtract(P
, rmNearestTiesToEven
);
2190 sign
= origSign
; // IEEE754 requires this
2191 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2192 // But some 8-bit floats only have positive 0.
2201 /* Normalized llvm frem (C fmod). */
2202 IEEEFloat::opStatus
IEEEFloat::mod(const IEEEFloat
&rhs
) {
2204 fs
= modSpecials(rhs
);
2205 unsigned int origSign
= sign
;
2207 while (isFiniteNonZero() && rhs
.isFiniteNonZero() &&
2208 compareAbsoluteValue(rhs
) != cmpLessThan
) {
2209 int Exp
= ilogb(*this) - ilogb(rhs
);
2210 IEEEFloat V
= scalbn(rhs
, Exp
, rmNearestTiesToEven
);
2211 // V can overflow to NaN with fltNonfiniteBehavior::NanOnly, so explicitly
2213 if (V
.isNaN() || compareAbsoluteValue(V
) == cmpLessThan
)
2214 V
= scalbn(rhs
, Exp
- 1, rmNearestTiesToEven
);
2217 fs
= subtract(V
, rmNearestTiesToEven
);
2221 sign
= origSign
; // fmod requires this
2222 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2228 /* Normalized fused-multiply-add. */
2229 IEEEFloat::opStatus
IEEEFloat::fusedMultiplyAdd(const IEEEFloat
&multiplicand
,
2230 const IEEEFloat
&addend
,
2231 roundingMode rounding_mode
) {
2234 /* Post-multiplication sign, before addition. */
2235 sign
^= multiplicand
.sign
;
2237 /* If and only if all arguments are normal do we need to do an
2238 extended-precision calculation. */
2239 if (isFiniteNonZero() &&
2240 multiplicand
.isFiniteNonZero() &&
2241 addend
.isFinite()) {
2242 lostFraction lost_fraction
;
2244 lost_fraction
= multiplySignificand(multiplicand
, addend
);
2245 fs
= normalize(rounding_mode
, lost_fraction
);
2246 if (lost_fraction
!= lfExactlyZero
)
2247 fs
= (opStatus
) (fs
| opInexact
);
2249 /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
2250 positive zero unless rounding to minus infinity, except that
2251 adding two like-signed zeroes gives that zero. */
2252 if (category
== fcZero
&& !(fs
& opUnderflow
) && sign
!= addend
.sign
) {
2253 sign
= (rounding_mode
== rmTowardNegative
);
2254 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
2258 fs
= multiplySpecials(multiplicand
);
2260 /* FS can only be opOK or opInvalidOp. There is no more work
2261 to do in the latter case. The IEEE-754R standard says it is
2262 implementation-defined in this case whether, if ADDEND is a
2263 quiet NaN, we raise invalid op; this implementation does so.
2265 If we need to do the addition we can do so with normal
2268 fs
= addOrSubtract(addend
, rounding_mode
, false);
2274 /* Rounding-mode correct round to integral value. */
2275 IEEEFloat::opStatus
IEEEFloat::roundToIntegral(roundingMode rounding_mode
) {
2279 // [IEEE Std 754-2008 6.1]:
2280 // The behavior of infinity in floating-point arithmetic is derived from the
2281 // limiting cases of real arithmetic with operands of arbitrarily
2282 // large magnitude, when such a limit exists.
2284 // Operations on infinite operands are usually exact and therefore signal no
2289 if (isSignaling()) {
2290 // [IEEE Std 754-2008 6.2]:
2291 // Under default exception handling, any operation signaling an invalid
2292 // operation exception and for which a floating-point result is to be
2293 // delivered shall deliver a quiet NaN.
2295 // [IEEE Std 754-2008 6.2]:
2296 // Signaling NaNs shall be reserved operands that, under default exception
2297 // handling, signal the invalid operation exception(see 7.2) for every
2298 // general-computational and signaling-computational operation except for
2299 // the conversions described in 5.12.
2302 // [IEEE Std 754-2008 6.2]:
2303 // For an operation with quiet NaN inputs, other than maximum and minimum
2304 // operations, if a floating-point result is to be delivered the result
2305 // shall be a quiet NaN which should be one of the input NaNs.
2307 // Every general-computational and quiet-computational operation involving
2308 // one or more input NaNs, none of them signaling, shall signal no
2309 // exception, except fusedMultiplyAdd might signal the invalid operation
2310 // exception(see 7.2).
2316 // [IEEE Std 754-2008 6.3]:
2317 // ... the sign of the result of conversions, the quantize operation, the
2318 // roundToIntegral operations, and the roundToIntegralExact(see 5.3.1) is
2319 // the sign of the first or only operand.
2323 // If the exponent is large enough, we know that this value is already
2324 // integral, and the arithmetic below would potentially cause it to saturate
2325 // to +/-Inf. Bail out early instead.
2326 if (exponent
+1 >= (int)semanticsPrecision(*semantics
))
2329 // The algorithm here is quite simple: we add 2^(p-1), where p is the
2330 // precision of our format, and then subtract it back off again. The choice
2331 // of rounding modes for the addition/subtraction determines the rounding mode
2332 // for our integral rounding as well.
2333 // NOTE: When the input value is negative, we do subtraction followed by
2334 // addition instead.
2335 APInt
IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics
)), 1);
2336 IntegerConstant
<<= semanticsPrecision(*semantics
)-1;
2337 IEEEFloat
MagicConstant(*semantics
);
2338 fs
= MagicConstant
.convertFromAPInt(IntegerConstant
, false,
2339 rmNearestTiesToEven
);
2341 MagicConstant
.sign
= sign
;
2343 // Preserve the input sign so that we can handle the case of zero result
2345 bool inputSign
= isNegative();
2347 fs
= add(MagicConstant
, rounding_mode
);
2349 // Current value and 'MagicConstant' are both integers, so the result of the
2350 // subtraction is always exact according to Sterbenz' lemma.
2351 subtract(MagicConstant
, rounding_mode
);
2353 // Restore the input sign.
2354 if (inputSign
!= isNegative())
2361 /* Comparison requires normalized numbers. */
2362 IEEEFloat::cmpResult
IEEEFloat::compare(const IEEEFloat
&rhs
) const {
2365 assert(semantics
== rhs
.semantics
);
2367 switch (PackCategoriesIntoKey(category
, rhs
.category
)) {
2369 llvm_unreachable(nullptr);
2371 case PackCategoriesIntoKey(fcNaN
, fcZero
):
2372 case PackCategoriesIntoKey(fcNaN
, fcNormal
):
2373 case PackCategoriesIntoKey(fcNaN
, fcInfinity
):
2374 case PackCategoriesIntoKey(fcNaN
, fcNaN
):
2375 case PackCategoriesIntoKey(fcZero
, fcNaN
):
2376 case PackCategoriesIntoKey(fcNormal
, fcNaN
):
2377 case PackCategoriesIntoKey(fcInfinity
, fcNaN
):
2378 return cmpUnordered
;
2380 case PackCategoriesIntoKey(fcInfinity
, fcNormal
):
2381 case PackCategoriesIntoKey(fcInfinity
, fcZero
):
2382 case PackCategoriesIntoKey(fcNormal
, fcZero
):
2386 return cmpGreaterThan
;
2388 case PackCategoriesIntoKey(fcNormal
, fcInfinity
):
2389 case PackCategoriesIntoKey(fcZero
, fcInfinity
):
2390 case PackCategoriesIntoKey(fcZero
, fcNormal
):
2392 return cmpGreaterThan
;
2396 case PackCategoriesIntoKey(fcInfinity
, fcInfinity
):
2397 if (sign
== rhs
.sign
)
2402 return cmpGreaterThan
;
2404 case PackCategoriesIntoKey(fcZero
, fcZero
):
2407 case PackCategoriesIntoKey(fcNormal
, fcNormal
):
2411 /* Two normal numbers. Do they have the same sign? */
2412 if (sign
!= rhs
.sign
) {
2414 result
= cmpLessThan
;
2416 result
= cmpGreaterThan
;
2418 /* Compare absolute values; invert result if negative. */
2419 result
= compareAbsoluteValue(rhs
);
2422 if (result
== cmpLessThan
)
2423 result
= cmpGreaterThan
;
2424 else if (result
== cmpGreaterThan
)
2425 result
= cmpLessThan
;
2432 /// IEEEFloat::convert - convert a value of one floating point type to another.
2433 /// The return value corresponds to the IEEE754 exceptions. *losesInfo
2434 /// records whether the transformation lost information, i.e. whether
2435 /// converting the result back to the original type will produce the
2436 /// original value (this is almost the same as return value==fsOK, but there
2437 /// are edge cases where this is not so).
2439 IEEEFloat::opStatus
IEEEFloat::convert(const fltSemantics
&toSemantics
,
2440 roundingMode rounding_mode
,
2442 lostFraction lostFraction
;
2443 unsigned int newPartCount
, oldPartCount
;
2446 const fltSemantics
&fromSemantics
= *semantics
;
2447 bool is_signaling
= isSignaling();
2449 lostFraction
= lfExactlyZero
;
2450 newPartCount
= partCountForBits(toSemantics
.precision
+ 1);
2451 oldPartCount
= partCount();
2452 shift
= toSemantics
.precision
- fromSemantics
.precision
;
2454 bool X86SpecialNan
= false;
2455 if (&fromSemantics
== &semX87DoubleExtended
&&
2456 &toSemantics
!= &semX87DoubleExtended
&& category
== fcNaN
&&
2457 (!(*significandParts() & 0x8000000000000000ULL
) ||
2458 !(*significandParts() & 0x4000000000000000ULL
))) {
2459 // x86 has some unusual NaNs which cannot be represented in any other
2460 // format; note them here.
2461 X86SpecialNan
= true;
2464 // If this is a truncation of a denormal number, and the target semantics
2465 // has larger exponent range than the source semantics (this can happen
2466 // when truncating from PowerPC double-double to double format), the
2467 // right shift could lose result mantissa bits. Adjust exponent instead
2468 // of performing excessive shift.
2469 // Also do a similar trick in case shifting denormal would produce zero
2470 // significand as this case isn't handled correctly by normalize.
2471 if (shift
< 0 && isFiniteNonZero()) {
2472 int omsb
= significandMSB() + 1;
2473 int exponentChange
= omsb
- fromSemantics
.precision
;
2474 if (exponent
+ exponentChange
< toSemantics
.minExponent
)
2475 exponentChange
= toSemantics
.minExponent
- exponent
;
2476 if (exponentChange
< shift
)
2477 exponentChange
= shift
;
2478 if (exponentChange
< 0) {
2479 shift
-= exponentChange
;
2480 exponent
+= exponentChange
;
2481 } else if (omsb
<= -shift
) {
2482 exponentChange
= omsb
+ shift
- 1; // leave at least one bit set
2483 shift
-= exponentChange
;
2484 exponent
+= exponentChange
;
2488 // If this is a truncation, perform the shift before we narrow the storage.
2489 if (shift
< 0 && (isFiniteNonZero() ||
2490 (category
== fcNaN
&& semantics
->nonFiniteBehavior
!=
2491 fltNonfiniteBehavior::NanOnly
)))
2492 lostFraction
= shiftRight(significandParts(), oldPartCount
, -shift
);
2494 // Fix the storage so it can hold to new value.
2495 if (newPartCount
> oldPartCount
) {
2496 // The new type requires more storage; make it available.
2497 integerPart
*newParts
;
2498 newParts
= new integerPart
[newPartCount
];
2499 APInt::tcSet(newParts
, 0, newPartCount
);
2500 if (isFiniteNonZero() || category
==fcNaN
)
2501 APInt::tcAssign(newParts
, significandParts(), oldPartCount
);
2503 significand
.parts
= newParts
;
2504 } else if (newPartCount
== 1 && oldPartCount
!= 1) {
2505 // Switch to built-in storage for a single part.
2506 integerPart newPart
= 0;
2507 if (isFiniteNonZero() || category
==fcNaN
)
2508 newPart
= significandParts()[0];
2510 significand
.part
= newPart
;
2513 // Now that we have the right storage, switch the semantics.
2514 semantics
= &toSemantics
;
2516 // If this is an extension, perform the shift now that the storage is
2518 if (shift
> 0 && (isFiniteNonZero() || category
==fcNaN
))
2519 APInt::tcShiftLeft(significandParts(), newPartCount
, shift
);
2521 if (isFiniteNonZero()) {
2522 fs
= normalize(rounding_mode
, lostFraction
);
2523 *losesInfo
= (fs
!= opOK
);
2524 } else if (category
== fcNaN
) {
2525 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
2527 fromSemantics
.nonFiniteBehavior
!= fltNonfiniteBehavior::NanOnly
;
2528 makeNaN(false, sign
);
2529 return is_signaling
? opInvalidOp
: opOK
;
2532 // If NaN is negative zero, we need to create a new NaN to avoid converting
2534 if (fromSemantics
.nanEncoding
== fltNanEncoding::NegativeZero
&&
2535 semantics
->nanEncoding
!= fltNanEncoding::NegativeZero
)
2536 makeNaN(false, false);
2538 *losesInfo
= lostFraction
!= lfExactlyZero
|| X86SpecialNan
;
2540 // For x87 extended precision, we want to make a NaN, not a special NaN if
2541 // the input wasn't special either.
2542 if (!X86SpecialNan
&& semantics
== &semX87DoubleExtended
)
2543 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
2545 // Convert of sNaN creates qNaN and raises an exception (invalid op).
2546 // This also guarantees that a sNaN does not become Inf on a truncation
2547 // that loses all payload bits.
2554 } else if (category
== fcInfinity
&&
2555 semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
2556 makeNaN(false, sign
);
2559 } else if (category
== fcZero
&&
2560 semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
2561 // Negative zero loses info, but positive zero doesn't.
2563 fromSemantics
.nanEncoding
!= fltNanEncoding::NegativeZero
&& sign
;
2564 fs
= *losesInfo
? opInexact
: opOK
;
2565 // NaN is negative zero means -0 -> +0, which can lose information
2575 /* Convert a floating point number to an integer according to the
2576 rounding mode. If the rounded integer value is out of range this
2577 returns an invalid operation exception and the contents of the
2578 destination parts are unspecified. If the rounded value is in
2579 range but the floating point number is not the exact integer, the C
2580 standard doesn't require an inexact exception to be raised. IEEE
2581 854 does require it so we do that.
2583 Note that for conversions to integer type the C standard requires
2584 round-to-zero to always be used. */
2585 IEEEFloat::opStatus
IEEEFloat::convertToSignExtendedInteger(
2586 MutableArrayRef
<integerPart
> parts
, unsigned int width
, bool isSigned
,
2587 roundingMode rounding_mode
, bool *isExact
) const {
2588 lostFraction lost_fraction
;
2589 const integerPart
*src
;
2590 unsigned int dstPartsCount
, truncatedBits
;
2594 /* Handle the three special cases first. */
2595 if (category
== fcInfinity
|| category
== fcNaN
)
2598 dstPartsCount
= partCountForBits(width
);
2599 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2601 if (category
== fcZero
) {
2602 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2603 // Negative zero can't be represented as an int.
2608 src
= significandParts();
2610 /* Step 1: place our absolute value, with any fraction truncated, in
2613 /* Our absolute value is less than one; truncate everything. */
2614 APInt::tcSet(parts
.data(), 0, dstPartsCount
);
2615 /* For exponent -1 the integer bit represents .5, look at that.
2616 For smaller exponents leftmost truncated bit is 0. */
2617 truncatedBits
= semantics
->precision
-1U - exponent
;
2619 /* We want the most significant (exponent + 1) bits; the rest are
2621 unsigned int bits
= exponent
+ 1U;
2623 /* Hopelessly large in magnitude? */
2627 if (bits
< semantics
->precision
) {
2628 /* We truncate (semantics->precision - bits) bits. */
2629 truncatedBits
= semantics
->precision
- bits
;
2630 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, bits
, truncatedBits
);
2632 /* We want at least as many bits as are available. */
2633 APInt::tcExtract(parts
.data(), dstPartsCount
, src
, semantics
->precision
,
2635 APInt::tcShiftLeft(parts
.data(), dstPartsCount
,
2636 bits
- semantics
->precision
);
2641 /* Step 2: work out any lost fraction, and increment the absolute
2642 value if we would round away from zero. */
2643 if (truncatedBits
) {
2644 lost_fraction
= lostFractionThroughTruncation(src
, partCount(),
2646 if (lost_fraction
!= lfExactlyZero
&&
2647 roundAwayFromZero(rounding_mode
, lost_fraction
, truncatedBits
)) {
2648 if (APInt::tcIncrement(parts
.data(), dstPartsCount
))
2649 return opInvalidOp
; /* Overflow. */
2652 lost_fraction
= lfExactlyZero
;
2655 /* Step 3: check if we fit in the destination. */
2656 unsigned int omsb
= APInt::tcMSB(parts
.data(), dstPartsCount
) + 1;
2660 /* Negative numbers cannot be represented as unsigned. */
2664 /* It takes omsb bits to represent the unsigned integer value.
2665 We lose a bit for the sign, but care is needed as the
2666 maximally negative integer is a special case. */
2667 if (omsb
== width
&&
2668 APInt::tcLSB(parts
.data(), dstPartsCount
) + 1 != omsb
)
2671 /* This case can happen because of rounding. */
2676 APInt::tcNegate (parts
.data(), dstPartsCount
);
2678 if (omsb
>= width
+ !isSigned
)
2682 if (lost_fraction
== lfExactlyZero
) {
2689 /* Same as convertToSignExtendedInteger, except we provide
2690 deterministic values in case of an invalid operation exception,
2691 namely zero for NaNs and the minimal or maximal value respectively
2692 for underflow or overflow.
2693 The *isExact output tells whether the result is exact, in the sense
2694 that converting it back to the original floating point type produces
2695 the original value. This is almost equivalent to result==opOK,
2696 except for negative zeroes.
2699 IEEEFloat::convertToInteger(MutableArrayRef
<integerPart
> parts
,
2700 unsigned int width
, bool isSigned
,
2701 roundingMode rounding_mode
, bool *isExact
) const {
2704 fs
= convertToSignExtendedInteger(parts
, width
, isSigned
, rounding_mode
,
2707 if (fs
== opInvalidOp
) {
2708 unsigned int bits
, dstPartsCount
;
2710 dstPartsCount
= partCountForBits(width
);
2711 assert(dstPartsCount
<= parts
.size() && "Integer too big");
2713 if (category
== fcNaN
)
2718 bits
= width
- isSigned
;
2720 tcSetLeastSignificantBits(parts
.data(), dstPartsCount
, bits
);
2721 if (sign
&& isSigned
)
2722 APInt::tcShiftLeft(parts
.data(), dstPartsCount
, width
- 1);
2728 /* Convert an unsigned integer SRC to a floating point number,
2729 rounding according to ROUNDING_MODE. The sign of the floating
2730 point number is not modified. */
2731 IEEEFloat::opStatus
IEEEFloat::convertFromUnsignedParts(
2732 const integerPart
*src
, unsigned int srcCount
, roundingMode rounding_mode
) {
2733 unsigned int omsb
, precision
, dstCount
;
2735 lostFraction lost_fraction
;
2737 category
= fcNormal
;
2738 omsb
= APInt::tcMSB(src
, srcCount
) + 1;
2739 dst
= significandParts();
2740 dstCount
= partCount();
2741 precision
= semantics
->precision
;
2743 /* We want the most significant PRECISION bits of SRC. There may not
2744 be that many; extract what we can. */
2745 if (precision
<= omsb
) {
2746 exponent
= omsb
- 1;
2747 lost_fraction
= lostFractionThroughTruncation(src
, srcCount
,
2749 APInt::tcExtract(dst
, dstCount
, src
, precision
, omsb
- precision
);
2751 exponent
= precision
- 1;
2752 lost_fraction
= lfExactlyZero
;
2753 APInt::tcExtract(dst
, dstCount
, src
, omsb
, 0);
2756 return normalize(rounding_mode
, lost_fraction
);
2759 IEEEFloat::opStatus
IEEEFloat::convertFromAPInt(const APInt
&Val
, bool isSigned
,
2760 roundingMode rounding_mode
) {
2761 unsigned int partCount
= Val
.getNumWords();
2765 if (isSigned
&& api
.isNegative()) {
2770 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2773 /* Convert a two's complement integer SRC to a floating point number,
2774 rounding according to ROUNDING_MODE. ISSIGNED is true if the
2775 integer is signed, in which case it must be sign-extended. */
2777 IEEEFloat::convertFromSignExtendedInteger(const integerPart
*src
,
2778 unsigned int srcCount
, bool isSigned
,
2779 roundingMode rounding_mode
) {
2783 APInt::tcExtractBit(src
, srcCount
* integerPartWidth
- 1)) {
2786 /* If we're signed and negative negate a copy. */
2788 copy
= new integerPart
[srcCount
];
2789 APInt::tcAssign(copy
, src
, srcCount
);
2790 APInt::tcNegate(copy
, srcCount
);
2791 status
= convertFromUnsignedParts(copy
, srcCount
, rounding_mode
);
2795 status
= convertFromUnsignedParts(src
, srcCount
, rounding_mode
);
2801 /* FIXME: should this just take a const APInt reference? */
2803 IEEEFloat::convertFromZeroExtendedInteger(const integerPart
*parts
,
2804 unsigned int width
, bool isSigned
,
2805 roundingMode rounding_mode
) {
2806 unsigned int partCount
= partCountForBits(width
);
2807 APInt api
= APInt(width
, ArrayRef(parts
, partCount
));
2810 if (isSigned
&& APInt::tcExtractBit(parts
, width
- 1)) {
2815 return convertFromUnsignedParts(api
.getRawData(), partCount
, rounding_mode
);
2818 Expected
<IEEEFloat::opStatus
>
2819 IEEEFloat::convertFromHexadecimalString(StringRef s
,
2820 roundingMode rounding_mode
) {
2821 lostFraction lost_fraction
= lfExactlyZero
;
2823 category
= fcNormal
;
2827 integerPart
*significand
= significandParts();
2828 unsigned partsCount
= partCount();
2829 unsigned bitPos
= partsCount
* integerPartWidth
;
2830 bool computedTrailingFraction
= false;
2832 // Skip leading zeroes and any (hexa)decimal point.
2833 StringRef::iterator begin
= s
.begin();
2834 StringRef::iterator end
= s
.end();
2835 StringRef::iterator dot
;
2836 auto PtrOrErr
= skipLeadingZeroesAndAnyDot(begin
, end
, &dot
);
2838 return PtrOrErr
.takeError();
2839 StringRef::iterator p
= *PtrOrErr
;
2840 StringRef::iterator firstSignificantDigit
= p
;
2843 integerPart hex_value
;
2847 return createError("String contains multiple dots");
2852 hex_value
= hexDigitValue(*p
);
2853 if (hex_value
== UINT_MAX
)
2858 // Store the number while we have space.
2861 hex_value
<<= bitPos
% integerPartWidth
;
2862 significand
[bitPos
/ integerPartWidth
] |= hex_value
;
2863 } else if (!computedTrailingFraction
) {
2864 auto FractOrErr
= trailingHexadecimalFraction(p
, end
, hex_value
);
2866 return FractOrErr
.takeError();
2867 lost_fraction
= *FractOrErr
;
2868 computedTrailingFraction
= true;
2872 /* Hex floats require an exponent but not a hexadecimal point. */
2874 return createError("Hex strings require an exponent");
2875 if (*p
!= 'p' && *p
!= 'P')
2876 return createError("Invalid character in significand");
2878 return createError("Significand has no digits");
2879 if (dot
!= end
&& p
- begin
== 1)
2880 return createError("Significand has no digits");
2882 /* Ignore the exponent if we are zero. */
2883 if (p
!= firstSignificantDigit
) {
2886 /* Implicit hexadecimal point? */
2890 /* Calculate the exponent adjustment implicit in the number of
2891 significant digits. */
2892 expAdjustment
= static_cast<int>(dot
- firstSignificantDigit
);
2893 if (expAdjustment
< 0)
2895 expAdjustment
= expAdjustment
* 4 - 1;
2897 /* Adjust for writing the significand starting at the most
2898 significant nibble. */
2899 expAdjustment
+= semantics
->precision
;
2900 expAdjustment
-= partsCount
* integerPartWidth
;
2902 /* Adjust for the given exponent. */
2903 auto ExpOrErr
= totalExponent(p
+ 1, end
, expAdjustment
);
2905 return ExpOrErr
.takeError();
2906 exponent
= *ExpOrErr
;
2909 return normalize(rounding_mode
, lost_fraction
);
2913 IEEEFloat::roundSignificandWithExponent(const integerPart
*decSigParts
,
2914 unsigned sigPartCount
, int exp
,
2915 roundingMode rounding_mode
) {
2916 unsigned int parts
, pow5PartCount
;
2917 fltSemantics calcSemantics
= { 32767, -32767, 0, 0 };
2918 integerPart pow5Parts
[maxPowerOfFiveParts
];
2921 isNearest
= (rounding_mode
== rmNearestTiesToEven
||
2922 rounding_mode
== rmNearestTiesToAway
);
2924 parts
= partCountForBits(semantics
->precision
+ 11);
2926 /* Calculate pow(5, abs(exp)). */
2927 pow5PartCount
= powerOf5(pow5Parts
, exp
>= 0 ? exp
: -exp
);
2929 for (;; parts
*= 2) {
2930 opStatus sigStatus
, powStatus
;
2931 unsigned int excessPrecision
, truncatedBits
;
2933 calcSemantics
.precision
= parts
* integerPartWidth
- 1;
2934 excessPrecision
= calcSemantics
.precision
- semantics
->precision
;
2935 truncatedBits
= excessPrecision
;
2937 IEEEFloat
decSig(calcSemantics
, uninitialized
);
2938 decSig
.makeZero(sign
);
2939 IEEEFloat
pow5(calcSemantics
);
2941 sigStatus
= decSig
.convertFromUnsignedParts(decSigParts
, sigPartCount
,
2942 rmNearestTiesToEven
);
2943 powStatus
= pow5
.convertFromUnsignedParts(pow5Parts
, pow5PartCount
,
2944 rmNearestTiesToEven
);
2945 /* Add exp, as 10^n = 5^n * 2^n. */
2946 decSig
.exponent
+= exp
;
2948 lostFraction calcLostFraction
;
2949 integerPart HUerr
, HUdistance
;
2950 unsigned int powHUerr
;
2953 /* multiplySignificand leaves the precision-th bit set to 1. */
2954 calcLostFraction
= decSig
.multiplySignificand(pow5
);
2955 powHUerr
= powStatus
!= opOK
;
2957 calcLostFraction
= decSig
.divideSignificand(pow5
);
2958 /* Denormal numbers have less precision. */
2959 if (decSig
.exponent
< semantics
->minExponent
) {
2960 excessPrecision
+= (semantics
->minExponent
- decSig
.exponent
);
2961 truncatedBits
= excessPrecision
;
2962 if (excessPrecision
> calcSemantics
.precision
)
2963 excessPrecision
= calcSemantics
.precision
;
2965 /* Extra half-ulp lost in reciprocal of exponent. */
2966 powHUerr
= (powStatus
== opOK
&& calcLostFraction
== lfExactlyZero
) ? 0:2;
2969 /* Both multiplySignificand and divideSignificand return the
2970 result with the integer bit set. */
2971 assert(APInt::tcExtractBit
2972 (decSig
.significandParts(), calcSemantics
.precision
- 1) == 1);
2974 HUerr
= HUerrBound(calcLostFraction
!= lfExactlyZero
, sigStatus
!= opOK
,
2976 HUdistance
= 2 * ulpsFromBoundary(decSig
.significandParts(),
2977 excessPrecision
, isNearest
);
2979 /* Are we guaranteed to round correctly if we truncate? */
2980 if (HUdistance
>= HUerr
) {
2981 APInt::tcExtract(significandParts(), partCount(), decSig
.significandParts(),
2982 calcSemantics
.precision
- excessPrecision
,
2984 /* Take the exponent of decSig. If we tcExtract-ed less bits
2985 above we must adjust our exponent to compensate for the
2986 implicit right shift. */
2987 exponent
= (decSig
.exponent
+ semantics
->precision
2988 - (calcSemantics
.precision
- excessPrecision
));
2989 calcLostFraction
= lostFractionThroughTruncation(decSig
.significandParts(),
2992 return normalize(rounding_mode
, calcLostFraction
);
2997 Expected
<IEEEFloat::opStatus
>
2998 IEEEFloat::convertFromDecimalString(StringRef str
, roundingMode rounding_mode
) {
3002 /* Scan the text. */
3003 StringRef::iterator p
= str
.begin();
3004 if (Error Err
= interpretDecimal(p
, str
.end(), &D
))
3005 return std::move(Err
);
3007 /* Handle the quick cases. First the case of no significant digits,
3008 i.e. zero, and then exponents that are obviously too large or too
3009 small. Writing L for log 10 / log 2, a number d.ddddd*10^exp
3010 definitely overflows if
3012 (exp - 1) * L >= maxExponent
3014 and definitely underflows to zero where
3016 (exp + 1) * L <= minExponent - precision
3018 With integer arithmetic the tightest bounds for L are
3020 93/28 < L < 196/59 [ numerator <= 256 ]
3021 42039/12655 < L < 28738/8651 [ numerator <= 65536 ]
3024 // Test if we have a zero number allowing for strings with no null terminators
3025 // and zero decimals with non-zero exponents.
3027 // We computed firstSigDigit by ignoring all zeros and dots. Thus if
3028 // D->firstSigDigit equals str.end(), every digit must be a zero and there can
3029 // be at most one dot. On the other hand, if we have a zero with a non-zero
3030 // exponent, then we know that D.firstSigDigit will be non-numeric.
3031 if (D
.firstSigDigit
== str
.end() || decDigitValue(*D
.firstSigDigit
) >= 10U) {
3034 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
3037 /* Check whether the normalized exponent is high enough to overflow
3038 max during the log-rebasing in the max-exponent check below. */
3039 } else if (D
.normalizedExponent
- 1 > INT_MAX
/ 42039) {
3040 fs
= handleOverflow(rounding_mode
);
3042 /* If it wasn't, then it also wasn't high enough to overflow max
3043 during the log-rebasing in the min-exponent check. Check that it
3044 won't overflow min in either check, then perform the min-exponent
3046 } else if (D
.normalizedExponent
- 1 < INT_MIN
/ 42039 ||
3047 (D
.normalizedExponent
+ 1) * 28738 <=
3048 8651 * (semantics
->minExponent
- (int) semantics
->precision
)) {
3049 /* Underflow to zero and round. */
3050 category
= fcNormal
;
3052 fs
= normalize(rounding_mode
, lfLessThanHalf
);
3054 /* We can finally safely perform the max-exponent check. */
3055 } else if ((D
.normalizedExponent
- 1) * 42039
3056 >= 12655 * semantics
->maxExponent
) {
3057 /* Overflow and round. */
3058 fs
= handleOverflow(rounding_mode
);
3060 integerPart
*decSignificand
;
3061 unsigned int partCount
;
3063 /* A tight upper bound on number of bits required to hold an
3064 N-digit decimal integer is N * 196 / 59. Allocate enough space
3065 to hold the full significand, and an extra part required by
3067 partCount
= static_cast<unsigned int>(D
.lastSigDigit
- D
.firstSigDigit
) + 1;
3068 partCount
= partCountForBits(1 + 196 * partCount
/ 59);
3069 decSignificand
= new integerPart
[partCount
+ 1];
3072 /* Convert to binary efficiently - we do almost all multiplication
3073 in an integerPart. When this would overflow do we do a single
3074 bignum multiplication, and then revert again to multiplication
3075 in an integerPart. */
3077 integerPart decValue
, val
, multiplier
;
3085 if (p
== str
.end()) {
3089 decValue
= decDigitValue(*p
++);
3090 if (decValue
>= 10U) {
3091 delete[] decSignificand
;
3092 return createError("Invalid character in significand");
3095 val
= val
* 10 + decValue
;
3096 /* The maximum number that can be multiplied by ten with any
3097 digit added without overflowing an integerPart. */
3098 } while (p
<= D
.lastSigDigit
&& multiplier
<= (~ (integerPart
) 0 - 9) / 10);
3100 /* Multiply out the current part. */
3101 APInt::tcMultiplyPart(decSignificand
, decSignificand
, multiplier
, val
,
3102 partCount
, partCount
+ 1, false);
3104 /* If we used another part (likely but not guaranteed), increase
3106 if (decSignificand
[partCount
])
3108 } while (p
<= D
.lastSigDigit
);
3110 category
= fcNormal
;
3111 fs
= roundSignificandWithExponent(decSignificand
, partCount
,
3112 D
.exponent
, rounding_mode
);
3114 delete [] decSignificand
;
3120 bool IEEEFloat::convertFromStringSpecials(StringRef str
) {
3121 const size_t MIN_NAME_SIZE
= 3;
3123 if (str
.size() < MIN_NAME_SIZE
)
3126 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("+Inf")) {
3131 bool IsNegative
= str
.front() == '-';
3133 str
= str
.drop_front();
3134 if (str
.size() < MIN_NAME_SIZE
)
3137 if (str
.equals("inf") || str
.equals("INFINITY") || str
.equals("Inf")) {
3143 // If we have a 's' (or 'S') prefix, then this is a Signaling NaN.
3144 bool IsSignaling
= str
.front() == 's' || str
.front() == 'S';
3146 str
= str
.drop_front();
3147 if (str
.size() < MIN_NAME_SIZE
)
3151 if (str
.starts_with("nan") || str
.starts_with("NaN")) {
3152 str
= str
.drop_front(3);
3154 // A NaN without payload.
3156 makeNaN(IsSignaling
, IsNegative
);
3160 // Allow the payload to be inside parentheses.
3161 if (str
.front() == '(') {
3162 // Parentheses should be balanced (and not empty).
3163 if (str
.size() <= 2 || str
.back() != ')')
3166 str
= str
.slice(1, str
.size() - 1);
3169 // Determine the payload number's radix.
3170 unsigned Radix
= 10;
3171 if (str
[0] == '0') {
3172 if (str
.size() > 1 && tolower(str
[1]) == 'x') {
3173 str
= str
.drop_front(2);
3179 // Parse the payload and make the NaN.
3181 if (!str
.getAsInteger(Radix
, Payload
)) {
3182 makeNaN(IsSignaling
, IsNegative
, &Payload
);
3190 Expected
<IEEEFloat::opStatus
>
3191 IEEEFloat::convertFromString(StringRef str
, roundingMode rounding_mode
) {
3193 return createError("Invalid string length");
3195 // Handle special cases.
3196 if (convertFromStringSpecials(str
))
3199 /* Handle a leading minus sign. */
3200 StringRef::iterator p
= str
.begin();
3201 size_t slen
= str
.size();
3202 sign
= *p
== '-' ? 1 : 0;
3203 if (*p
== '-' || *p
== '+') {
3207 return createError("String has no digits");
3210 if (slen
>= 2 && p
[0] == '0' && (p
[1] == 'x' || p
[1] == 'X')) {
3212 return createError("Invalid string");
3213 return convertFromHexadecimalString(StringRef(p
+ 2, slen
- 2),
3217 return convertFromDecimalString(StringRef(p
, slen
), rounding_mode
);
3220 /* Write out a hexadecimal representation of the floating point value
3221 to DST, which must be of sufficient size, in the C99 form
3222 [-]0xh.hhhhp[+-]d. Return the number of characters written,
3223 excluding the terminating NUL.
3225 If UPPERCASE, the output is in upper case, otherwise in lower case.
3227 HEXDIGITS digits appear altogether, rounding the value if
3228 necessary. If HEXDIGITS is 0, the minimal precision to display the
3229 number precisely is used instead. If nothing would appear after
3230 the decimal point it is suppressed.
3232 The decimal exponent is always printed and has at least one digit.
3233 Zero values display an exponent of zero. Infinities and NaNs
3234 appear as "infinity" or "nan" respectively.
3236 The above rules are as specified by C99. There is ambiguity about
3237 what the leading hexadecimal digit should be. This implementation
3238 uses whatever is necessary so that the exponent is displayed as
3239 stored. This implies the exponent will fall within the IEEE format
3240 range, and the leading hexadecimal digit will be 0 (for denormals),
3241 1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
3242 any other digits zero).
3244 unsigned int IEEEFloat::convertToHexString(char *dst
, unsigned int hexDigits
,
3246 roundingMode rounding_mode
) const {
3255 memcpy (dst
, upperCase
? infinityU
: infinityL
, sizeof infinityU
- 1);
3256 dst
+= sizeof infinityL
- 1;
3260 memcpy (dst
, upperCase
? NaNU
: NaNL
, sizeof NaNU
- 1);
3261 dst
+= sizeof NaNU
- 1;
3266 *dst
++ = upperCase
? 'X': 'x';
3268 if (hexDigits
> 1) {
3270 memset (dst
, '0', hexDigits
- 1);
3271 dst
+= hexDigits
- 1;
3273 *dst
++ = upperCase
? 'P': 'p';
3278 dst
= convertNormalToHexString (dst
, hexDigits
, upperCase
, rounding_mode
);
3284 return static_cast<unsigned int>(dst
- p
);
3287 /* Does the hard work of outputting the correctly rounded hexadecimal
3288 form of a normal floating point number with the specified number of
3289 hexadecimal digits. If HEXDIGITS is zero the minimum number of
3290 digits necessary to print the value precisely is output. */
3291 char *IEEEFloat::convertNormalToHexString(char *dst
, unsigned int hexDigits
,
3293 roundingMode rounding_mode
) const {
3294 unsigned int count
, valueBits
, shift
, partsCount
, outputDigits
;
3295 const char *hexDigitChars
;
3296 const integerPart
*significand
;
3301 *dst
++ = upperCase
? 'X': 'x';
3304 hexDigitChars
= upperCase
? hexDigitsUpper
: hexDigitsLower
;
3306 significand
= significandParts();
3307 partsCount
= partCount();
3309 /* +3 because the first digit only uses the single integer bit, so
3310 we have 3 virtual zero most-significant-bits. */
3311 valueBits
= semantics
->precision
+ 3;
3312 shift
= integerPartWidth
- valueBits
% integerPartWidth
;
3314 /* The natural number of digits required ignoring trailing
3315 insignificant zeroes. */
3316 outputDigits
= (valueBits
- significandLSB () + 3) / 4;
3318 /* hexDigits of zero means use the required number for the
3319 precision. Otherwise, see if we are truncating. If we are,
3320 find out if we need to round away from zero. */
3322 if (hexDigits
< outputDigits
) {
3323 /* We are dropping non-zero bits, so need to check how to round.
3324 "bits" is the number of dropped bits. */
3326 lostFraction fraction
;
3328 bits
= valueBits
- hexDigits
* 4;
3329 fraction
= lostFractionThroughTruncation (significand
, partsCount
, bits
);
3330 roundUp
= roundAwayFromZero(rounding_mode
, fraction
, bits
);
3332 outputDigits
= hexDigits
;
3335 /* Write the digits consecutively, and start writing in the location
3336 of the hexadecimal point. We move the most significant digit
3337 left and add the hexadecimal point later. */
3340 count
= (valueBits
+ integerPartWidth
- 1) / integerPartWidth
;
3342 while (outputDigits
&& count
) {
3345 /* Put the most significant integerPartWidth bits in "part". */
3346 if (--count
== partsCount
)
3347 part
= 0; /* An imaginary higher zero part. */
3349 part
= significand
[count
] << shift
;
3352 part
|= significand
[count
- 1] >> (integerPartWidth
- shift
);
3354 /* Convert as much of "part" to hexdigits as we can. */
3355 unsigned int curDigits
= integerPartWidth
/ 4;
3357 if (curDigits
> outputDigits
)
3358 curDigits
= outputDigits
;
3359 dst
+= partAsHex (dst
, part
, curDigits
, hexDigitChars
);
3360 outputDigits
-= curDigits
;
3366 /* Note that hexDigitChars has a trailing '0'. */
3369 *q
= hexDigitChars
[hexDigitValue (*q
) + 1];
3370 } while (*q
== '0');
3373 /* Add trailing zeroes. */
3374 memset (dst
, '0', outputDigits
);
3375 dst
+= outputDigits
;
3378 /* Move the most significant digit to before the point, and if there
3379 is something after the decimal point add it. This must come
3380 after rounding above. */
3387 /* Finally output the exponent. */
3388 *dst
++ = upperCase
? 'P': 'p';
3390 return writeSignedDecimal (dst
, exponent
);
3393 hash_code
hash_value(const IEEEFloat
&Arg
) {
3394 if (!Arg
.isFiniteNonZero())
3395 return hash_combine((uint8_t)Arg
.category
,
3396 // NaN has no sign, fix it at zero.
3397 Arg
.isNaN() ? (uint8_t)0 : (uint8_t)Arg
.sign
,
3398 Arg
.semantics
->precision
);
3400 // Normal floats need their exponent and significand hashed.
3401 return hash_combine((uint8_t)Arg
.category
, (uint8_t)Arg
.sign
,
3402 Arg
.semantics
->precision
, Arg
.exponent
,
3404 Arg
.significandParts(),
3405 Arg
.significandParts() + Arg
.partCount()));
3408 // Conversion from APFloat to/from host float/double. It may eventually be
3409 // possible to eliminate these and have everybody deal with APFloats, but that
3410 // will take a while. This approach will not easily extend to long double.
3411 // Current implementation requires integerPartWidth==64, which is correct at
3412 // the moment but could be made more general.
3414 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
3415 // the actual IEEE respresentations. We compensate for that here.
3417 APInt
IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
3418 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
);
3419 assert(partCount()==2);
3421 uint64_t myexponent
, mysignificand
;
3423 if (isFiniteNonZero()) {
3424 myexponent
= exponent
+16383; //bias
3425 mysignificand
= significandParts()[0];
3426 if (myexponent
==1 && !(mysignificand
& 0x8000000000000000ULL
))
3427 myexponent
= 0; // denormal
3428 } else if (category
==fcZero
) {
3431 } else if (category
==fcInfinity
) {
3432 myexponent
= 0x7fff;
3433 mysignificand
= 0x8000000000000000ULL
;
3435 assert(category
== fcNaN
&& "Unknown category");
3436 myexponent
= 0x7fff;
3437 mysignificand
= significandParts()[0];
3441 words
[0] = mysignificand
;
3442 words
[1] = ((uint64_t)(sign
& 1) << 15) |
3443 (myexponent
& 0x7fffLL
);
3444 return APInt(80, words
);
3447 APInt
IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
3448 assert(semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
);
3449 assert(partCount()==2);
3455 // Convert number to double. To avoid spurious underflows, we re-
3456 // normalize against the "double" minExponent first, and only *then*
3457 // truncate the mantissa. The result of that second conversion
3458 // may be inexact, but should never underflow.
3459 // Declare fltSemantics before APFloat that uses it (and
3460 // saves pointer to it) to ensure correct destruction order.
3461 fltSemantics extendedSemantics
= *semantics
;
3462 extendedSemantics
.minExponent
= semIEEEdouble
.minExponent
;
3463 IEEEFloat
extended(*this);
3464 fs
= extended
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3465 assert(fs
== opOK
&& !losesInfo
);
3468 IEEEFloat
u(extended
);
3469 fs
= u
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3470 assert(fs
== opOK
|| fs
== opInexact
);
3472 words
[0] = *u
.convertDoubleAPFloatToAPInt().getRawData();
3474 // If conversion was exact or resulted in a special case, we're done;
3475 // just set the second double to zero. Otherwise, re-convert back to
3476 // the extended format and compute the difference. This now should
3477 // convert exactly to double.
3478 if (u
.isFiniteNonZero() && losesInfo
) {
3479 fs
= u
.convert(extendedSemantics
, rmNearestTiesToEven
, &losesInfo
);
3480 assert(fs
== opOK
&& !losesInfo
);
3483 IEEEFloat
v(extended
);
3484 v
.subtract(u
, rmNearestTiesToEven
);
3485 fs
= v
.convert(semIEEEdouble
, rmNearestTiesToEven
, &losesInfo
);
3486 assert(fs
== opOK
&& !losesInfo
);
3488 words
[1] = *v
.convertDoubleAPFloatToAPInt().getRawData();
3493 return APInt(128, words
);
3496 template <const fltSemantics
&S
>
3497 APInt
IEEEFloat::convertIEEEFloatToAPInt() const {
3498 assert(semantics
== &S
);
3500 constexpr int bias
= -(S
.minExponent
- 1);
3501 constexpr unsigned int trailing_significand_bits
= S
.precision
- 1;
3502 constexpr int integer_bit_part
= trailing_significand_bits
/ integerPartWidth
;
3503 constexpr integerPart integer_bit
=
3504 integerPart
{1} << (trailing_significand_bits
% integerPartWidth
);
3505 constexpr uint64_t significand_mask
= integer_bit
- 1;
3506 constexpr unsigned int exponent_bits
=
3507 S
.sizeInBits
- 1 - trailing_significand_bits
;
3508 static_assert(exponent_bits
< 64);
3509 constexpr uint64_t exponent_mask
= (uint64_t{1} << exponent_bits
) - 1;
3511 uint64_t myexponent
;
3512 std::array
<integerPart
, partCountForBits(trailing_significand_bits
)>
3515 if (isFiniteNonZero()) {
3516 myexponent
= exponent
+ bias
;
3517 std::copy_n(significandParts(), mysignificand
.size(),
3518 mysignificand
.begin());
3519 if (myexponent
== 1 &&
3520 !(significandParts()[integer_bit_part
] & integer_bit
))
3521 myexponent
= 0; // denormal
3522 } else if (category
== fcZero
) {
3523 myexponent
= ::exponentZero(S
) + bias
;
3524 mysignificand
.fill(0);
3525 } else if (category
== fcInfinity
) {
3526 if (S
.nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
3527 llvm_unreachable("semantics don't support inf!");
3529 myexponent
= ::exponentInf(S
) + bias
;
3530 mysignificand
.fill(0);
3532 assert(category
== fcNaN
&& "Unknown category!");
3533 myexponent
= ::exponentNaN(S
) + bias
;
3534 std::copy_n(significandParts(), mysignificand
.size(),
3535 mysignificand
.begin());
3537 std::array
<uint64_t, (S
.sizeInBits
+ 63) / 64> words
;
3539 std::copy_n(mysignificand
.begin(), mysignificand
.size(), words
.begin());
3540 if constexpr (significand_mask
!= 0) {
3541 // Clear the integer bit.
3542 words
[mysignificand
.size() - 1] &= significand_mask
;
3544 std::fill(words_iter
, words
.end(), uint64_t{0});
3545 constexpr size_t last_word
= words
.size() - 1;
3546 uint64_t shifted_sign
= static_cast<uint64_t>(sign
& 1)
3547 << ((S
.sizeInBits
- 1) % 64);
3548 words
[last_word
] |= shifted_sign
;
3549 uint64_t shifted_exponent
= (myexponent
& exponent_mask
)
3550 << (trailing_significand_bits
% 64);
3551 words
[last_word
] |= shifted_exponent
;
3552 if constexpr (last_word
== 0) {
3553 return APInt(S
.sizeInBits
, words
[0]);
3555 return APInt(S
.sizeInBits
, words
);
3558 APInt
IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
3559 assert(partCount() == 2);
3560 return convertIEEEFloatToAPInt
<semIEEEquad
>();
3563 APInt
IEEEFloat::convertDoubleAPFloatToAPInt() const {
3564 assert(partCount()==1);
3565 return convertIEEEFloatToAPInt
<semIEEEdouble
>();
3568 APInt
IEEEFloat::convertFloatAPFloatToAPInt() const {
3569 assert(partCount()==1);
3570 return convertIEEEFloatToAPInt
<semIEEEsingle
>();
3573 APInt
IEEEFloat::convertBFloatAPFloatToAPInt() const {
3574 assert(partCount() == 1);
3575 return convertIEEEFloatToAPInt
<semBFloat
>();
3578 APInt
IEEEFloat::convertHalfAPFloatToAPInt() const {
3579 assert(partCount()==1);
3580 return convertIEEEFloatToAPInt
<semIEEEhalf
>();
3583 APInt
IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
3584 assert(partCount() == 1);
3585 return convertIEEEFloatToAPInt
<semFloat8E5M2
>();
3588 APInt
IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
3589 assert(partCount() == 1);
3590 return convertIEEEFloatToAPInt
<semFloat8E5M2FNUZ
>();
3593 APInt
IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
3594 assert(partCount() == 1);
3595 return convertIEEEFloatToAPInt
<semFloat8E4M3FN
>();
3598 APInt
IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
3599 assert(partCount() == 1);
3600 return convertIEEEFloatToAPInt
<semFloat8E4M3FNUZ
>();
3603 APInt
IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
3604 assert(partCount() == 1);
3605 return convertIEEEFloatToAPInt
<semFloat8E4M3B11FNUZ
>();
3608 APInt
IEEEFloat::convertFloatTF32APFloatToAPInt() const {
3609 assert(partCount() == 1);
3610 return convertIEEEFloatToAPInt
<semFloatTF32
>();
3613 // This function creates an APInt that is just a bit map of the floating
3614 // point constant as it would appear in memory. It is not a conversion,
3615 // and treating the result as a normal integer is unlikely to be useful.
3617 APInt
IEEEFloat::bitcastToAPInt() const {
3618 if (semantics
== (const llvm::fltSemantics
*)&semIEEEhalf
)
3619 return convertHalfAPFloatToAPInt();
3621 if (semantics
== (const llvm::fltSemantics
*)&semBFloat
)
3622 return convertBFloatAPFloatToAPInt();
3624 if (semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
)
3625 return convertFloatAPFloatToAPInt();
3627 if (semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
)
3628 return convertDoubleAPFloatToAPInt();
3630 if (semantics
== (const llvm::fltSemantics
*)&semIEEEquad
)
3631 return convertQuadrupleAPFloatToAPInt();
3633 if (semantics
== (const llvm::fltSemantics
*)&semPPCDoubleDoubleLegacy
)
3634 return convertPPCDoubleDoubleAPFloatToAPInt();
3636 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E5M2
)
3637 return convertFloat8E5M2APFloatToAPInt();
3639 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E5M2FNUZ
)
3640 return convertFloat8E5M2FNUZAPFloatToAPInt();
3642 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3FN
)
3643 return convertFloat8E4M3FNAPFloatToAPInt();
3645 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3FNUZ
)
3646 return convertFloat8E4M3FNUZAPFloatToAPInt();
3648 if (semantics
== (const llvm::fltSemantics
*)&semFloat8E4M3B11FNUZ
)
3649 return convertFloat8E4M3B11FNUZAPFloatToAPInt();
3651 if (semantics
== (const llvm::fltSemantics
*)&semFloatTF32
)
3652 return convertFloatTF32APFloatToAPInt();
3654 assert(semantics
== (const llvm::fltSemantics
*)&semX87DoubleExtended
&&
3656 return convertF80LongDoubleAPFloatToAPInt();
3659 float IEEEFloat::convertToFloat() const {
3660 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEsingle
&&
3661 "Float semantics are not IEEEsingle");
3662 APInt api
= bitcastToAPInt();
3663 return api
.bitsToFloat();
3666 double IEEEFloat::convertToDouble() const {
3667 assert(semantics
== (const llvm::fltSemantics
*)&semIEEEdouble
&&
3668 "Float semantics are not IEEEdouble");
3669 APInt api
= bitcastToAPInt();
3670 return api
.bitsToDouble();
3673 /// Integer bit is explicit in this format. Intel hardware (387 and later)
3674 /// does not support these bit patterns:
3675 /// exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
3676 /// exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
3677 /// exponent!=0 nor all 1's, integer bit 0 ("unnormal")
3678 /// exponent = 0, integer bit 1 ("pseudodenormal")
3679 /// At the moment, the first three are treated as NaNs, the last one as Normal.
3680 void IEEEFloat::initFromF80LongDoubleAPInt(const APInt
&api
) {
3681 uint64_t i1
= api
.getRawData()[0];
3682 uint64_t i2
= api
.getRawData()[1];
3683 uint64_t myexponent
= (i2
& 0x7fff);
3684 uint64_t mysignificand
= i1
;
3685 uint8_t myintegerbit
= mysignificand
>> 63;
3687 initialize(&semX87DoubleExtended
);
3688 assert(partCount()==2);
3690 sign
= static_cast<unsigned int>(i2
>>15);
3691 if (myexponent
== 0 && mysignificand
== 0) {
3693 } else if (myexponent
==0x7fff && mysignificand
==0x8000000000000000ULL
) {
3695 } else if ((myexponent
== 0x7fff && mysignificand
!= 0x8000000000000000ULL
) ||
3696 (myexponent
!= 0x7fff && myexponent
!= 0 && myintegerbit
== 0)) {
3698 exponent
= exponentNaN();
3699 significandParts()[0] = mysignificand
;
3700 significandParts()[1] = 0;
3702 category
= fcNormal
;
3703 exponent
= myexponent
- 16383;
3704 significandParts()[0] = mysignificand
;
3705 significandParts()[1] = 0;
3706 if (myexponent
==0) // denormal
3711 void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt
&api
) {
3712 uint64_t i1
= api
.getRawData()[0];
3713 uint64_t i2
= api
.getRawData()[1];
3717 // Get the first double and convert to our format.
3718 initFromDoubleAPInt(APInt(64, i1
));
3719 fs
= convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3720 assert(fs
== opOK
&& !losesInfo
);
3723 // Unless we have a special case, add in second double.
3724 if (isFiniteNonZero()) {
3725 IEEEFloat
v(semIEEEdouble
, APInt(64, i2
));
3726 fs
= v
.convert(semPPCDoubleDoubleLegacy
, rmNearestTiesToEven
, &losesInfo
);
3727 assert(fs
== opOK
&& !losesInfo
);
3730 add(v
, rmNearestTiesToEven
);
3734 template <const fltSemantics
&S
>
3735 void IEEEFloat::initFromIEEEAPInt(const APInt
&api
) {
3736 assert(api
.getBitWidth() == S
.sizeInBits
);
3737 constexpr integerPart integer_bit
= integerPart
{1}
3738 << ((S
.precision
- 1) % integerPartWidth
);
3739 constexpr uint64_t significand_mask
= integer_bit
- 1;
3740 constexpr unsigned int trailing_significand_bits
= S
.precision
- 1;
3741 constexpr unsigned int stored_significand_parts
=
3742 partCountForBits(trailing_significand_bits
);
3743 constexpr unsigned int exponent_bits
=
3744 S
.sizeInBits
- 1 - trailing_significand_bits
;
3745 static_assert(exponent_bits
< 64);
3746 constexpr uint64_t exponent_mask
= (uint64_t{1} << exponent_bits
) - 1;
3747 constexpr int bias
= -(S
.minExponent
- 1);
3749 // Copy the bits of the significand. We need to clear out the exponent and
3750 // sign bit in the last word.
3751 std::array
<integerPart
, stored_significand_parts
> mysignificand
;
3752 std::copy_n(api
.getRawData(), mysignificand
.size(), mysignificand
.begin());
3753 if constexpr (significand_mask
!= 0) {
3754 mysignificand
[mysignificand
.size() - 1] &= significand_mask
;
3757 // We assume the last word holds the sign bit, the exponent, and potentially
3758 // some of the trailing significand field.
3759 uint64_t last_word
= api
.getRawData()[api
.getNumWords() - 1];
3760 uint64_t myexponent
=
3761 (last_word
>> (trailing_significand_bits
% 64)) & exponent_mask
;
3764 assert(partCount() == mysignificand
.size());
3766 sign
= static_cast<unsigned int>(last_word
>> ((S
.sizeInBits
- 1) % 64));
3768 bool all_zero_significand
=
3769 llvm::all_of(mysignificand
, [](integerPart bits
) { return bits
== 0; });
3771 bool is_zero
= myexponent
== 0 && all_zero_significand
;
3773 if constexpr (S
.nonFiniteBehavior
== fltNonfiniteBehavior::IEEE754
) {
3774 if (myexponent
- bias
== ::exponentInf(S
) && all_zero_significand
) {
3780 bool is_nan
= false;
3782 if constexpr (S
.nanEncoding
== fltNanEncoding::IEEE
) {
3783 is_nan
= myexponent
- bias
== ::exponentNaN(S
) && !all_zero_significand
;
3784 } else if constexpr (S
.nanEncoding
== fltNanEncoding::AllOnes
) {
3785 bool all_ones_significand
=
3786 std::all_of(mysignificand
.begin(), mysignificand
.end() - 1,
3787 [](integerPart bits
) { return bits
== ~integerPart
{0}; }) &&
3788 (!significand_mask
||
3789 mysignificand
[mysignificand
.size() - 1] == significand_mask
);
3790 is_nan
= myexponent
- bias
== ::exponentNaN(S
) && all_ones_significand
;
3791 } else if constexpr (S
.nanEncoding
== fltNanEncoding::NegativeZero
) {
3792 is_nan
= is_zero
&& sign
;
3797 exponent
= ::exponentNaN(S
);
3798 std::copy_n(mysignificand
.begin(), mysignificand
.size(),
3799 significandParts());
3808 category
= fcNormal
;
3809 exponent
= myexponent
- bias
;
3810 std::copy_n(mysignificand
.begin(), mysignificand
.size(), significandParts());
3811 if (myexponent
== 0) // denormal
3812 exponent
= S
.minExponent
;
3814 significandParts()[mysignificand
.size()-1] |= integer_bit
; // integer bit
3817 void IEEEFloat::initFromQuadrupleAPInt(const APInt
&api
) {
3818 initFromIEEEAPInt
<semIEEEquad
>(api
);
3821 void IEEEFloat::initFromDoubleAPInt(const APInt
&api
) {
3822 initFromIEEEAPInt
<semIEEEdouble
>(api
);
3825 void IEEEFloat::initFromFloatAPInt(const APInt
&api
) {
3826 initFromIEEEAPInt
<semIEEEsingle
>(api
);
3829 void IEEEFloat::initFromBFloatAPInt(const APInt
&api
) {
3830 initFromIEEEAPInt
<semBFloat
>(api
);
3833 void IEEEFloat::initFromHalfAPInt(const APInt
&api
) {
3834 initFromIEEEAPInt
<semIEEEhalf
>(api
);
3837 void IEEEFloat::initFromFloat8E5M2APInt(const APInt
&api
) {
3838 initFromIEEEAPInt
<semFloat8E5M2
>(api
);
3841 void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt
&api
) {
3842 initFromIEEEAPInt
<semFloat8E5M2FNUZ
>(api
);
3845 void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt
&api
) {
3846 initFromIEEEAPInt
<semFloat8E4M3FN
>(api
);
3849 void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt
&api
) {
3850 initFromIEEEAPInt
<semFloat8E4M3FNUZ
>(api
);
3853 void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt
&api
) {
3854 initFromIEEEAPInt
<semFloat8E4M3B11FNUZ
>(api
);
3857 void IEEEFloat::initFromFloatTF32APInt(const APInt
&api
) {
3858 initFromIEEEAPInt
<semFloatTF32
>(api
);
3861 /// Treat api as containing the bits of a floating point number.
3862 void IEEEFloat::initFromAPInt(const fltSemantics
*Sem
, const APInt
&api
) {
3863 assert(api
.getBitWidth() == Sem
->sizeInBits
);
3864 if (Sem
== &semIEEEhalf
)
3865 return initFromHalfAPInt(api
);
3866 if (Sem
== &semBFloat
)
3867 return initFromBFloatAPInt(api
);
3868 if (Sem
== &semIEEEsingle
)
3869 return initFromFloatAPInt(api
);
3870 if (Sem
== &semIEEEdouble
)
3871 return initFromDoubleAPInt(api
);
3872 if (Sem
== &semX87DoubleExtended
)
3873 return initFromF80LongDoubleAPInt(api
);
3874 if (Sem
== &semIEEEquad
)
3875 return initFromQuadrupleAPInt(api
);
3876 if (Sem
== &semPPCDoubleDoubleLegacy
)
3877 return initFromPPCDoubleDoubleAPInt(api
);
3878 if (Sem
== &semFloat8E5M2
)
3879 return initFromFloat8E5M2APInt(api
);
3880 if (Sem
== &semFloat8E5M2FNUZ
)
3881 return initFromFloat8E5M2FNUZAPInt(api
);
3882 if (Sem
== &semFloat8E4M3FN
)
3883 return initFromFloat8E4M3FNAPInt(api
);
3884 if (Sem
== &semFloat8E4M3FNUZ
)
3885 return initFromFloat8E4M3FNUZAPInt(api
);
3886 if (Sem
== &semFloat8E4M3B11FNUZ
)
3887 return initFromFloat8E4M3B11FNUZAPInt(api
);
3888 if (Sem
== &semFloatTF32
)
3889 return initFromFloatTF32APInt(api
);
3891 llvm_unreachable(nullptr);
3894 /// Make this number the largest magnitude normal number in the given
3896 void IEEEFloat::makeLargest(bool Negative
) {
3897 // We want (in interchange format):
3898 // sign = {Negative}
3900 // significand = 1..1
3901 category
= fcNormal
;
3903 exponent
= semantics
->maxExponent
;
3905 // Use memset to set all but the highest integerPart to all ones.
3906 integerPart
*significand
= significandParts();
3907 unsigned PartCount
= partCount();
3908 memset(significand
, 0xFF, sizeof(integerPart
)*(PartCount
- 1));
3910 // Set the high integerPart especially setting all unused top bits for
3911 // internal consistency.
3912 const unsigned NumUnusedHighBits
=
3913 PartCount
*integerPartWidth
- semantics
->precision
;
3914 significand
[PartCount
- 1] = (NumUnusedHighBits
< integerPartWidth
)
3915 ? (~integerPart(0) >> NumUnusedHighBits
)
3918 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
&&
3919 semantics
->nanEncoding
== fltNanEncoding::AllOnes
)
3920 significand
[0] &= ~integerPart(1);
3923 /// Make this number the smallest magnitude denormal number in the given
3925 void IEEEFloat::makeSmallest(bool Negative
) {
3926 // We want (in interchange format):
3927 // sign = {Negative}
3929 // significand = 0..01
3930 category
= fcNormal
;
3932 exponent
= semantics
->minExponent
;
3933 APInt::tcSet(significandParts(), 1, partCount());
3936 void IEEEFloat::makeSmallestNormalized(bool Negative
) {
3937 // We want (in interchange format):
3938 // sign = {Negative}
3940 // significand = 10..0
3942 category
= fcNormal
;
3945 exponent
= semantics
->minExponent
;
3946 APInt::tcSetBit(significandParts(), semantics
->precision
- 1);
3949 IEEEFloat::IEEEFloat(const fltSemantics
&Sem
, const APInt
&API
) {
3950 initFromAPInt(&Sem
, API
);
3953 IEEEFloat::IEEEFloat(float f
) {
3954 initFromAPInt(&semIEEEsingle
, APInt::floatToBits(f
));
3957 IEEEFloat::IEEEFloat(double d
) {
3958 initFromAPInt(&semIEEEdouble
, APInt::doubleToBits(d
));
3962 void append(SmallVectorImpl
<char> &Buffer
, StringRef Str
) {
3963 Buffer
.append(Str
.begin(), Str
.end());
3966 /// Removes data from the given significand until it is no more
3967 /// precise than is required for the desired precision.
3968 void AdjustToPrecision(APInt
&significand
,
3969 int &exp
, unsigned FormatPrecision
) {
3970 unsigned bits
= significand
.getActiveBits();
3972 // 196/59 is a very slight overestimate of lg_2(10).
3973 unsigned bitsRequired
= (FormatPrecision
* 196 + 58) / 59;
3975 if (bits
<= bitsRequired
) return;
3977 unsigned tensRemovable
= (bits
- bitsRequired
) * 59 / 196;
3978 if (!tensRemovable
) return;
3980 exp
+= tensRemovable
;
3982 APInt
divisor(significand
.getBitWidth(), 1);
3983 APInt
powten(significand
.getBitWidth(), 10);
3985 if (tensRemovable
& 1)
3987 tensRemovable
>>= 1;
3988 if (!tensRemovable
) break;
3992 significand
= significand
.udiv(divisor
);
3994 // Truncate the significand down to its active bit count.
3995 significand
= significand
.trunc(significand
.getActiveBits());
3999 void AdjustToPrecision(SmallVectorImpl
<char> &buffer
,
4000 int &exp
, unsigned FormatPrecision
) {
4001 unsigned N
= buffer
.size();
4002 if (N
<= FormatPrecision
) return;
4004 // The most significant figures are the last ones in the buffer.
4005 unsigned FirstSignificant
= N
- FormatPrecision
;
4008 // FIXME: this probably shouldn't use 'round half up'.
4010 // Rounding down is just a truncation, except we also want to drop
4011 // trailing zeros from the new result.
4012 if (buffer
[FirstSignificant
- 1] < '5') {
4013 while (FirstSignificant
< N
&& buffer
[FirstSignificant
] == '0')
4016 exp
+= FirstSignificant
;
4017 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
4021 // Rounding up requires a decimal add-with-carry. If we continue
4022 // the carry, the newly-introduced zeros will just be truncated.
4023 for (unsigned I
= FirstSignificant
; I
!= N
; ++I
) {
4024 if (buffer
[I
] == '9') {
4032 // If we carried through, we have exactly one digit of precision.
4033 if (FirstSignificant
== N
) {
4034 exp
+= FirstSignificant
;
4036 buffer
.push_back('1');
4040 exp
+= FirstSignificant
;
4041 buffer
.erase(&buffer
[0], &buffer
[FirstSignificant
]);
4045 void IEEEFloat::toString(SmallVectorImpl
<char> &Str
, unsigned FormatPrecision
,
4046 unsigned FormatMaxPadding
, bool TruncateZero
) const {
4050 return append(Str
, "-Inf");
4052 return append(Str
, "+Inf");
4054 case fcNaN
: return append(Str
, "NaN");
4060 if (!FormatMaxPadding
) {
4062 append(Str
, "0.0E+0");
4065 if (FormatPrecision
> 1)
4066 Str
.append(FormatPrecision
- 1, '0');
4067 append(Str
, "e+00");
4080 // Decompose the number into an APInt and an exponent.
4081 int exp
= exponent
- ((int) semantics
->precision
- 1);
4083 semantics
->precision
,
4084 ArrayRef(significandParts(), partCountForBits(semantics
->precision
)));
4086 // Set FormatPrecision if zero. We want to do this before we
4087 // truncate trailing zeros, as those are part of the precision.
4088 if (!FormatPrecision
) {
4089 // We use enough digits so the number can be round-tripped back to an
4090 // APFloat. The formula comes from "How to Print Floating-Point Numbers
4091 // Accurately" by Steele and White.
4092 // FIXME: Using a formula based purely on the precision is conservative;
4093 // we can print fewer digits depending on the actual value being printed.
4095 // FormatPrecision = 2 + floor(significandBits / lg_2(10))
4096 FormatPrecision
= 2 + semantics
->precision
* 59 / 196;
4099 // Ignore trailing binary zeros.
4100 int trailingZeros
= significand
.countr_zero();
4101 exp
+= trailingZeros
;
4102 significand
.lshrInPlace(trailingZeros
);
4104 // Change the exponent from 2^e to 10^e.
4107 } else if (exp
> 0) {
4109 significand
= significand
.zext(semantics
->precision
+ exp
);
4110 significand
<<= exp
;
4112 } else { /* exp < 0 */
4115 // We transform this using the identity:
4116 // (N)(2^-e) == (N)(5^e)(10^-e)
4117 // This means we have to multiply N (the significand) by 5^e.
4118 // To avoid overflow, we have to operate on numbers large
4119 // enough to store N * 5^e:
4120 // log2(N * 5^e) == log2(N) + e * log2(5)
4121 // <= semantics->precision + e * 137 / 59
4122 // (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
4124 unsigned precision
= semantics
->precision
+ (137 * texp
+ 136) / 59;
4126 // Multiply significand by 5^e.
4127 // N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
4128 significand
= significand
.zext(precision
);
4129 APInt
five_to_the_i(precision
, 5);
4131 if (texp
& 1) significand
*= five_to_the_i
;
4135 five_to_the_i
*= five_to_the_i
;
4139 AdjustToPrecision(significand
, exp
, FormatPrecision
);
4141 SmallVector
<char, 256> buffer
;
4144 unsigned precision
= significand
.getBitWidth();
4145 if (precision
< 4) {
4146 // We need enough precision to store the value 10.
4148 significand
= significand
.zext(precision
);
4150 APInt
ten(precision
, 10);
4151 APInt
digit(precision
, 0);
4153 bool inTrail
= true;
4154 while (significand
!= 0) {
4155 // digit <- significand % 10
4156 // significand <- significand / 10
4157 APInt::udivrem(significand
, ten
, significand
, digit
);
4159 unsigned d
= digit
.getZExtValue();
4161 // Drop trailing zeros.
4162 if (inTrail
&& !d
) exp
++;
4164 buffer
.push_back((char) ('0' + d
));
4169 assert(!buffer
.empty() && "no characters in buffer!");
4171 // Drop down to FormatPrecision.
4172 // TODO: don't do more precise calculations above than are required.
4173 AdjustToPrecision(buffer
, exp
, FormatPrecision
);
4175 unsigned NDigits
= buffer
.size();
4177 // Check whether we should use scientific notation.
4178 bool FormatScientific
;
4179 if (!FormatMaxPadding
)
4180 FormatScientific
= true;
4185 // But we shouldn't make the number look more precise than it is.
4186 FormatScientific
= ((unsigned) exp
> FormatMaxPadding
||
4187 NDigits
+ (unsigned) exp
> FormatPrecision
);
4189 // Power of the most significant digit.
4190 int MSD
= exp
+ (int) (NDigits
- 1);
4193 FormatScientific
= false;
4195 // 765e-5 == 0.00765
4197 FormatScientific
= ((unsigned) -MSD
) > FormatMaxPadding
;
4202 // Scientific formatting is pretty straightforward.
4203 if (FormatScientific
) {
4204 exp
+= (NDigits
- 1);
4206 Str
.push_back(buffer
[NDigits
-1]);
4208 if (NDigits
== 1 && TruncateZero
)
4211 for (unsigned I
= 1; I
!= NDigits
; ++I
)
4212 Str
.push_back(buffer
[NDigits
-1-I
]);
4213 // Fill with zeros up to FormatPrecision.
4214 if (!TruncateZero
&& FormatPrecision
> NDigits
- 1)
4215 Str
.append(FormatPrecision
- NDigits
+ 1, '0');
4216 // For !TruncateZero we use lower 'e'.
4217 Str
.push_back(TruncateZero
? 'E' : 'e');
4219 Str
.push_back(exp
>= 0 ? '+' : '-');
4220 if (exp
< 0) exp
= -exp
;
4221 SmallVector
<char, 6> expbuf
;
4223 expbuf
.push_back((char) ('0' + (exp
% 10)));
4226 // Exponent always at least two digits if we do not truncate zeros.
4227 if (!TruncateZero
&& expbuf
.size() < 2)
4228 expbuf
.push_back('0');
4229 for (unsigned I
= 0, E
= expbuf
.size(); I
!= E
; ++I
)
4230 Str
.push_back(expbuf
[E
-1-I
]);
4234 // Non-scientific, positive exponents.
4236 for (unsigned I
= 0; I
!= NDigits
; ++I
)
4237 Str
.push_back(buffer
[NDigits
-1-I
]);
4238 for (unsigned I
= 0; I
!= (unsigned) exp
; ++I
)
4243 // Non-scientific, negative exponents.
4245 // The number of digits to the left of the decimal point.
4246 int NWholeDigits
= exp
+ (int) NDigits
;
4249 if (NWholeDigits
> 0) {
4250 for (; I
!= (unsigned) NWholeDigits
; ++I
)
4251 Str
.push_back(buffer
[NDigits
-I
-1]);
4254 unsigned NZeros
= 1 + (unsigned) -NWholeDigits
;
4258 for (unsigned Z
= 1; Z
!= NZeros
; ++Z
)
4262 for (; I
!= NDigits
; ++I
)
4263 Str
.push_back(buffer
[NDigits
-I
-1]);
4266 bool IEEEFloat::getExactInverse(APFloat
*inv
) const {
4267 // Special floats and denormals have no exact inverse.
4268 if (!isFiniteNonZero())
4271 // Check that the number is a power of two by making sure that only the
4272 // integer bit is set in the significand.
4273 if (significandLSB() != semantics
->precision
- 1)
4277 IEEEFloat
reciprocal(*semantics
, 1ULL);
4278 if (reciprocal
.divide(*this, rmNearestTiesToEven
) != opOK
)
4281 // Avoid multiplication with a denormal, it is not safe on all platforms and
4282 // may be slower than a normal division.
4283 if (reciprocal
.isDenormal())
4286 assert(reciprocal
.isFiniteNonZero() &&
4287 reciprocal
.significandLSB() == reciprocal
.semantics
->precision
- 1);
4290 *inv
= APFloat(reciprocal
, *semantics
);
4295 int IEEEFloat::getExactLog2Abs() const {
4296 if (!isFinite() || isZero())
4299 const integerPart
*Parts
= significandParts();
4300 const int PartCount
= partCountForBits(semantics
->precision
);
4303 for (int i
= 0; i
< PartCount
; ++i
) {
4304 PopCount
+= llvm::popcount(Parts
[i
]);
4309 if (exponent
!= semantics
->minExponent
)
4312 int CountrParts
= 0;
4313 for (int i
= 0; i
< PartCount
;
4314 ++i
, CountrParts
+= APInt::APINT_BITS_PER_WORD
) {
4315 if (Parts
[i
] != 0) {
4316 return exponent
- semantics
->precision
+ CountrParts
+
4317 llvm::countr_zero(Parts
[i
]) + 1;
4321 llvm_unreachable("didn't find the set bit");
4324 bool IEEEFloat::isSignaling() const {
4327 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
)
4330 // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the
4331 // first bit of the trailing significand being 0.
4332 return !APInt::tcExtractBit(significandParts(), semantics
->precision
- 2);
4335 /// IEEE-754R 2008 5.3.1: nextUp/nextDown.
4337 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
4338 /// appropriate sign switching before/after the computation.
4339 IEEEFloat::opStatus
IEEEFloat::next(bool nextDown
) {
4340 // If we are performing nextDown, swap sign so we have -x.
4344 // Compute nextUp(x)
4345 opStatus result
= opOK
;
4347 // Handle each float category separately.
4350 // nextUp(+inf) = +inf
4353 // nextUp(-inf) = -getLargest()
4357 // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag.
4358 // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not
4359 // change the payload.
4360 if (isSignaling()) {
4361 result
= opInvalidOp
;
4362 // For consistency, propagate the sign of the sNaN to the qNaN.
4363 makeNaN(false, isNegative(), nullptr);
4367 // nextUp(pm 0) = +getSmallest()
4368 makeSmallest(false);
4371 // nextUp(-getSmallest()) = -0
4372 if (isSmallest() && isNegative()) {
4373 APInt::tcSet(significandParts(), 0, partCount());
4376 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
)
4381 if (isLargest() && !isNegative()) {
4382 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
4383 // nextUp(getLargest()) == NAN
4387 // nextUp(getLargest()) == INFINITY
4388 APInt::tcSet(significandParts(), 0, partCount());
4389 category
= fcInfinity
;
4390 exponent
= semantics
->maxExponent
+ 1;
4395 // nextUp(normal) == normal + inc.
4397 // If we are negative, we need to decrement the significand.
4399 // We only cross a binade boundary that requires adjusting the exponent
4401 // 1. exponent != semantics->minExponent. This implies we are not in the
4402 // smallest binade or are dealing with denormals.
4403 // 2. Our significand excluding the integral bit is all zeros.
4404 bool WillCrossBinadeBoundary
=
4405 exponent
!= semantics
->minExponent
&& isSignificandAllZeros();
4407 // Decrement the significand.
4409 // We always do this since:
4410 // 1. If we are dealing with a non-binade decrement, by definition we
4411 // just decrement the significand.
4412 // 2. If we are dealing with a normal -> normal binade decrement, since
4413 // we have an explicit integral bit the fact that all bits but the
4414 // integral bit are zero implies that subtracting one will yield a
4415 // significand with 0 integral bit and 1 in all other spots. Thus we
4416 // must just adjust the exponent and set the integral bit to 1.
4417 // 3. If we are dealing with a normal -> denormal binade decrement,
4418 // since we set the integral bit to 0 when we represent denormals, we
4419 // just decrement the significand.
4420 integerPart
*Parts
= significandParts();
4421 APInt::tcDecrement(Parts
, partCount());
4423 if (WillCrossBinadeBoundary
) {
4424 // Our result is a normal number. Do the following:
4425 // 1. Set the integral bit to 1.
4426 // 2. Decrement the exponent.
4427 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4431 // If we are positive, we need to increment the significand.
4433 // We only cross a binade boundary that requires adjusting the exponent if
4434 // the input is not a denormal and all of said input's significand bits
4435 // are set. If all of said conditions are true: clear the significand, set
4436 // the integral bit to 1, and increment the exponent. If we have a
4437 // denormal always increment since moving denormals and the numbers in the
4438 // smallest normal binade have the same exponent in our representation.
4439 bool WillCrossBinadeBoundary
= !isDenormal() && isSignificandAllOnes();
4441 if (WillCrossBinadeBoundary
) {
4442 integerPart
*Parts
= significandParts();
4443 APInt::tcSet(Parts
, 0, partCount());
4444 APInt::tcSetBit(Parts
, semantics
->precision
- 1);
4445 assert(exponent
!= semantics
->maxExponent
&&
4446 "We can not increment an exponent beyond the maxExponent allowed"
4447 " by the given floating point semantics.");
4450 incrementSignificand();
4456 // If we are performing nextDown, swap sign so we have -nextUp(-x)
4463 APFloatBase::ExponentType
IEEEFloat::exponentNaN() const {
4464 return ::exponentNaN(*semantics
);
4467 APFloatBase::ExponentType
IEEEFloat::exponentInf() const {
4468 return ::exponentInf(*semantics
);
4471 APFloatBase::ExponentType
IEEEFloat::exponentZero() const {
4472 return ::exponentZero(*semantics
);
4475 void IEEEFloat::makeInf(bool Negative
) {
4476 if (semantics
->nonFiniteBehavior
== fltNonfiniteBehavior::NanOnly
) {
4477 // There is no Inf, so make NaN instead.
4478 makeNaN(false, Negative
);
4481 category
= fcInfinity
;
4483 exponent
= exponentInf();
4484 APInt::tcSet(significandParts(), 0, partCount());
4487 void IEEEFloat::makeZero(bool Negative
) {
4490 if (semantics
->nanEncoding
== fltNanEncoding::NegativeZero
) {
4491 // Merge negative zero to positive because 0b10000...000 is used for NaN
4494 exponent
= exponentZero();
4495 APInt::tcSet(significandParts(), 0, partCount());
4498 void IEEEFloat::makeQuiet() {
4500 if (semantics
->nonFiniteBehavior
!= fltNonfiniteBehavior::NanOnly
)
4501 APInt::tcSetBit(significandParts(), semantics
->precision
- 2);
4504 int ilogb(const IEEEFloat
&Arg
) {
4506 return IEEEFloat::IEK_NaN
;
4508 return IEEEFloat::IEK_Zero
;
4509 if (Arg
.isInfinity())
4510 return IEEEFloat::IEK_Inf
;
4511 if (!Arg
.isDenormal())
4512 return Arg
.exponent
;
4514 IEEEFloat
Normalized(Arg
);
4515 int SignificandBits
= Arg
.getSemantics().precision
- 1;
4517 Normalized
.exponent
+= SignificandBits
;
4518 Normalized
.normalize(IEEEFloat::rmNearestTiesToEven
, lfExactlyZero
);
4519 return Normalized
.exponent
- SignificandBits
;
4522 IEEEFloat
scalbn(IEEEFloat X
, int Exp
, IEEEFloat::roundingMode RoundingMode
) {
4523 auto MaxExp
= X
.getSemantics().maxExponent
;
4524 auto MinExp
= X
.getSemantics().minExponent
;
4526 // If Exp is wildly out-of-scale, simply adding it to X.exponent will
4527 // overflow; clamp it to a safe range before adding, but ensure that the range
4528 // is large enough that the clamp does not change the result. The range we
4529 // need to support is the difference between the largest possible exponent and
4530 // the normalized exponent of half the smallest denormal.
4532 int SignificandBits
= X
.getSemantics().precision
- 1;
4533 int MaxIncrement
= MaxExp
- (MinExp
- SignificandBits
) + 1;
4535 // Clamp to one past the range ends to let normalize handle overlflow.
4536 X
.exponent
+= std::clamp(Exp
, -MaxIncrement
- 1, MaxIncrement
);
4537 X
.normalize(RoundingMode
, lfExactlyZero
);
4543 IEEEFloat
frexp(const IEEEFloat
&Val
, int &Exp
, IEEEFloat::roundingMode RM
) {
4546 // Quiet signalling nans.
4547 if (Exp
== IEEEFloat::IEK_NaN
) {
4548 IEEEFloat
Quiet(Val
);
4553 if (Exp
== IEEEFloat::IEK_Inf
)
4556 // 1 is added because frexp is defined to return a normalized fraction in
4557 // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
4558 Exp
= Exp
== IEEEFloat::IEK_Zero
? 0 : Exp
+ 1;
4559 return scalbn(Val
, -Exp
, RM
);
4562 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
)
4564 Floats(new APFloat
[2]{APFloat(semIEEEdouble
), APFloat(semIEEEdouble
)}) {
4565 assert(Semantics
== &semPPCDoubleDouble
);
4568 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, uninitializedTag
)
4570 Floats(new APFloat
[2]{APFloat(semIEEEdouble
, uninitialized
),
4571 APFloat(semIEEEdouble
, uninitialized
)}) {
4572 assert(Semantics
== &semPPCDoubleDouble
);
4575 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, integerPart I
)
4576 : Semantics(&S
), Floats(new APFloat
[2]{APFloat(semIEEEdouble
, I
),
4577 APFloat(semIEEEdouble
)}) {
4578 assert(Semantics
== &semPPCDoubleDouble
);
4581 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, const APInt
&I
)
4583 Floats(new APFloat
[2]{
4584 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[0])),
4585 APFloat(semIEEEdouble
, APInt(64, I
.getRawData()[1]))}) {
4586 assert(Semantics
== &semPPCDoubleDouble
);
4589 DoubleAPFloat::DoubleAPFloat(const fltSemantics
&S
, APFloat
&&First
,
4592 Floats(new APFloat
[2]{std::move(First
), std::move(Second
)}) {
4593 assert(Semantics
== &semPPCDoubleDouble
);
4594 assert(&Floats
[0].getSemantics() == &semIEEEdouble
);
4595 assert(&Floats
[1].getSemantics() == &semIEEEdouble
);
4598 DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat
&RHS
)
4599 : Semantics(RHS
.Semantics
),
4600 Floats(RHS
.Floats
? new APFloat
[2]{APFloat(RHS
.Floats
[0]),
4601 APFloat(RHS
.Floats
[1])}
4603 assert(Semantics
== &semPPCDoubleDouble
);
4606 DoubleAPFloat::DoubleAPFloat(DoubleAPFloat
&&RHS
)
4607 : Semantics(RHS
.Semantics
), Floats(std::move(RHS
.Floats
)) {
4608 RHS
.Semantics
= &semBogus
;
4609 assert(Semantics
== &semPPCDoubleDouble
);
4612 DoubleAPFloat
&DoubleAPFloat::operator=(const DoubleAPFloat
&RHS
) {
4613 if (Semantics
== RHS
.Semantics
&& RHS
.Floats
) {
4614 Floats
[0] = RHS
.Floats
[0];
4615 Floats
[1] = RHS
.Floats
[1];
4616 } else if (this != &RHS
) {
4617 this->~DoubleAPFloat();
4618 new (this) DoubleAPFloat(RHS
);
4623 // Implement addition, subtraction, multiplication and division based on:
4624 // "Software for Doubled-Precision Floating-Point Computations",
4625 // by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
4626 APFloat::opStatus
DoubleAPFloat::addImpl(const APFloat
&a
, const APFloat
&aa
,
4627 const APFloat
&c
, const APFloat
&cc
,
4631 Status
|= z
.add(c
, RM
);
4632 if (!z
.isFinite()) {
4633 if (!z
.isInfinity()) {
4634 Floats
[0] = std::move(z
);
4635 Floats
[1].makeZero(/* Neg = */ false);
4636 return (opStatus
)Status
;
4639 auto AComparedToC
= a
.compareAbsoluteValue(c
);
4641 Status
|= z
.add(aa
, RM
);
4642 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4643 // z = cc + aa + c + a;
4644 Status
|= z
.add(c
, RM
);
4645 Status
|= z
.add(a
, RM
);
4647 // z = cc + aa + a + c;
4648 Status
|= z
.add(a
, RM
);
4649 Status
|= z
.add(c
, RM
);
4651 if (!z
.isFinite()) {
4652 Floats
[0] = std::move(z
);
4653 Floats
[1].makeZero(/* Neg = */ false);
4654 return (opStatus
)Status
;
4658 Status
|= zz
.add(cc
, RM
);
4659 if (AComparedToC
== APFloat::cmpGreaterThan
) {
4660 // Floats[1] = a - z + c + zz;
4662 Status
|= Floats
[1].subtract(z
, RM
);
4663 Status
|= Floats
[1].add(c
, RM
);
4664 Status
|= Floats
[1].add(zz
, RM
);
4666 // Floats[1] = c - z + a + zz;
4668 Status
|= Floats
[1].subtract(z
, RM
);
4669 Status
|= Floats
[1].add(a
, RM
);
4670 Status
|= Floats
[1].add(zz
, RM
);
4675 Status
|= q
.subtract(z
, RM
);
4677 // zz = q + c + (a - (q + z)) + aa + cc;
4678 // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
4680 Status
|= zz
.add(c
, RM
);
4681 Status
|= q
.add(z
, RM
);
4682 Status
|= q
.subtract(a
, RM
);
4684 Status
|= zz
.add(q
, RM
);
4685 Status
|= zz
.add(aa
, RM
);
4686 Status
|= zz
.add(cc
, RM
);
4687 if (zz
.isZero() && !zz
.isNegative()) {
4688 Floats
[0] = std::move(z
);
4689 Floats
[1].makeZero(/* Neg = */ false);
4693 Status
|= Floats
[0].add(zz
, RM
);
4694 if (!Floats
[0].isFinite()) {
4695 Floats
[1].makeZero(/* Neg = */ false);
4696 return (opStatus
)Status
;
4698 Floats
[1] = std::move(z
);
4699 Status
|= Floats
[1].subtract(Floats
[0], RM
);
4700 Status
|= Floats
[1].add(zz
, RM
);
4702 return (opStatus
)Status
;
4705 APFloat::opStatus
DoubleAPFloat::addWithSpecial(const DoubleAPFloat
&LHS
,
4706 const DoubleAPFloat
&RHS
,
4709 if (LHS
.getCategory() == fcNaN
) {
4713 if (RHS
.getCategory() == fcNaN
) {
4717 if (LHS
.getCategory() == fcZero
) {
4721 if (RHS
.getCategory() == fcZero
) {
4725 if (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcInfinity
&&
4726 LHS
.isNegative() != RHS
.isNegative()) {
4727 Out
.makeNaN(false, Out
.isNegative(), nullptr);
4730 if (LHS
.getCategory() == fcInfinity
) {
4734 if (RHS
.getCategory() == fcInfinity
) {
4738 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
);
4740 APFloat
A(LHS
.Floats
[0]), AA(LHS
.Floats
[1]), C(RHS
.Floats
[0]),
4742 assert(&A
.getSemantics() == &semIEEEdouble
);
4743 assert(&AA
.getSemantics() == &semIEEEdouble
);
4744 assert(&C
.getSemantics() == &semIEEEdouble
);
4745 assert(&CC
.getSemantics() == &semIEEEdouble
);
4746 assert(&Out
.Floats
[0].getSemantics() == &semIEEEdouble
);
4747 assert(&Out
.Floats
[1].getSemantics() == &semIEEEdouble
);
4748 return Out
.addImpl(A
, AA
, C
, CC
, RM
);
4751 APFloat::opStatus
DoubleAPFloat::add(const DoubleAPFloat
&RHS
,
4753 return addWithSpecial(*this, RHS
, *this, RM
);
4756 APFloat::opStatus
DoubleAPFloat::subtract(const DoubleAPFloat
&RHS
,
4759 auto Ret
= add(RHS
, RM
);
4764 APFloat::opStatus
DoubleAPFloat::multiply(const DoubleAPFloat
&RHS
,
4765 APFloat::roundingMode RM
) {
4766 const auto &LHS
= *this;
4768 /* Interesting observation: For special categories, finding the lowest
4769 common ancestor of the following layered graph gives the correct
4778 e.g. NaN * NaN = NaN
4780 Normal * Zero = Zero
4783 if (LHS
.getCategory() == fcNaN
) {
4787 if (RHS
.getCategory() == fcNaN
) {
4791 if ((LHS
.getCategory() == fcZero
&& RHS
.getCategory() == fcInfinity
) ||
4792 (LHS
.getCategory() == fcInfinity
&& RHS
.getCategory() == fcZero
)) {
4793 Out
.makeNaN(false, false, nullptr);
4796 if (LHS
.getCategory() == fcZero
|| LHS
.getCategory() == fcInfinity
) {
4800 if (RHS
.getCategory() == fcZero
|| RHS
.getCategory() == fcInfinity
) {
4804 assert(LHS
.getCategory() == fcNormal
&& RHS
.getCategory() == fcNormal
&&
4805 "Special cases not handled exhaustively");
4808 APFloat A
= Floats
[0], B
= Floats
[1], C
= RHS
.Floats
[0], D
= RHS
.Floats
[1];
4811 Status
|= T
.multiply(C
, RM
);
4812 if (!T
.isFiniteNonZero()) {
4814 Floats
[1].makeZero(/* Neg = */ false);
4815 return (opStatus
)Status
;
4818 // tau = fmsub(a, c, t), that is -fmadd(-a, c, t).
4821 Status
|= Tau
.fusedMultiplyAdd(C
, T
, RM
);
4826 Status
|= V
.multiply(D
, RM
);
4829 Status
|= W
.multiply(C
, RM
);
4830 Status
|= V
.add(W
, RM
);
4832 Status
|= Tau
.add(V
, RM
);
4836 Status
|= U
.add(Tau
, RM
);
4839 if (!U
.isFinite()) {
4840 Floats
[1].makeZero(/* Neg = */ false);
4842 // Floats[1] = (t - u) + tau
4843 Status
|= T
.subtract(U
, RM
);
4844 Status
|= T
.add(Tau
, RM
);
4847 return (opStatus
)Status
;
4850 APFloat::opStatus
DoubleAPFloat::divide(const DoubleAPFloat
&RHS
,
4851 APFloat::roundingMode RM
) {
4852 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4853 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4855 Tmp
.divide(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()), RM
);
4856 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4860 APFloat::opStatus
DoubleAPFloat::remainder(const DoubleAPFloat
&RHS
) {
4861 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4862 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4864 Tmp
.remainder(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4865 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4869 APFloat::opStatus
DoubleAPFloat::mod(const DoubleAPFloat
&RHS
) {
4870 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4871 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4872 auto Ret
= Tmp
.mod(APFloat(semPPCDoubleDoubleLegacy
, RHS
.bitcastToAPInt()));
4873 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4878 DoubleAPFloat::fusedMultiplyAdd(const DoubleAPFloat
&Multiplicand
,
4879 const DoubleAPFloat
&Addend
,
4880 APFloat::roundingMode RM
) {
4881 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4882 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4883 auto Ret
= Tmp
.fusedMultiplyAdd(
4884 APFloat(semPPCDoubleDoubleLegacy
, Multiplicand
.bitcastToAPInt()),
4885 APFloat(semPPCDoubleDoubleLegacy
, Addend
.bitcastToAPInt()), RM
);
4886 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4890 APFloat::opStatus
DoubleAPFloat::roundToIntegral(APFloat::roundingMode RM
) {
4891 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4892 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
4893 auto Ret
= Tmp
.roundToIntegral(RM
);
4894 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
4898 void DoubleAPFloat::changeSign() {
4899 Floats
[0].changeSign();
4900 Floats
[1].changeSign();
4904 DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat
&RHS
) const {
4905 auto Result
= Floats
[0].compareAbsoluteValue(RHS
.Floats
[0]);
4906 if (Result
!= cmpEqual
)
4908 Result
= Floats
[1].compareAbsoluteValue(RHS
.Floats
[1]);
4909 if (Result
== cmpLessThan
|| Result
== cmpGreaterThan
) {
4910 auto Against
= Floats
[0].isNegative() ^ Floats
[1].isNegative();
4911 auto RHSAgainst
= RHS
.Floats
[0].isNegative() ^ RHS
.Floats
[1].isNegative();
4912 if (Against
&& !RHSAgainst
)
4914 if (!Against
&& RHSAgainst
)
4915 return cmpGreaterThan
;
4916 if (!Against
&& !RHSAgainst
)
4918 if (Against
&& RHSAgainst
)
4919 return (cmpResult
)(cmpLessThan
+ cmpGreaterThan
- Result
);
4924 APFloat::fltCategory
DoubleAPFloat::getCategory() const {
4925 return Floats
[0].getCategory();
4928 bool DoubleAPFloat::isNegative() const { return Floats
[0].isNegative(); }
4930 void DoubleAPFloat::makeInf(bool Neg
) {
4931 Floats
[0].makeInf(Neg
);
4932 Floats
[1].makeZero(/* Neg = */ false);
4935 void DoubleAPFloat::makeZero(bool Neg
) {
4936 Floats
[0].makeZero(Neg
);
4937 Floats
[1].makeZero(/* Neg = */ false);
4940 void DoubleAPFloat::makeLargest(bool Neg
) {
4941 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4942 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x7fefffffffffffffull
));
4943 Floats
[1] = APFloat(semIEEEdouble
, APInt(64, 0x7c8ffffffffffffeull
));
4948 void DoubleAPFloat::makeSmallest(bool Neg
) {
4949 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4950 Floats
[0].makeSmallest(Neg
);
4951 Floats
[1].makeZero(/* Neg = */ false);
4954 void DoubleAPFloat::makeSmallestNormalized(bool Neg
) {
4955 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4956 Floats
[0] = APFloat(semIEEEdouble
, APInt(64, 0x0360000000000000ull
));
4958 Floats
[0].changeSign();
4959 Floats
[1].makeZero(/* Neg = */ false);
4962 void DoubleAPFloat::makeNaN(bool SNaN
, bool Neg
, const APInt
*fill
) {
4963 Floats
[0].makeNaN(SNaN
, Neg
, fill
);
4964 Floats
[1].makeZero(/* Neg = */ false);
4967 APFloat::cmpResult
DoubleAPFloat::compare(const DoubleAPFloat
&RHS
) const {
4968 auto Result
= Floats
[0].compare(RHS
.Floats
[0]);
4969 // |Float[0]| > |Float[1]|
4970 if (Result
== APFloat::cmpEqual
)
4971 return Floats
[1].compare(RHS
.Floats
[1]);
4975 bool DoubleAPFloat::bitwiseIsEqual(const DoubleAPFloat
&RHS
) const {
4976 return Floats
[0].bitwiseIsEqual(RHS
.Floats
[0]) &&
4977 Floats
[1].bitwiseIsEqual(RHS
.Floats
[1]);
4980 hash_code
hash_value(const DoubleAPFloat
&Arg
) {
4982 return hash_combine(hash_value(Arg
.Floats
[0]), hash_value(Arg
.Floats
[1]));
4983 return hash_combine(Arg
.Semantics
);
4986 APInt
DoubleAPFloat::bitcastToAPInt() const {
4987 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4989 Floats
[0].bitcastToAPInt().getRawData()[0],
4990 Floats
[1].bitcastToAPInt().getRawData()[0],
4992 return APInt(128, 2, Data
);
4995 Expected
<APFloat::opStatus
> DoubleAPFloat::convertFromString(StringRef S
,
4997 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
4998 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
4999 auto Ret
= Tmp
.convertFromString(S
, RM
);
5000 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5004 APFloat::opStatus
DoubleAPFloat::next(bool nextDown
) {
5005 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5006 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5007 auto Ret
= Tmp
.next(nextDown
);
5008 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5013 DoubleAPFloat::convertToInteger(MutableArrayRef
<integerPart
> Input
,
5014 unsigned int Width
, bool IsSigned
,
5015 roundingMode RM
, bool *IsExact
) const {
5016 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5017 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5018 .convertToInteger(Input
, Width
, IsSigned
, RM
, IsExact
);
5021 APFloat::opStatus
DoubleAPFloat::convertFromAPInt(const APInt
&Input
,
5024 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5025 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5026 auto Ret
= Tmp
.convertFromAPInt(Input
, IsSigned
, RM
);
5027 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5032 DoubleAPFloat::convertFromSignExtendedInteger(const integerPart
*Input
,
5033 unsigned int InputSize
,
5034 bool IsSigned
, roundingMode RM
) {
5035 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5036 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5037 auto Ret
= Tmp
.convertFromSignExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
5038 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5043 DoubleAPFloat::convertFromZeroExtendedInteger(const integerPart
*Input
,
5044 unsigned int InputSize
,
5045 bool IsSigned
, roundingMode RM
) {
5046 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5047 APFloat
Tmp(semPPCDoubleDoubleLegacy
);
5048 auto Ret
= Tmp
.convertFromZeroExtendedInteger(Input
, InputSize
, IsSigned
, RM
);
5049 *this = DoubleAPFloat(semPPCDoubleDouble
, Tmp
.bitcastToAPInt());
5053 unsigned int DoubleAPFloat::convertToHexString(char *DST
,
5054 unsigned int HexDigits
,
5056 roundingMode RM
) const {
5057 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5058 return APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5059 .convertToHexString(DST
, HexDigits
, UpperCase
, RM
);
5062 bool DoubleAPFloat::isDenormal() const {
5063 return getCategory() == fcNormal
&&
5064 (Floats
[0].isDenormal() || Floats
[1].isDenormal() ||
5065 // (double)(Hi + Lo) == Hi defines a normal number.
5066 Floats
[0] != Floats
[0] + Floats
[1]);
5069 bool DoubleAPFloat::isSmallest() const {
5070 if (getCategory() != fcNormal
)
5072 DoubleAPFloat
Tmp(*this);
5073 Tmp
.makeSmallest(this->isNegative());
5074 return Tmp
.compare(*this) == cmpEqual
;
5077 bool DoubleAPFloat::isSmallestNormalized() const {
5078 if (getCategory() != fcNormal
)
5081 DoubleAPFloat
Tmp(*this);
5082 Tmp
.makeSmallestNormalized(this->isNegative());
5083 return Tmp
.compare(*this) == cmpEqual
;
5086 bool DoubleAPFloat::isLargest() const {
5087 if (getCategory() != fcNormal
)
5089 DoubleAPFloat
Tmp(*this);
5090 Tmp
.makeLargest(this->isNegative());
5091 return Tmp
.compare(*this) == cmpEqual
;
5094 bool DoubleAPFloat::isInteger() const {
5095 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5096 return Floats
[0].isInteger() && Floats
[1].isInteger();
5099 void DoubleAPFloat::toString(SmallVectorImpl
<char> &Str
,
5100 unsigned FormatPrecision
,
5101 unsigned FormatMaxPadding
,
5102 bool TruncateZero
) const {
5103 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5104 APFloat(semPPCDoubleDoubleLegacy
, bitcastToAPInt())
5105 .toString(Str
, FormatPrecision
, FormatMaxPadding
, TruncateZero
);
5108 bool DoubleAPFloat::getExactInverse(APFloat
*inv
) const {
5109 assert(Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5110 APFloat
Tmp(semPPCDoubleDoubleLegacy
, bitcastToAPInt());
5112 return Tmp
.getExactInverse(nullptr);
5113 APFloat
Inv(semPPCDoubleDoubleLegacy
);
5114 auto Ret
= Tmp
.getExactInverse(&Inv
);
5115 *inv
= APFloat(semPPCDoubleDouble
, Inv
.bitcastToAPInt());
5119 int DoubleAPFloat::getExactLog2() const {
5120 // TODO: Implement me
5124 int DoubleAPFloat::getExactLog2Abs() const {
5125 // TODO: Implement me
5129 DoubleAPFloat
scalbn(const DoubleAPFloat
&Arg
, int Exp
,
5130 APFloat::roundingMode RM
) {
5131 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5132 return DoubleAPFloat(semPPCDoubleDouble
, scalbn(Arg
.Floats
[0], Exp
, RM
),
5133 scalbn(Arg
.Floats
[1], Exp
, RM
));
5136 DoubleAPFloat
frexp(const DoubleAPFloat
&Arg
, int &Exp
,
5137 APFloat::roundingMode RM
) {
5138 assert(Arg
.Semantics
== &semPPCDoubleDouble
&& "Unexpected Semantics");
5139 APFloat First
= frexp(Arg
.Floats
[0], Exp
, RM
);
5140 APFloat Second
= Arg
.Floats
[1];
5141 if (Arg
.getCategory() == APFloat::fcNormal
)
5142 Second
= scalbn(Second
, -Exp
, RM
);
5143 return DoubleAPFloat(semPPCDoubleDouble
, std::move(First
), std::move(Second
));
5146 } // namespace detail
5148 APFloat::Storage::Storage(IEEEFloat F
, const fltSemantics
&Semantics
) {
5149 if (usesLayout
<IEEEFloat
>(Semantics
)) {
5150 new (&IEEE
) IEEEFloat(std::move(F
));
5153 if (usesLayout
<DoubleAPFloat
>(Semantics
)) {
5154 const fltSemantics
& S
= F
.getSemantics();
5156 DoubleAPFloat(Semantics
, APFloat(std::move(F
), S
),
5157 APFloat(semIEEEdouble
));
5160 llvm_unreachable("Unexpected semantics");
5163 Expected
<APFloat::opStatus
> APFloat::convertFromString(StringRef Str
,
5165 APFLOAT_DISPATCH_ON_SEMANTICS(convertFromString(Str
, RM
));
5168 hash_code
hash_value(const APFloat
&Arg
) {
5169 if (APFloat::usesLayout
<detail::IEEEFloat
>(Arg
.getSemantics()))
5170 return hash_value(Arg
.U
.IEEE
);
5171 if (APFloat::usesLayout
<detail::DoubleAPFloat
>(Arg
.getSemantics()))
5172 return hash_value(Arg
.U
.Double
);
5173 llvm_unreachable("Unexpected semantics");
5176 APFloat::APFloat(const fltSemantics
&Semantics
, StringRef S
)
5177 : APFloat(Semantics
) {
5178 auto StatusOrErr
= convertFromString(S
, rmNearestTiesToEven
);
5179 assert(StatusOrErr
&& "Invalid floating point representation");
5180 consumeError(StatusOrErr
.takeError());
5183 FPClassTest
APFloat::classify() const {
5185 return isNegative() ? fcNegZero
: fcPosZero
;
5187 return isNegative() ? fcNegNormal
: fcPosNormal
;
5189 return isNegative() ? fcNegSubnormal
: fcPosSubnormal
;
5191 return isNegative() ? fcNegInf
: fcPosInf
;
5192 assert(isNaN() && "Other class of FP constant");
5193 return isSignaling() ? fcSNan
: fcQNan
;
5196 APFloat::opStatus
APFloat::convert(const fltSemantics
&ToSemantics
,
5197 roundingMode RM
, bool *losesInfo
) {
5198 if (&getSemantics() == &ToSemantics
) {
5202 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
5203 usesLayout
<IEEEFloat
>(ToSemantics
))
5204 return U
.IEEE
.convert(ToSemantics
, RM
, losesInfo
);
5205 if (usesLayout
<IEEEFloat
>(getSemantics()) &&
5206 usesLayout
<DoubleAPFloat
>(ToSemantics
)) {
5207 assert(&ToSemantics
== &semPPCDoubleDouble
);
5208 auto Ret
= U
.IEEE
.convert(semPPCDoubleDoubleLegacy
, RM
, losesInfo
);
5209 *this = APFloat(ToSemantics
, U
.IEEE
.bitcastToAPInt());
5212 if (usesLayout
<DoubleAPFloat
>(getSemantics()) &&
5213 usesLayout
<IEEEFloat
>(ToSemantics
)) {
5214 auto Ret
= getIEEE().convert(ToSemantics
, RM
, losesInfo
);
5215 *this = APFloat(std::move(getIEEE()), ToSemantics
);
5218 llvm_unreachable("Unexpected semantics");
5221 APFloat
APFloat::getAllOnesValue(const fltSemantics
&Semantics
) {
5222 return APFloat(Semantics
, APInt::getAllOnes(Semantics
.sizeInBits
));
5225 void APFloat::print(raw_ostream
&OS
) const {
5226 SmallVector
<char, 16> Buffer
;
5228 OS
<< Buffer
<< "\n";
5231 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5232 LLVM_DUMP_METHOD
void APFloat::dump() const { print(dbgs()); }
5235 void APFloat::Profile(FoldingSetNodeID
&NID
) const {
5236 NID
.Add(bitcastToAPInt());
5239 /* Same as convertToInteger(integerPart*, ...), except the result is returned in
5240 an APSInt, whose initial bit-width and signed-ness are used to determine the
5241 precision of the conversion.
5243 APFloat::opStatus
APFloat::convertToInteger(APSInt
&result
,
5244 roundingMode rounding_mode
,
5245 bool *isExact
) const {
5246 unsigned bitWidth
= result
.getBitWidth();
5247 SmallVector
<uint64_t, 4> parts(result
.getNumWords());
5248 opStatus status
= convertToInteger(parts
, bitWidth
, result
.isSigned(),
5249 rounding_mode
, isExact
);
5250 // Keeps the original signed-ness.
5251 result
= APInt(bitWidth
, parts
);
5255 double APFloat::convertToDouble() const {
5256 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEdouble
)
5257 return getIEEE().convertToDouble();
5258 assert(getSemantics().isRepresentableBy(semIEEEdouble
) &&
5259 "Float semantics is not representable by IEEEdouble");
5260 APFloat Temp
= *this;
5262 opStatus St
= Temp
.convert(semIEEEdouble
, rmNearestTiesToEven
, &LosesInfo
);
5263 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
5265 return Temp
.getIEEE().convertToDouble();
5268 float APFloat::convertToFloat() const {
5269 if (&getSemantics() == (const llvm::fltSemantics
*)&semIEEEsingle
)
5270 return getIEEE().convertToFloat();
5271 assert(getSemantics().isRepresentableBy(semIEEEsingle
) &&
5272 "Float semantics is not representable by IEEEsingle");
5273 APFloat Temp
= *this;
5275 opStatus St
= Temp
.convert(semIEEEsingle
, rmNearestTiesToEven
, &LosesInfo
);
5276 assert(!(St
& opInexact
) && !LosesInfo
&& "Unexpected imprecision");
5278 return Temp
.getIEEE().convertToFloat();
5283 #undef APFLOAT_DISPATCH_ON_SEMANTICS